Valid HTML 4.0! Valid CSS!
%%% -*-BibTeX-*-
%%% ====================================================================
%%% BibTeX-file{
%%%     author          = "Nelson H. F. Beebe",
%%%     version         = "1.92",
%%%     date            = "19 March 2024",
%%%     time            = "08:20:08 MST",
%%%     filename        = "todaes.bib",
%%%     address         = "University of Utah
%%%                        Department of Mathematics, 110 LCB
%%%                        155 S 1400 E RM 233
%%%                        Salt Lake City, UT 84112-0090
%%%                        USA",
%%%     telephone       = "+1 801 581 5254",
%%%     FAX             = "+1 801 581 4148",
%%%     URL             = "https://www.math.utah.edu/~beebe",
%%%     checksum        = "13360 57660 298829 2926533",
%%%     email           = "beebe at math.utah.edu, beebe at acm.org,
%%%                        beebe at computer.org (Internet)",
%%%     codetable       = "ISO/ASCII",
%%%     keywords        = "bibliography; BibTeX; ACM Transactions on
%%%                        Design Automation of Electronic Systems;
%%%                        TODAES",
%%%     license         = "public domain",
%%%     supported       = "yes",
%%%     docstring       = "This is a COMPLETE BibTeX bibliography for
%%%                        ACM Transactions on Design Automation of
%%%                        Electronic Systems (CODEN ATASFO, ISSN
%%%                        1084-4309 (print), 1557-7309 (electronic)),
%%%                        completely covering all issues from volume 1,
%%%                        number 1, January 1996 to date.
%%%
%%%                        The ACM maintains World Wide Web pages with
%%%                        journal tables of contents for 1996--date at
%%%
%%%                            http://www.acm.org/todaes/
%%%                            http://www.acm.org/pubs/contents/journals/todaes/
%%%                            http://portal.acm.org/browse_dl.cfm?idx=J776
%%%
%%%                        That data has been automatically converted to
%%%                        BibTeX form, corrected for spelling and page
%%%                        number errors, and merged into this file.
%%%
%%%                        At version 1.92, the COMPLETE year coverage
%%%                        looks like this:
%%%
%%%                             1996 (  20)    2006 (  44)    2016 (  73)
%%%                             1997 (  19)    2007 (  52)    2017 (  68)
%%%                             1998 (  29)    2008 (  68)    2018 (  70)
%%%                             1999 (  17)    2009 (  65)    2019 (  69)
%%%                             2000 (  35)    2010 (  35)    2020 (  56)
%%%                             2001 (  28)    2011 (  40)    2021 (  50)
%%%                             2002 (  31)    2012 (  67)    2022 (  65)
%%%                             2003 (  30)    2013 (  55)    2023 ( 105)
%%%                             2004 (  21)    2014 (  48)    2024 (  40)
%%%                             2005 (  35)    2015 (  68)
%%%
%%%                             Article:       1403
%%%
%%%                             Total entries: 1403
%%%
%%%                        Numerous errors in the sources noted above
%%%                        have been corrected.   Spelling has been
%%%                        verified with the UNIX spell and GNU ispell
%%%                        programs using the exception dictionary
%%%                        stored in the companion file with extension
%%%                        .sok.
%%%
%%%                        ACM copyrights explicitly permit abstracting
%%%                        with credit, so article abstracts, keywords,
%%%                        and subject classifications have been
%%%                        included in this bibliography wherever
%%%                        available.  Article reviews have been
%%%                        omitted, until their copyright status has
%%%                        been clarified.
%%%
%%%                        bibsource keys in the bibliography entries
%%%                        below indicate the entry originally came
%%%                        from the computer science bibliography
%%%                        archive, even though it has likely since
%%%                        been corrected and updated.
%%%
%%%                        URL keys in the bibliography point to
%%%                        World Wide Web locations of additional
%%%                        information about the entry.
%%%
%%%                        BibTeX citation tags are uniformly chosen
%%%                        as name:year:abbrev, where name is the
%%%                        family name of the first author or editor,
%%%                        year is a 4-digit number, and abbrev is a
%%%                        3-letter condensation of important title
%%%                        words. Citation tags were automatically
%%%                        generated by software developed by the
%%%                        author for the BibNet Project.
%%%
%%%                        In this bibliography, entries are sorted
%%%                        by journal, and then by publication order,
%%%                        with the help of ``bibsort -byvolume''.  The
%%%                        bibsort utility is available from
%%%                        ftp://ftp.math.utah.edu/pub/tex/bib.
%%%
%%%                        The author will be grateful for reports of
%%%                        errors of any kind in this bibliography.
%%%
%%%                        The checksum field above contains a CRC-16
%%%                        checksum as the first value, followed by the
%%%                        equivalent of the standard UNIX wc (word
%%%                        count) utility output of lines, words, and
%%%                        characters.  This is produced by Robert
%%%                        Solovay's checksum utility."
%%%     }
%%% ====================================================================
@Preamble{
    "\input bibnames.sty"
  # "\ifx \undefined \circled \def \circled #1{(#1)}\fi"
  # "\ifx \undefined \reg \def \reg {\circled{R}}\fi"
  # "\ifx \undefined \TM \def \TM {${}^{\sc TM}$} \fi"
}

%%% ====================================================================
%%% Acknowledgement abbreviations:
@String{ack-nhfb = "Nelson H. F. Beebe,
                    University of Utah,
                    Department of Mathematics, 110 LCB,
                    155 S 1400 E RM 233,
                    Salt Lake City, UT 84112-0090, USA,
                    Tel: +1 801 581 5254,
                    FAX: +1 801 581 4148,
                    e-mail: \path|beebe@math.utah.edu|,
                            \path|beebe@acm.org|,
                            \path|beebe@computer.org| (Internet),
                    URL: \path|https://www.math.utah.edu/~beebe/|"}

%%% ====================================================================
%%% Journal abbreviations:
@String{j-TODAES                = "ACM Transactions on Design Automation of
                                   Electronic Systems"}

%%% ====================================================================
%%% Bibliography entries from Communications of the ACM.
@Article{Pedram:1996:PMI,
  author =       "Massoud Pedram",
  title =        "Power minimization in {IC} design: principles and
                 applications",
  journal =      j-TODAES,
  volume =       "1",
  number =       "1",
  pages =        "3--56",
  month =        jan,
  year =         "1996",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1996-1-1/p3-pedram/p3-pedram.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1996-1-1/p3-pedram/",
  abstract =     "Low power has emerged as a principal theme in today's
                 electronics industry. The need for low power has caused
                 a major paradigm shift in which power dissipation is as
                 important as performance and area. This article
                 presents an in-depth survey of CAD methodologies and
                 techniques for designing low power digital CMOS
                 circuits and systems and describes the many issues
                 facing designers at architectural, logical, and
                 physical levels of design abstraction. It reviews some
                 of the techniques and tools that have been proposed to
                 overcome these difficulties and outlines the future
                 challenges that must be met to design low power, high
                 performance systems.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Algorithms; Design; Experimentation; Performance",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "adiabatic circuits; CMOS circuits; computer-aided
                 design of VLSI; dynamic power dissipation; energy-delay
                 product; gated clocks; layout; low power layout; low
                 power synthesis; lower-power design; power analysis and
                 estimation; power management; power minimization and
                 management; probabilistic analysis;
                 silicon-on-insulator technology; statistical sampling;
                 switched capacitance; switching activity; symbolic
                 simulation; synthesis; system design",
  subject =      "Hardware --- Integrated Circuits --- Types and Design
                 Styles (B.7.1): {\bf VLSI (very large scale
                 integration)}; Computer Applications --- Computer-Aided
                 Engineering (J.6): {\bf Computer-aided design (CAD)};
                 Hardware --- Integrated Circuits --- General (B.7.0)",
}

@Article{Cheng:1996:AGF,
  author =       "Kwang-Ting Cheng and A. S. Krishnakumar",
  title =        "Automatic generation of functional vectors using the
                 extended finite state machine model",
  journal =      j-TODAES,
  volume =       "1",
  number =       "1",
  pages =        "57--79",
  month =        jan,
  year =         "1996",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1996-1-1/p57-cheng/p57-cheng.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1996-1-1/p57-cheng/",
  abstract =     "We present a method of automatic generation of
                 functional vectors for sequential circuits. These
                 vectors can be used for design verification,
                 manufacturing testing, or power estimation. A
                 high-level description of the circuit in VHDL or C is
                 assumed available. Our method automatically transforms
                 the high-level description of a circuit in VHDL or C
                 into an extended finite state machine (EFSM) model that
                 is used to generate functional vectors. The EFSM model
                 is a generalization of the traditional state machine
                 model. It is a compact representation of models with
                 local data variables and preserves many nice properties
                 of a traditional state machine model. The theoretical
                 background of the EFSM model is addressed in this
                 article. Our method guarantees that the generated
                 vectors cover every statement in the high-level
                 description at least once. Experimental results show
                 that a set of comprehensive functional vectors for
                 sequential circuits with more than a hundred flip-flops
                 can be generated automatically in a few minutes of CPU
                 time using our prototype system.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Algorithms; Experimentation; Languages; Theory;
                 Verification",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "automatic test generation; design verification;
                 extended finite state machines; functional testing",
  subject =      "Hardware --- Integrated Circuits --- Design Aids
                 (B.7.2): {\bf Verification}; Hardware --- Logic Design
                 --- Design Styles (B.6.1): {\bf Sequential circuits};
                 Theory of Computation --- Computation by Abstract
                 Devices --- Models of Computation (F.1.1): {\bf
                 Automata}; Mathematics of Computing --- Discrete
                 Mathematics --- Graph Theory (G.2.2): {\bf Graph
                 algorithms}; Hardware --- Integrated Circuits ---
                 Reliability and Testing** (B.7.3): {\bf Testability**};
                 Hardware --- Logic Design --- Design Aids (B.6.3): {\bf
                 Hardware description languages}; Hardware ---
                 Register-Transfer-Level Implementation --- Design Aids
                 (B.5.2)",
}

@Article{Chang:1996:USM,
  author =       "Yao-Wen Chang and D. F. Wong and C. K. Wong",
  title =        "Universal switch modules for {FPGA} design",
  journal =      j-TODAES,
  volume =       "1",
  number =       "1",
  pages =        "80--101",
  month =        jan,
  year =         "1996",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1996-1-1/p80-chang/p80-chang.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1996-1-1/p80-chang/",
  abstract =     "A switch module $M$ with $W$ terminals on each side is
                 said to be {\em universal\/} if every set of nets
                 satisfying the dimensional constraint (i.e., the number
                 of nets on each side of $M$ is at most $W$) is
                 simultaneously rout able through $M$. In this article,
                 we present a class of universal switch modules. Each of
                 our switch modules has $ 6 W$ switches and {\em
                 switch-module flexibility\/} three (i.e., $ F_S = 3$).
                 We prove that no switch module with less than $ 6 W$
                 switches can be universal. We also compare our switch
                 modules with those used in the Xilinx XC4000 family
                 FPGAs and the {\em antisymmetric\/} switch modules
                 (with $ F_S = 3$) suggested by Rose and Brown [1991].
                 Although these two kinds of switch modules also have $
                 F_S = 3$ and $ 6 W$ switches, we show that they are not
                 universal. Based on combinatorial counting techniques,
                 we show that each of our universal switch modules can
                 accommodate up to 25\% more routing instances, compared
                 with the XC4000-type switch module of the same size.
                 Experimental results demonstrate that our universal
                 switch modules improve routability at the chip level.
                 Finally, our work also provides a theoretical insight
                 into the important observation by Rose and Brown [1991]
                 (based on extensive experiments) that $ F_S = 3$ is
                 often sufficient to provide high routability.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Algorithms; Design; Experimentation; Measurement;
                 Performance; Theory; Verification",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  subject =      "Hardware --- Integrated Circuits --- Types and Design
                 Styles (B.7.1): {\bf Gate arrays}; Hardware ---
                 Integrated Circuits --- Design Aids (B.7.2): {\bf
                 Placement and routing}",
}

@Article{Thakur:1996:SPF,
  author =       "Shashidhar Thakur and D. F. Wong",
  title =        "Series-parallel functions and {FPGA} logic module
                 design",
  journal =      j-TODAES,
  volume =       "1",
  number =       "1",
  pages =        "102--122",
  month =        jan,
  year =         "1996",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1996-1-1/p102-thakur/p102-thakur.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1996-1-1/p102-thakur/",
  abstract =     "The need for a two-way interaction between logic
                 synthesis and FPGA logic module design has been
                 stressed recently. Having a logic module that can
                 implement many functions is a good idea only if one can
                 also give a synthesis strategy that makes efficient use
                 of this functionality. Traditionally, technology
                 mapping algorithms have been developed after the logic
                 architecture has been designed. We follow a dual
                 approach, by focusing on a specific technology mapping
                 algorithm, namely, the structural tree-based mapping
                 algorithm, and designing a logic module that can be
                 mapped efficiently by this algorithm. It is known that
                 the tree-based mapping algorithm makes optimal use of a
                 library of functions, each of which can be represented
                 by a tree of AND, OR, and NOT gates (series-parallel or
                 SP functions). We show how to design a SP function with
                 a minimum number of inputs that can implement all
                 possible SP functions with a specified number of
                 inputs. For instances, we demonstrate a seven-input SP
                 function that can implement all four-input SP
                 functions. Mapping results show that, on an average,
                 the number blocks of this function needed to map
                 benchmark circuits are 12\% less than those for Actel's
                 ACT1 logic modules. Our logic modules show a 4\%
                 improvement over ACT1, if the block count is scaled to
                 take into account the number of transistors needed to
                 implement different logic modules.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Algorithms; Design; Experimentation; Performance;
                 Theory; Verification",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "field programmable gate arrays; series-parallel
                 technology mapping; tree-based technology mapping
                 algorithm; universal logic modules",
  subject =      "Hardware --- Integrated Circuits --- Types and Design
                 Styles (B.7.1): {\bf Gate arrays}; Hardware --- Logic
                 Design --- Design Styles (B.6.1): {\bf Combinational
                 logic}; Hardware --- Logic Design --- Design Aids
                 (B.6.3); Computer Applications --- Computer-Aided
                 Engineering (J.6): {\bf Computer-aided design (CAD)};
                 Mathematics of Computing --- Discrete Mathematics ---
                 Graph Theory (G.2.2): {\bf Trees}",
}

@Article{Thanvantri:1996:OFS,
  author =       "Venkat Thanvantri and Sartaj Sahni",
  title =        "Optimal folding of standard and custom cells",
  journal =      j-TODAES,
  volume =       "1",
  number =       "1",
  pages =        "123--143",
  month =        jan,
  year =         "1996",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1996-1-1/p123-thanvantri/p123-thanvantri.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1996-1-1/p123-thanvantri/",
  abstract =     "We study the problem of folding an ordered list of
                 standard and custom cells into rows of a chip so as to
                 minimize either the routing area or the total chip
                 area. Nine versions of the folding problem are
                 formulated and fast polynomial time algorithms are
                 obtained for each. Two of our formulations correspond
                 to problems formulated in Paik and Sahni [1993] for the
                 folding of a stack of bit-slice components. Our
                 algorithms for these two formulations are
                 asymptotically superior to those of Paik and Sahni
                 [1993].",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Algorithms; Design; Experimentation; Measurement;
                 Performance; Theory",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "custom cell folding; layout area; standard cell
                 folding",
  subject =      "Hardware --- Integrated Circuits --- Types and Design
                 Styles (B.7.1): {\bf Gate arrays}; Hardware ---
                 Integrated Circuits --- Design Aids (B.7.2): {\bf
                 Layout}; Theory of Computation --- Analysis of
                 Algorithms and Problem Complexity --- Nonnumerical
                 Algorithms and Problems (F.2.2): {\bf Routing and
                 layout}",
}

@Article{Cong:1996:CLS,
  author =       "Jason Cong and Yuzheng Ding",
  title =        "Combinational logic synthesis for {LUT} based field
                 programmable gate arrays",
  journal =      j-TODAES,
  volume =       "1",
  number =       "2",
  pages =        "145--204",
  month =        apr,
  year =         "1996",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1996-1-2/p145-cong/p145-cong.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1996-1-2/p145-cong/",
  abstract =     "The increasing popularity of the field programmable
                 gate-array (FPGA) technology has generated a great deal
                 of interest in the algorithmic study and tool
                 development for FPGA-specific design automation
                 problems. The most widely used FPGAs are LUT based
                 FPGAs, in which the basic logic element is a $K$-input
                 one-output lookup-table (LUT) that can implement any
                 Boolean function of up to $K$ variables. This unique
                 feature of the LUT has brought new challenges to logic
                 synthesis and optimization, resulting in many new
                 techniques reported in recent years. This article
                 summarizes the research results on combinational logic
                 synthesis for LUT based FPGAs under a coherent
                 framework. These results were dispersed in various
                 conference proceedings and journals and under various
                 formulations and terminologies. We first present
                 general problem formulations, various optimization
                 objectives and measurements, then focus on a set of
                 commonly used basic concepts and techniques, and
                 finally summarize existing synthesis algorithms and
                 systems. We classify and summarize the basic techniques
                 into two categories, namely, {\em logic optimization\/}
                 and {\em technology mapping}, and describe the existing
                 algorithms and systems in terms of how they use the
                 classified basic techniques. A comprehensive list of
                 references is compiled in the attached bibliography.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Algorithms; Design; Experimentation; Measurement;
                 Performance; Theory",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "area minimization; computer-aided design of VLSI;
                 decomposition; delay minimization; delay modeling;
                 FPGA; logic optimization; power minimization;
                 programmable logic; routing; simplification; synthesis;
                 system design; technology mapping",
  subject =      "Hardware --- Logic Design --- Design Styles (B.6.1):
                 {\bf Combinational logic}; Hardware --- Logic Design
                 --- Design Aids (B.6.3): {\bf Automatic synthesis};
                 Hardware --- Logic Design --- Design Aids (B.6.3): {\bf
                 Optimization}; Hardware --- Integrated Circuits ---
                 Types and Design Styles (B.7.1): {\bf Gate arrays};
                 Computer Applications --- Computer-Aided Engineering
                 (J.6): {\bf Computer-aided design (CAD)}",
}

@Article{Middelhoek:1996:VEF,
  author =       "Peter F. A. Middelhoek and Sreeranga P. Rajan",
  title =        "From {VHDL} to efficient and first-time-right designs:
                 a formal approach",
  journal =      j-TODAES,
  volume =       "1",
  number =       "2",
  pages =        "205--250",
  month =        apr,
  year =         "1996",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1996-1-2/p205-middelhoek/p205-middelhoek.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1996-1-2/p205-middelhoek/",
  abstract =     "In this article we provide a practical
                 transformational approach to the synthesis of correct
                 synchronous digital hardware designs from high-level
                 specifications. We do this while taking into account
                 the complete life cycle of a design from early
                 prototype to full custom implementation. Besides
                 time-to-market, both flexibility with respect to target
                 architecture and efficiency issues are addressed by the
                 methodology. The utilization of user-selected
                 behavior-preserving transformation steps ensures
                 first-time-right design while exploiting the
                 experience, flexibility, and creativity of the
                 designer. \par

                 To ensure that design transformations are indeed
                 behavior-preserving a novel mechanized approach to the
                 specification and verification of design
                 transformations on control data flow graphs which is
                 independent of a specific behavioral model or graph
                 size has been developed. \par

                 As a demonstration of an industrial application we use
                 a video processing algorithm needed for the conversion
                 from a line-interlaced to progressively scanned video
                 format. Both a video signal processor-based prototype
                 implementation as well as a very efficient full custom
                 implementation are developed starting from a single
                 high-level behavioral specification of the algorithm in
                 VHDL. Results are compared with those previously
                 obtained using different tools and methodologies.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Design; Human Factors; Languages; Theory;
                 Verification",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "CDFG; correctness by construction; design methodology;
                 rapid system prototyping; SFG; transformational design;
                 VHDL",
  subject =      "Hardware --- Register-Transfer-Level Implementation
                 --- Design (B.5.1): {\bf Arithmetic and logic units};
                 Hardware --- Register-Transfer-Level Implementation ---
                 Design (B.5.1): {\bf Control design}; Hardware ---
                 Register-Transfer-Level Implementation --- Design
                 (B.5.1): {\bf Data-path design}; Hardware ---
                 Register-Transfer-Level Implementation --- Design
                 (B.5.1): {\bf Styles}; Hardware ---
                 Register-Transfer-Level Implementation --- Design Aids
                 (B.5.2): {\bf Automatic synthesis}; Hardware ---
                 Register-Transfer-Level Implementation --- Design Aids
                 (B.5.2): {\bf Hardware description languages}; Hardware
                 --- Register-Transfer-Level Implementation --- Design
                 Aids (B.5.2): {\bf Verification}; Hardware --- Logic
                 Design --- Design Aids (B.6.3): {\bf Automatic
                 synthesis}; Hardware --- Logic Design --- Design Aids
                 (B.6.3): {\bf Hardware description languages}; Hardware
                 --- Logic Design --- Design Aids (B.6.3): {\bf
                 Optimization}; Hardware --- Logic Design --- Design
                 Aids (B.6.3): {\bf Verification}; Software ---
                 Programming Languages --- Language Classifications
                 (D.3.2): {\bf Applicative (functional) languages};
                 Software --- Programming Languages --- Language
                 Classifications (D.3.2): {\bf Data-flow languages};
                 Theory of Computation --- Logics and Meanings of
                 Programs --- Specifying and Verifying and Reasoning
                 about Programs (F.3.1): {\bf Mechanical verification};
                 Theory of Computation --- Mathematical Logic and Formal
                 Languages --- Mathematical Logic (F.4.1): {\bf
                 Mechanical theorem proving}; Computer Applications ---
                 Computer-Aided Engineering (J.6): {\bf Computer-aided
                 design (CAD)}; Hardware --- Register-Transfer-Level
                 Implementation --- Design Aids (B.5.2): {\bf
                 Optimization}; Software --- Software Engineering ---
                 Software/Program Verification (D.2.4): {\bf Correctness
                 proofs}; Hardware --- Logic Design --- Design Aids
                 (B.6.3): {\bf VHDL}",
}

@Article{Kolson:1996:ORA,
  author =       "David J. Kolson and Alexandru Nicolau and Nikil Dutt
                 and Ken Kennedy",
  title =        "Optimal register assignment to loops for embedded code
                 generation",
  journal =      j-TODAES,
  volume =       "1",
  number =       "2",
  pages =        "251--279",
  month =        apr,
  year =         "1996",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1996-1-2/p251-kolson/p251-kolson.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1996-1-2/p251-kolson/",
  abstract =     "One of the challenging tasks in code generation for
                 embedded systems is register assignment. When more live
                 variables than registers exist, some variables will
                 necessarily be accessed from data memory. Because loops
                 are typically executed many times and are often
                 time-critical, good register assignment in loops is
                 exceedingly important as accessing data memory can
                 degrade performance. The issue of finding an optimal
                 register assignment to loops has been open for some
                 time. In this article, we present a technique for
                 optimal (i.e., spill minimizing) register assignment to
                 loops. First we present a technique for register
                 assignment to architecture styles that are
                 characterized by a consolidated register file. Then we
                 extend the technique to include architecture styles
                 that are characterized by distributed memories and/or a
                 combination of general- and special-purpose registers.
                 Experimental results demonstrate that although the
                 optimal algorithm may be computationally prohibitive,
                 heuristic versions obtain results with performance
                 better than that of an existing graph coloring
                 approach.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Algorithms; Languages",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "code generation; embedded systems; system design",
  subject =      "Software --- Programming Languages --- Processors
                 (D.3.4): {\bf Compilers}; Software --- Programming
                 Languages --- Processors (D.3.4): {\bf Optimization};
                 Software --- Programming Languages --- Processors
                 (D.3.4): {\bf Code generation}",
}

@Article{Prasad:1996:TRP,
  author =       "S. C. Prasad and K. Roy",
  title =        "Transistor reordering for power minimization under
                 delay constraint",
  journal =      j-TODAES,
  volume =       "1",
  number =       "2",
  pages =        "280--300",
  month =        apr,
  year =         "1996",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1996-1-2/p280-prasad/p280-prasad.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1996-1-2/p280-prasad/",
  abstract =     "In this article we address the problem of optimization
                 of VLSI circuits to minimize power consumption while
                 meeting performance goals. We present a method of
                 estimating power consumption of a basic or complex CMOS
                 gate which takes the internal capacitances of the gate
                 into account. This method is used to select an ordering
                 of series-connected transistors found in CMOS gates to
                 achieve lower power consumption. The method is very
                 efficient when used by library-based design styles. We
                 describe a multipass algorithm that makes use of
                 transistor reordering to optimize performance and power
                 consumption of circuits, has a linear time complexity
                 per pass, and converges to a solution in a small number
                 of passes. Transformations in addition to transistor
                 reordering can be used by the algorithm. The algorithm
                 has been benchmarked on several large examples and the
                 results are presented.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Algorithms; Design; Performance",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "circuit optimization; critical path enumeration; gate
                 input reordering; power estimation; transistor
                 reordering",
  subject =      "Hardware --- Logic Design --- Design Aids (B.6.3):
                 {\bf Optimization}; Hardware --- Integrated Circuits
                 --- Types and Design Styles (B.7.1): {\bf VLSI (very
                 large scale integration)}",
}

@Article{Wolf:1996:OOC,
  author =       "Wayne Wolf",
  title =        "Object-oriented cosynthesis of distributed embedded
                 systems",
  journal =      j-TODAES,
  volume =       "1",
  number =       "3",
  pages =        "301--314",
  month =        jul,
  year =         "1996",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1996-1-3/p301-wolf/p301-wolf.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1996-1-3/p301-wolf/",
  abstract =     "This article describes a new hardware-software
                 cosynthesis algorithm that takes advantage of the
                 structure inherent in an object-oriented specification.
                 The algorithm creates a distributed system
                 implementation with arbitrary topology, using the
                 object-oriented structure to partition functionality in
                 addition to scheduling and allocating processes.
                 Process partitioning is an especially important
                 optimization for such systems because the specification
                 will not, in general, take into account the process
                 structure required for efficient execution on the
                 distributed engine. The object-oriented specification
                 naturally provides both coarse-grained and fine-grained
                 partitions of the system. Our algorithm uses that
                 multilevel structure to guide synthesis. Experimental
                 results show that our algorithm takes advantage of the
                 object-oriented specification to quickly converge on
                 high-quality implementations.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Design",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "distributed embedded systems; hardware-software
                 co-design; object-oriented co-synthesis",
  subject =      "Computer Systems Organization --- Special-Purpose and
                 Application-Based Systems (C.3): {\bf
                 Microprocessor/microcomputer applications}; Computer
                 Systems Organization --- Special-Purpose and
                 Application-Based Systems (C.3): {\bf Real-time and
                 embedded systems}",
}

@Article{Chow:1996:LPR,
  author =       "Sue-Hong Chow and Yi-Cheng Ho and TingTing Hwang and
                 C. L. Liu",
  title =        "Low power realization of finite state machines --- a
                 decomposition approach",
  journal =      j-TODAES,
  volume =       "1",
  number =       "3",
  pages =        "315--340",
  month =        jul,
  year =         "1996",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1996-1-3/p315-chow/p315-chow.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1996-1-3/p315-chow/",
  abstract =     "We present in this article a new approach to the
                 synthesis problem for finite state machines with the
                 reduction of power dissipation as a design objective. A
                 finite state machine is decomposed into a number of
                 {\em coupled\/} submachines. Most of the time, only one
                 of the submachines will be activated which,
                 consequently, could lead to substantial savings in
                 power consumption. The key steps in our approach are:
                 (1) decomposition of a finite state machine into
                 submachines so that there is a high probability that
                 state transitions will be confined to the smaller of
                 the submachines most of the time, and (2) synthesis of
                 the coupled submachines to optimize the logic circuits.
                 Experimental results confirmed that our approach
                 produced very good results (in particular, for finite
                 state machines with a large number of states.)",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Design; Performance",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "decomposition of finite state machines; lower power
                 design; state assignment",
  subject =      "Hardware --- Logic Design --- Design Styles (B.6.1):
                 {\bf Sequential circuits}; Hardware --- Logic Design
                 --- Design Aids (B.6.3): {\bf Automatic synthesis};
                 Hardware --- Logic Design --- Design Aids (B.6.3): {\bf
                 Optimization}; Computer Applications --- Computer-Aided
                 Engineering (J.6): {\bf Computer-aided design (CAD)}",
}

@Article{Kagaris:1996:FAM,
  author =       "Dimitrios Kagaris and Spyros Tragoudas",
  title =        "A fast algorithm for minimizing {FPGA} combinational
                 and sequential modules",
  journal =      j-TODAES,
  volume =       "1",
  number =       "3",
  pages =        "341--351",
  month =        jul,
  year =         "1996",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1996-1-3/p341-kagaris/p341-kagaris.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1996-1-3/p341-kagaris/",
  abstract =     "We present a quadratic-time algorithm for minimizing
                 the number of modules in an FPGA with combinational and
                 sequential modules (like the C-modules and S-modules of
                 the ACT2 and ACT3 architectures). The constraint is
                 that a combinational module can be combined with one
                 flip-flop in a single sequential module, only if the
                 combinational module drives no other combinational
                 modules. Our algorithm uses a minimum-cost flow
                 formulation to solve the problem with a significant
                 time improvement over a previous approach that used a
                 general linear program.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Algorithms; Design",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "field programmable gate arrays; retiming",
  subject =      "Hardware --- Logic Design --- Design Aids (B.6.3):
                 {\bf Automatic synthesis}; Hardware --- Logic Design
                 --- Design Aids (B.6.3): {\bf Optimization}; Hardware
                 --- Integrated Circuits --- Types and Design Styles
                 (B.7.1): {\bf Gate arrays}",
}

@Article{Chang:1996:OCP,
  author =       "En-Shou Chang and Daniel D. Gajski and Sanjiv
                 Narayan",
  title =        "An optimal clock period selection method based on
                 slack minimization criteria",
  journal =      j-TODAES,
  volume =       "1",
  number =       "3",
  pages =        "352--370",
  month =        jul,
  year =         "1996",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1996-1-3/p352-chang/p352-chang.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1996-1-3/p352-chang/",
  abstract =     "An important decision in synthesizing a hardware
                 implementation from a behavioral description is
                 selecting the clock period to schedule the datapath
                 operations into control steps. Prior to scheduling,
                 most existing behavioral synthesis systems either
                 require the designer to specify the clock period
                 explicitly or require that the delays of the operators
                 used in the design be specified in multiples of the
                 clock period. An unfavorable choice of clock period
                 could result in operations being idle for a large
                 portion of the clock period and, consequently, affect
                 the performance of the synthesized design. In this
                 article, we demonstrate the effect of clock slack on
                 the performance of designs and present an algorithm to
                 find a slack-minimal clock period. We prove the
                 optimality of our method and apply it to several
                 examples to demonstrate its effectiveness in maximizing
                 design performance.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Design; Measurement; Performance",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "clock period; clock slack; performance estimation;
                 scheduling",
  subject =      "Hardware --- Register-Transfer-Level Implementation
                 --- Design Aids (B.5.2)",
}

@Article{Lopez:1996:EDP,
  author =       "Mario A. Lopez and Dinesh P. Mehta",
  title =        "Efficient decomposition of polygons into {L-shapes}
                 with application to {VLSI} layouts",
  journal =      j-TODAES,
  volume =       "1",
  number =       "3",
  pages =        "371--395",
  month =        jul,
  year =         "1996",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1996-1-3/p371-lopez/p371-lopez.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1996-1-3/p371-lopez/",
  abstract =     "We present two practical algorithms for partitioning
                 circuit components represented by rectilinear polygons
                 so that they can be stored using the L-shaped corner
                 stitching data structure; that is, our algorithms
                 decompose a simple polygon into a set of nonoverlapping
                 L-shapes and rectangles by using horizontal cuts only.
                 The more general of our algorithms computes and optimal
                 configuration for a wide variety of optimization
                 functions, whereas the other computes a minimum
                 configuration of rectangles and L-shapes. Both
                 algorithms run in $ O(n + h \log h) $ time, where $n$
                 is the number of vertices in the polygon and $h$ is the
                 number of H-pairs. Because for VLSI data $h$ is small,
                 in practice these algorithms are linear in $n$.
                 Experimental results on actual VLSI data compare our
                 algorithms and demonstrate the gains in performance for
                 corner stitching (as measured by different objective
                 functions) obtained by using them instead of more
                 traditional rectangular partitioning algorithms.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Algorithms",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "corner stitching; L-shapes; partition; rectangle;
                 rectilinear polygons",
  subject =      "Hardware --- Integrated Circuits --- Design Aids
                 (B.7.2): {\bf Layout}; Theory of Computation ---
                 Analysis of Algorithms and Problem Complexity ---
                 Nonnumerical Algorithms and Problems (F.2.2): {\bf
                 Geometrical problems and computations}; Mathematics of
                 Computing --- Discrete Mathematics --- Graph Theory
                 (G.2.2): {\bf Graph algorithms}",
}

@Article{Moreno:1996:REU,
  author =       "R. Moreno and R. Hermida and M. Fern{\'a}ndez",
  title =        "Register estimation in unscheduled dataflow graphs",
  journal =      j-TODAES,
  volume =       "1",
  number =       "3",
  pages =        "396--403",
  month =        jul,
  year =         "1996",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1996-1-3/p396-moreno/p396-moreno.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1996-1-3/p396-moreno/",
  abstract =     "A method for register number estimation in unscheduled
                 or partially scheduled dataflow graphs is presented.
                 The strategy consists of studying the probability that
                 an edge between two nodes crosses the boundary between
                 two control steps, and its is based on a model that
                 associates probabilities with the different scheduling
                 alternatives of each node. These probabilities are
                 computed by means of an analytic method that takes into
                 account the distribution of operations in the dataflow
                 graph and the hardware modules available in the
                 library. The results highlight that the estimation
                 method is very accurate because the error between the
                 estimated value and the real value is always within a
                 narrow margin.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Design",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "area estimation; high-level synthesis; probabilities;
                 register estimation; scheduling",
  subject =      "Hardware --- Register-Transfer-Level Implementation
                 --- Design (B.5.1): {\bf Data-path design}",
}

@Article{Cheng:1996:GLT,
  author =       "Kwang-Ting Cheng",
  title =        "Gate-level test generation for sequential circuits",
  journal =      j-TODAES,
  volume =       "1",
  number =       "4",
  pages =        "405--442",
  month =        oct,
  year =         "1996",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1996-1-4/p405-cheng/p405-cheng.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1996-1-4/p405-cheng/",
  abstract =     "This paper discusses the gate-level automatic test
                 pattern generation (ATPG) methods and techniques for
                 sequential circuits. The basic concepts, examples,
                 advantages, and limitations of representative methods
                 are reviewed in detail. The relationship between
                 gate-level sequential circuit ATPG and the partial scan
                 design is also discussed.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Algorithms; Reliability; Verification",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "automatic test generation; IC testing; sequential
                 circuit test generation; testing",
  subject =      "Hardware --- Integrated Circuits --- Reliability and
                 Testing** (B.7.3); Hardware --- Integrated Circuits ---
                 Types and Design Styles (B.7.1)",
}

@Article{Langevin:1996:RTC,
  author =       "M. Langevin and E. Cerny",
  title =        "A recursive technique for computing lower-bound
                 performance of schedules",
  journal =      j-TODAES,
  volume =       "1",
  number =       "4",
  pages =        "443--455",
  month =        oct,
  year =         "1996",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1996-1-4/p443-langevin/p443-langevin.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1996-1-4/p443-langevin/",
  abstract =     "We present a fast recursive technique for estimating
                 lower-bound performance of data path schedules. The
                 method relies on the determination of an ASAPUC a(s
                 Soon As Possible Under Constraint) time-step value for
                 each node of the DFG (Data-Flow Graph) that is based on
                 the ASAPUC values of its predecessor nodes. That is,
                 the lower-bound estimation is applied to each subgraph
                 permitting the derivation of a tight lower bound on the
                 performance of the complete DFG. Applying the greedy
                 lower-bound estimator of Rim and Jain [1994] to each
                 subgraph improves the complete lower bound in more than
                 50\% of the experiments reported in Rim and Jain
                 [1994], and the CPU time is only about twice as long.
                 The recursive methodology can be extended to exploit
                 other lower-bound techniques, for example, considering
                 other constraints such as the number of busses or
                 registers.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "dataflow graph; lower-bound on performance; microcode
                 optimization; resource constraints; scheduling",
  subject =      "Hardware --- Register-Transfer-Level Implementation
                 --- Design Aids (B.5.2): {\bf Optimization}; Hardware
                 --- Register-Transfer-Level Implementation --- Design
                 Aids (B.5.2): {\bf Automatic synthesis}",
}

@Article{Sosic:1996:UAF,
  author =       "Rok Sosi{\=c} and Jun Gu and Robert R. Johnson",
  title =        "The {Unison} algorithm: fast evaluation of {Boolean}
                 expressions",
  journal =      j-TODAES,
  volume =       "1",
  number =       "4",
  pages =        "456--477",
  month =        oct,
  year =         "1996",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Oct 22 15:33:01 2002",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1996-1-4/p456-sosic/p456-sosic.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1996-1-4/p456-sosic/",
  abstract =     "We present a Unison algorithm to evaluate arbitrarily
                 complex Boolean expressions. This novel algorithm,
                 based on the total differential of a Boolean function,
                 enables fast evaluation of Boolean expressions in
                 software. Any combination of Boolean operations can be
                 packed into the bits of one computer word and evaluated
                 in parallel by bitwise logical operations. Sample runs
                 of the Unison algorithm show that many Boolean
                 operations can evaluated in one clock cycle. The Unison
                 algorithm is able to evaluate Boolean expressions at an
                 execution speed that is comparable to compiled
                 evaluation while retaining the flexibility of
                 interpreted approaches. The algorithm lends itself well
                 to many practical applications.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Algorithms; Design; Performance; Reliability;
                 Verification",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Boolean differential; Boolean evaluation; Boolean
                 expressions; Unison algorithm",
  subject =      "Hardware --- Logic Design --- General (B.6.0); Theory
                 of Computation --- Analysis of Algorithms and Problem
                 Complexity --- Nonnumerical Algorithms and Problems
                 (F.2.2)",
}

@Article{Cong:1996:OWI,
  author =       "Jason Cong and Lei He",
  title =        "Optimal wiresizing for interconnects with multiple
                 sources",
  journal =      j-TODAES,
  volume =       "1",
  number =       "4",
  pages =        "478--511",
  month =        oct,
  year =         "1996",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1996-1-4/p478-cong/p478-cong.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1996-1-4/p478-cong/",
  abstract =     "In this paper, we study the optimal wiresizing problem
                 for nets with multiple sources under the RC tree model
                 and the Elmore delay model. We decompose the routing
                 tree for a multisource net into the source subtree
                 (SST) and a set of loading subtrees (LSTs), and show
                 that the optimal wiresizing solution satisfies a number
                 of interesting properties, including: LST separability,
                 the LST monotone property, the SST local monotone
                 property, and the dominance property. Furthermore, we
                 study the optimal wiresizing problem using a variable
                 segment-division rather than an a priori fixed
                 segment-division as in all previous works and reveal
                 the bundled refinement property. These properties lead
                 to efficient algorithms to compute the optimal
                 solutions. We have tested our algorithm on nets
                 extracted from the multilayer layout for a
                 high-performance Intel microprocessor. Accurate SPICE
                 simulation shows that our methods reduce the average
                 delay by up to 23.5\% and the maximum delay by up to
                 37.8\%, respectively, for the submicron CMOS technology
                 when compared to the minimal wire width solution. In
                 addition, the algorithm based on the variable
                 segment-division yields a speedup of over 100$ \times $
                 time and does not lose any accuracy, when compared with
                 the algorithm based on the a priori fixed
                 segment-division.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Algorithms; Design; Experimentation; Performance",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "bundled refinement; decomposition of multi-source
                 routing tree; dominance property; Elmore delay;
                 fidelity; high performance; interconnect optimization;
                 layout optimization; local refinement; multi-source
                 net; multi-source routing tree; optimal wiresizing;
                 variable segment-division",
  subject =      "Hardware --- Integrated Circuits --- Design Aids
                 (B.7.2): {\bf Placement and routing}; Hardware ---
                 Integrated Circuits --- Design Aids (B.7.2): {\bf
                 Simulation}; Computer Applications --- Computer-Aided
                 Engineering (J.6): {\bf Computer-aided design (CAD)};
                 Hardware --- Integrated Circuits --- Design Aids
                 (B.7.2): {\bf Layout}; Mathematics of Computing ---
                 Discrete Mathematics --- Graph Theory (G.2.2); Hardware
                 --- Integrated Circuits --- Types and Design Styles
                 (B.7.1); Hardware --- Integrated Circuits --- Design
                 Aids (B.7.2): {\bf SPICE}; Hardware --- Input/Output
                 and Data Communications --- Interconnections
                 (Subsystems) (B.4.3)",
}

@Article{Ganley:1996:RST,
  author =       "Joseph L. Ganley and James P. Cohoon",
  title =        "Rectilinear {Steiner} trees on a checkerboard",
  journal =      j-TODAES,
  volume =       "1",
  number =       "4",
  pages =        "512--522",
  month =        oct,
  year =         "1996",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1996-1-4/p512-ganley/p512-ganley.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1996-1-4/p512-ganley/",
  abstract =     "The rectilinear Steiner tree problem is to find a
                 minimum-length set of horizontal and vertical line
                 segments that interconnect a given set of points in the
                 plane. Here we study the {\em thumbnail rectilinear
                 Steiner tree\/} problem, where the input points are
                 drawn from a small integer grid. Specifically, we
                 devise a fully-set decomposition algorithm for
                 computing optimal thumbnail rectilinear Steiner trees.
                 We then present experimental results comparing the
                 performance of this algorithm with two existing
                 algorithms for computing optimal rectilinear Steiner
                 trees. The thumbnail rectilinear Steiner tree problem
                 has applications in VLSI placement algorithms, based on
                 geometric partitioning, global routing of
                 field-programmable gate arrays, and routing estimation
                 during floorplanning.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Algorithms; Theory",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "exact algorithms; full-set decomposition; rectilinear
                 Steiner tree; routing",
  subject =      "Hardware --- Integrated Circuits --- Design Aids
                 (B.7.2): {\bf Placement and routing}; Theory of
                 Computation --- Analysis of Algorithms and Problem
                 Complexity --- Nonnumerical Algorithms and Problems
                 (F.2.2): {\bf Geometrical problems and computations};
                 Mathematics of Computing --- Discrete Mathematics ---
                 Graph Theory (G.2.2): {\bf Graph algorithms};
                 Mathematics of Computing --- Discrete Mathematics ---
                 Graph Theory (G.2.2): {\bf Trees}",
}

@Article{Lin:1997:RDH,
  author =       "Youn-Long Lin",
  title =        "Recent developments in high-level synthesis",
  journal =      j-TODAES,
  volume =       "2",
  number =       "1",
  pages =        "2--21",
  month =        jan,
  year =         "1997",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1997-2-1/p2-lin/p2-lin.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1997-2-1/p2-lin/",
  abstract =     "We survey recent developments in high level synthesis
                 technology for VLSI design. The need for higher-level
                 design automation tools are discussed first. We then
                 describe some basic techniques for various subtasks of
                 high-level synthesis. Techniques that have been
                 proposed in the past few years (since 1994) for various
                 subtasks of high-level synthesis are surveyed. We also
                 survey some new synthesis objectives including
                 testability, power efficiency, and reliability.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Design; Experimentation; Languages; Reliability",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "design automation; design methodology; high level
                 synthesis; VLSI design",
  subject =      "Hardware --- Register-Transfer-Level Implementation
                 --- Design (B.5.1): {\bf Data-path design}; Hardware
                 --- Register-Transfer-Level Implementation --- Design
                 Aids (B.5.2): {\bf Automatic synthesis}; Hardware ---
                 Register-Transfer-Level Implementation --- Design Aids
                 (B.5.2): {\bf Hardware description languages}; Hardware
                 --- Register-Transfer-Level Implementation --- Design
                 Aids (B.5.2): {\bf Optimization}",
}

@Article{Gong:1997:MRH,
  author =       "Jie Gong and Daniel D. Gajski and Smita Bakshi",
  title =        "Model refinement for hardware-software codesign",
  journal =      j-TODAES,
  volume =       "2",
  number =       "1",
  pages =        "22--41",
  month =        jan,
  year =         "1997",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1997-2-1/p22-gong/p22-gong.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1997-2-1/p22-gong/",
  abstract =     "Hardware-software codesign, which implements a given
                 specification with a set of system components such as
                 ASICs and processors, includes several key tasks such
                 as system component allocation, functional
                 partitioning, quality metrics estimation, and model
                 refinement. In this work, we focus on the model
                 refinement task which transforms a specification from
                 an original functional model to a refined
                 implementation model. First, we categorize several
                 commonly used implementation models and describe a set
                 of refinement procedures to transform a specification
                 to each of these implementation models. We also present
                 a set of experimental results to compare the
                 implementation models and to demonstrate how the
                 proposed approach can be used to explore different
                 implementation styles.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Algorithms; Design; Experimentation; Languages;
                 Measurement",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "functional model; implementation model; model
                 refinement; software-hardware codesign",
  subject =      "Computer Applications --- Computer-Aided Engineering
                 (J.6): {\bf Computer-aided design (CAD)}; Computer
                 Systems Organization --- General (C.0): {\bf
                 Hardware/software interfaces}; Hardware ---
                 Register-Transfer-Level Implementation --- General
                 (B.5.0); Computer Systems Organization --- General
                 (C.0): {\bf Modeling of computer architecture}",
}

@Article{deAbreuMoreira:1997:ADC,
  author =       "Dilvan {de Abreu Moreira} and Les T. Walczowski",
  title =        "{AGENTS} a distributed client-server system for leaf
                 cell generation",
  journal =      j-TODAES,
  volume =       "2",
  number =       "1",
  pages =        "42--61",
  month =        jan,
  year =         "1997",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Oct 31 06:28:35 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1997-2-1/p42-moreira/p42-moreira.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1997-2-1/p42-moreira/",
  abstract =     "The AGENTS system is a set of programs designed to
                 generate automatically the mask-level layout of full
                 custom CMOS, BICMOS, and bipolar leaf cells. The system
                 is formed from four sever programs: the placer, router,
                 database, and broker. \par

                 The placer places components in a cell, the router
                 wires the circuits sent to it, the database stores all
                 the information that is dependent upon the fabrication
                 process, such as the design rules, and the Broker makes
                 the services of the other servers available. \par

                 These servers communicate over a computer network using
                 the TCP/IP Internet Protocol. The Placer server
                 receives from its client the description and netlist of
                 the circuit to be generated using EDIF (Electronic
                 Design Interchange Format.) The output to its client is
                 the mask layout of the circuit, again codified in EDIF.
                 The concept of agents as software components which have
                 the ability to communicate and cooperate with each
                 other is at the heart of the AGENTS system. This
                 concept is not only used at the higher level, for the
                 four servers, but at a lower level as well, inside the
                 Router and Placer servers, where small relatively
                 simple agents work together to accomplish complex
                 tasks. These small agents are responsible for all the
                 reasoning carried out by the two servers, as they hold
                 the basic inference routines and the knowledge needed
                 by the servers. The system's philosophy is that
                 competence should emerge out of the collective behavior
                 of a large number of relatively simple agents. In
                 addition and integrated to these small agents, the
                 system uses a genetic algorithm to improve components'
                 placement before routing.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Design",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "client/server model; genetic algorithms; software
                 agents",
  subject =      "Hardware --- Integrated Circuits --- Design Aids
                 (B.7.2): {\bf Placement and routing}; Hardware ---
                 Integrated Circuits --- Types and Design Styles
                 (B.7.1)",
}

@Article{Esbensen:1997:PDI,
  author =       "Henrik Esbensen and Ernest S. Kuh",
  title =        "A performance-driven {IC\slash MCM} placement
                 algorithm featuring explicit design space exploration",
  journal =      j-TODAES,
  volume =       "2",
  number =       "1",
  pages =        "62--80",
  month =        jan,
  year =         "1997",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1997-2-1/p62-esbensen/p62-esbensen.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1997-2-1/p62-esbensen/",
  abstract =     "A genetic algorithm for building-block placement of
                 ICs and MCMs is presented that simultaneously minimizes
                 layout area and an Elmore-based estimate of the maximum
                 path delay while trying to meet a target aspect ratio.
                 Explicit design space exploration is performed by using
                 a vector-valued, 3-dimensional cost function and
                 searching for a set of distinct solutions representing
                 the best trade-offs of the cost dimensions. From the
                 output solutions, the designer can choose the solution
                 with the preferred trade-off. In contrast to existing
                 approaches, the required properties of the output
                 solutions are specified without using weights or
                 bounds. Consequently, the practical problems of
                 specifying these quantities are eliminated. Promising
                 experimental results are obtained for various placement
                 problems, including a real-world design. Solution sets
                 representing good, balanced cost trade-offs are found
                 using a reasonable amount of runtime. Furthermore, the
                 performance is shown to be comparable to that of
                 simulated annealing in the special case of
                 1-dimensional optimization, in which direct comparison
                 is possible.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Algorithms; Design",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "design space exploration; timing-driven building-block
                 placement",
  subject =      "Hardware --- Integrated Circuits --- Types and Design
                 Styles (B.7.1): {\bf VLSI (very large scale
                 integration)}; Hardware --- Integrated Circuits ---
                 Design Aids (B.7.2): {\bf Placement and routing};
                 Computing Methodologies --- Artificial Intelligence ---
                 Problem Solving, Control Methods, and Search (I.2.8):
                 {\bf Heuristic methods}",
}

@Article{Lin:1997:STV,
  author =       "Yann-Rue Lin and Cheng-Tsung Hwang and Allen C.-H.
                 Wu",
  title =        "Scheduling techniques for variable voltage low power
                 designs",
  journal =      j-TODAES,
  volume =       "2",
  number =       "2",
  pages =        "81--97",
  month =        jan,
  year =         "1997",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1997-2-2/p81-lin/p81-lin.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1997-2-2/p81-lin/",
  abstract =     "This paper presents an integer linear programming
                 (ILP) model and a heuristic for the variable voltage
                 scheduling problem. We present the variable voltage
                 scheduling techniques that consider in turn timing
                 constraints alone, resource constraints alone, and
                 timing and resource constraints together for design
                 space exploration. Experimental results show that our
                 heuristic produces results competitive with those of
                 the ILP method in a fraction of the run-time. The
                 results also show that a wide range of design
                 alternatives can be generated using our design space
                 exploration method. Using different cost/delay
                 combinations, power consumption in a single design can
                 differ by as much as a factor of 6 when using mixed
                 3.3V and 5V supply voltages.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Algorithms; Design; Performance",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "high-level synthesis; lower power design; scheduling;
                 variable voltage",
  subject =      "Hardware --- Register-Transfer-Level Implementation
                 --- Design Aids (B.5.2); Hardware ---
                 Register-Transfer-Level Implementation --- Design
                 (B.5.1): {\bf Styles}; Hardware --- Integrated Circuits
                 --- Types and Design Styles (B.7.1): {\bf VLSI (very
                 large scale integration)}",
}

@Article{Fummi:1997:FDT,
  author =       "F. Fummi and U. Rovati and D. Sciuto",
  title =        "Functional design for testability of control-dominated
                 architectures",
  journal =      j-TODAES,
  volume =       "2",
  number =       "2",
  pages =        "98--122",
  month =        jan,
  year =         "1997",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1997-2-2/p98-fummi/p98-fummi.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1997-2-2/p98-fummi/",
  abstract =     "Control-dominated architectures are usually described
                 in a hardware description language (HDL) by means of
                 interacting FSMs. A VHDL or Verilog specification can
                 be translated into an interacting FSM (IFSM)
                 representation as described here. The IFSM model allows
                 us to approach the testable synthesis problem at the
                 level of each FSM. The functionality is modified by the
                 addition of transparency to data flow. The complete
                 testability of the IFSM implementation is thus achieved
                 by connecting fully testable implementations of each
                 modified FSM. In this way, test sequences separately
                 generated for each FSM are directly applied to the IFSM
                 to achieve complete fault coverage. The addition of
                 test functionality to each FSM description, and its
                 simultaneous synthesis with the FSM functionality,
                 produces a lower area overhead than that necessary for
                 the application of a partial-scan technique. Moreover,
                 the test generation problem is highly simplified since
                 it is reduced to the test generation for each separate
                 FSM.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Measurement; Performance; Reliability",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "functional testing; interacting FSMs",
  subject =      "Hardware --- Register-Transfer-Level Implementation
                 --- Reliability and Testing** (B.5.3): {\bf Test
                 generation**}; Hardware --- Register-Transfer-Level
                 Implementation --- Reliability and Testing** (B.5.3):
                 {\bf Testability**}; Hardware --- Logic Design ---
                 Design Aids (B.6.3): {\bf Hardware description
                 languages}",
}

@Article{Kormicki:1997:PLS,
  author =       "Maciek Kormicki and Ausif Mahmood and Bradley S.
                 Carlson",
  title =        "Parallel logic simulation on a network of workstations
                 using parallel virtual machine",
  journal =      j-TODAES,
  volume =       "2",
  number =       "2",
  pages =        "123--134",
  month =        jan,
  year =         "1997",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1997-2-2/p123-kormicki/p123-kormicki.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1997-2-2/p123-kormicki/",
  abstract =     "This paper explores parallel logic simulation on a
                 network of workstations using a parallel virtual
                 machine (PVM). A novel parallel implementation of the
                 centralized-time event-driven logic simulation
                 algorithm is carried out such that no global
                 controlling workstation is needed to synchronize the
                 advance of simulation time. Further advantages of our
                 new approach include a random partitioning of the
                 circuit onto available workstations and a pipelined
                 execution of the different phases of the simulation
                 algorithm. To achieve a better load balance, we employ
                 a semioptimistic scheme for gate evaluations (in
                 conjunction with a centralized-time algorithm) such
                 that no rollback is required. The performance of this
                 implementation has been evaluated using the ISCAS
                 benchmark circuits. Speedups improve with the size of
                 the circuit and the activity level in the circuit.
                 Analyses of the communication overhead show that the
                 techniques developed here will yield even higher gains
                 as newer networking technologies like ATM are employed
                 to connect workstations.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Algorithms; Performance; Verification",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "distributed computing; parallel logic simulation; PVM;
                 synchronous simulation",
  subject =      "Hardware --- Logic Design --- Design Aids (B.6.3):
                 {\bf Simulation}; Hardware --- Integrated Circuits ---
                 Design Aids (B.7.2): {\bf Simulation}",
}

@Article{Yang:1997:HFM,
  author =       "Cheng-Hsing Yang and Chia-Chun Tsai and Jan-Ming Ho
                 and Sao-Jie Chen",
  title =        "Hmap: a fast mapper for {EPGAs} using extended {GBDD}
                 hash tables",
  journal =      j-TODAES,
  volume =       "2",
  number =       "2",
  pages =        "135--150",
  month =        jan,
  year =         "1997",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1997-2-2/p135-yang/p135-yang.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1997-2-2/p135-yang/",
  abstract =     "A fast and efficient algorithm for technology mapping
                 of electrically programmable gate arrays (EPGAs) is
                 proposed. This Hmap algorithm covers the Boolean
                 network with programmed logic modules bottom-up. The
                 covering operation is based on collapsing the fanins of
                 a node to form a bigger supernode such that fewer
                 clusters are needed to be detected. Then Boolean
                 matching is used to detect whether the collapsed
                 supernode can be mapped into a logic module by looking
                 up an extended GBDD hash table. The use of this table
                 look-up matching can shorten the matching time
                 significantly. As shown in the experiments, the average
                 running time of Hmap is 20 times faster than that of
                 MIS-pga2.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Design",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  subject =      "Hardware --- Integrated Circuits --- Design Aids
                 (B.7.2); Hardware --- Integrated Circuits --- Types and
                 Design Styles (B.7.1): {\bf Gate arrays}",
}

@Article{Mak:1997:BLM,
  author =       "Wai-Kei Mak and D. F. Wong",
  title =        "Board-level multiterminal net routing for {FPGA-based}
                 logic emulation",
  journal =      j-TODAES,
  volume =       "2",
  number =       "2",
  pages =        "151--167",
  month =        jan,
  year =         "1997",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1997-2-2/p151-mak/p151-mak.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1997-2-2/p151-mak/",
  abstract =     "We consider a board-level routing problem applicable
                 to FPGA-based logic emulation systems such as the
                 Realizer System [Varghese et al. 1993] and the
                 Enterprise Emulation System [Maliniak 1992]
                 manufactured by Quickturn Design Systems. Optimal
                 algorithms have been proposed for the case where all
                 nets are two-terminal nets [Chan and Schlag 1993; Mak
                 and Wong 1995]. We show how multiterminal nets can be
                 handled by decomposition into two-terminal nets. We
                 show that the multiterminal net decomposition problem
                 can be modeled as a bounded-degree hypergraph-to-graph
                 transformation problem where hyperedges are transformed
                 to spanning trees. A network flow-based algorithm that
                 solves both problems is proposed. It determines if
                 there is a feasible decomposition and gives one
                 whenever such a decomposition exists.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Algorithms; Verification",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "board-level routing; crossbars; field programmable
                 gate arrays; logic emulation; multi-terminal net
                 decomposition",
  subject =      "Hardware --- Integrated Circuits --- Types and Design
                 Styles (B.7.1): {\bf Gate arrays}; Hardware ---
                 Integrated Circuits --- Design Aids (B.7.2): {\bf
                 Placement and routing}; Hardware --- Integrated
                 Circuits --- Design Aids (B.7.2): {\bf Verification}",
}

@Article{Kahng:1997:ARI,
  author =       "Andrew B. Kahng and Sudhakar Muddu",
  title =        "Analysis of {RC} interconnections under ramp input",
  journal =      j-TODAES,
  volume =       "2",
  number =       "2",
  pages =        "168--192",
  month =        jan,
  year =         "1997",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1997-2-2/p168-kahng/p168-kahng.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1997-2-2/p168-kahng/",
  abstract =     "We give new methods for calculating the time-domain
                 response for a finite-length distributed {\em RC\/}
                 line that is stimulated by a ramp input. The following
                 are our contributions. First, we obtain the solution of
                 the diffusion equation for a seminfinite distributed
                 {\em RC\/} line with ramp input. We then present a
                 general and, in the limit, {\em exact\/} approach to
                 compute the time-domain response for finite-length {\em
                 RC\/} lines under ramp input by summing distinct
                 diffusions starting at either end of the line. Next, we
                 obtain analytical expressions for the finite
                 time-domain voltage response for an open-ended finite
                 {\em RC\/} line and for a finite {\em RC\/} line with
                 capacitive load. The delay estimates using this method
                 are very close to SPICE-computing delays. Finally, we
                 present a general recursive equation for computing the
                 higher-order diffusion components due to reflections at
                 the source and load ends. Future work extends our
                 method to response computations in general
                 interconnection trees by modeling both reflection and
                 transmission coefficients at discontinuities.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Algorithms; Design; Performance; Theory;
                 Verification",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "diffusion equation analysis; ramp input response; VLSI
                 interconnects",
  subject =      "Hardware --- Integrated Circuits --- Types and Design
                 Styles (B.7.1): {\bf VLSI (very large scale
                 integration)}; Hardware --- Integrated Circuits ---
                 Design Aids (B.7.2): {\bf Layout}",
}

@Article{Benini:1997:SBM,
  author =       "Luca Benini and Giovanni {De Micheli}",
  title =        "A survey of {Boolean} matching techniques for library
                 binding",
  journal =      j-TODAES,
  volume =       "2",
  number =       "3",
  pages =        "193--226",
  month =        jan,
  year =         "1997",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1997-2-3/p193-benini/p193-benini.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1997-2-3/p193-benini/",
  abstract =     "When binding a logic network to a set of cells, a
                 fundamental problem is recognizing whether a cell can
                 implement a portion of the network. Boolean matching
                 means solving this task using a formalism based on
                 Boolean algebra. In its simplest form, Boolean matching
                 can be posed as a tautology check. We review several
                 approaches to Boolean matching as well as to its
                 generalization to cases involving {\em don't care\/}
                 conditions and its restriction to specific libraries
                 such as those typical of anti-fuse based FPGAs. We then
                 present a general formulation of Boolean matching
                 supporting multiple-output logic cells.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Design; Measurement; Performance",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  subject =      "Hardware --- Logic Design --- Design Styles (B.6.1)",
}

@Article{Johnson:1997:DSM,
  author =       "Mark C. Johnson and Kaushik Roy",
  title =        "Datapath scheduling with multiple supply voltages and
                 level converters",
  journal =      j-TODAES,
  volume =       "2",
  number =       "3",
  pages =        "227--248",
  month =        jan,
  year =         "1997",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1997-2-3/p227-johnson/p227-johnson.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1997-2-3/p227-johnson/",
  abstract =     "We present an algorithm called MOVER (Multiple
                 Operating Voltage Energy Reduction) to minimize
                 datapath energy dissipation through use of multiple
                 supply voltages. In a single voltage design, the
                 critical path length, clock period, and number of
                 control steps limit minimization of voltage and power.
                 Multiple supply voltages permit localized voltage
                 reductions to take up remaining schedule slack. MOVER
                 initially finds one minimum voltage for an entire
                 datapath. It then determines a second voltage for
                 operations where there is still schedule slack. New
                 voltages con be introduced and minimized until no
                 schedule slack remains. MOVER was exercised for a
                 variety of DSP datapath examples. Energy savings ranged
                 from 0\% to 50\% when comparing dual to single voltage
                 results. The benefit of going from two to three
                 voltages never exceeded 15\%. Power supply costs are
                 not reflected in these savings, but a simple analysis
                 shows that energy savings can be achieved even with
                 relatively inefficient DC-DC converters. Datapath
                 resource requirements were found to vary greatly with
                 respect to number of supplies. Area penalties ranged
                 from 0\% to 170\%. Implications of multiple voltage
                 design for IC layout and power supply requirements are
                 discussed.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Algorithms; Design",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "datapath scheduling; DSP; high-level synthesis; level
                 conversion; low power design; multiple voltage; power
                 optimization; scheduling",
  subject =      "Hardware --- Register-Transfer-Level Implementation
                 --- Design (B.5.1): {\bf Data-path design}; Hardware
                 --- Register-Transfer-Level Implementation --- Design
                 Aids (B.5.2): {\bf Optimization}; Mathematics of
                 Computing --- Numerical Analysis --- Optimization
                 (G.1.6): {\bf Integer programming}",
}

@Article{Yalcin:1997:EPC,
  author =       "Hakan Yalcin and John P. Hayes",
  title =        "Event propagation conditions in circuit delay
                 computation",
  journal =      j-TODAES,
  volume =       "2",
  number =       "3",
  pages =        "249--280",
  month =        jan,
  year =         "1997",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1997-2-3/p249-yalcin/p249-yalcin.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1997-2-3/p249-yalcin/",
  abstract =     "Accurate and efficient computation of delays is a
                 central problem in computer-aided design of complex
                 VLSI circuits. Delays are determined by events (signal
                 transitions) propagated from the inputs of a circuit to
                 its outputs, so precise characterization of event
                 propagation is required for accurate delay computation.
                 Although many different propagation conditions (PCs)
                 have been proposed for delay computation, their
                 properties and relationships have been far from clear.
                 We present a systematic analysis of delay computation
                 based on a series of waveform models that capture
                 signal behavior rigorously at different levels of
                 details. The most general model, called the exact of W0
                 model, specifies each event occurring in a circuit
                 signal. A novel method is presented that generates
                 approximate waveforms by progressively eliminating
                 signal values from the exact model. For each waveform
                 model, we drive the PCs that correctly capture the
                 requirements under which an event propagates along a
                 path. The waveform models and their PCs are shown to
                 form a well-defined hierarchy, which provides a means
                 to trade accuracy for computational effort. The
                 relationships among the derived PCs and existing ones
                 are analyzed in depth. It is proven that though many
                 PCs, such as the popular floating mode condition,
                 produce a correct upper bound on the circuit delay,
                 they can fail to recognize event propagation in some
                 instances. This analysis further enables us to derive
                 new and useful PCs. We describe such a PC, called safe
                 static. Experimental results demonstrate that safe
                 static provides an excellent accuracy/efficiency
                 tradeoff.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Performance; Theory; Verification",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "delay computation; event propagation; false path; path
                 sensitization; propagation condition; timing analysis;
                 waveform modeling",
  subject =      "Hardware --- Integrated Circuits --- Design Aids
                 (B.7.2): {\bf Verification}; Hardware --- Logic Design
                 --- Design Aids (B.6.3): {\bf Verification}",
}

@Article{Thadikaran:1997:ACB,
  author =       "Paul Thadikaran and Sreejit Chakravarty and Janak
                 Patel",
  title =        "Algorithms to compute bridging fault coverage of
                 {IDDQ} test sets",
  journal =      j-TODAES,
  volume =       "2",
  number =       "3",
  pages =        "281--305",
  month =        jan,
  year =         "1997",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1997-2-3/p281-thadikaran/p281-thadikaran.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1997-2-3/p281-thadikaran/",
  abstract =     "We present two algorithms, called list-based scheme
                 and tree-based scheme, to compute bridging fault (BF)
                 coverage of {\em I DDQ\/} tests. These algorithms use
                 the novel ideal of ``indistinguishable pairs,'' which
                 makes it more efficient and versatile than known fault
                 simulation algorithms. Unlike known algorithms, the two
                 algorithms can be used for combinational as well as
                 sequential circuits and for arbitrary sets of BFs.
                 Experiments show that the tree-based scheme is, in
                 general, better than the list-based scheme. But the
                 list-based scheme is better for some classes of
                 faults.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Algorithms; Design",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  subject =      "Hardware --- Logic Design --- Design Aids (B.6.3):
                 {\bf Simulation}",
}

%%% Check page gap: 306--311 between issues 3 and 4 of volume 2 ??
@Article{Xu:1997:LDR,
  author =       "Min Xu and Fadi J. Kurdahi",
  title =        "Layout-driven {RTL} binding techniques for high-level
                 synthesis using accurate estimators",
  journal =      j-TODAES,
  volume =       "2",
  number =       "4",
  pages =        "312--343",
  month =        jan,
  year =         "1997",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1997-2-4/p312-xu/p312-xu.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1997-2-4/p312-xu/",
  abstract =     "The importance of effective and efficient accounting
                 of layout effects is well established in High-Level
                 Synthesis (HLS), since it allows more realistic
                 exploration of the design space and the generation of
                 solutions with predictable metrics. This feature is
                 highly desirable in order to avoid unnecessary
                 iterations through the design process. In this article,
                 we address the problem of layout-driven
                 register-transfer-level (RTL) binding as this step has
                 a direct relevance to the final performance of the
                 design. By producing not only an RTL design but also an
                 approximate physical topology of the chip-level
                 implementation, we ensure that the solution will
                 perform at the predicted metric once implemented, thus
                 avoiding unnecessary delays in the design process.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Algorithms; Design; Measurement; Performance",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  subject =      "Hardware --- Register-Transfer-Level Implementation
                 --- Design (B.5.1); Hardware --- Integrated Circuits
                 --- Types and Design Styles (B.7.1): {\bf Gate arrays};
                 Hardware --- Integrated Circuits --- Design Aids
                 (B.7.2): {\bf Layout}",
}

@Article{Munch:1997:EIB,
  author =       "Michael M{\"u}nch and Norbert Wehn and Manfred
                 Glesner",
  title =        "An efficient {ILP-based} scheduling algorithm for
                 control-dominated {VHDL} descriptions",
  journal =      j-TODAES,
  volume =       "2",
  number =       "4",
  pages =        "344--364",
  month =        jan,
  year =         "1997",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1997-2-4/p344-munch/p344-munch.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1997-2-4/p344-munch/",
  abstract =     "To adopt behavioral synthesis techniques in existing
                 design flows, the synthesis methodology must provide
                 the designer with a mechanism to specify a component's
                 interface timing. This will permit pre- and
                 postsynthesis validation through cosimulation with
                 other subsystems or even through formal verification.
                 In control-flow dominated designs, additional timing
                 constraints will result in a complex
                 specification/constraint system for which the
                 scheduling problem has been shown to be NP-complete. In
                 this article, we present a mathematical framework for
                 solving a special instance of the scheduling problem in
                 control-flow dominated behavioral VHDL descriptions
                 given that the timing of I/O signals has been
                 completely or partially specified. It is based on a
                 code-transformation approach that fully preserves the
                 VHDL semantics. The scheduling problem is mapped onto
                 an integer linear program (ILP) solvable in polynomial
                 time assuming a restricted partial order on selected
                 statements. It captures both control-flow and timing
                 constraints in a single model and also exploits
                 dataflow information to optimize the statement sequence
                 across basic block boundaries.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Algorithms",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  subject =      "Hardware --- Register-Transfer-Level Implementation
                 --- Design (B.5.1): {\bf Control design}; Hardware ---
                 Register-Transfer-Level Implementation --- Design Aids
                 (B.5.2): {\bf Automatic synthesis}; Hardware ---
                 Register-Transfer-Level Implementation --- Design Aids
                 (B.5.2): {\bf Optimization}",
}

@Article{Freund:1997:CEA,
  author =       "L. Freund and M. Israel and F. Rousseau and J. M.
                 Berg{\'e} and M. Auguin and C. Belleudy and G.
                 Gogniat",
  title =        "A codesign experiment in acoustic echo cancellation
                 {GMDF}",
  journal =      j-TODAES,
  volume =       "2",
  number =       "4",
  pages =        "365--383",
  month =        jan,
  year =         "1997",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1997-2-4/p365-freund/p365-freund.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1997-2-4/p365-freund/",
  abstract =     "Continuous advances in processor and ASIC technologies
                 enable the integration of more and more complex
                 embedded systems. Embedded systems have become
                 commonplace in recent years. Since their
                 implementations generally require the use of
                 heterogeneous resources (e.g., processor cores, ASICs)
                 in one system with hard design constraints, the
                 importance of hardware/software codesign methodologies
                 increases steadily. HW/SW codesign approaches consist
                 generally of HW/SW partitioning and scheduling,
                 constrained code generation, and hardware and interface
                 synthesis. This article presents the codesign of an
                 industrial experiment in acoustic echo cancellation
                 (GMDF algorithm); and emphasizes the partitioning and
                 communication synthesis steps. This experiment brings
                 to light interesting problems such as data and program
                 distribution between system memories and the modeling
                 of communications in the partitioning process",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Design; Experimentation",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  subject =      "Computer Systems Organization --- Special-Purpose and
                 Application-Based Systems (C.3)",
}

@Article{Panda:1997:MDO,
  author =       "Preeti Ranjan Panda and Nikil D. Dutt and Alexandru
                 Nicolau",
  title =        "Memory data organization for improved cache
                 performance in embedded processor applications",
  journal =      j-TODAES,
  volume =       "2",
  number =       "4",
  pages =        "384--409",
  month =        jan,
  year =         "1997",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1997-2-4/p384-panda/p384-panda.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1997-2-4/p384-panda/",
  abstract =     "Code generation for embedded processors opens up the
                 possibility for several performance optimization
                 techniques that have been ignored by traditional
                 compilers due to compilation time constraints. We
                 present techniques that take into account the
                 parameters of the data caches for organizing scalar and
                 array variables declared in embedded code into memory,
                 with the objective of improving data cache performance.
                 We present techniques for clustering variables to
                 minimize compulsory cache misses, and for solving the
                 memory assignment problem to minimize conflict cache
                 misses. Our experiments with benchmark code kernels
                 from DSP and other domains on the CW4001 embedded
                 processor from LSI Logic indicate significant
                 improvements in data cache performance by the
                 application of our memory organization technique.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Performance; Verification",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "cache memory; data cache; memory synthesis; system
                 design; system synthesis",
  subject =      "Hardware --- Memory Structures --- Design Styles
                 (B.3.2): {\bf Cache memories}; Software --- Programming
                 Languages --- Processors (D.3.4): {\bf Compilers}",
}

@Article{Tomiyama:1997:CPT,
  author =       "Hiroyuki Tomiyama and Hiroto Yasuura",
  title =        "Code placement techniques for cache miss rate
                 reduction",
  journal =      j-TODAES,
  volume =       "2",
  number =       "4",
  pages =        "410--429",
  month =        jan,
  year =         "1997",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1997-2-4/p410-tomiyama/p410-tomiyama.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1997-2-4/p410-tomiyama/",
  abstract =     "In the design of embedded systems with cache memories,
                 it is important to minimize the cache miss rates to
                 reduce power consumption of the systems as well as
                 improve the performance. In this article, we propose
                 two code placement methods (a simplified method and a
                 refined one) to reduce miss rates of instruction
                 caches. We first define a simplified code placement
                 problem without an attempt to minimize the code size.
                 The problem is formulated as an integer linear
                 programming (ILP) problem, by which an optimal
                 placement can be found. Experimental results show that
                 the simplified method reduces cache misses by an
                 average of 30\% (max. 77\%). However, the code size
                 obtained by the simplified method tends to be large,
                 which inevitably leads to a larger memory size. In
                 order to overcome this limitation, we further propose a
                 refined code placement method in which the code size
                 provided by the system designers must be satisfied. The
                 effectiveness of the refined method is also
                 demonstrated.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Design; Performance",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  subject =      "Software --- Programming Languages --- Processors
                 (D.3.4): {\bf Code generation}; Hardware --- Control
                 Structures and Microprogramming --- Microprogram Design
                 Aids (B.1.4): {\bf Languages and compilers}; Software
                 --- Programming Languages --- Processors (D.3.4): {\bf
                 Optimization}; Hardware --- Control Structures and
                 Microprogramming --- Microprogram Design Aids (B.1.4):
                 {\bf Optimization}",
}

@Article{Johnson:1998:MAS,
  author =       "E. W. Johnson and J. B. Brockman",
  title =        "Measurement and analysis of sequential design
                 processes",
  journal =      j-TODAES,
  volume =       "3",
  number =       "1",
  pages =        "1--20",
  month =        jan,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-1/p1-johnson/p1-johnson.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-1/p1-johnson/",
  abstract =     "As design processes continue to increase in complexity
                 it is important to base process-improvement decisions
                 on quantitative analysis. We describe the development
                 of an analytical approach for evaluating sequential
                 design-process completion time and for determining the
                 sensitivities of design time with respect to individual
                 task durations and transition probabilities. Techniques
                 are also detailed for collecting process metadata and
                 calibrating a design process model. Example
                 applications illustrate the use of the methodology in
                 analyzing and improving software and hardware design
                 processes.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Design; Documentation; Human Factors; Management;
                 Measurement",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "management science; sensitivity analysis; workflow",
  subject =      "Computer Applications --- Computer-Aided Engineering
                 (J.6); Computing Milieux --- Computers and Education
                 --- Computer and Information Science Education
                 (K.3.2)",
}

@Article{Khordoc:1998:SVA,
  author =       "K. Khordoc and E. Cerny",
  title =        "Semantics and verification of action diagrams with
                 linear timing",
  journal =      j-TODAES,
  volume =       "3",
  number =       "1",
  pages =        "21--50",
  month =        jan,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-1/p21-khordoc/p21-khordoc.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-1/p21-khordoc/",
  abstract =     "Specifications containing linear timing constraints,
                 such as found in action diagrams (timing diagrams)
                 defining interface behaviors, are often used in
                 practice. Although efficient $ O(n 3) $ shortest path
                 algorithms exist for computing the minimum and maximum
                 time distances between actions, subject to the timing
                 constraints, there is so far no accurate method that
                 can decide (a) whether a specification of this kind is
                 realizable (i.e., can be simulated by a causal system),
                 and (b) given the action diagrams of the interfaces of
                 two or more communicating systems, whether the systems
                 implementing such independent specifications will
                 correctly interoperate (i.e., satisfy the respective
                 protocols and timing assumptions). First we illustrate
                 the weakness of existing action diagram verification
                 techniques: the causality issue is not addressed, and
                 the proposed methods to answer the compatibility
                 (interoperability) question yield false negative
                 answers in many practical situations. We then define
                 the meaning of causality in an action diagram
                 specification and state a set of sufficient conditions
                 for causality to hold. This development then leads to
                 an exact procedure for the verification of the
                 interface compatibility of communicating action
                 diagrams. the results are illustrated on a practical
                 example.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Algorithms; Design; Theory; Verification",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "causality; compatibility of interfaces; hardware
                 interfaces; timing diagrams; timing verification",
  subject =      "Hardware --- Integrated Circuits --- Design Aids
                 (B.7.2); Software --- Software Engineering ---
                 Requirements/Specifications (D.2.1)",
}

@Article{Liao:1998:NVC,
  author =       "S. Liao and K. Keutzer and S. Tjiang and S. Devadas",
  title =        "A new viewpoint on code generation for directed
                 acyclic graphs",
  journal =      j-TODAES,
  volume =       "3",
  number =       "1",
  pages =        "51--75",
  month =        jan,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-1/p51-liao/p51-liao.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-1/p51-liao/",
  abstract =     "We present a new viewpoint on code generation for
                 directed acyclic graphs (DAGs). Our formulation is
                 based on {\em binate covering}, the problem of
                 satisfying, with minimum cost, a set of disjunctive
                 clauses, and can take into account commutativity of
                 operators and of the machine model. An important
                 contribution of this work is a set of necessary and
                 sufficient conditions for a valid schedule to be
                 derived, based on the notion of {\em worms\/} and {\em
                 worm-partitions}. This set of conditions can be
                 compactly expressed with clauses that relate scheduling
                 to code selection. For the case of one-register
                 machines, we can derive clauses that lead to generation
                 of optimal code for the DAG. Recent advances in exact
                 binate covering algorithms allows us to use this
                 strategy to generate optimal code for large basic
                 blocks. The optimal code generated by our algorithm
                 results in significant reductions in overall code
                 size.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Algorithms; Design; Theory",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "binate covering; code generation; directed acyclic
                 graphs",
  subject =      "Software --- Programming Languages --- Processors
                 (D.3.4); Mathematics of Computing --- Discrete
                 Mathematics --- Graph Theory (G.2.2)",
}

@Article{Shi:1998:CCT,
  author =       "C.-J. Shi and J. A. Brzozowski",
  title =        "Cluster-cover a theoretical framework for a class of
                 {VLSI-CAD} optimization problems",
  journal =      j-TODAES,
  volume =       "3",
  number =       "1",
  pages =        "76--107",
  month =        jan,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-1/p76-shi/p76-shi.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-1/p76-shi/",
  abstract =     "This article introduces a mathematical framework
                 called cluster-cover. We show that this framework
                 captures the combinatorial structure of a class of VLSI
                 design optimization problems, including two-level logic
                 minimization, constrained encoding, multilayer
                 topological planar routing, application timing
                 assignment for delay-fault testing, and minimization of
                 monitoring logic for BIST enhancement. These apparently
                 unrelated problems can all be cast into two
                 metaproblems in our framework: finding a maximum
                 cluster and finding a minimum cover. We describe
                 paradigms for developing algorithms for these problems.
                 First, a simple heuristic called greedy peeling is
                 presented and characterized. We derive sufficient
                 conditions that guarantee optimum solutions by greedy
                 peeling. We generalize the performance analysis of a
                 multilayer topological planar routing heuristic to
                 greedy peeling for the general cluster-cover problems.
                 We propose a performance bound of greedy set covering
                 that can be computed efficiently for a given problem
                 instance; this bound is much tighter than the
                 previously known bounds. Second, prime covering ---
                 originally developed for logic minimization --- is
                 generalized to finding exact solutions for
                 cluster-cover problems. Previously, only the connection
                 between logic minimization and constrained encoding was
                 known.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Design; Theory",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "cluster-cover; logic minimization; NP-completeness;
                 self-checking logic design; state assignment;
                 topological routing",
  subject =      "Hardware --- Logic Design --- Design Aids (B.6.3);
                 Hardware --- Integrated Circuits --- General (B.7.0);
                 Theory of Computation --- Analysis of Algorithms and
                 Problem Complexity --- Nonnumerical Algorithms and
                 Problems (F.2.2)",
}

@Article{Hsiung:1998:IIC,
  author =       "Pao-Ann Hsiung and Chung-Hwang Chen and Trong-Yen Lee
                 and Sao-Jie Chen",
  title =        "{ICOS}: an intelligent concurrent object-oriented
                 synthesis methodology for multiprocessor systems",
  journal =      j-TODAES,
  volume =       "3",
  number =       "2",
  pages =        "109--135",
  month =        apr,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-2/p109-hsiung/p109-hsiung.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-2/p109-hsiung/",
  abstract =     "The design of multiprocessor architectures differs
                 from uniprocessor systems in that the number of
                 processors and their interconnection must be
                 considered. This leads to an enormous increase in the
                 design-space exploration time, which is exponential in
                 the total number of system components. The methodology
                 proposed here, called {\em Intelligent Concurrent
                 Object-Oriented Synthesis\/} (ICOS) methodology, makes
                 feasible the synthesis of complex multiprocessor
                 systems through the application of several techniques
                 that speed up the design process. ICOS is based on {\em
                 Performance Synthesis Methodology\/} (PSM), a recently
                 proposed object-oriented system-level design
                 methodology. Four major techniques: object-oriented
                 design, fuzzy design-space exploration, concurrent
                 design, and intelligent reuse of complete subsystems
                 are integrated in ICOS. First, object-oriented modeling
                 and design, through the use of object-oriented
                 relationships and operators, make the whole design
                 process manageable and maintainable in ICOS. Second,
                 fuzzy comparison applied to the specializations or
                 instances of components reduces the exponential growth
                 of design-space exploration in ICOS. Third, independent
                 components from different design alternatives are
                 synthesized in parallel; this design concurrency
                 shortens the overall design time. Lastly, the
                 resynthesis of complete subsystems can be avoided
                 through the application of learning, thus making the
                 methodology intelligent enough to reuse previous design
                 configurations. Experiments show that all these applied
                 techniques contribute to the synthesis efficiency and
                 the degree of automation in ICOS.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Design",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "concurrent object-oriented system-level synthesis;
                 fuzzy design-space exploration; learning",
  subject =      "Computer Applications --- Computer-Aided Engineering
                 (J.6): {\bf Computer-aided design (CAD)}; Computing
                 Methodologies --- Artificial Intelligence --- Learning
                 (I.2.6): {\bf Knowledge acquisition}; Computing
                 Methodologies --- Artificial Intelligence --- Learning
                 (I.2.6): {\bf Analogies}; Computing Methodologies ---
                 Artificial Intelligence --- Deduction and Theorem
                 Proving (I.2.3): {\bf Deduction}; Computer Systems
                 Organization --- Processor Architectures --- Multiple
                 Data Stream Architectures (Multiprocessors) (C.1.2)",
}

@Article{Araujo:1998:CGF,
  author =       "Guido Araujo and Sharad Malik",
  title =        "Code generation for fixed-point {DSPs}",
  journal =      j-TODAES,
  volume =       "3",
  number =       "2",
  pages =        "136--161",
  month =        apr,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-2/p136-araujo/p136-araujo.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-2/p136-araujo/",
  abstract =     "This paper examines the problem of code-generation for
                 Digital Signal Processors (DSPs). We make two major
                 contributions. First, for an important class of DSP
                 architectures, we propose an optimal $ O(n) $ algorithm
                 for the tasks of register allocation and instruction
                 scheduling for expression trees. Optimality is
                 guaranteed by sufficient conditions derived from a
                 structural representation of the processor Instruction
                 Set Architecture (ISA). Second, we develop heuristics
                 for the case when basic blocks are Directed Acyclic
                 Graphs (DAGs).",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Algorithms",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "code generation; register allocation; scheduling",
  subject =      "Software --- Programming Languages --- Processors
                 (D.3.4): {\bf Optimization}; Software --- Programming
                 Languages --- Processors (D.3.4): {\bf Code
                 generation}",
}

@Article{Tiruvuri:1998:ELB,
  author =       "Giri Tiruvuri and Moon Chung",
  title =        "Estimation of lower bounds in scheduling algorithms
                 for high-level synthesis",
  journal =      j-TODAES,
  volume =       "3",
  number =       "2",
  pages =        "162--180",
  month =        apr,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-2/p162-tiruvuri/p162-tiruvuri.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-2/p162-tiruvuri/",
  abstract =     "To produce efficient design, a high-level synthesis
                 system should be able to analyze a variety of
                 cost-performance tradeoffs. The system can use
                 lower-bound performance estimated methods to identify
                 and prune inferior designs without producing complete
                 designs. We present a lower-bound performance estimate
                 method that is not only faster than existing methods,
                 but also produces better lower bounds. In most cases,
                 the lower bound produced by our algorithm is tight.
                 \par

                 Scheduling algorithms such as branch-and-bound need
                 fast and effective lower-bound estimate methods, often
                 for a large number of partially scheduled dataflow
                 graphs, to reduce the search space. We extend our
                 method to efficiently estimate completion time of
                 partial schedules. This problem is not addressed by
                 existing methods in the literature. Our lower-bound
                 estimate is shown to by very effective in reducing the
                 size of the search space when used in a
                 branch-and-bound scheduling algorithm. \par

                 Our methods can handle multicycle operations, pipelined
                 functional units, and chaining of operations. We also
                 present an extension to handle conditional branches. A
                 salient feature of the extended method is its
                 applicability to speculative execution as well as
                 C-select implementation of conditional branches.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Algorithms; Design; Experimentation; Measurement;
                 Performance",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "dynamic programming; high-level synthesis; lower-bound
                 estimated; scheduling",
  subject =      "Hardware --- Register-Transfer-Level Implementation
                 --- General (B.5.0); Hardware --- Performance and
                 Reliability --- Performance Analysis and Design Aids
                 (B.8.2); Theory of Computation --- Analysis of
                 Algorithms and Problem Complexity --- Nonnumerical
                 Algorithms and Problems (F.2.2): {\bf Sequencing and
                 scheduling}; Hardware --- Integrated Circuits ---
                 General (B.7.0)",
}

@Article{Vahid:1998:FPI,
  author =       "Frank Vahid and Thuy Dm Le and Yu-Chin Hsu",
  title =        "Functional partitioning improvements over structural
                 partitioning for packaging constraints and synthesis:
                 tool performance",
  journal =      j-TODAES,
  volume =       "3",
  number =       "2",
  pages =        "181--208",
  month =        apr,
  year =         "1998",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/290833.290841",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-2/p181-vahid/p181-vahid.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-2/p181-vahid/",
  abstract =     "Incorporating functional partitioning into a synthesis
                 methodology leads to several important advantages. In
                 functional partitioning, we first partition a
                 functional specification into smaller subspecifications
                 and then synthesize structure for each, in contrast to
                 the current approach of first synthesizing structure
                 for the entire specification and then partitioning that
                 structure. One advantage is the improvement in I/O
                 performance and package count, when partitioning among
                 hardware blocks with size and I/O constraints, such as
                 FPGAs or blocks within an ASIC. A second advantage is
                 reduction in synthesis runtimes. We describe these
                 important advantages, concluding that further research
                 on functional partitioning can lead to improved results
                 from synthesis environments.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Design",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "behavioral synthesis; functional partitioning;
                 system-level design",
  subject =      "Hardware --- Register-Transfer-Level Implementation
                 --- Design Aids (B.5.2): {\bf Automatic synthesis};
                 Hardware --- Register-Transfer-Level Implementation ---
                 Design Aids (B.5.2): {\bf Hardware description
                 languages}; Hardware --- Register-Transfer-Level
                 Implementation --- Design Aids (B.5.2): {\bf
                 Optimization}; Computer Applications --- Computer-Aided
                 Engineering (J.6): {\bf Computer-aided design (CAD)}",
}

@Article{Koch:1998:BBD,
  author =       "Gernot H. Koch and W. Rosenstiel and U. Kebschull",
  title =        "Breakpoints and breakpoint detection in source-level
                 emulation",
  journal =      j-TODAES,
  volume =       "3",
  number =       "2",
  pages =        "209--230",
  month =        apr,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-2/p209-koch/p209-koch.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-2/p209-koch/",
  abstract =     "We present an approach for accelerating the validation
                 speed of behavioral system descriptions through
                 hardware emulation. The method allows source-level
                 debugging of running hardware specified in behavioral
                 VH DL in a way similar to source-leve debugging in
                 software programming languages. We discuss breakpoints
                 in source-level emulation and how the circuit generated
                 by high-level synthesis has to be modified to work with
                 breakpoints. Breakpoint encoding and detection are
                 shown in detail. Our approach allows breakpoint
                 detection by hardware with seriously slowing the
                 circuit or dramatically increasing its size.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Design; Performance; Verification",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "debugging; emulation; high-level synthesis",
  subject =      "Hardware --- Integrated Circuits --- Design Aids
                 (B.7.2): {\bf Simulation}; Hardware --- Logic Design
                 --- Design Aids (B.6.3): {\bf VHDL}",
}

@Article{Pomeranz:1998:FTG,
  author =       "Irith Pomeranz and Sudhakar M. Reddy",
  title =        "Functional test generation for delay faults in
                 combinational circuits",
  journal =      j-TODAES,
  volume =       "3",
  number =       "2",
  pages =        "231--248",
  month =        apr,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-2/p231-pomeranz/p231-pomeranz.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-2/p231-pomeranz/",
  abstract =     "We propose a functional fault model for delay faults
                 in combinational circuits and describe a functional
                 test generation procedure based on this model. The
                 proposed method is most suitable when a gate-level
                 description of the circuit-under-test, necessary for
                 employing existing gate-level delay fault test
                 generators, is not available or does not accurately
                 describe the circuit. It is also suitable for
                 generating tests in early design stages of a circuit,
                 before a gate-level implementation is selected. In
                 addition, it can potentially be employed to supplement
                 conventional test generators for gate-level circuits to
                 reduce the cost of handling large numbers of paths. A
                 parameter called is used to control the number of
                 functional faults targeted and thus the number of tests
                 generated. If is unlimited, the functional test set
                 detects every robustly testable path delay fault in any
                 gate-level implementation of the given ciruit. An
                 appropriate subset of tests can be selected once the
                 implementation is known. The test sets generated for
                 various values of are fault simulated on gate-level
                 realizations to demonstrate their effectiveness. The
                 experiments indicate that functional test sets may be
                 able to identify functions whose realizations have low
                 path delay fault coverage.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "delay faults; function-robust tests; functional delay
                 fault model; path delay faults; robust tests",
  subject =      "Hardware --- Performance and Reliability ---
                 Reliability, Testing, and Fault-Tolerance (B.8.1);
                 Hardware --- Integrated Circuits --- Types and Design
                 Styles (B.7.1)",
}

@Article{Chen:1998:SDI,
  author =       "X. T. Chen and F. J. Meyer and F. Lombardi",
  title =        "Structural diagnosis of interconnects by coloring",
  journal =      j-TODAES,
  volume =       "3",
  number =       "2",
  pages =        "249--271",
  month =        apr,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-2/p249-chen/p249-chen.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-2/p249-chen/",
  abstract =     "This paper presents a new approach for diagnosing
                 shorts in interconnects in which the adjacencies
                 between nets are known. This structural approach
                 exploits different graph coloring techniques to
                 generate a test set with no aliasing and confounding,
                 i.e., full diagnosis (detection and location) is
                 accomplished. Initially, a simple coloring approach
                 based on a greedy condition of the adjacency graph is
                 proposed for fault detection. Then, the conditions for
                 aliasing and confounding are analyzed with respect to
                 the sizes of the possible shorts. These results are
                 used to generate new colors using a process called
                 color mixing. Color mixing guarantees that additional
                 tests, required in order to avoid aliasing/confounding,
                 will use appropriate codes. The characteristics of
                 unbalanced/balanced codes for encoding the colors in
                 the vector-generation process of interconnect diagnosis
                 are discussed and are proved to yield full diagnosis
                 using a novel method. An algorithm for full diagnosis
                 is then presented; this algorithm has an execution
                 complexity of $ O(\max (N^2, N \times D^3)) $ where $N$
                 is the number of nets and $D$ is the maximum degree of
                 the nodes in the adjacency graph. Simulation results
                 show that the proposed approach requires a smaller
                 number of test vectors than previous approaches.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "balanced code; diagnosis; graph coloring;
                 interconnect; syndrome",
  subject =      "Mathematics of Computing --- Discrete Mathematics ---
                 Graph Theory (G.2.2); Computer Applications ---
                 Computer-Aided Engineering (J.6); Hardware ---
                 Performance and Reliability --- Reliability, Testing,
                 and Fault-Tolerance (B.8.1)",
}

@Article{Mehta:1998:ESR,
  author =       "Dinesh P. Mehta",
  title =        "Estimating the storage requirements of the rectangular
                 and {L-shaped} corner stitching data structures",
  journal =      j-TODAES,
  volume =       "3",
  number =       "2",
  pages =        "272--284",
  month =        apr,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-2/p272-mehta/p272-mehta.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-2/p272-mehta/",
  abstract =     "This paper proposes a technique for estimating the
                 storage requirements of the Rectangular Corner
                 Stitching (RCS) data structure [Ousterhout 1984] and
                 the L-shaped Corner Stitching (LCS) data structure
                 [Mehta and Blust 1997] on a given circuit by studying
                 its (the circuit's) geometric properties. This provides
                 a method for estimating the storage requirements of a
                 circuit without having to implement the corner
                 stitching data structure, which is a tedious and
                 time-consuming task. This technique can also be used to
                 estimate the amount of space saved by employing the LCS
                 data structure over the RCS data structure on a given
                 circuit.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Algorithms",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "corner stitching; data structures; L-shapes; memory
                 requirements analysis; rectangle; rectilinear
                 polygons",
  subject =      "Hardware --- Integrated Circuits --- Design Aids
                 (B.7.2): {\bf Layout}; Data --- Data Storage
                 Representations (E.2): {\bf Linked representations};
                 Theory of Computation --- Analysis of Algorithms and
                 Problem Complexity --- Nonnumerical Algorithms and
                 Problems (F.2.2): {\bf Geometrical problems and
                 computations}",
}

@Article{Bhattacharya:1998:ERS,
  author =       "Subhrajit Bhattacharya and Sujit Dey and Franc
                 Breglez",
  title =        "Effects of resource sharing on circuit delay: an
                 assignment algorithm for clock period optimization",
  journal =      j-TODAES,
  volume =       "3",
  number =       "2",
  pages =        "285--307",
  month =        apr,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-2/p285-bhattacharya/p285-bhattacharya.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-2/p285-bhattacharya/",
  abstract =     "This paper analyzes the effect of resource sharing and
                 assignment on the clock period of the synthesized
                 circuit. The assignment phase assigns or binds
                 operations of the scheduled behavioral description to a
                 set of allocated resources. We focus on control-flow
                 intensive descriptions, characterized by the presence
                 of mutually exclusive paths due to the presence of
                 nested conditional branches and loops. \par

                 We show that clustering multiple operations in the same
                 state of the schedule, possibly leading to chaining of
                 functional units (FUs) in the RTL circuit, is an
                 effective way to minimize the total number of clock
                 cycles, and hence total execution time. We present an
                 assignment algorithm that is particularly effective for
                 such design styles by minimizing data chaining and
                 hence the clock period of the circuit, thereby leading
                 to further reduction in total execution time.
                 \par

                 Existing resource sharing and assignment approaches for
                 reducing the clock period of the resulting circuit
                 either increase the resource allocation or use faster
                 modules, both leading to leading to larger area
                 requirements. In this paper we show that even when the
                 type of available resource units and the number of
                 resource units of each type is fixed, different
                 assignments may lead to circuits with significant
                 differences in clock period. \par

                 We provide a comprehensive analysis of how resource
                 sharing and assignment introduces long paths in the
                 circuit. Based on the analysis, we develop an
                 assignment algorithm that uses a high-level delay
                 estimator to assign operations to a fixed set of
                 available resources so as to minimize the clock period
                 of the resultant circuit, with no or minimal effect on
                 the area of the circuit. Experimental results on
                 several conditional-intensive designs demonstrate the
                 effectiveness of the assignment algorithm.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "clock period; high-level synthesis; resource sharing",
  subject =      "Hardware --- Register-Transfer-Level Implementation
                 --- Design Aids (B.5.2): {\bf Optimization}",
}

@Article{Cabodi:1998:AVB,
  author =       "Gianpiero Cabodi and Paolo Camurati and Stefano Quer",
  title =        "Auxiliary variables for {BDD-based} representation and
                 manipulation of {Boolean} functions",
  journal =      j-TODAES,
  volume =       "3",
  number =       "3",
  pages =        "309--340",
  month =        jul,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-3/p309-cabodi/p309-cabodi.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-3/p309-cabodi/",
  abstract =     "BDDs are the state-of-the-art technique for
                 representing and manipulating Boolean functions. Their
                 introduction caused a major leap forward in synthesis,
                 verification, and testing. However, they are often
                 unmanageable because of the large amount of nodes. To
                 attack this problem, we insert auxiliary variables that
                 decompose monolithic BDDs in smaller ones. This method
                 works very well for Boolean function representation. As
                 far as combinational circuits are concerned,
                 representing their functions is the main issue. Going
                 into the sequential domain, we focus on traversal
                 techniques. We show that, once we have Boolean
                 functions in decomposed form, symbolic manipulations
                 are viable and efficient. We investigate the relation
                 between auxiliary variables and static and dynamic
                 ordering strategies. Experimental evidence shows that
                 we achieve a certain degree of independence from
                 variable ordering. Thus, this approach can be an
                 alternative to dynamic re-ordering. Experimental
                 results on Boolean function representation, and exact
                 and approximate forward symbolic traversal of FSMs,
                 demonstrate the benefits both in terms of memory
                 requirements and of CPU time.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Design; Verification",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "binary decision diagrams; finite state machines;
                 functional decompositions; reachability analysis",
  subject =      "Hardware --- Logic Design --- Design Aids (B.6.3):
                 {\bf Verification}",
}

@Article{Cong:1998:BSC,
  author =       "Jason Cong and Andrew B. Kahng and Cheng-Kok Koh and
                 C.-W. Albert Tsao",
  title =        "Bounded-skew clock and {Steiner} routing",
  journal =      j-TODAES,
  volume =       "3",
  number =       "3",
  pages =        "341--388",
  month =        jul,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-3/p341-cong/p341-cong.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-3/p341-cong/",
  abstract =     "We study the minimum-cost bounded-skew routing tree
                 problem under the pathlength (linear) and Elmore delay
                 models. This problem captures several engineering
                 tradeoffs in the design of routing topologies with
                 controlled skew. Our bounded-skew routing algorithm,
                 called the BST/DME algorithm, extends the DME algorithm
                 for exact zero-skew trees via the concept of {\em a
                 merging region}. For a {\em prescribed topology},
                 BST/DME constructs a bounded-skew tree (BST) in two
                 phases: (i) a bottom-up phase to construct a binary
                 tree of merging regions which represent the loci of
                 possible embedding points of the internal nodes, and
                 (ii) a top-down phase to determine the exact locations
                 of the internal nodes. We present two approaches to
                 construct the merging regions: (i) the {\em Boundary
                 Merging and Embedding\/} (BME) method which utilizes
                 merging points that are restricted to the {\em
                 boundaries\/} of merging regions, and (ii) the {\em
                 Interior Merging and Embedding\/} (IME) algorithm which
                 employs a sampling strategy and a dynamic
                 programming-based selection technique to consider
                 merging points that are {\em interior\/} to, as well as
                 on the boundary of, the merging regions. When the
                 topology is not prescribed, we propose a new {\em
                 Greedy\/}-BST/DME algorithm which combines the merging
                 region computation with topology generation. The
                 Greedy-BST/DME algorithm very closely matches the best
                 known heuristics for the zero-skew case and for the
                 unbounded-skew case (i.e., the Steiner minimal tree
                 problem). Experimental results show that our BST
                 algorithms can produce a set of routing solutions with
                 smooth skew and wire length tradeoffs.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Algorithms; Design; Experimentation; Performance",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "(inter)connection; boundary merging and embedding;
                 bounded-skew; clock tree; Elmore delay; interior
                 merging and embedding; low power; merging region;
                 merging segment; pathlength delay; Steiner tree;
                 synchronization; VLSI; zero-skew",
  subject =      "Hardware --- Integrated Circuits --- Design Aids
                 (B.7.2): {\bf Placement and routing}; Computer
                 Applications --- Computer-Aided Engineering (J.6): {\bf
                 Computer-aided design (CAD)}",
}

@Article{Jone:1998:CAD,
  author =       "Wen-Ben Jone and K. S. Tsai",
  title =        "Confidence analysis for defect-level estimation of
                 {VLSI} random testing",
  journal =      j-TODAES,
  volume =       "3",
  number =       "3",
  pages =        "389--407",
  month =        jul,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-3/p389-jone/p389-jone.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-3/p389-jone/",
  abstract =     "The defect level in circuit testing is the percentage
                 of circuits such as chips, that are defective and
                 shipped for use after testing. Our previously published
                 results showed that the defect level of circuit
                 fabrication and testing should be a probability
                 distribution, rather than a single value, and the
                 concept of confidence degree was proposed [Gondalia et
                 al. 1993; Jone et al. 1995]. In this work, defect level
                 is represented by a confidence interval which is more
                 conventional and easier to interpret. The point
                 estimate of defect level analysis and conditions to
                 avoid meaningless confidence intervals are also
                 investigated. Methods for adaptive random test length
                 determination driven by different confidence intervals
                 or interval length are proposed to meet both test
                 requirements and test costs tradeoff. Finally, a
                 complete test plan that can direct the test flow from
                 fabrication infancy to maturity is suggested.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Experimentation; Measurement; Performance;
                 Reliability",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "defect level analysis; random testing; test confidence
                 analysis; test quality; VLSI testing",
  subject =      "Hardware --- Performance and Reliability ---
                 Reliability, Testing, and Fault-Tolerance (B.8.1)",
}

@Article{Mathur:1998:RAE,
  author =       "Anmol Mathur and Ali Dasdan and Rajesh K. Gupta",
  title =        "Rate analysis for embedded systems",
  journal =      j-TODAES,
  volume =       "3",
  number =       "3",
  pages =        "408--436",
  month =        jul,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-3/p408-mathur/p408-mathur.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-3/p408-mathur/",
  abstract =     "Embedded systems consist of interacting components
                 that are required to deliver a specific functionality
                 under constraints on execution rates and relative time
                 separation of the components. In this article, we model
                 an embedded system using concurrent processes
                 interacting through synchronization. We assume that
                 there are rate constraints on the execution rates of
                 processes imposed by the designer or the environment of
                 the system, where the execution rate of a process is
                 the number of its executions per unit time. We address
                 the problem of computing bounds on the execution rates
                 of processes constituting an embedded system, and
                 propose an interactive rate analysis framework. As part
                 of the rate analysis framework we present an efficient
                 algorithms for checking the consistency of the rate
                 constraints. Bounds on the execution rate of each
                 process are computed using an efficient algorithm based
                 on the relationship between the execution rate of a
                 process and the maximum mean delay cycles in the
                 process graph. Finally, if the computed rates violate
                 some of the rate constraints, some of the processes in
                 the system are redesigned using information from the
                 rate analysis step. This rate analysis framework is
                 implemented in a tool called RATAN. We illustrate by an
                 example how RATAN can be used in an embedded system
                 design.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Algorithms; Design; Performance; Theory",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "average execution rate; concurrent system modeling;
                 embedded systems; interactive rate violation debugging;
                 rate analysis; rate constraints",
  subject =      "Computer Systems Organization --- Performance of
                 Systems (C.4): {\bf Modeling techniques}; Computer
                 Systems Organization --- Performance of Systems (C.4):
                 {\bf Performance attributes}; Computer Systems
                 Organization --- Special-Purpose and Application-Based
                 Systems (C.3): {\bf Real-time and embedded systems};
                 Computer Systems Organization --- Performance of
                 Systems (C.4): {\bf Design studies}",
}

@Article{Pan:1998:OCP,
  author =       "Peichen Pan and C. L. Liu",
  title =        "Optimal clock period {FPGA} technology mapping for
                 sequential circuits",
  journal =      j-TODAES,
  volume =       "3",
  number =       "3",
  pages =        "437--462",
  month =        jul,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-3/p437-pan/p437-pan.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-3/p437-pan/",
  abstract =     "We study the technology mapping problem for sequential
                 circuits for look-up table (LUT) based field
                 programmable gate arrays (FPGAs). Existing approaches
                 to the problem simply remove the flip-flops (FFs), then
                 map the remaining combinational logic, and finally put
                 the FFs back. These approaches ignore the sequential
                 nature of a circuit and assume the positions of the FFs
                 are fixed. However, FFs in a sequential circuit can be
                 repositioned by a functionality-preserving
                 transformation called retiming. As a result, existing
                 approaches can only consider a very small portion of
                 the available solution space. We propose in this paper
                 a novel approach to the technology mapping problem. In
                 our approach, retiming is integrated into the
                 technology mapping process so as to consider the full
                 solution space. We then present a polynomial technology
                 mapping algorithm that, for a given circuit, produces a
                 mapping solution with the minimum clock period among
                 all possible ways of retiming. The effectiveness of the
                 algorithm is also demonstrated experimentally.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Algorithms; Performance",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "clock period; field-programmable gate arrays; FPGAs;
                 logic replication; look-up tables; retiming; sequential
                 synthesis; technology mapping",
  subject =      "Hardware --- Logic Design --- Design Styles (B.6.1):
                 {\bf Sequential circuits}; Hardware --- Logic Design
                 --- Design Aids (B.6.3): {\bf Automatic synthesis};
                 Hardware --- Logic Design --- Design Aids (B.6.3): {\bf
                 Optimization}",
}

@Article{Riepe:1998:EBD,
  author =       "Michael A. Riepe and Karem A. Sakallah",
  title =        "The edge-based design rule model revisited",
  journal =      j-TODAES,
  volume =       "3",
  number =       "3",
  pages =        "463--486",
  month =        jul,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-3/p463-riepe/p463-riepe.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-3/p463-riepe/",
  abstract =     "A model for integrated circuit design rules based on
                 rectangle edge constraints has been proposed by
                 Jeppson, Christensson, and Hedenstierna. This model
                 appears to be the most rigorous proposed to date for
                 the description of such edge-based design rules.
                 However, in certain rare circumstances their model is
                 unable to express the correct design rule when the
                 constrained edges are not adjacent in the layout. We
                 introduce a new notation, called an edge path, which
                 allows us to extend their model to allow for
                 constraints between edges separated by an arbitrary
                 number of intervening edges. Using this notation we
                 enumerate all edge paths that are required to correctly
                 model the original design rule macros of the JCH model,
                 and prove that these macros are sufficient to model the
                 most common rules. We also show how this notation
                 allows us to directly specify many kinds of conditional
                 design rules that required ad hoc specification under
                 the JCH model.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Theory; Verification",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "design rule checking; design rules; layout
                 verification",
  subject =      "Computer Applications --- Computer-Aided Engineering
                 (J.6); Hardware --- Integrated Circuits --- Design Aids
                 (B.7.2): {\bf Verification}",
}

@Article{Su:1998:EFL,
  author =       "Alan Su and Yu-Chin Hsu and Ta-Yung Liu and Mike
                 Tien-Chien Lee",
  title =        "Eliminating false loops caused by sharing in control
                 path",
  journal =      j-TODAES,
  volume =       "3",
  number =       "3",
  pages =        "487--495",
  month =        jul,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-3/p487-su/p487-su.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-3/p487-su/",
  abstract =     "In high-level synthesis, resource sharing may result
                 in a circuit containing false loops that create great
                 difficulty in timing validation during the design
                 sign-off phase. It is hence desirable to avoid
                 generating any false loops in a synthesized circuit.
                 Previous work [Stok 1992; Huang et al. 1995] considered
                 mainly data path sharing for false loop elimination.
                 However, for a complete circuit with both data path and
                 control path, false loops can be created due to control
                 logic sharing. In this article, we present a novel
                 approach to detect and eliminate the false loops caused
                 by control logic sharing. An effective filter is
                 devised to reduce the computational complexity of false
                 loop detection, which is based on checking the level
                 numbers that are propagated from data path operators to
                 inputs and outputs of the control path. Only the
                 input/output pairs of the control path identified by
                 the filter are further investigated by traversing into
                 the data path for false loop detection. A removal
                 algorithm is then applied to eliminate the detected
                 false loops, followed by logic minimization to further
                 optimize the circuit. Experimental results show that
                 for the nine example circuits we tested, the final
                 designs after false loop removal and logic minimization
                 give only slightly larger area than the original ones
                 that contain false loops.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Algorithms; Design",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "control path; false loop",
  subject =      "Hardware --- Register-Transfer-Level Implementation
                 --- Design Aids (B.5.2): {\bf Automatic synthesis};
                 Hardware --- Register-Transfer-Level Implementation ---
                 Design Aids (B.5.2): {\bf Hardware description
                 languages}; Hardware --- Register-Transfer-Level
                 Implementation --- Design Aids (B.5.2): {\bf
                 Optimization}; Hardware --- Register-Transfer-Level
                 Implementation --- Design Aids (B.5.2): {\bf
                 Verification}",
}

@Article{Zhou:1998:ORR,
  author =       "Hai Zhou and D. F. Wong",
  title =        "Optimal river routing with crosstalk constraints",
  journal =      j-TODAES,
  volume =       "3",
  number =       "3",
  pages =        "496--514",
  month =        jul,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-3/p496-zhou/p496-zhou.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-3/p496-zhou/",
  abstract =     "With the increasing density of VLSI circuits, the
                 interconnection wires are being packed even closer.
                 This has increased the effect of interaction among
                 these wires on circuit performance and hence, the
                 importance of controlling crosstalk. In this article,
                 we consider river routing with crosstalk constraints.
                 Given the positions of the pins in a single-layer
                 routing channel and the maximum tolerable crosstalk
                 between each pair of neighboring nets, we give a
                 polynomial time algorithm to decide whether there is a
                 feasible river routing solution and produce one with
                 minimum crosstalk when it is feasible.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Algorithms; Design; Experimentation; Performance",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "crosstalk; river routing",
  subject =      "Hardware --- Integrated Circuits --- Design Aids
                 (B.7.2): {\bf Placement and routing}; Mathematics of
                 Computing --- Discrete Mathematics --- Graph Theory
                 (G.2.2): {\bf Network problems}; Computer Applications
                 --- Computer-Aided Engineering (J.6): {\bf
                 Computer-aided design (CAD)}",
}

@Article{Passerone:1998:MRS,
  author =       "C. Passerone and C. Sansoe and L. Lavagno and R.
                 McGeer and J. Martin and R. Passerone and A.
                 Sangiovanni-Vincentelli",
  title =        "Modeling reactive systems in {Java}",
  journal =      j-TODAES,
  volume =       "3",
  number =       "4",
  pages =        "515--523",
  month =        oct,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-4/p515-passerone/p515-passerone.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-4/p515-passerone/",
  abstract =     "We present an application of the Java TM programming
                 language to specify and implement reactive real-time
                 systems. We have developed and tested a collection of
                 classes and methods to describe concurrent modules and
                 their asynchronous communication by means of signals.
                 The control structures are closely patterned after
                 those of the synchronous language {\em Esterel},
                 succinctly describing concurrency, sequencing and
                 preemption. We show the user-friendliness and
                 efficiency of the proposed technique by using an
                 example from the automotive domain.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Design; Languages; Theory",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "high level design; Java; prototyping; simulation",
  subject =      "Hardware --- Logic Design --- Design Aids (B.6.3):
                 {\bf Hardware description languages}; Computing
                 Methodologies --- Simulation and Modeling --- Model
                 Validation and Analysis (I.6.4); Computer Applications
                 --- Physical Sciences and Engineering (J.2): {\bf
                 Electronics}; Computer Applications --- Computer-Aided
                 Engineering (J.6): {\bf Computer-aided design (CAD)}",
}

@Article{Wang:1998:MEV,
  author =       "Li-C. Wang and Magdy S. Abadir and Jing Zeng",
  title =        "On measuring the effectiveness of various design
                 validation approaches for {PowerPC} microprocessor
                 embedded arrays",
  journal =      j-TODAES,
  volume =       "3",
  number =       "4",
  pages =        "524--532",
  month =        oct,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-4/p524-wang/p524-wang.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-4/p524-wang/",
  abstract =     "Design validation for embedded arrays remains as a
                 challenging problem in today's microprocessor design
                 environment. At Somerset, validation of array designs
                 relies on both formal verification and vector
                 simulation. Although several methods for array design
                 validation have been proposed and had great success
                 [Ganguly et al. 1996; Pandey et al. 1996, 1997; Wang
                 and Abadir 1997], little evidence has been reported for
                 the effectiveness of these methods with respect to the
                 detection of design errors. In this paper, we measure
                 the effectiveness of different validation approaches
                 based on automatic design error injection and
                 simulation. The technique provides a systematic way to
                 evaluate various validation approaches at both logic
                 and transistor levels. Experimental results on recent
                 PowerPC microprocessor arrays will be discussed and
                 reported.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Design; Verification",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "assertion test generation; ATPG; design error model;
                 logic verification; symbolic trajectory evaluation;
                 validation",
  subject =      "Hardware --- Logic Design --- Design Aids (B.6.3):
                 {\bf Simulation}; Hardware --- Logic Design --- Design
                 Aids (B.6.3): {\bf Verification}; Hardware ---
                 Integrated Circuits --- Design Aids (B.7.2): {\bf
                 Simulation}; Hardware --- Integrated Circuits ---
                 Design Aids (B.7.2): {\bf Verification}",
}

@Article{Dasdan:1998:TDD,
  author =       "Ali Dasdan and Dinesh Ramanathan and Rajesh K. Gupta",
  title =        "A timing-driven design and validation methodology for
                 embedded real-time systems",
  journal =      j-TODAES,
  volume =       "3",
  number =       "4",
  pages =        "533--553",
  month =        oct,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-4/p533-dasdan/p533-dasdan.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-4/p533-dasdan/",
  abstract =     "We address the problem of timing constraint derivation
                 and validation for reactive and real-time embedded
                 systems. We assume that such a system is structured
                 into its tasks, and the structure is modeled using a
                 task graph. Our solution uses the timing behavior
                 committed by the environment to the system first to
                 derive the timing constraints on the system's internal
                 behavior and then use them to derive and validate the
                 timing constraints on the system's external behavior.
                 Our solution consists of the following contributions: a
                 generalized task graph model, a comprehensive
                 classification of timing constraints, algorithms for
                 derivation and validation of timing constraints of the
                 system modeled in the generalized task graph model, a
                 codesign methodology that combines the model and the
                 algorithms, and the implementation of this methodology
                 in a tool called RADHA-RATAN. The main advantages of
                 our solution are that it simplifies the problem of
                 ensuring timing correctness of the system by reducing
                 the complexity of the problem from system level to task
                 level, and that it makes the codesign methodology
                 timing-driven in that our solution makes it possible to
                 maintain a handle on the system's timing correctness
                 from very early stages in the system's design flow.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Design; Performance; Verification",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "performance verification; period assignment; period
                 derivation; rate assignment; rate derivation;
                 requirements analysis; system-level design; timing
                 analysis; timing-driven codesign",
  subject =      "Computer Systems Organization --- General (C.0): {\bf
                 Systems specification methodology}; Computer Systems
                 Organization --- Special-Purpose and Application-Based
                 Systems (C.3): {\bf Real-time and embedded systems};
                 Computer Systems Organization --- Performance of
                 Systems (C.4): {\bf Modeling techniques}; Computer
                 Systems Organization --- Performance of Systems (C.4):
                 {\bf Performance attributes}; Software --- Operating
                 Systems --- Organization and Design (D.4.7): {\bf
                 Real-time systems and embedded systems}; Software ---
                 Operating Systems --- Performance (D.4.8): {\bf
                 Modeling and prediction}; Computer Applications ---
                 Computer-Aided Engineering (J.6): {\bf Computer-aided
                 design (CAD)}",
}

@Article{Rajan:1998:ASD,
  author =       "S. P. Rajan and M. Fujita and K. Yuan and M. T-C.
                 Lee",
  title =        "{ATM} switch design by high-level modeling, formal
                 verification and high-level synthesis",
  journal =      j-TODAES,
  volume =       "3",
  number =       "4",
  pages =        "554--562",
  month =        oct,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-4/p554-rajan/p554-rajan.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-4/p554-rajan/",
  abstract =     "Asynchronous Transfer Mode (ATM) has emerged as a
                 backbone for high-speed broadband telecommunication
                 networks. In this paper, we present ATM switch design,
                 starting from a parametric high-level model and
                 debugging the model using a combination of formal
                 verification and simulation. The model has been used to
                 synthesize ATM switches according to customers'
                 choices, by choosing concrete values for each of the
                 generic parameters. We provide a pragmatic combination
                 of simulation, model checking, and theorem proving to
                 gain confidence in the ATM switch design correctness.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Design; Verification",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "ATM switch; high-level design; synthesis;
                 verification",
  subject =      "Computer Applications --- Computer-Aided Engineering
                 (J.6): {\bf Computer-aided design (CAD)}",
}

@Article{Huggins:1998:SVP,
  author =       "James K. Huggins and David {Van Campenhout}",
  title =        "Specification and verification of pipelining in the
                 {ARM2} {RISC} microprocessor",
  journal =      j-TODAES,
  volume =       "3",
  number =       "4",
  pages =        "563--580",
  month =        oct,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-4/p563-huggins/p563-huggins.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-4/p563-huggins/",
  abstract =     "Gurevich Abstract State Machines (ASMs) provide a
                 sound mathematical basis for the specification and
                 verification of systems. An application of the ASM
                 methodology to the verification of a pipelined
                 microprocessor (an ARM2 implementation) is described.
                 Both the sequential execution model and final pipelined
                 model are formalized using ASMs. A series of
                 intermediate models are introduced that gradually
                 expose the complications of pipelining. The first
                 intermediate model is proven equivalent to the
                 sequential model in the absence of structural, control,
                 and data hazards. In the following steps, these
                 simplifying assumptions are lifted one by one, and the
                 original proof is refined to establish the equivalence
                 of each intermediate model with the sequential model,
                 leading ultimately to a full proof of equivalence of
                 the sequential and pipelined models.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "abstract state machines; ARM processor; design
                 verification; formal verification; pipelined
                 processors; pipelining",
  subject =      "Hardware --- Register-Transfer-Level Implementation
                 --- Design Aids (B.5.2); Computer Systems Organization
                 --- General (C.0): {\bf Systems specification
                 methodology}; Computer Systems Organization ---
                 Processor Architectures --- Single Data Stream
                 Architectures (C.1.1)",
}

@Article{VanCampenhout:1998:HLD,
  author =       "D. {Van Campenhout} and H. Al-Asaad and J. P. Hayes
                 and T. Mudge and R. B. Brown",
  title =        "High-level design verification of microprocessors via
                 error modeling",
  journal =      j-TODAES,
  volume =       "3",
  number =       "4",
  pages =        "581--599",
  month =        oct,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-4/p581-campenhout/p581-campenhout.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-4/p581-campenhout/",
  abstract =     "A design verification methodology for microprocessor
                 hardware based on modeling design errors and generating
                 simulation vectors for the modeled errors via physical
                 fault testing techniques is presented. We have
                 systematically collected design error data from a
                 number of microprocessor design projects. The error
                 data is used to derive error models suitable for design
                 verification testing. A class of basic error models is
                 identified and shown to yield tests that provide good
                 coverage of common error types. To improve coverage for
                 more complex errors, a new class of conditional error
                 models is introduced. An experiment to evaluate the
                 effectiveness of our methodology is presented. Single
                 actual design errors are injected into a correct
                 design, and it is determined if the methodology will
                 generate a test that detects the actual errors. The
                 experiment has been conducted for two microprocessor
                 designs and the results indicate that very high
                 coverage of actual design errors can be obtained with
                 test sets that are complete for a small number of
                 synthetic error models.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Verification",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "design errors; design verification; error modeling",
  subject =      "Hardware --- General (B.0); Hardware ---
                 Register-Transfer-Level Implementation --- Design Aids
                 (B.5.2)",
}

@Article{Hasteer:1998:EEC,
  author =       "G. Hasteer and A. Mathur and P. Banerjee",
  title =        "Efficient equivalence checking of multi-phase designs
                 using phase abstraction and retiming",
  journal =      j-TODAES,
  volume =       "3",
  number =       "4",
  pages =        "600--625",
  month =        oct,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-4/p600-hasteer/p600-hasteer.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-4/p600-hasteer/",
  abstract =     "Equivalence checking of finite state machines (FSMs)
                 traditionally assumes single phase machines where a
                 single clock (implicit or explicit) synchronizes the
                 state of the FSM. We extend the equivalence checking
                 paradigm to FSMs with multi-phase clocks. Such designs
                 are becoming increasingly popular in high performance
                 microprocessors since they result in lower
                 synchronization overhead. In addition, aggressive
                 pipelining and the use of ``sparse'' encodings results
                 in designs where the ratio of steady states to the
                 total state space is very low. In this paper, we show
                 that automatically transforming such designs to ones
                 that have more ``dense'' encodings can result in
                 significant benefits in using implicit BDD-based
                 techniques for their verification. We explore two such
                 techniques: {\em phase abstraction\/} and {\em
                 retiming\/} and demonstrate their utility in the
                 context of FSM equivalence checking. The main
                 contributions of our work are: \par

                 --We show that a multi-phase FSM can be transformed to
                 a functionally equivalent one phase FSM and this phase
                 abstraction leads to significant improvement in the
                 size of FSMs that can be checked for equivalence.
                 \par

                 --We show that min-latch retiming preserves equivalence
                 and can be performed efficiently in multi-phase
                 designs, even when latch borrowing and discarding is
                 allowed at the primary inputs and outputs. \par

                 --We demonstrate the utility of our approach on several
                 controller FSMs from the industry.",
  acknowledgement = ack-nhfb,
  annote =       "Article title page incorrectly has Bannerjee instead
                 of Banerjee.",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Algorithms; Design; Performance; Theory;
                 Verification",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "binary decision; diagram; encoding density;
                 multi-phase FSM; product machine; sequential hardware
                 equivalence; steady states",
  subject =      "Hardware --- Logic Design --- Design Aids (B.6.3);
                 Hardware --- Logic Design --- Design Aids (B.6.3): {\bf
                 Verification}; Computer Applications --- Computer-Aided
                 Engineering (J.6): {\bf Computer-aided design (CAD)}",
}

@Article{Benso:1998:ELC,
  author =       "A. Benso and P. Prinetto and M. Rebaudengo and M.
                 Sonza Reorda",
  title =        "{EXFI}: a low-cost fault injection system for embedded
                 microprocessor-based boards",
  journal =      j-TODAES,
  volume =       "3",
  number =       "4",
  pages =        "626--634",
  month =        oct,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-4/p626-benso/p626-benso.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-4/p626-benso/",
  abstract =     "Evaluating the faulty behavior of low-cost embedded
                 microprocessor-based boards is an increasingly
                 important issue, due to their adoption in many safety
                 critical systems. The architecture of a complete Fault
                 Injection environment is proposed, integrating a module
                 for generating a collapsed list of faults, and another
                 for performing their injection and gathering the
                 results. To address this issue, the paper describes a
                 software-implemented Fault Injection approach based on
                 the Trace Exception Mode available in most
                 microprocessors. The authors describe EXFI, a
                 prototypical system implementing the approach, and
                 provide data about some sample benchmark applications.
                 The main advantages of EXFI are the low cost, the good
                 portability, and the high efficiency",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Design; Experimentation; Measurement",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "fault coverage; fault injection; microprocessor
                 systems; software-implemented fault injection; trace
                 exception mode",
  subject =      "Hardware --- Performance and Reliability ---
                 Reliability, Testing, and Fault-Tolerance (B.8.1);
                 Hardware --- Performance and Reliability ---
                 Performance Analysis and Design Aids (B.8.2)",
}

@Article{Gasteier:1999:BBC,
  author =       "Michael Gasteier and Manfred Glesner",
  title =        "Bus-based communication synthesis on system level",
  journal =      j-TODAES,
  volume =       "4",
  number =       "1",
  pages =        "1--11",
  month =        jan,
  year =         "1999",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1999-4-1/p1-gasteier/p1-gasteier.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1999-4-1/p1-gasteier/",
  abstract =     "In this article, we present an approach to automatic
                 generation of communication topologies for statically
                 scheduled systems of subsystems. Given a specification
                 containing a set of processes that communicate via
                 abstract send and receive functions, we show how a
                 cost-efficient communication topology consisting of one
                 or more buses without arbitration scheme can be set up
                 for such applications.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Algorithms; Design",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "bus generation; bus without arbitration; communication
                 synthesis; statically scheduled systems; transfer
                 scheduling",
  subject =      "Hardware --- Input/Output and Data Communications ---
                 Interconnections (Subsystems) (B.4.3)",
}

@Article{Liao:1999:TCB,
  author =       "Stan Liao and Srinivas Devadas and Kurt Keutzer",
  title =        "A text-compression-based method for code size
                 minimization in embedded systems",
  journal =      j-TODAES,
  volume =       "4",
  number =       "1",
  pages =        "12--38",
  month =        jan,
  year =         "1999",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1999-4-1/p12-liao/p12-liao.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1999-4-1/p12-liao/",
  abstract =     "We address the problem of code-size minimization in
                 VLSI systems with embedded DSP processors. Reducing
                 code size reduces the production cost of embedded
                 systems \par

                 we use data-compression methods to develop code-size
                 minimization strategies. In our framework, the
                 compressed program consists of a skeleton and a
                 dictionary. We show that the dictionary can be computed
                 by solving a set-covering problem derived from the
                 original program. To execute the compressed code, we
                 describe two methods that have different performance
                 characteristics and different degrees of freedom in
                 compressing the code. We also address performance
                 considerations, and show that they can be incorporated
                 easily into the set-covering formulation, and present
                 experimental results obtained with Texas Instruments'
                 optimizing TMS3220C25 compiler.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Algorithms; Experimentation; Measurement;
                 Performance",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "code size optimization; compression",
  subject =      "Software --- Programming Languages --- Processors
                 (D.3.4): {\bf Compilers}; Software --- Programming
                 Languages --- Processors (D.3.4): {\bf Optimization};
                 Data --- Coding and Information Theory (E.4): {\bf Data
                 compaction and compression}",
}

@Article{Song:1999:CDP,
  author =       "Xiaoyu Song and Yuke Wang",
  title =        "On the crossing distribution problem",
  journal =      j-TODAES,
  volume =       "4",
  number =       "1",
  pages =        "39--51",
  month =        jan,
  year =         "1999",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1999-4-1/p39-song/p39-song.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1999-4-1/p39-song/",
  abstract =     "VLSI layout design is typically decomposed into four
                 steps: {\em placement, global routing, routing region
                 definition, and detailed routing}. The crossing
                 distribution problem occurs prior to detailed routing
                 [Groenveld 1989; Mared-Sadowska and Sarrafzadeh 1995;
                 Wang and Shung 1992]. A {\em crossing\/} is defined as
                 the intersection of two nets. The problem of net
                 crossing distribution is important in layout design,
                 such as design of dense chips, multichip modules (MCM),
                 critical net routing, and analog circuits [Groenveld
                 1989; Sarrafzadeh 1995; Wang and Shung 1992]. It is
                 observed that nets crossing each other are more
                 difficult to route than those that do not cross. The
                 layout of crossing nets has to be realized in more than
                 two layers and requires a larger number of {\em vias}.
                 In this paper we study the crossing distribution
                 problem of two-terminal nets between two regions. We
                 present an optimal $ O(n^2) $ time algorithm for
                 two-sided nets, where n is the number of nets. Our
                 results are superior to previous ones [Markek-Sadowska
                 and Sarrafzadeh 1995; Wang and Shung 1992]. We give an
                 optimal $ O(n^2) $ time algorithm for the crossing
                 distribution problem with one-sided nets. We solve
                 optimally the complete version of the crossing
                 distribution problem for two-terminal nets in two
                 regions that has not been studied before.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Algorithms; Design; Experimentation",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "crossings; VLSI layout",
  subject =      "Hardware --- Integrated Circuits (B.7); Hardware ---
                 Integrated Circuits --- Design Aids (B.7.2); Theory of
                 Computation --- Analysis of Algorithms and Problem
                 Complexity (F.2); Theory of Computation --- Analysis of
                 Algorithms and Problem Complexity --- Nonnumerical
                 Algorithms and Problems (F.2.2): {\bf Sequencing and
                 scheduling}",
}

@Article{Tseng:1999:TLL,
  author =       "Jyh-Mou Tseng and Jing-Yang Jou",
  title =        "Two-level logic minimization for low power",
  journal =      j-TODAES,
  volume =       "4",
  number =       "1",
  pages =        "52--69",
  month =        jan,
  year =         "1999",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1999-4-1/p52-tseng/p52-tseng.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1999-4-1/p52-tseng/",
  abstract =     "In this paper we present a complete Boolean method for
                 reducing the power consumption in two-level
                 combinational circuits. The two-level logic optimizer
                 performs the logic minimization for low power targeting
                 static PLA, general logic gates, and dynamic PLA
                 implementations. We modify the Espresso algorithm by
                 adding our heuristics, which bias logic minimization
                 toward lowering power dissipation. In our heuristics,
                 signal probabilities and transition densities are two
                 important parameters. The experimental results are
                 promising.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Algorithms; Design; Performance; Theory",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "logic synthesis; low power design; programmable logic
                 array; two-level logic minimization",
  subject =      "Hardware --- Logic Design --- Design Styles (B.6.1):
                 {\bf Combinational logic}; Hardware --- Logic Design
                 --- Design Styles (B.6.1): {\bf Logic arrays}; Hardware
                 --- Logic Design --- Design Aids (B.6.3): {\bf
                 Automatic synthesis}; Hardware --- Integrated Circuits
                 --- Types and Design Styles (B.7.1): {\bf VLSI (very
                 large scale integration)}",
}

@Article{Vahid:1999:PCT,
  author =       "Frank Vahid",
  title =        "Procedure cloning: a transformation for improved
                 system-level functional partitioning",
  journal =      j-TODAES,
  volume =       "4",
  number =       "1",
  pages =        "70--96",
  month =        jan,
  year =         "1999",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1999-4-1/p70-vahid/p70-vahid.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1999-4-1/p70-vahid/",
  abstract =     "Functional partitioning assigns the functions of a
                 system's program-like specification among system
                 components, such as standard-software and
                 custom-hardware processors. We introduce a new
                 transformation, called procedure cloning, that
                 significantly improves functional partitioning results.
                 The transformation creates a clone of a procedure for
                 sole use by a particular procedure caller, so the clone
                 can be assigned to the caller's processor, which in
                 turn improves performance through reduced
                 communication. Heuristics are used to prevent the
                 exponential size increase that could occur if cloning
                 were done indiscriminately. We introduce a variety of
                 cloning heuristics, highlight experiments demonstrating
                 the improvements obtained using cloning, and compare
                 the various cloning heuristics.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Design",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "behavioral synthesis; embedded systems; functional
                 partitioning; hardware/software codesign; replication;
                 system-level design; system-on-a-chip;
                 transformations",
  subject =      "Hardware --- General (B.0); Hardware ---
                 Register-Transfer-Level Implementation --- Design Aids
                 (B.5.2): {\bf Automatic synthesis}; Hardware ---
                 Register-Transfer-Level Implementation --- Design Aids
                 (B.5.2): {\bf Hardware description languages}; Hardware
                 --- Register-Transfer-Level Implementation --- Design
                 Aids (B.5.2): {\bf Optimization}; Computer Applications
                 --- Computer-Aided Engineering (J.6): {\bf
                 Computer-aided design (CAD)}",
}

@Article{Wang:1999:PRP,
  author =       "Qi Wang and Sarma B. K. Vrudhula and Gary Yeap and
                 Shantanu Ganguly",
  title =        "Power reduction and power-delay trade-offs using logic
                 transformations",
  journal =      j-TODAES,
  volume =       "4",
  number =       "1",
  pages =        "97--121",
  month =        jan,
  year =         "1999",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1999-4-1/p97-wang/p97-wang.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1999-4-1/p97-wang/",
  abstract =     "We present an efficient technique to reduce the
                 switching activity in a technology-mapped CMOS
                 combinational circuit based on local logic
                 transformations. The transformations consist of adding
                 redundant connections or gates so as to reduce
                 switching activity. We describe simple and efficient
                 procedures, based on logic implication, for identifying
                 the sources and targets of the redundant connections.
                 Additionally, we give procedures that permit the
                 designer to trade-off power and delay after the
                 transformations. Results of experiments on both the
                 MCNC benchmark circuits and the circuits of a PowerPC
                 microprocessor chip are given. The results indicate
                 that significant power reduction of a CMOS
                 combinational circuit can be achieved with very low
                 area overhead, delay penalty, and computational cost.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Design; Experimentation; Performance",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "CMOS logic; logic optimization; logic synthesis; low
                 power; power estimation",
  subject =      "Hardware --- Integrated Circuits --- General (B.7.0);
                 Hardware --- Logic Design --- Design Styles (B.6.1):
                 {\bf Combinational logic}",
}

@Article{Kern:1999:FVH,
  author =       "Christoph Kern and Mark R. Greenstreet",
  title =        "Formal verification in hardware design: a survey",
  journal =      j-TODAES,
  volume =       "4",
  number =       "2",
  pages =        "123--193",
  month =        apr,
  year =         "1999",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1999-4-2/p123-kern/p123-kern.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1999-4-2/p123-kern/",
  abstract =     "In recent years, formal methods have emerged as an
                 alternative approach to ensuring the quality and
                 correctness of hardware designs, overcoming some of the
                 limitations of traditional validation techniques such
                 as simulation and testing. \par

                 There are two main aspects to the application of formal
                 methods in a design process: the formal framework used
                 to specify desired properties of a design and the
                 verification techniques and tools used to reason about
                 the relationship between a specification and a
                 corresponding implementation. We survey a variety of
                 frameworks and techniques proposed in the literature
                 and applied to actual designs. The specification
                 frameworks we describe include temporal logics,
                 predicate logic, abstraction and refinement, as well as
                 containment between $ \omega $-regular languages. The
                 verification techniques presented include model
                 checking, automata-theoretic techniques, automated
                 theorem proving, and approaches that integrate the
                 above methods. \par

                 In order to provide insight into the scope and
                 limitations of currently available techniques, we
                 present a selection of case studies where formal
                 methods were applied to industrial-scale designs, such
                 as microprocessors, floating-point hardware, protocols,
                 memory subsystems, and communications hardware.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Design; Verification",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "case studies; formal methods; formal verification;
                 hardware verification; language containment; model
                 checking; survey; theorem proving",
  subject =      "General Literature --- Introductory and Survey (A.1);
                 Hardware --- Integrated Circuits --- Design Aids
                 (B.7.2): {\bf Verification}",
}

@Article{Lee:1999:BBI,
  author =       "Kuen-Jong Lee and Jing-Jou Tang and Tsung-Chu Huang",
  title =        "{BIFEST}: a built-in intermediate fault effect sensing
                 and test generation system for {CMOS} bridging faults",
  journal =      j-TODAES,
  volume =       "4",
  number =       "2",
  pages =        "194--218",
  month =        apr,
  year =         "1999",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1999-4-2/p194-lee/p194-lee.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1999-4-2/p194-lee/",
  abstract =     "This paper presents BIFEST, an ATPG system that
                 employs the built-in intermediate voltage test
                 technique in an efficient ATPG process to deal with
                 CMOS bridging faults. Fast and accurate calculations of
                 the intermediate bridging voltages and the variant
                 threshold tolerance margins on a resistive bridging
                 fault model are presented. A PODEM-like, PPSFP-based
                 ATPG process is developed to generate test patterns for
                 faults that are conventionally logic-testable. The
                 remaining faults are then dealt with by special
                 circuits, called built-in intermediate voltage sensors
                 (BIVSs). By this methodology, almost the same fault
                 coverage as that employing {\em I DDQ\/} testing can be
                 achieved with only logic monitoring required.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Design; Experimentation; Reliability",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  subject =      "Hardware --- Integrated Circuits --- General (B.7.0)",
}

@Article{Thornton:1999:BSC,
  author =       "M. A. Thornton and V. S. S. Nair",
  title =        "Behavioral synthesis of combinational logic using
                 spectral-based heuristics",
  journal =      j-TODAES,
  volume =       "4",
  number =       "2",
  pages =        "219--230",
  month =        apr,
  year =         "1999",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1999-4-2/p219-thornton/p219-thornton.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1999-4-2/p219-thornton/",
  abstract =     "A prototype system developed to convert a behavioral
                 representation of a Boolean function in OBDD form into
                 an initial structural representation is described and
                 experimental results are given. The system produces a
                 multilevel circuit using heuristic rules based on
                 properties of a subset of spectral coefficients. Since
                 the behavioral description is in OBDD form, efficient
                 methods are used to quickly compute the small subset of
                 spectral coefficients needed for the application of the
                 heuristics. The heuristics guide subsequent
                 decompositions of the OBDD, resulting in an iterative
                 construction of the structural form. At each stage of
                 the translation, the form of the decomposition is
                 chosen in order to achieve optimization goals.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Design",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "automatic synthesis; decision diagram; decision
                 diagrams; design aids; logic design; spectral methods",
  subject =      "Hardware --- Logic Design --- Design Aids (B.6.3)",
}

@Article{Cheng:1999:CGN,
  author =       "Wei-Kai Cheng and Youn-Long Lin",
  title =        "Code generation of nested loops for {DSP} processors
                 with heterogeneous registers and structural
                 pipelining",
  journal =      j-TODAES,
  volume =       "4",
  number =       "3",
  pages =        "231--256",
  month =        jul,
  year =         "1999",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1999-4-3/p231-cheng/p231-cheng.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1999-4-3/p231-cheng/",
  abstract =     "We propose a microcode-optimizing method targeting a
                 programmable DSP processor. Efficient generation of
                 microcodes is essential to better utilize the
                 computation power of a DSP processor. Since most
                 state-of-the-art DSP processors feature some sort of
                 irregular architectures and most DSP applications have
                 nested loop constructs, their code generation is a
                 nontrivial task. In this paper, we consider two
                 features frequently found in contemporary DSP
                 processors -- structural pipelining and heterogeneous
                 registers. We propose a code generator that performs
                 instruction scheduling and register allocation
                 simultaneously. The proposed approach has been
                 implemented and evaluated using a set of benchmark core
                 algorithms. Simulation of the generated codes targeted
                 towards the TI TMS320C40 DSP processor shows that our
                 system is indeed more effective compared with a
                 commercial optimizing DSP compiler.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "code generation; DSP",
  subject =      "Computer Systems Organization --- Special-Purpose and
                 Application-Based Systems (C.3): {\bf Real-time and
                 embedded systems}",
}

@Article{Li:1999:PEE,
  author =       "Yau-Tsun Steven Li and Sharad Malik and Andrew Wolfe",
  title =        "Performance estimation of embedded software with
                 instruction cache modeling",
  journal =      j-TODAES,
  volume =       "4",
  number =       "3",
  pages =        "257--279",
  month =        jul,
  year =         "1999",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1999-4-3/p257-li/p257-li.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1999-4-3/p257-li/",
  abstract =     "Embedded systems generally interact in some way with
                 the outside world. This may involve measuring sensors
                 and controlling actuators, communicating with other
                 systems, or interacting with users. These functions
                 impose real-time constraints on system design.
                 Verification of these specifications requires computing
                 an upper bound on the worst-case execution time (WCET)
                 of a hardware/software system. Furthermore, it is
                 critical to derive a tight upper bound on WCET in order
                 to make efficient use of system resources. \par

                 The problem of bounding WCET is particularly difficult
                 on modern processors. These processors use cache-based
                 memory systems that vary memory access time based on
                 the dynamic memory access pattern of the program. This
                 must be accurately modeled in order to tightly bound
                 WCET. Several analysis methods have been proposed to
                 bound WCET on processors with instruction caches.
                 Existing approaches either search all possible program
                 paths, an intractable problem, or they use highly
                 pessimistic assumptions to limit the search space. In
                 this paper we present a more effective method for
                 modeling instruction cache activity and computing a
                 tight bound on WCET. The method uses an integer linear
                 programming formulation and does not require explicit
                 enumeration of program paths. The method is implemented
                 in the program {\tt cinderella} and we present some
                 experimental results of this implementation.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Design; Experimentation; Performance; Theory",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  subject =      "Computer Systems Organization --- Performance of
                 Systems (C.4): {\bf Modeling techniques}; Computer
                 Systems Organization --- Special-Purpose and
                 Application-Based Systems (C.3): {\bf Real-time and
                 embedded systems}",
}

@Article{Shi:1999:SSL,
  author =       "C.-J. Richard Shi and Michael W. Tian",
  title =        "Simulation and sensitivity of linear analog circuits
                 under parameter variations by {Robust} interval
                 analysis",
  journal =      j-TODAES,
  volume =       "4",
  number =       "3",
  pages =        "280--312",
  month =        jul,
  year =         "1999",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1999-4-3/p280-shi/p280-shi.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1999-4-3/p280-shi/",
  abstract =     "An interval-mathematic approach is presented for
                 frequency-domain simulation and sensitivity analysis of
                 linear analog circuits under parameter variations. With
                 uncertain parameters represented as intervals, bounding
                 frequency-domain responses is formulated as the problem
                 of solving systems of linear interval equations. The
                 formulation is based on a variant of modified nodal
                 analysis, and is particularly amenable to interval
                 analysis. Some characterization of the solution sets of
                 systems of linear interval equations are derived. With
                 these characterizations, an elegant and efficient
                 algorithm is proposed to solve systems of linear
                 interval equations. While the widely used Monte Carlo
                 approach requires many circuit simulations to achieve
                 even moderate accuracy, the computational cost of the
                 proposed approach is about twice that of one circuit
                 simulation. The computed response bounds contain
                 provably, or are usually very close to, the actual
                 response bounds. Further, sensitivity under parameter
                 variations can be computed from the response bounds at
                 minor computational cost. The algorithms are
                 implemented in SPICE3F5, using sparse-matrix techniques
                 and tested on several practical analog circuits.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Algorithms; Design; Verification",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "interval mathematics; process variations; sensitivity;
                 uncertainty; worst-case analysis",
  subject =      "Hardware --- Integrated Circuits --- Design Aids
                 (B.7.2): {\bf Simulation}; Hardware --- Integrated
                 Circuits --- Design Aids (B.7.2): {\bf Verification};
                 Mathematics of Computing --- Numerical Analysis ---
                 Numerical Linear Algebra (G.1.3): {\bf Linear systems
                 (direct and iterative methods)}; Mathematics of
                 Computing --- Numerical Analysis --- Numerical Linear
                 Algebra (G.1.3): {\bf Sparse, structured, and very
                 large systems (direct and iterative methods)}; Computer
                 Applications --- Computer-Aided Engineering (J.6): {\bf
                 Computer-aided manufacturing (CAM)}",
}

@Article{Wurth:1999:FMO,
  author =       "Bernd Wurth and Ulf Schlichtmann and Klaus Eckl and
                 Kurt J. Antreich",
  title =        "Functional multiple-output decomposition with
                 application to technology mapping for lookup
                 table-based {FPGAs}",
  journal =      j-TODAES,
  volume =       "4",
  number =       "3",
  pages =        "313--350",
  month =        jul,
  year =         "1999",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1999-4-3/p313-wurth/p313-wurth.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1999-4-3/p313-wurth/",
  abstract =     "Functional decomposition is an important technique for
                 technology mapping to look up table-based FPGA
                 architectures. We present the theory of and a novel
                 approach to functional disjoint decomposition of
                 multiple-output functions, in which common subfunctions
                 are extracted during technology mapping. \par

                 While a Boolean function usually has a very large
                 number of subfunctions, we show that not all of them
                 are useful for multiple-output decomposition. We use a
                 partition of the set of bound set vertices as the basis
                 to compute {\em preferable\/} decomposition functions,
                 which are sufficient for an optimal multiple-output
                 decomposition. \par

                 We propose several new algorithms that deal with
                 central issues of functional multiple-output
                 decomposition. First, an efficient algorithm to solve
                 the variable partitioning problem is described. Second,
                 we show how to implicitly compute all preferable
                 functions of a single-output function and how to
                 identify all common preferable functions of a
                 multiple-output function. Due to implicit computation
                 in the crucial steps, the algorithm is very efficient.
                 Experimental results show significant reductions in
                 area.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Algorithms; Design; Experimentation; Performance;
                 Theory",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "assignable functions; Boolean functions;
                 computer-aided design of VLSI; decomposition; FPGA
                 technology; implicit BDD-based methods; mapping
                 synthesis; multiple-output decomposition; preferable
                 functions; subfunction sharing gain; subfunction
                 sharing potential; TOS; variable partitioning for
                 decomposition",
  subject =      "Computer Applications --- Computer-Aided Engineering
                 (J.6); Hardware --- Integrated Circuits --- Types and
                 Design Styles (B.7.1): {\bf Gate arrays}",
}

@Article{Benini:1999:SSC,
  author =       "L. Benini and G. {De Micheli} and E. Macii and M.
                 Poncino and R. Scarsi",
  title =        "Symbolic synthesis of clock-gating logic for power
                 optimization of synchronous controllers",
  journal =      j-TODAES,
  volume =       "4",
  number =       "4",
  pages =        "351--375",
  month =        oct,
  year =         "1999",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1999-4-4/p351-benini/p351-benini.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1999-4-4/p351-benini/",
  abstract =     "Recent results have shown that dynamic power
                 management is effective in reducing the total power
                 consumption of sequential circuits. In this paper, we
                 propose a bottom-up approach for the automatic
                 extraction and synthesis of dynamic power management
                 circuitry starting from structural logic-level
                 specifications. Our techniques leverage the compact
                 BDD-based representation of Boolean and pseudo-Boolean
                 functions to detect idle conditions where the clock can
                 be stopped without compromising functional correctness.
                 Moreover, symbolic techniques allow accurate
                 probabilistic computations; in particular, they enable
                 the use of non-equiprobable primary input
                 distributions, a key step in the construction of models
                 that match the behavior of real hardware devices with a
                 high degree of fidelity. The results are encouraging,
                 since power savings of up to 34\% have been obtained on
                 standard benchmark circuits.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Design",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  subject =      "Hardware --- Logic Design --- Design Styles (B.6.1):
                 {\bf Sequential circuits}; Hardware --- Logic Design
                 --- Design Aids (B.6.3): {\bf Automatic synthesis};
                 Hardware --- Logic Design --- Design Aids (B.6.3): {\bf
                 Optimization}",
}

@Article{Choi:1999:FDA,
  author =       "Kyumyung Choi and Steven P. Levitan",
  title =        "A flexible datapath allocation method for
                 architectural synthesis",
  journal =      j-TODAES,
  volume =       "4",
  number =       "4",
  pages =        "376--404",
  month =        oct,
  year =         "1999",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1999-4-4/p376-choi/p376-choi.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1999-4-4/p376-choi/",
  abstract =     "We present a robust datapath allocation method that is
                 flexible enough to handle constraints imposed by a
                 variety of target architectures. Key features of this
                 method are its ability to handle accurate modeling of
                 datapath units and the simultaneous optimization of
                 direct objective functions. The proposed method
                 consists of a new binding model construction scheme and
                 an optimization technique based on simulated annealing.
                 To illustrate the flexibility of this method, two
                 datapath allocation procedures have been developed for
                 two problem environments: (1) a procedure that
                 incorporates interconnection area and delay estimates,
                 where floor-planning is tightly integrated into
                 datapath allocation; and (2) a procedure that handles
                 registers, register files, and multiport memories for
                 data storage, as well as random and linear topologies
                 for interconnection architectures. Results from these
                 two applications show our method produces competitive
                 designs for benchmark circuits, as well as being
                 flexible enough to be used for a variety of different
                 domains.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "allocation and binding; high-level synthesis",
  subject =      "Hardware --- Register-Transfer-Level Implementation
                 --- Design Aids (B.5.2): {\bf Automatic synthesis};
                 Hardware --- Register-Transfer-Level Implementation ---
                 Design Aids (B.5.2): {\bf Optimization}; Hardware ---
                 Integrated Circuits --- Design Aids (B.7.2): {\bf
                 Placement and routing}; Mathematics of Computing ---
                 Numerical Analysis --- Optimization (G.1.6); Computer
                 Applications --- Computer-Aided Engineering (J.6): {\bf
                 Computer-aided design (CAD)}",
}

@Article{Hong:1999:POU,
  author =       "Inki Hong and Miodrag Potkonjak and Ramesh Karri",
  title =        "Power optimization using divide-and-conquer techniques
                 for minimization of the number of operations",
  journal =      j-TODAES,
  volume =       "4",
  number =       "4",
  pages =        "405--429",
  month =        oct,
  year =         "1999",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1999-4-4/p405-hong/p405-hong.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1999-4-4/p405-hong/",
  abstract =     "We introduce an approach for power optimization using
                 a set of compilation and architectural techniques. The
                 key technical innovation is a novel divide-and-conquer
                 compilation technique to minimize the number of
                 operations for general computations. Our technique
                 optimizes not only a significantly wider set of
                 computations than the previously published techniques,
                 but also outperforms (or performs at least as well as
                 other techniques) on all examples. Along the
                 architectural dimension, we investigate coordinated
                 impact of compilation techniques on the number of
                 processors which provide optimal trade-off between cost
                 and power. We demonstrate that proper compilation
                 techniques can significantly reduce power with bounded
                 hardware cost. The effectiveness of all techniques and
                 algorithms is documented on numerous real-life
                 designs.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Algorithms",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "code generation; transformations",
  subject =      "Software --- Programming Languages --- Processors
                 (D.3.4): {\bf Compilers}; Software --- Programming
                 Languages --- Processors (D.3.4): {\bf Optimization}",
}

@Article{Potkonjak:1999:MAD,
  author =       "Miodrag Potkonjak and Wayne Wolf",
  title =        "A methodology and algorithms for the design of hard
                 real-time multitasking {ASICs}",
  journal =      j-TODAES,
  volume =       "4",
  number =       "4",
  pages =        "430--459",
  month =        oct,
  year =         "1999",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1999-4-4/p430-potkonjak/p430-potkonjak.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1999-4-4/p430-potkonjak/",
  abstract =     "Traditional high-level synthesis concentrates on the
                 implementation of a single task (e.g. filter, linear
                 controller, A/D converter). However, many
                 applications--multifunctional embedded controllers
                 intelligent wireless end-points, and DSP and multimedia
                 servers--are defined as sets of several computational
                 tasks. This paper describes new techniques for the
                 synthesis of ASIC implementations that realize multiple
                 computational processes under hard real-time
                 constraints. Our synthesis methodology establishes
                 connections between two important computer engineering
                 domains: operating systems and behavioral synthesis.
                 Our hierarchical approach starts from an
                 incompletely-specified preliminary solution and uses,
                 interchangeably, operating system and behavioral
                 synthesis techniques to derive increasingly more
                 detailed and accurate design solutions. We have
                 experimented with both optimal and heuristic algorithms
                 to implement this methodology. The optimal algorithm
                 uses several heuristics to speed up the average run
                 time of an exhaustive branch-and-bound search.
                 Force-directed optimization is the core of the
                 heuristic synthesis method. Analysis of the proposed
                 algorithms and the experiments shows that matching the
                 number of bits and type of operational in tasks
                 assigned to the same application-specific processor was
                 the most important factor in obtaining area-efficient
                 designs.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Algorithms; Design; Performance",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  subject =      "Hardware --- Integrated Circuits --- Types and Design
                 Styles (B.7.1): {\bf Algorithms implemented in
                 hardware}",
}

@Article{DosSantos:2000:CMP,
  author =       "Luiz C. V. {Dos Santos} and M. J. M. Heijligers and C.
                 A. J. {Van Eijk} and J. {Van Eijndhoven} and J. A. G.
                 Jess",
  title =        "A code-motion pruning technique for global
                 scheduling",
  journal =      j-TODAES,
  volume =       "5",
  number =       "1",
  pages =        "1--33",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 09:50:12 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-1/p1-dos_santos/p1-dos_santos.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-1/p1-dos_santos/",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Fang:2000:MFP,
  author =       "Wen-Jong Fang and Allen C.-H. Wu",
  title =        "Multiway {FPGA} partitioning by fully exploiting
                 design hierarchy",
  journal =      j-TODAES,
  volume =       "5",
  number =       "1",
  pages =        "34--50",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-1/p34-fang/p34-fang.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-1/p34-fang/",
  abstract =     "In this paper, we present a new integrated synthesis
                 and partitioning method for multiple-FPGA applications.
                 Our approach bridges the gap between HDL synthesis and
                 physical partitioning by fully exploiting the design
                 hierarchy. We propose a novel multiple-FPGA synthesis
                 and partitioning method which is performed in three
                 phases: (1) fine-grained synthesis, (2)
                 functional-based clustering, and (3) hierarchical
                 set-covering partitioning. This method first
                 synthesizes a design specification in a fine-grained
                 way so that functional clusters can be preserved based
                 on the structural nature of the design specification.
                 Then, it applies a hierarchical set-covering
                 partitioning method to form the final FPGA partitions.
                 Experimental results on a number of benchmarks and
                 industrial designs demonstrate that IO limits are the
                 bottleneck for CLB utilization when applying a
                 traditional multiple-FPGA synthesis method on flattened
                 netlists. In contrast, by fully exploiting the design
                 structural hierarchy during the multiple-FPGA
                 partitioning, our proposed method produces fewer FPGA
                 partitions with higher CLB and lower IO-pin
                 utilizations.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Design; Experimentation; Performance",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "fine-grained synthesis; functional clustering;
                 multi-way partitioning; multiple-FPGA synthesis",
  subject =      "Hardware --- Integrated Circuits --- Types and Design
                 Styles (B.7.1): {\bf Gate arrays}; Hardware ---
                 Integrated Circuits --- Design Aids (B.7.2)",
}

@Article{Hsiung:2000:CCM,
  author =       "Pao-Ann Hsiung",
  title =        "{CMAPS}: a cosynthesis methodology for
                 application-oriented parallel systems",
  journal =      j-TODAES,
  volume =       "5",
  number =       "1",
  pages =        "51--81",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-1/p51-hsiung/p51-hsiung.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-1/p51-hsiung/",
  abstract =     "Currently, a lot of research is devoted to {\em system
                 design}, and little work is done on {\em requirements
                 analysis}. Besides going from specification to design,
                 one of our main objectives is to show how an
                 application problem can be transformed into
                 specifications. Working from the hardware-software
                 codesign perspective, a system is designed starting
                 from an application problem itself, rather than the
                 detailed behavioral specifications. Given an
                 application problem specified as a directed acyclic
                 graph of elementary problems, a hardware-software
                 solution is derived such that the synthesized software,
                 a parallel pseudoprogram, can be scheduled and executed
                 on the synthesized software, a parallel pseudoprogram,
                 can be scheduled and executed on the synthesized
                 hardware, a set of system-level parallel computer
                 specifications, with heuristically optimal performance.
                 This is known as system-level cosynthesis of
                 application-oriented general-purpose parallel systems
                 for which a novel methodology called {\em Cosynthesis
                 Methodology for Application-Oriented Parallel
                 Systems\/} (CMAPS), is presented. Since parallel
                 programs and multiprocessor architectures are largely
                 interdependent, CMAPS explores the relationship between
                 hardware designs and software algorithms by
                 interleaving the modeling phases and the synthesis
                 phases of both hardware and software. High scalability
                 in terms of problem complexity and easy upgradability
                 to new technologies are achieved through modularization
                 of the input problem specification, of the software
                 algorithms, and of the hardware subsystem models. The
                 work presented in this paper will be beneficial to
                 designers of general-purpose parallel computer systems
                 which must be oriented toward solving some
                 user-specified problem such as the global controller of
                 an industry automation process or a multiprocessor
                 video server. Some application examples are given to
                 illustrate various codesign phases of CMAPS and its
                 feasibility.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Design",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "application-oriented general-purpose multiprocessors;
                 hardware-software modeling and cosynthesis;
                 requirements analysis",
  subject =      "Computer Applications --- Computer-Aided Engineering
                 (J.6): {\bf Computer-aided design (CAD)}; Computer
                 Systems Organization --- General (C.0): {\bf System
                 architectures}; Computer Systems Organization ---
                 General (C.0): {\bf Systems specification methodology};
                 Computer Systems Organization --- Processor
                 Architectures --- Multiple Data Stream Architectures
                 (Multiprocessors) (C.1.2); Computer Systems
                 Organization --- Computer System Implementation ---
                 General (C.5.0); Computer Systems Organization ---
                 Processor Architectures --- Parallel Architectures
                 (C.1.4)",
}

@Article{Mehta:2000:UFR,
  author =       "Dinesh P. Mehta and Naveed Sherwani",
  title =        "On the use of flexible, rectilinear blocks to obtain
                 minimum-area floorplans in mixed block and cell
                 designs",
  journal =      j-TODAES,
  volume =       "5",
  number =       "1",
  pages =        "82--97",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-1/p82-mehta/p82-mehta.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-1/p82-mehta/",
  abstract =     "This paper presents three minimum-area floorplanning
                 algorithms that use flexible arbitrary rectilinear
                 shapes for the standard cell regions in MBC design. The
                 first algorithm (pure HCST) introduces a grid traversal
                 technique which guarantees a minimum-area floorplan.
                 The second algorithm (Hybrid-BF) uses a combination of
                 HCST and Breadth First (BF) traversals to give a
                 practical solution that approximately places flexible
                 blocks at specified locations called {\em seeds}. The
                 third algorithm (Hybrid-MBF) improves on the shapes of
                 the flexible blocks generated by Hybrid-BF by using a
                 combination of HCST and a Modified Breadth First (MBF)
                 traversal. All three algorithms are polynomial in the
                 number of grid squares. Optimizated implementations of
                 Hybrid-BF and Hybrid-MBF required less than two seconds
                 on a SUN SPARCstation 10.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "floorplanning; mixed block and cell designs;
                 rectilinear polygons",
  subject =      "Hardware --- Integrated Circuits --- Design Aids
                 (B.7.2): {\bf Layout}; Theory of Computation ---
                 Analysis of Algorithms and Problem Complexity ---
                 Nonnumerical Algorithms and Problems (F.2.2): {\bf
                 Routing and layout}; Mathematics of Computing ---
                 Discrete Mathematics --- Graph Theory (G.2.2): {\bf
                 Graph algorithms}",
}

@Article{Sapatnekar:2000:PDO,
  author =       "Sachin S. Sapatnekar and Weitong Chuang",
  title =        "Power-delay optimizations in gate sizing",
  journal =      j-TODAES,
  volume =       "5",
  number =       "1",
  pages =        "98--114",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-1/p98-sapatnekar/p98-sapatnekar.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-1/p98-sapatnekar/",
  abstract =     "The problem of power-delay tradeoffs in transistor
                 sizing is examined using a nonlinear optimization
                 formulation. Both the dynamic and the short-circuit
                 power are considered, and a new modeling technique is
                 used to calculate the short-circuit power. The notion
                 of transition density is used, with an enhancement that
                 considers the effect of gate delays on the transition
                 density. When the short-circuit power is neglected, the
                 minimum power circuit is identical to the minimum area
                 circuit. However, under our more realistic models, our
                 experimental results on several circuits show that the
                 minimum power circuit is not necessarily the same as
                 the minimum area circuit.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "optimization; power estimation; transistor sizing;
                 VLSI layout",
  subject =      "Hardware --- Integrated Circuits --- Design Aids
                 (B.7.2): {\bf Layout}",
}

@Article{Benini:2000:SLPa,
  author =       "Luca Benini and Giovanni de Micheli",
  title =        "System-level power optimization: techniques and
                 tools",
  journal =      j-TODAES,
  volume =       "5",
  number =       "2",
  pages =        "115--192",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-2/p115-benini/p115-benini.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-2/p115-benini/",
  abstract =     "This tutorial surveys design methods for
                 energy-efficient system-level design. We consider
                 electronic systems consisting of a hardware platform
                 and software layers. We consider the three major
                 constituents of hardware that consume energy, namely
                 computation, communication, and storage units, and we
                 review methods of reducing their energy consumption. We
                 also study models for analyzing the energy cost of
                 software, and methods for energy-efficient software
                 design and compilation. This survey is organized around
                 three main phases of a system design: conceptualization
                 and modeling design and implementation, and runtime
                 management. For each phase, we review recent techniques
                 for energy-efficient design of both hardware and
                 software.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  subject =      "Hardware --- Integrated Circuits --- Design Aids
                 (B.7.2); Hardware --- Performance and Reliability ---
                 Performance Analysis and Design Aids (B.8.2); Computer
                 Systems Organization --- Processor Architectures ---
                 General (C.1.0); Software --- Software Engineering ---
                 Design Tools and Techniques (D.2.2)",
}

@Article{Cong:2000:SGD,
  author =       "Jason Cong and Yean-Yow Hwang",
  title =        "Structural gate decomposition for depth-optimal
                 technology mapping in {LUT-based} {FPGA} designs",
  journal =      j-TODAES,
  volume =       "5",
  number =       "2",
  pages =        "193--225",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-2/p193-cong/p193-cong.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-2/p193-cong/",
  abstract =     "In this paper we study structural gate decomposition
                 in general, simple gate networks for depth-optimal
                 technology mapping using $K$-input Lookup-Tables
                 ($K$-LUTs). We show that (1) structural gate
                 decomposition in any $K$-bounded network results in an
                 optimal mapping depth smaller than or equal to that of
                 the original network, regardless of the decomposition
                 method used; and (2) the problem of structural gate
                 decomposition for depth-optimal technology mapping is
                 NP-hard for $K$-unbounded networks when $ K \geq 3$ and
                 remains NP-hard for $K$-bounded networks when $ K \geq
                 5$. Based on these results, we propose two new
                 structural gate decomposition algorithms, named {\tt
                 DOGMA} and {\tt DOGMA-m}, which combine the
                 level-driven node-packing technique (used in FlowMap)
                 and the network flow-based labeling technique (used in
                 {\tt Chortle-d}) for depth-optimal technology mapping.
                 Experimental results show that (1) among five
                 structural gate decomposition algorithms, {\tt DOGMA-m}
                 results in the best mapping solutions; and (2) compared
                 with {\tt speed\_up} (an algebraic algorithm) and {\tt
                 TOS} (a Boolean approach), {\tt DOGMA-m} completes,
                 decomposition of all tested benchmarks in a short time
                 while {\tt speed\_up} and {\tt TOS} fail in several
                 cases. However, {\tt speed\_up} results in the smallest
                 depth and area in the following technology mapping
                 steps.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "computer-aided design of VSLI; decomposition; delay
                 minimization; FPGA; logic optimization; programmable
                 logic; simplification; synthesis; system design;
                 technology mapping",
  subject =      "Hardware --- Logic Design --- Design Styles (B.6.1);
                 Hardware --- Logic Design --- Design Aids (B.6.3);
                 Hardware --- Logic Design --- Design Aids (B.6.3): {\bf
                 Automatic synthesis}; Hardware --- Integrated Circuits
                 --- Types and Design Styles (B.7.1)",
}

@Article{Hwang:2000:PSS,
  author =       "Chi-Hong Hwang and Allen C.-H. Wu",
  title =        "A predictive system shutdown method for energy saving
                 of event-driven computation",
  journal =      j-TODAES,
  volume =       "5",
  number =       "2",
  pages =        "226--241",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-2/p226-hwang/p226-hwang.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-2/p226-hwang/",
  abstract =     "This paper presents a system-level power management
                 technique for energy savings of event-driven
                 application. We present a new predictive
                 system-shutdown method to exploit sleep mode operations
                 for energy saving. We use an exponential-average
                 approach to predict the upcoming idle period. We
                 introduce two mechanisms, prediction-miss correction
                 and prewake-up, to improve the hit ratio and to reduce
                 the delay overhead. Experiments on four different
                 event-driven applications show that our proposed method
                 achieves high hit ratios in a wide range of delay
                 overheads, which results in a high degree of energy
                 with low delay penalties.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "event-drive; power management; predictive; sleep mode;
                 system shutdown",
  subject =      "Computer Applications --- Computer-Aided Engineering
                 (J.6)",
}

@Article{Sudarsanam:2000:SRA,
  author =       "Ashok Sudarsanam and Sharad Malik",
  title =        "Simultaneous reference allocation in code generation
                 for dual data memory bank {ASIPs}",
  journal =      j-TODAES,
  volume =       "5",
  number =       "2",
  pages =        "242--264",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-2/p242-sudarsanam/p242-sudarsanam.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-2/p242-sudarsanam/",
  abstract =     "We address the problem of code generation for DSP
                 systems on a chip. In such systems, the amount of
                 silicon devoted of program ROM is limited, so
                 application software must be sufficiently dense.
                 Additionally, the software must be written so as to
                 meet various high-performance constraints, which may
                 include hard real-time constraints. Unfortunately,
                 current compiler technology is unable to generate
                 high-quality code for DSPs, whose architectures are
                 highly irregular. Thus, designers often resort to
                 programming application software in assembly--a
                 time-consuming task. In this paper, we focus on
                 providing support for architectural feature of DSPs
                 that makes code generation difficult, namely multiple
                 data memory banks. This feature increases memory
                 bandwidth by permitting multiple data memory accesses
                 to occur in parallel when the referenced variables
                 belong to different data memory banks and the registers
                 involved conform to a strict set of conditions. We
                 present an algorithm that attempts to maximize the
                 benefit of this architectural feature. While previous
                 approaches have decoupled the phases of register
                 allocation and memory bank assignment, thereby
                 compromising code quality, our algorithm performs these
                 two phases simultaneously. Experimental results
                 demonstrate that our algorithm not only generates
                 high-quality compiled code, but also improves the
                 quality of completely-referenced code.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "code generation; code optimization; graph labelling;
                 memory bank assignment; register allocation",
  subject =      "Software --- Programming Languages --- Processors
                 (D.3.4); Software --- Programming Languages ---
                 Processors (D.3.4): {\bf Code generation}; Software ---
                 Programming Languages --- Processors (D.3.4): {\bf
                 Compilers}; Software --- Programming Languages ---
                 Processors (D.3.4): {\bf Optimization}",
}

@Article{Irwin:2000:E,
  author =       "Mary Jane Irwin",
  title =        "Editorial",
  journal =      j-TODAES,
  volume =       "5",
  number =       "3",
  pages =        "265--266",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p265-irwin/p265-irwin.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p265-irwin/",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Bahar:2000:POT,
  author =       "R. Iris Bahar and Ernest T. Lampe and Enrico Macii",
  title =        "Power optimization of technology-dependent circuits
                 based on symbolic computation of logic implications",
  journal =      j-TODAES,
  volume =       "5",
  number =       "3",
  pages =        "267--293",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p267-bahar/p267-bahar.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p267-bahar/",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "aids; automation; design synthesis; logic design",
  subject =      "Hardware --- Logic Design --- Design Styles (B.6.1):
                 {\bf Combinational logic}; Hardware --- Control
                 Structures and Microprogramming --- Microprogram Design
                 Aids (B.1.4): {\bf Optimization}; Hardware --- Logic
                 Design --- Design Aids (B.6.3): {\bf Optimization};
                 Hardware --- Performance and Reliability --- General
                 (B.8.0); Computer Applications --- Physical Sciences
                 and Engineering (J.2): {\bf Electronics}",
}

@Article{Balakrishnan:2000:AFS,
  author =       "M. Balakrishnan and Heman Khanna",
  title =        "Allocation of {FIFO} structures in {RTL} data paths",
  journal =      j-TODAES,
  volume =       "5",
  number =       "3",
  pages =        "294--310",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p294-balakrishnan/p294-balakrishnan.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p294-balakrishnan/",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "data path; FIFO; ILP; RTL; synthesis",
  subject =      "Hardware --- Register-Transfer-Level Implementation
                 --- Design (B.5.1); Mathematics of Computing ---
                 Probability and Statistics (G.3): {\bf Queueing
                 theory}",
}

@Article{Benini:2000:SLPb,
  author =       "L. Benini and G. {De Micheli}",
  title =        "Synthesis of low-power selectively-clocked systems
                 from high-level specification",
  journal =      j-TODAES,
  volume =       "5",
  number =       "3",
  pages =        "311--321",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p311-benini/p311-benini.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p311-benini/",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "algorithms; design; gated clock; high-level synthesis;
                 low power",
  subject =      "Hardware --- Control Structures and Microprogramming
                 --- Control Structure Performance Analysis and Design
                 Aids (B.1.2); Hardware --- Performance and Reliability
                 --- General (B.8.0); Theory of Computation ---
                 Computation by Abstract Devices --- Models of
                 Computation (F.1.1): {\bf Unbounded-action devices}",
}

@Article{Blythe:2000:EOD,
  author =       "Stephen A. Blythe and Robert A. Walker",
  title =        "Efficient optimal design space characterization
                 methodologies",
  journal =      j-TODAES,
  volume =       "5",
  number =       "3",
  pages =        "322--336",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p322-blythe/p322-blythe.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p322-blythe/",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "bounding; clock-length determination; design space
                 exploration; efficient searching; high-level synthesis;
                 module selection; scheduling",
  subject =      "Computer Applications --- Computer-Aided Engineering
                 (J.6); Hardware --- Performance and Reliability ---
                 General (B.8.0); Computing Methodologies --- Simulation
                 and Modeling --- General (I.6.0); Computer Applications
                 --- Physical Sciences and Engineering (J.2): {\bf
                 Electronics}",
}

@Article{Bogliolo:2000:RBR,
  author =       "Alessandro Bogliolo and Luca Benini and Giovanni {De
                 Micheli}",
  title =        "Regression-based {RTL} power modeling",
  journal =      j-TODAES,
  volume =       "5",
  number =       "3",
  pages =        "337--372",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p337-bogliolo/p337-bogliolo.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p337-bogliolo/",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "adaptive characterization; functional macros;
                 regression models; RTL design; RTL power modeling",
  subject =      "Hardware --- Register-Transfer-Level Implementation
                 --- Design Aids (B.5.2); Hardware --- Logic Design ---
                 Design Aids (B.6.3); Hardware --- Performance and
                 Reliability --- General (B.8.0); Computing Milieux ---
                 Management of Computing and Information Systems ---
                 Installation Management (K.6.2): {\bf Benchmarks}",
}

@Article{Bommu:2000:RBF,
  author =       "Surendra Bommu and Niall O'Neill and Maciej
                 Ciesielski",
  title =        "Retiming-based factorization for sequential logic
                 optimization",
  journal =      j-TODAES,
  volume =       "5",
  number =       "3",
  pages =        "373--398",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p373-bommu/p373-bommu.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p373-bommu/",
  abstract =     "Current sequential optimization techniques apply a
                 variety of logic transformations that mainly target the
                 combinational logic component of the circuit. Retiming
                 is typically applied as a postprocessing step to the
                 gate-level implementation obtained after technology
                 mapping. This paper introduces a new sequential logic
                 transformation which integrates retiming with logic
                 transformations at the technology-independent level.
                 This transformation is based on implicit retiming
                 across logic blocks and fanout stems during logic
                 optimization. Its application to sequential network
                 synthesis results in the optimization of logic across
                 register boundaries. It can be used in conjunction with
                 any measure of circuit quality for which a fast and
                 reliable gain estimation method can be obtained. We
                 implemented our new technique within the SIS framework
                 and demonstrated its effectiveness in terms of
                 cycle-time minimization on a set sequential benchmark
                 circuits.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Algorithms; Design",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "finite stat machines; retiming; sequential synthesis",
  subject =      "Hardware --- General (B.0); Hardware --- Logic Design
                 (B.6)",
}

@Article{Carchiolo:2000:HSS,
  author =       "Vincenza Carchiolo and Michele Malgeri and Giuseppe
                 Mangioni",
  title =        "Hardware\slash software synthesis of formal
                 specifications in codesign of embedded systems",
  journal =      j-TODAES,
  volume =       "5",
  number =       "3",
  pages =        "399--432",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p399-carchiolo/p399-carchiolo.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p399-carchiolo/",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "codesign; embedded system; hardware and software
                 synthesis",
  subject =      "Hardware --- Register-Transfer-Level Implementation
                 --- Design Aids (B.5.2); Computer Systems Organization
                 --- Special-Purpose and Application-Based Systems
                 (C.3): {\bf Real-time and embedded systems}; Computer
                 Systems Organization --- General (C.0); Software ---
                 Software Engineering --- Requirements/Specifications
                 (D.2.1); Theory of Computation --- Mathematical Logic
                 and Formal Languages --- Formal Languages (F.4.3)",
}

@Article{Chang:2000:TDR,
  author =       "Yao-Wen Chang and Kai Zhu and D. F. Wong",
  title =        "Timing-driven routing for symmetrical array-based
                 {FPGAs}",
  journal =      j-TODAES,
  volume =       "5",
  number =       "3",
  pages =        "433--450",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p433-chang/p433-chang.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p433-chang/",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "computer-aided design of VLSI; field-programmable gate
                 array; layout; synthesis",
  subject =      "Hardware --- Integrated Circuits --- Types and Design
                 Styles (B.7.1): {\bf Gate arrays}; Theory of
                 Computation --- Analysis of Algorithms and Problem
                 Complexity --- Nonnumerical Algorithms and Problems
                 (F.2.2): {\bf Routing and layout}; Hardware ---
                 Integrated Circuits --- Design Aids (B.7.2): {\bf
                 Placement and routing}; Computer Applications ---
                 Computer-Aided Engineering (J.6)",
}

@Article{Gelosh:2000:MLT,
  author =       "Donald S. Gelosh and Dorothy E. Setliff",
  title =        "Modeling layout tools to derive forward estimates of
                 area and delay at the {RTL} level",
  journal =      j-TODAES,
  volume =       "5",
  number =       "3",
  pages =        "451--491",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p451-gelosh/p451-gelosh.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p451-gelosh/",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "estimation; estimation techniques; layout; machine
                 learning; VLSI CAD",
  subject =      "Hardware --- Input/Output and Data Communications ---
                 Performance Analysis and Design Aids** (B.4.4);
                 Hardware --- Register-Transfer-Level Implementation ---
                 Design Aids (B.5.2): {\bf Automatic synthesis};
                 Computer Applications --- Computer-Aided Engineering
                 (J.6); Hardware --- Integrated Circuits --- Types and
                 Design Styles (B.7.1): {\bf VLSI (very large scale
                 integration)}; Computing Methodologies --- Artificial
                 Intelligence --- Learning (I.2.6): {\bf Concept
                 learning}; Computing Methodologies --- Simulation and
                 Modeling --- Simulation Output Analysis (I.6.6)",
}

@Article{Gogniat:2000:CBE,
  author =       "G. Gogniat and M. Auguin and L. Bianco and A.
                 Pegatoquet",
  title =        "A codesign back-end approach for embedded system
                 design",
  journal =      j-TODAES,
  volume =       "5",
  number =       "3",
  pages =        "492--509",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p492-gogniat/p492-gogniat.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p492-gogniat/",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "codesign; communications synthesis; HW&slash; SW
                 integration; template architecture",
  subject =      "Computer Systems Organization --- Special-Purpose and
                 Application-Based Systems (C.3): {\bf Real-time and
                 embedded systems}; Computer Applications ---
                 Computer-Aided Engineering (J.6); Hardware ---
                 Integrated Circuits --- Types and Design Styles
                 (B.7.1): {\bf Advanced technologies}",
}

@Article{Gupta:2000:CIP,
  author =       "Avaneendra Gupta and John P. Hayes",
  title =        "{CLIP}: integer-programming-based optimal layout
                 synthesis of {$2$D CMOS} cells",
  journal =      j-TODAES,
  volume =       "5",
  number =       "3",
  pages =        "510--547",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p510-gupta/p510-gupta.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p510-gupta/",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "circuit clustering; CMOS networks; diffusion sharing;
                 integer linear programming; integer programming; layout
                 optimization; leaf cell synthesis; module generation;
                 transistor chains; two-dimensional layout",
  subject =      "Hardware --- Integrated Circuits --- Types and Design
                 Styles (B.7.1): {\bf Memory technologies}; Hardware ---
                 Integrated Circuits --- Design Aids (B.7.2): {\bf
                 Layout}; Hardware --- Integrated Circuits --- Design
                 Aids (B.7.2): {\bf Simulation}; Mathematics of
                 Computing --- Numerical Analysis --- Optimization
                 (G.1.6): {\bf Integer programming}; Software ---
                 Programming Languages --- Language Classifications
                 (D.3.2): {\bf Specialized application languages};
                 Computer Applications --- Computer-Aided Engineering
                 (J.6)",
}

@Article{Hsiao:2000:DST,
  author =       "Michael S. Hsiao and Elizabeth M. Rudnick and Janak H.
                 Patel",
  title =        "Dynamic state traversal for sequential circuit test
                 generation",
  journal =      j-TODAES,
  volume =       "5",
  number =       "3",
  pages =        "548--565",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p548-hsiao/p548-hsiao.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p548-hsiao/",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "automatic test pattern generation (ATPG);
                 finite-state-machine traversal; genetic algorithms;
                 sequential circuits; simulation-based; testing",
  subject =      "Hardware --- Performance and Reliability ---
                 Reliability, Testing, and Fault-Tolerance (B.8.1);
                 Hardware --- Logic Design --- Design Styles (B.6.1):
                 {\bf Sequential circuits}; Computer Applications ---
                 Computer-Aided Engineering (J.6); Computing
                 Methodologies --- Artificial Intelligence --- Problem
                 Solving, Control Methods, and Search (I.2.8): {\bf
                 Heuristic methods}",
}

@Article{Jha:2000:HLL,
  author =       "Pradip K. Jha and Nikil D. Dutt",
  title =        "High-level library mapping for memories",
  journal =      j-TODAES,
  volume =       "5",
  number =       "3",
  pages =        "566--603",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p566-jha/p566-jha.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p566-jha/",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "high-level synthesis; memory libraries;
                 technology-mapping",
  subject =      "Hardware --- Memory Structures --- Design Styles
                 (B.3.2): {\bf Primary memory}; Hardware ---
                 Register-Transfer-Level Implementation --- Design
                 (B.5.1): {\bf Memory design}; Computer Applications ---
                 Computer-Aided Engineering (J.6); Hardware ---
                 Register-Transfer-Level Implementation --- Design Aids
                 (B.5.2): {\bf Automatic synthesis}",
}

@Article{Lalgudi:2000:OCE,
  author =       "Kumar N. Lalgudi and Marios C. Papaefthymiou and
                 Miodrag Potkonjak",
  title =        "Optimizing computations for effective
                 block-processing",
  journal =      j-TODAES,
  volume =       "5",
  number =       "3",
  pages =        "604--630",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p604-lalgudi/p604-lalgudi.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p604-lalgudi/",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "combinatorial optimization; computation dataflow
                 graphs; embedded systems; high-level synthesis; integer
                 linear programming; retiming; scheduling;
                 vectorization",
  subject =      "Computer Systems Organization --- Special-Purpose and
                 Application-Based Systems (C.3): {\bf Signal processing
                 systems}; Computing Methodologies --- Pattern
                 Recognition --- Applications (I.5.4): {\bf Signal
                 processing}; Mathematics of Computing --- Numerical
                 Analysis --- Optimization (G.1.6): {\bf Integer
                 programming}; Mathematics of Computing --- Discrete
                 Mathematics --- General (G.2.0); Theory of Computation
                 --- Analysis of Algorithms and Problem Complexity ---
                 Nonnumerical Algorithms and Problems (F.2.2): {\bf
                 Sequencing and scheduling}; Computing Methodologies ---
                 Artificial Intelligence --- Problem Solving, Control
                 Methods, and Search (I.2.8): {\bf Scheduling}; Computer
                 Applications --- Computer-Aided Engineering (J.6)",
}

@Article{Long:2000:FFA,
  author =       "David E. Long and Mahesh A. Iyer and Miron
                 Abramovici",
  title =        "{FILL} and {FUNI}: algorithms to identify illegal
                 states and sequentially untestable faults",
  journal =      j-TODAES,
  volume =       "5",
  number =       "3",
  pages =        "631--657",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p631-long/p631-long.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p631-long/",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "automatic test generation; illegal states; sequential
                 circuits; untestable faults",
  subject =      "Hardware --- Performance and Reliability --- General
                 (B.8.0); Hardware --- Control Structures and
                 Microprogramming --- General (B.1.0); Hardware ---
                 Arithmetic and Logic Structures --- General (B.2.0);
                 Computer Applications --- Computer-Aided Engineering
                 (J.6); Hardware --- Arithmetic and Logic Structures ---
                 High-Speed Arithmetic (B.2.4): {\bf Algorithms};
                 Hardware --- Logic Design --- Design Styles (B.6.1):
                 {\bf Sequential circuits}; Hardware --- Logic Design
                 --- Design Aids (B.6.3); Hardware --- Integrated
                 Circuits --- Types and Design Styles (B.7.1)",
}

@Article{Marculescu:2000:SSM,
  author =       "Diana Marculescu and Radu Marculescu and Massoud
                 Pedram",
  title =        "Stochastic sequential machine synthesis with
                 application to constrained sequence generation",
  journal =      j-TODAES,
  volume =       "5",
  number =       "3",
  pages =        "658--681",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p658-marculescu/p658-marculescu.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p658-marculescu/",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "algorithms; design; performance; theory",
  subject =      "Data --- Coding and Information Theory (E.4): {\bf
                 Data compaction and compression}; Computer Applications
                 --- Computer-Aided Engineering (J.6); Hardware ---
                 Logic Design --- Design Aids (B.6.3); Hardware ---
                 Integrated Circuits --- Types and Design Styles
                 (B.7.1): {\bf VLSI (very large scale integration)};
                 Hardware --- Performance and Reliability --- General
                 (B.8.0); Theory of Computation --- Computation by
                 Abstract Devices --- Models of Computation (F.1.1);
                 Mathematics of Computing --- Probability and Statistics
                 (G.3): {\bf Stochastic processes}",
}

@Article{Panda:2000:CVC,
  author =       "Preeti Ranjan Panda and Nikil D. Dutt and Alexandru
                 Nicolau",
  title =        "On-chip vs. off-chip memory: the data partitioning
                 problem in embedded processor-based systems",
  journal =      j-TODAES,
  volume =       "5",
  number =       "3",
  pages =        "682--704",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p682-panda/p682-panda.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p682-panda/",
  abstract =     "Efficient utilization of on-chip memory space is
                 extremely important in modern embedded system
                 applications based on processor cores. In addition to a
                 data cache that interfaces with slower off-chip memory,
                 a fast on-chip SRAM, called Scratch-Pad memory, is
                 often used in several applications, so that critical
                 data can be stored there with a guaranteed fast access
                 time. We present a technique for efficiently exploiting
                 on-chip Scratch-Pad memory by partitioning the
                 application's scalar and arrayed variables into
                 off-chip DRAM and on-chip Scratch-Pad SRAM, with the
                 goal of minimizing the total execution time of embedded
                 applications. We also present extensions of our
                 proposed memory assignment strategy to handle context
                 switching between multiple programs, as well as a
                 generalized memory hierarchy. Our experiments on code
                 kernels from typical applications show that our
                 technique results in significant performance
                 improvements.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Algorithms; Measurement; Performance",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "data cache; data partitioning; memory synthesis;
                 on-chip memory; scratch-pad memory; system design;
                 system synthesis",
  subject =      "Hardware --- Memory Structures --- Design Styles
                 (B.3.2): {\bf Cache memories}; Software --- Programming
                 Languages --- Processors (D.3.4): {\bf Compilers}",
}

@Article{Raimi:2000:EML,
  author =       "Richard Raimi and Ramin Hojati and Kedar S. Namjoshi",
  title =        "Environment modeling and language universality",
  journal =      j-TODAES,
  volume =       "5",
  number =       "3",
  pages =        "705--725",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p705-raimi/p705-raimi.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p705-raimi/",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "abstraction; environment modeling; language
                 universality; model checking",
  subject =      "Hardware --- Performance and Reliability ---
                 Reliability, Testing, and Fault-Tolerance (B.8.1);
                 Computer Systems Organization --- Performance of
                 Systems (C.4); Computer Applications --- Computer-Aided
                 Engineering (J.6); Theory of Computation ---
                 Computation by Abstract Devices --- Models of
                 Computation (F.1.1): {\bf Automata}; Software ---
                 Software Engineering --- Software/Program Verification
                 (D.2.4): {\bf Model checking}; Theory of Computation
                 --- Computation by Abstract Devices --- Models of
                 Computation (F.1.1): {\bf Unbounded-action devices}",
}

@Article{Yan:2000:TLB,
  author =       "Jin-Tai Yan",
  title =        "Three-layer bubble-sorting-based {nonManhattan}
                 channel routing",
  journal =      j-TODAES,
  volume =       "5",
  number =       "3",
  pages =        "726--734",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p726-yan/p726-yan.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p726-yan/",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "bubble-sorting algorithm; channel routing; three-layer
                 nonManhattan routing model",
  subject =      "Theory of Computation --- Analysis of Algorithms and
                 Problem Complexity --- Nonnumerical Algorithms and
                 Problems (F.2.2): {\bf Routing and layout}; Hardware
                 --- Integrated Circuits --- Design Aids (B.7.2): {\bf
                 Placement and routing}; Hardware --- Integrated
                 Circuits --- Design Aids (B.7.2): {\bf Verification};
                 Hardware --- Performance and Reliability --- General
                 (B.8.0); Computer Applications --- Computer-Aided
                 Engineering (J.6); Hardware --- Input/Output and Data
                 Communications --- Input/Output Devices (B.4.2): {\bf
                 Channels and controllers}",
}

@Article{Yang:2000:ERC,
  author =       "Cheng-Hsing Yang and Sao-Jie Chen and Jan-Ming Ho and
                 Chia-Chun Tsai",
  title =        "Efficient routability check algorithms for segmented
                 channel routing",
  journal =      j-TODAES,
  volume =       "5",
  number =       "3",
  pages =        "735--747",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p735-yang/p735-yang.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p735-yang/",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "field programmable gate arrays (FPGAs); routing;
                 segmented channel",
  subject =      "Hardware --- Input/Output and Data Communications ---
                 Input/Output Devices (B.4.2): {\bf Channels and
                 controllers}; Hardware --- Integrated Circuits ---
                 Types and Design Styles (B.7.1): {\bf Gate arrays};
                 Computer Applications --- Computer-Aided Engineering
                 (J.6); Hardware --- Integrated Circuits --- Design Aids
                 (B.7.2): {\bf Placement and routing}; Theory of
                 Computation --- Analysis of Algorithms and Problem
                 Complexity --- Nonnumerical Algorithms and Problems
                 (F.2.2): {\bf Routing and layout}",
}

@Article{Marwedel:2000:GE,
  author =       "Peter Marwedel",
  title =        "Guest {Editorial}",
  journal =      j-TODAES,
  volume =       "5",
  number =       "4",
  pages =        "749--751",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-4/p749-marwedel/p749-marwedel.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-4/p749-marwedel/",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  subject =      "Computing Milieux --- Computers and Society ---
                 Organizational Impacts (K.4.3)",
}

@Article{Aditya:2000:CSM,
  author =       "Shail Aditya and Scott A. Mahlke and B. Ramakrishna
                 Rau",
  title =        "Code size minimization and retargetable assembly for
                 custom {EPIC} and {VLIW} instruction formats",
  journal =      j-TODAES,
  volume =       "5",
  number =       "4",
  pages =        "752--773",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-4/p752-aditya/p752-aditya.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-4/p752-aditya/",
  abstract =     "PICO is a fully automated system for designing the
                 architecture and the microarchitecture of VLIW and EPIC
                 processors. A serious concern with this class of
                 processors, due to their very long instructions, is
                 their code size. One focus of this paper is to describe
                 a series of code size minimization techniques used
                 within PICO, some of which are applied during the
                 automatic design of the instruction format, while
                 others are applied during program assembly. The design
                 of a retargetable assembler to support these techniques
                 also poses certain novel challenges, which constitute
                 the second focus of this paper. Contrary to widely held
                 perceptions, we demonstrate that it is entirely
                 possible to design VLIW and EPIC processors that are
                 capable of issuing large numbers of operational per
                 cycle, but whose code size is only moderately larger
                 than that for a sequential CISC processor.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Design; Experimentation; Measurement",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "code size minimization; custom templates; design
                 automation; EPIC; instruction format design; noop
                 compression; retargetable assembly; VLIW",
  subject =      "Computer Systems Organization --- Processor
                 Architectures --- Single Data Stream Architectures
                 (C.1.1): {\bf RISC/CISC, VLIW architectures}; Software
                 --- Programming Languages --- Processors (D.3.4): {\bf
                 Code generation}; Software --- Programming Languages
                 --- Processors (D.3.4): {\bf Retargetable compilers};
                 Hardware --- Control Structures and Microprogramming
                 --- Control Structure Performance Analysis and Design
                 Aids (B.1.2)",
}

@Article{VanEijk:2000:CAC,
  author =       "Koen {Van Eijk} and Bart Mesman and Carlos A. Alba
                 Pinto and Qin Zhao and Marco Bekooij and Jef {Van
                 Meerbergen} and Jochen Jess",
  title =        "Constraint analysis for code generation: basic
                 techniques and applications in {FACTS}",
  journal =      j-TODAES,
  volume =       "5",
  number =       "4",
  pages =        "774--793",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 09:50:12 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-4/p774-van_eijk/p774-van_eijk.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-4/p774-van_eijk/",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Leupers:2000:GBC,
  author =       "Rainer Leupers and Steven Bashford",
  title =        "Graph-based code selection techniques for embedded
                 processors",
  journal =      j-TODAES,
  volume =       "5",
  number =       "4",
  pages =        "794--814",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-4/p794-leupers/p794-leupers.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-4/p794-leupers/",
  abstract =     "Code selection is an important task in code generation
                 for programmable processors, where the goal is to find
                 an efficient mapping of machine-independent
                 intermediate code to processor-specific machine
                 instructions. Traditional approaches to code selection
                 are based on tree parsing which enables fast and
                 optimal code selection for intermediate code given as a
                 set of data-flow trees. While this approach is
                 generally useful in compilers for general-purpose
                 processors, it may lead to poor code quality in the
                 case of embedded processors. The reason is that the
                 special architectural features of embedded processors
                 require performing code selection on data-flow graphs,
                 which are a more general representation of intermediate
                 code. In this paper, we present data-flow graph-based
                 code selection techniques for two architectural
                 families of embedded processors: media processors with
                 support for SIMD instructions and fixed-point DSPs with
                 irregular data paths. Both techniques exploit the fact
                 that, in the area of embedded systems, high code
                 quality is a much more important goal than high
                 compilation speed. We demonstrate that certain
                 architectural features can only be utilized by
                 graph-based code selection, while in other cases this
                 approach leads to a significant increase in code
                 quality as compared to tree-based code selection.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Algorithms; Experimentation",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "code selection; data-flow graphs; embedded processors;
                 irregular data paths; SIMD instructions",
  subject =      "Software --- Programming Languages --- Processors
                 (D.3.4): {\bf Code generation}",
}

@Article{Pees:2000:RCS,
  author =       "Stefan Pees and Andreas Hoffmann and Heinrich Meyr",
  title =        "Retargetable compiled simulation of embedded
                 processors using a machine description language",
  journal =      j-TODAES,
  volume =       "5",
  number =       "4",
  pages =        "815--834",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-4/p815-pees/p815-pees.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-4/p815-pees/",
  abstract =     "Fast processor simulators are needed for the software
                 development of embedded processors, for HW/SW
                 cosimulation systems, and for profiling and design of
                 application-specific processors. Such fast simulators
                 can be generated based on the machine description
                 language LISA. Using this language to model processor
                 architectures enables the generation of compiled
                 simulators on various abstraction levels, assemblers,
                 and compiler back ends. The article discusses the
                 requirements of software development tools on processor
                 models and presents the approach based on the LISA
                 language. Furthermore, the implementation of a
                 retargetable environment consisting of compiled
                 simulator, debugger, and assembler is presented.
                 Measurements for a verified, cycle-based LISA model of
                 the TI TMS320C62$ \times $ DSP show that this approach
                 achieves between 37$ \times $ and 170$ \times $ higher
                 simulation speed compared to a commercial simulator
                 using a standard technique and the same accuracy
                 level.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Design; Languages; Performance; Verification",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "compiled simulation; DSP processors; HW/SW
                 cosimulation; instruction set simulators; machine
                 description languages; processor modeling and
                 simulation; system-on-chip",
  subject =      "Computing Methodologies --- Simulation and Modeling
                 --- Model Development (I.6.5): {\bf Modeling
                 methodologies}; Computer Systems Organization ---
                 Special-Purpose and Application-Based Systems (C.3):
                 {\bf Real-time and embedded systems}; Hardware ---
                 Control Structures and Microprogramming --- Control
                 Structure Performance Analysis and Design Aids (B.1.2):
                 {\bf Simulation**}",
}

@Article{Bakshi:2001:PCH,
  author =       "Smita Bakshi and Daniel D. Gajski",
  title =        "Performance-constrained hierarchical pipelining for
                 behaviors, loops, and operations",
  journal =      j-TODAES,
  volume =       "6",
  number =       "1",
  pages =        "1--25",
  month =        apr,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 09:50:12 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/citations/journals/todaes/2001-6-1/p1-bakshi/",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Chakrabarty:2001:OTA,
  author =       "Krishnendu Chakrabarty",
  title =        "Optimal test access architectures for
                 system-on-a-chip",
  journal =      j-TODAES,
  volume =       "6",
  number =       "1",
  pages =        "26--49",
  month =        jan,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2001-6-1/p26-chakrabarty/p26-chakrabarty.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2001-6-1/p26-chakrabarty/",
  abstract =     "Test access is a major problem for core-based
                 system-on-a-chip (SOC) designs. Since embedded cores in
                 an SOC are not directly accessible via chip inputs and
                 outputs, special access mechanisms are required to test
                 them at the system level. An efficient test access
                 architecture should also reduce test cost by minimizing
                 test application time. We address several issues
                 related to the design of optimal test access
                 architectures that minimize testing time., including
                 the assignment of cores to test buses, distribution of
                 test data width between multiple test buses, and
                 analysis of test data width required to satisfy an
                 upper bound on the testing time. Even though the
                 decision versions of all these problems are shown to be
                 NP-complete, they can be solved exactly for practical
                 instances using integer linear programming (ILP). As a
                 case study, the ILP models for two hypothetical but
                 nontrivial systems are solved using a public-domain ILP
                 software package.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Design; Reliability",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  subject =      "Hardware --- Integrated Circuits --- Types and Design
                 Styles (B.7.1); Hardware --- Integrated Circuits ---
                 Design Aids (B.7.2); Hardware --- Integrated Circuits
                 --- Reliability and Testing** (B.7.3)",
}

@Article{Chen:2001:ALP,
  author =       "Rita Yu Chen and Mary Jane Irwin and Raminder S.
                 Bajwa",
  title =        "Architecture-level power estimation and design
                 experiments",
  journal =      j-TODAES,
  volume =       "6",
  number =       "1",
  pages =        "50--66",
  month =        jan,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2001-6-1/p50-chen/p50-chen.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2001-6-1/p50-chen/",
  abstract =     "Architecture-level power estimation has received more
                 attention recently because of its efficiency. This
                 article presents a technique used to do power analysis
                 of processors at the architecture level. It provides
                 cycle-by-cycle power consumption data of the
                 architecture on the basis of the instruction/data flow
                 stream. To characterize the power dissipation of
                 control units, a novel hierarchical method has been
                 developed. Using this technique, a power estimator is
                 implemented for a commercial processor. The accuracy of
                 the estimator is validated by comparing the power
                 values it produces against measurements made by a
                 gate-level power simulator for the same benchmark set.
                 Our estimation approach is shown to provide very
                 efficient and accurate power analysis at the
                 architecture level. The energy models built for
                 first-pass estimation (such as ALU, MAC unit, register
                 files) are reusable for future architecture design
                 modification. In this article, we demonstrate the
                 application of the technique. Furthermore, this
                 technique can evaluate various kinds of software to
                 achieve hardware/software codesign for low power.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Design; Experimentation; Performance",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "architecture tradeoff; architecture-level power
                 estimation; computer-aided design of VLSI; control
                 unit; energy model; energy table; functional unit;
                 hardware/software codesign; instruction format
                 transition; low power design; output signal transition;
                 power analysis and estimation; switch capacitance",
  subject =      "Computer Applications --- Computer-Aided Engineering
                 (J.6)",
}

@Article{Hsiung:2001:PPO,
  author =       "Pao-Ann Hsiung",
  title =        "{POSE}: a parallel object-oriented synthesis
                 environment",
  journal =      j-TODAES,
  volume =       "6",
  number =       "1",
  pages =        "67--92",
  month =        jan,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2001-6-1/p67-hsiung/p67-hsiung.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2001-6-1/p67-hsiung/",
  abstract =     "Design automation tools and methodologies always
                 encounter a problem of how systems may be designed
                 efficiently, including issues such as static modeling
                 and dynamic manipulation of system parts. With the
                 rapid progress of design technology, the continuously
                 increasing number of different choices per system part
                 and the growing complexity of today's systems, the
                 efficiency of the design environment is not only a
                 major concern now, but will also be a demanding problem
                 in the near future. In contrast to heuristic methods, a
                 novel environment called POSE is proposed that
                 increases efficiency during design without losing
                 optimality in the final design results. System parts
                 are modeled using the popular object-oriented modeling
                 technique and are dynamically manipulated using the
                 parallel design technique. A complete integration of
                 object-oriented and parallel techniques is one of the
                 major feature of POSE. Common problems related to
                 parallel design such as {\em emptiness\/} and {\em
                 deadlock\/} are also elegantly solved within POSE.
                 Experimental results and formal analysis based on POSE
                 all show its practical and theoretical usefulness. POSE
                 can be used at any level of synthesis as long as
                 off-the-shelf building-blocks manipulation is required.
                 POSE can be applied especially to {\em system-level\/}
                 synthesis, whose targets can be parallel computer
                 architectures, systems-on-chip, or embedded systems. We
                 will show how POSE has been applied to ICOS, a recently
                 proposed synthesis methodology. Furthermore, POSE can
                 be easily integrated with other heuristic design
                 methodologies to allow increased design efficiency.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Design",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "design-completion check; hardware synthesis;
                 object-oriented technology; parallel design; synthesis
                 rollback",
  subject =      "Computer Applications --- Computer-Aided Engineering
                 (J.6): {\bf Computer-aided design (CAD)}; Hardware ---
                 Miscellaneous (B.m): {\bf Design management}",
}

@Article{Huang:2001:CSP,
  author =       "Ing-Jer Huang",
  title =        "Co-synthesis of pipelined structures and instruction
                 reordering constraints for instruction set processors",
  journal =      j-TODAES,
  volume =       "6",
  number =       "1",
  pages =        "93--121",
  month =        jan,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2001-6-1/p93-huang/p93-huang.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2001-6-1/p93-huang/",
  abstract =     "This paper presents a hardware/software co-synthesis
                 approach to pipelined ISP (instruction set processor)
                 design. The approach synthesizes the pipeline structure
                 from a given instruction set architecture (behavioral)
                 specification. In addition, it generates a set of
                 reordering constraints that guides the compiler
                 back-end (reorderer) to properly schedule instructions
                 so that possible pipeline hazards are avoided and
                 throughput is improved. \par

                 Co-synthesis takes place while resolving pipeline
                 hazards, which can be attributed to interim-instruction
                 dependencies (IIDs). An extended taxonomy of IIDs have
                 been proposed for the systematic analysis of pipeline
                 hazards. Hardware/software methods are developed to
                 resolve IIDs. Algorithms based on taxonomy and
                 resolutions are constructed and integrated into the
                 pipeline synthesis process to explore hardware and
                 software design space. Application benchmarks are used
                 to evaluate possible designs and guide the design
                 decision. The power of the co-synthesis tool PIPER is
                 demonstrated through pipeline synthesis of one
                 illustrative example and two ISPs, including an
                 industrial one (TDY-43). In comparison with other
                 related approaches, our approach achieves higher
                 throughput and provides a systematic way to explore the
                 hardware/software trade-off.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Design; Theory",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "compiler instruction optimization\; instruction set
                 processor; pipeline hazards; pipeline taxonomy;
                 synthesis",
  subject =      "Hardware --- Control Structures and Microprogramming
                 --- Control Structure Performance Analysis and Design
                 Aids (B.1.2): {\bf Automatic synthesis**}",
}

@Article{Mariatos:2001:MAC,
  author =       "E. P. Mariatos and A. N. Birbas and M. K. Birbas",
  title =        "A mapping algorithm for computer-assisted exploration
                 in the design of embedded systems",
  journal =      j-TODAES,
  volume =       "6",
  number =       "1",
  pages =        "122--147",
  month =        jan,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  note =         "See note \cite{Chen:2007:NMA}.",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2001-6-1/p122-mariatos/p122-mariatos.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2001-6-1/p122-mariatos/",
  abstract =     "We present a technique for automatic exploration of
                 architectural alternatives in the design of complex
                 electronic embedded systems and systems-on-a-chip. The
                 technique transforms the problem into a set of simple
                 model-to-model operations and a mapping algorithm that
                 becomes the core of the entire design process. The
                 mapping algorithm is formulated as an assignment-type
                 problem (ATP), which is, in turn, solved by a
                 straightforward optimization method. The result is a
                 design assistance tool, which is demonstrated through a
                 telecommunication systems example.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Design; Experimentation",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "codesign; embedded system design space exploration;
                 specification mapping",
  subject =      "Computer Systems Organization --- Special-Purpose and
                 Application-Based Systems (C.3); Software --- Software
                 Engineering --- Design Tools and Techniques (D.2.2):
                 {\bf Computer-aided software engineering (CASE)}",
}

@Article{Panda:2001:DMO,
  author =       "P. R. Panda and F. Catthoor and N. D. Dutt and K.
                 Danckaert and E. Brockmeyer and C. Kulkarni and A.
                 Vandercappelle and P. G. Kjeldsberg",
  title =        "Data and memory optimization techniques for embedded
                 systems",
  journal =      j-TODAES,
  volume =       "6",
  number =       "2",
  pages =        "149--206",
  month =        jan,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2001-6-2/p149-panda/p149-panda.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2001-6-2/p149-panda/",
  abstract =     "We present a survey of the state-of-the-art techniques
                 used in performing data and memory-related
                 optimizations in embedded systems. The optimizations
                 are targeted directly or indirectly at the memory
                 subsystem, and impact one or more out of three
                 important cost metrics: area, performance, and power
                 dissipation of the resulting implementation. \par

                 We first examine architecture-independent optimizations
                 in the form of code transformations. We next cover a
                 broad spectrum of optimization techniques that address
                 memory architectures at varying levels of granularity,
                 ranging from register files to on-chip memory, data
                 caches, and dynamic memory (DRAM). We end with memory
                 addressing related issues.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Algorithms; Design; Experimentation; Performance",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "address generation; allocation; architecture
                 exploration; code transformation; data cache; data
                 optimization; DRAM; high-level synthesis; memory
                 architecture customization; memory power dissipation;
                 register file; size estimation; SRAM; survey",
  subject =      "Hardware --- Memory Structures --- General (B.3.0);
                 Hardware --- Register-Transfer-Level Implementation ---
                 Design (B.5.1): {\bf Memory design}; Hardware ---
                 Register-Transfer-Level Implementation --- Design Aids
                 (B.5.2): {\bf Optimization}; Hardware --- Integrated
                 Circuits --- Types and Design Styles (B.7.1): {\bf
                 Memory technologies}; Software --- Programming
                 Languages --- Processors (D.3.4): {\bf Optimization}",
}

@Article{Shenoy:2001:ASL,
  author =       "Nagaraj Shenoy and Alok Choudhary and Prithviraj
                 Banerjee",
  title =        "An algorithm for synthesis of large time-constrained
                 heterogeneous adaptive systems",
  journal =      j-TODAES,
  volume =       "6",
  number =       "2",
  pages =        "207--225",
  month =        apr,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2001-6-2/p207-shenoy/p207-shenoy.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2001-6-2/p207-shenoy/",
  abstract =     "Large time-constrained applications are highly
                 computer-intensive and are often implemented as a
                 complex organization of pipelined data parallel tasks
                 on a pool of embedded processors, DSP processors, and
                 FPGAs. The large number of design alternatives
                 available at each task level, the application as a
                 whole, and the special needs of the reconfigurable
                 devices (such as the FPGA) make the manual synthesis of
                 such systems very tedious. \par

                 The automatic synthesis algorithm in this paper
                 combines exact (MILP-based) and heuristic techniques to
                 solve this problem, which basically involves (1)
                 propagation of timing constraints; (2) pipelining the
                 loops to meet throughput requirements; (3) resource
                 selection and scheduling, keeping the processing
                 requirements and the timing constraints in view; (4)
                 scheduling the resources across the tasks to ensure
                 maximum utilization; and (5) hiding the reconfiguration
                 delays of the FPGAs. \par

                 While the use of MILP techniques helps in getting
                 high-quality results, combining them with heuristics
                 ensures acceptable synthesis times, striking a good
                 balance between quality of results and synthesis time.
                 Our experimental evaluation of the algorithm shows an
                 average 40\% in resource cost reduction (compared to
                 manual synthesis) with synthesis times from minutes to
                 as low as a few seconds in some cases.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Algorithms; Design; Experimentation",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "delay/cost table; hierarchical control data-flow
                 graph; list scheduling; mixed integer linear
                 programming; pipelining; reconfigurable computing;
                 time-constrained synthesis",
  subject =      "Computer Applications --- Computer-Aided Engineering
                 (J.6): {\bf Computer-aided design (CAD)}; Computer
                 Systems Organization --- Special-Purpose and
                 Application-Based Systems (C.3): {\bf Real-time and
                 embedded systems}",
}

@Article{Su:2001:IRA,
  author =       "Chauchin Su and Yue-Tsang Chen and Shyh-Jye Jou",
  title =        "Intrinsic response for analog module testing using an
                 analog testability bus",
  journal =      j-TODAES,
  volume =       "6",
  number =       "2",
  pages =        "226--243",
  month =        apr,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2001-6-2/p226-su/p226-su.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2001-6-2/p226-su/",
  abstract =     "A parasitic effect removal methodology is proposed to
                 handle the large parasitic effects in analog
                 testability buses. The removal is done by an on-chip
                 test generation technique and an intrinsic response
                 extraction algorithm. On-chip test generation creates
                 test signals on-chip to avoid the parasitic effects of
                 the test application bus. The intrinsic response
                 extraction cross-checks and cancels the parasitic
                 effects of both test application and response
                 observation paths. The tests using both SPICE
                 simulation and MNABST-1 P1149.4 test chip reveal that
                 the proposed algorithm can not only remove the
                 parasitic effects of the test buses but also tolerate
                 test signal variations. Furthermore, it is robust
                 enough to handle loud environmental noise and the
                 nonlinearity of the switching devices.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  generalterms = "Experimentation; Theory",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "analog testability bus; analog testing; boundary scan;
                 design for testability; intrinsic response",
  subject =      "Hardware --- Performance and Reliability ---
                 Reliability, Testing, and Fault-Tolerance (B.8.1)",
}

@Article{Huang:2001:VSE,
  author =       "Shi-Yu Huang and Kwang-Ting Cheng and Kuang-Chien
                 Chen",
  title =        "Verifying sequential equivalence using {ATPG}
                 techniques",
  journal =      j-TODAES,
  volume =       "6",
  number =       "2",
  pages =        "244--275",
  month =        apr,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2001-6-2/p244-huang/p244-huang.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2001-6-2/p244-huang/",
  abstract =     "In this paper we address the problem of verifying the
                 equivalence of two sequential circuits.
                 State-of-the-art sequential optimization techniques
                 such as retiming and sequential redundancy removal can
                 handle designs with up to hundreds or even thousands of
                 flip-flops. However, the BDD-based approaches for
                 verifying sequential equivalence can easily run into
                 memory explosion for such designs. In an attempt to
                 handle larger circuits, we modify test
                 pattern-generation techniques for verification. The
                 suggested approach utilizes the popular efficient
                 backward-justification technique used in most
                 sequential ATPG programs. We present several techniques
                 to enhance the efficiency of this approach by (1)
                 identifying equivalent flip-flop pairs using an
                 induction-based algorithm, and (2) generalizing the
                 idea of exploring the structural similarity between
                 circuits to perform verification in stages. This
                 ATPG-based framework is suitable for verifying circuits
                 either with or without a reset state. In order to
                 extend this approach to verify retimed circuits, we
                 introduce a delay-compensation-based algorithm for
                 preprocessing the circuits. The experimental results of
                 verifying the correctness of circuits after sequential
                 redundancy removal and retiming with up to several
                 hundred flip-flops are presented.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  subject =      "Hardware --- Logic Design --- Design Aids (B.6.3):
                 {\bf Verification}; Hardware --- Logic Design ---
                 Design Styles (B.6.1): {\bf Sequential circuits}",
}

@Article{VanPraet:2001:PMC,
  author =       "J. {Van Praet} and D. Lanneer and W. Geurts and G.
                 Goossens",
  title =        "Processor modeling and code selection for retargetable
                 compilation",
  journal =      j-TODAES,
  volume =       "6",
  number =       "3",
  pages =        "277--307",
  month =        jul,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Feb 19 14:35:45 MST 2002",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kagaris:2001:NHC,
  author =       "D. Kagaris and S. Tragoudas",
  title =        "{Von Neumann} hybrid cellular automata for generating
                 deterministic test sequences",
  journal =      j-TODAES,
  volume =       "6",
  number =       "3",
  pages =        "308--321",
  month =        jul,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Feb 19 14:35:45 MST 2002",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Liao:2001:CPT,
  author =       "Swanwa Liao and Mario A. Lopez and Dinesh Mehta",
  title =        "Constrained polygon transformations for incremental
                 floorplanning",
  journal =      j-TODAES,
  volume =       "6",
  number =       "3",
  pages =        "322--342",
  month =        jul,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Feb 19 14:35:45 MST 2002",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Chu:2001:CFS,
  author =       "Chris Chu and D. F. Wong",
  title =        "Closed form solutions to simultaneous buffer
                 insertion\slash sizing and wire sizing",
  journal =      j-TODAES,
  volume =       "6",
  number =       "3",
  pages =        "343--371",
  month =        jul,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Feb 19 14:35:45 MST 2002",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Hu:2001:ELA,
  author =       "Xiaobo Sharon Hu and Danny Z. Chen and Rajeshkumar
                 Sambandam",
  title =        "Efficient list-approximation techniques for floorplan
                 area minimization",
  journal =      j-TODAES,
  volume =       "6",
  number =       "3",
  pages =        "372--400",
  month =        jul,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Feb 19 14:35:45 MST 2002",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Nourani:2001:ITI,
  author =       "Mehrdad Nourani and Joan Carletta and Christos
                 Papachristou",
  title =        "Integrated test of interacting controllers and
                 datapaths",
  journal =      j-TODAES,
  volume =       "6",
  number =       "3",
  pages =        "401--422",
  month =        jul,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Feb 19 14:35:45 MST 2002",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Parulkar:2001:IRC,
  author =       "Ishwar Parulkar and Sandeep K. Gupta and Melvin A.
                 Breuer",
  title =        "Introducing redundant computations in {RTL} data paths
                 for reducing {BIST} resources",
  journal =      j-TODAES,
  volume =       "6",
  number =       "3",
  pages =        "423--445",
  month =        jul,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Feb 19 14:35:45 MST 2002",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Dasgupta:2001:SRG,
  author =       "Parthasarathi Dasgupta and Susmita Sur-Kolay",
  title =        "Slicible rectangular graphs and their optimal
                 floorplans",
  journal =      j-TODAES,
  volume =       "6",
  number =       "4",
  pages =        "447--470",
  month =        oct,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Feb 19 14:35:44 MST 2002",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Hartanto:2001:DSS,
  author =       "Ismed Hartanto and Srikanth Venkataraman and W. Kent
                 Fuchs and Elizabeth M. Rudnick and Janak H. Patel and
                 Sreejit Chakravarty",
  title =        "Diagnostic simulation of stuck-at faults in sequential
                 circuits using compact lists",
  journal =      j-TODAES,
  volume =       "6",
  number =       "4",
  pages =        "471--489",
  month =        oct,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Feb 19 14:35:44 MST 2002",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Narasimhan:2001:FAC,
  author =       "M. Narasimhan and J. Ramanujam",
  title =        "A fast approach to computing exact solutions to the
                 resource-constrained scheduling problem",
  journal =      j-TODAES,
  volume =       "6",
  number =       "4",
  pages =        "490--500",
  month =        oct,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Feb 19 14:35:44 MST 2002",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Karri:2001:IRT,
  author =       "Ramesh Karri and Balakrishnan Iyer",
  title =        "Introspection: a register transfer level technique for
                 cocurrent error detection and diagnosis in data
                 dominated designs",
  journal =      j-TODAES,
  volume =       "6",
  number =       "4",
  pages =        "501--515",
  month =        oct,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Feb 19 14:35:44 MST 2002",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Boyer:2001:ODS,
  author =       "Fran{\c{c}}ois R. Boyer and El Mostapha Aboulhamid and
                 Yvon Savaria and Michel Boyer",
  title =        "Optimal design of synchronous circuits using software
                 pipelining techniques",
  journal =      j-TODAES,
  volume =       "6",
  number =       "4",
  pages =        "516--532",
  month =        oct,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Feb 19 14:35:44 MST 2002",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Voeten:2001:FLT,
  author =       "Jeroen Voeten",
  title =        "On the fundamental limitations of transformational
                 design",
  journal =      j-TODAES,
  volume =       "6",
  number =       "4",
  pages =        "533--552",
  month =        oct,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Feb 19 14:35:44 MST 2002",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Shiue:2001:DMD,
  author =       "Wen-Tsong Shiue and Sathishkumar Udayanarayanan and
                 Chaitali Chakrabarti",
  title =        "Data memory design and exploration for low-power
                 embedded systems",
  journal =      j-TODAES,
  volume =       "6",
  number =       "4",
  pages =        "553--568",
  month =        oct,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Feb 19 14:35:44 MST 2002",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Ashar:2001:UCD,
  author =       "Pranav Ashar and Aarti Gupta and Sharad Malik",
  title =        "Using complete-$1$-distinguishability for {FSM}
                 equivalence checking",
  journal =      j-TODAES,
  volume =       "6",
  number =       "4",
  pages =        "569--590",
  month =        oct,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Feb 19 14:35:44 MST 2002",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Liu:2001:ODC,
  author =       "Tai-Hung Liu and Adnan Aziz and Vigyan Singhal",
  title =        "Optimizing designs containing black boxes",
  journal =      j-TODAES,
  volume =       "6",
  number =       "4",
  pages =        "591--601",
  month =        oct,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Feb 19 14:35:44 MST 2002",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Roop:2001:FST,
  author =       "Partha S. Roop and A. Sowmya and S. Ramesh",
  title =        "Forced simulation: a technique for automating
                 component reuse in embedded systems",
  journal =      j-TODAES,
  volume =       "6",
  number =       "4",
  pages =        "602--628",
  month =        oct,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Feb 19 14:35:44 MST 2002",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Flores:2001:ESM,
  author =       "Paulo F. Flores and Hor{\'a}cio C. Neto and Jo{\~a}o
                 P. Marques-Silva",
  title =        "An exact solution to the minimum size test pattern
                 problem",
  journal =      j-TODAES,
  volume =       "6",
  number =       "4",
  pages =        "629--644",
  month =        oct,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Feb 19 14:35:44 MST 2002",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Chowdhary:2002:GTM,
  author =       "Amit Chowdhary and John P. Hayes",
  title =        "General technology mapping for field-programmable gate
                 arrays based on lookup tables",
  journal =      j-TODAES,
  volume =       "7",
  number =       "1",
  pages =        "1--32",
  month =        jan,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 7 11:12:03 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Michael:2002:ATD,
  author =       "M. Michael and S. Tragoudas",
  title =        "{ATPG} tools for delay faults at the functional
                 level",
  journal =      j-TODAES,
  volume =       "7",
  number =       "1",
  pages =        "33--57",
  month =        jan,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 7 11:12:03 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lysecky:2002:PIB,
  author =       "Roman Lysecky and Frank Vahid",
  title =        "Prefetching for improved bus wrapper performance in
                 cores",
  journal =      j-TODAES,
  volume =       "7",
  number =       "1",
  pages =        "58--90",
  month =        jan,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 7 11:12:03 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Dutt:2002:CAI,
  author =       "Shantanu Dutt and Wenyong Deng",
  title =        "Cluster-aware iterative improvement techniques for
                 partitioning large {VLSI} circuits",
  journal =      j-TODAES,
  volume =       "7",
  number =       "1",
  pages =        "91--121",
  month =        jan,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 7 11:12:03 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Goodby:2002:MSP,
  author =       "Laurence Goodby and Alex Orailo{\u{g}}lu and Paul M.
                 Chau",
  title =        "Microarchitectural synthesis of
                 performance-constrained, low-power {VLSI} designs",
  journal =      j-TODAES,
  volume =       "7",
  number =       "1",
  pages =        "122--136",
  month =        jan,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 7 11:12:03 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{GuerraeSilva:2002:SMA,
  author =       "Lu{\'\i}s {Guerra e Silva} and Jo{\~a}o Marques-Silva
                 and L. Miguel Silveira and Karem A. Sakallah",
  title =        "Satisfiability models and algorithms for circuit delay
                 computation",
  journal =      j-TODAES,
  volume =       "7",
  number =       "1",
  pages =        "137--158",
  month =        jan,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Oct 31 06:28:44 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Darte:2002:CEL,
  author =       "Alain Darte and Robert Schreiber and B. Ramakrishna
                 Rau and Fr{\'e}d{\'e}ric Vivien",
  title =        "Constructing and exploiting linear schedules with
                 prescribed parallelism",
  journal =      j-TODAES,
  volume =       "7",
  number =       "1",
  pages =        "159--172",
  month =        jan,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 7 11:12:03 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Jagannathan:2002:FAC,
  author =       "Ashok Jagannathan and Sung-Woo Hur and John Lillis",
  title =        "A fast algorithm for context-aware buffer insertion",
  journal =      j-TODAES,
  volume =       "7",
  number =       "1",
  pages =        "173--188",
  month =        jan,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 7 11:12:03 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Vemuri:2002:ERO,
  author =       "Ranga Vemuri and Srinivas Katkoori and Meenakshi Kaul
                 and Jay Roy",
  title =        "An efficient register optimization algorithm for
                 high-level synthesis from hierarchical behavioral
                 specifications",
  journal =      j-TODAES,
  volume =       "7",
  number =       "1",
  pages =        "189--216",
  month =        jan,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 7 11:12:03 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lin:2002:OTB,
  author =       "Shi-Zheng Eric Lin and Chieh Changfan and Yu-Chin Hsu
                 and Fur-Shing Tsai",
  title =        "Optimal time borrowing analysis and timing budgeting
                 optimization for latch-based designs",
  journal =      j-TODAES,
  volume =       "7",
  number =       "1",
  pages =        "217--230",
  month =        jan,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 7 11:12:03 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Dasgupta:2002:MBP,
  author =       "Parthasarathi Dasgupta and Peichen Pan and Subhas C.
                 Nandy and Bhargab B. Bhattacharya",
  title =        "Monotone bipartitioning problem in a planar point set
                 with applications to {VLSI}",
  journal =      j-TODAES,
  volume =       "7",
  number =       "2",
  pages =        "231--248",
  month =        apr,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 7 11:12:04 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Corno:2002:IAS,
  author =       "F. Corno and P. Prinetto and M. Rebaudengo and M.
                 Sonza Reorda and G. Squillero",
  title =        "Initializability analysis of synchronous sequential
                 circuits",
  journal =      j-TODAES,
  volume =       "7",
  number =       "2",
  pages =        "249--264",
  month =        apr,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 7 11:12:04 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kim:2002:LTL,
  author =       "Ki-Wook Kim and Taewhan Kim and Ting-Ting Hwang and
                 Sung-Mo Kang and C. L. Liu",
  title =        "Logic transformation for low-power synthesis",
  journal =      j-TODAES,
  volume =       "7",
  number =       "2",
  pages =        "265--283",
  month =        apr,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 7 11:12:04 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Tessier:2002:FPA,
  author =       "Russell Tessier",
  title =        "Fast placement approaches for {FPGAs}",
  journal =      j-TODAES,
  volume =       "7",
  number =       "2",
  pages =        "284--305",
  month =        apr,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 7 11:12:04 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Zhao:2002:TMA,
  author =       "Min Zhao and Sachin S. Sapatnekar",
  title =        "Technology mapping algorithms for domino logic",
  journal =      j-TODAES,
  volume =       "7",
  number =       "2",
  pages =        "306--335",
  month =        apr,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 7 11:12:04 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Araujo:2002:GAR,
  author =       "Guido Araujo and Guilherme Ottoni and Marcelo Cintra",
  title =        "Global array reference allocation",
  journal =      j-TODAES,
  volume =       "7",
  number =       "2",
  pages =        "336--357",
  month =        apr,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 7 11:12:04 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Tsao:2002:UDC,
  author =       "Chung-wen Albert Tsao and Cheng-kok Koh",
  title =        "{UST\slash DME}: a clock tree router for general skew
                 constraints",
  journal =      j-TODAES,
  volume =       "7",
  number =       "3",
  pages =        "359--379",
  month =        jul,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 7 11:12:04 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kountouris:2002:ESC,
  author =       "Apostolos A. Kountouris and Christophe Wolinski",
  title =        "Efficient scheduling of conditional behaviors for
                 high-level synthesis",
  journal =      j-TODAES,
  volume =       "7",
  number =       "3",
  pages =        "380--412",
  month =        jul,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 7 11:12:04 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Vahid:2002:PSP,
  author =       "Frank Vahid",
  title =        "Partitioning sequential programs for {CAD} using a
                 three-step approach",
  journal =      j-TODAES,
  volume =       "7",
  number =       "3",
  pages =        "413--429",
  month =        jul,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 7 11:12:04 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lapinskii:2002:CAH,
  author =       "Viktor S. Lapinskii and Margarida F. Jacome and
                 Gustavo A. De Veciana",
  title =        "Cluster assignment for high-performance embedded
                 {VLIW} processors",
  journal =      j-TODAES,
  volume =       "7",
  number =       "3",
  pages =        "430--454",
  month =        jul,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 7 11:12:04 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Saxena:2002:ESL,
  author =       "Vikram Saxena and Farid N. Najm and Ibrahim N. Hajj",
  title =        "Estimation of state line statistics in sequential
                 circuits",
  journal =      j-TODAES,
  volume =       "7",
  number =       "3",
  pages =        "455--473",
  month =        jul,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 7 11:12:04 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Glebov:2002:FNA,
  author =       "A. Glebov and S. Gavrilov and D. Blaauw and V.
                 Zolotov",
  title =        "False-noise analysis using logic implications",
  journal =      j-TODAES,
  volume =       "7",
  number =       "3",
  pages =        "474--498",
  month =        jul,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 7 11:12:04 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Sarrafzadeh:2002:GE,
  author =       "Majid Sarrafzadeh and Rajeev Jayaraman",
  title =        "Guest editorial",
  journal =      j-TODAES,
  volume =       "7",
  number =       "4",
  pages =        "499--500",
  month =        oct,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 7 11:12:05 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Vemuri:2002:BBL,
  author =       "Navin Vemuri and Priyank Kalla and Russell Tessier",
  title =        "{BDD}-based logic synthesis for {LUT}-based {FPGAs}",
  journal =      j-TODAES,
  volume =       "7",
  number =       "4",
  pages =        "501--525",
  month =        oct,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 7 11:12:05 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Fan:2002:RDG,
  author =       "Hongbing Fan and Jiping Liu and Yu-Liang Wu and C. K.
                 Wong",
  title =        "Reduction design for generic universal switch blocks",
  journal =      j-TODAES,
  volume =       "7",
  number =       "4",
  pages =        "526--546",
  month =        oct,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 7 11:12:05 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Dandalis:2002:RTP,
  author =       "Andreas Dandalis and Viktor K. Prasanna",
  title =        "Run-time performance optimization of an {FPGA}-based
                 deduction engine for {SAT} solvers",
  journal =      j-TODAES,
  volume =       "7",
  number =       "4",
  pages =        "547--562",
  month =        oct,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 7 11:12:05 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Wang:2002:BSF,
  author =       "Haibo Wang and Sarma B. K. Vrudhula",
  title =        "Behavioral synthesis of field programmable analog
                 array circuits",
  journal =      j-TODAES,
  volume =       "7",
  number =       "4",
  pages =        "563--604",
  month =        oct,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 7 11:12:05 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kastner:2002:IGH,
  author =       "R. Kastner and A. Kaplan and S. Ogrenci Memik and E.
                 Bozorgzadeh",
  title =        "Instruction generation for hybrid reconfigurable
                 systems",
  journal =      j-TODAES,
  volume =       "7",
  number =       "4",
  pages =        "605--627",
  month =        oct,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 7 11:12:05 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Wu:2002:PDP,
  author =       "Guang-Ming Wu and Jai-Ming Lin and Yao-Wen Chang",
  title =        "Performance-driven placement for dynamically
                 reconfigurable {FPGAs}",
  journal =      j-TODAES,
  volume =       "7",
  number =       "4",
  pages =        "628--642",
  month =        oct,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 7 11:12:05 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Singh:2002:ECC,
  author =       "Amit Singh and Ganapathy Parthasarathy and
                 Ma{\l}gorzata Marek-Sadowska",
  title =        "Efficient circuit clustering for area and power
                 reduction in {FPGAs}",
  journal =      j-TODAES,
  volume =       "7",
  number =       "4",
  pages =        "643--663",
  month =        oct,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 7 11:12:05 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Dutt:2002:SBB,
  author =       "Shantanu Dutt and Vinay Verma and Hasan Arslan",
  title =        "A search-based bump-and-refit approach to incremental
                 routing for {ECO} applications in {FPGAs}",
  journal =      j-TODAES,
  volume =       "7",
  number =       "4",
  pages =        "664--693",
  month =        oct,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 7 11:12:05 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Tragoudas:2003:PDF,
  author =       "S. Tragoudas and N. Denny",
  title =        "Path delay fault testing using test points",
  journal =      j-TODAES,
  volume =       "8",
  number =       "1",
  pages =        "1--10",
  month =        jan,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 7 11:12:05 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Chang:2003:AFF,
  author =       "Yao-Wen Chang and Kai Zhu and Guang-Ming Wu and D. F.
                 Wong and C. K. Wong",
  title =        "Analysis of {FPGA\slash FPIC} switch modules",
  journal =      j-TODAES,
  volume =       "8",
  number =       "1",
  pages =        "11--37",
  month =        jan,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 7 11:12:05 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Jone:2003:DTI,
  author =       "W.-B. Jone and J. S. Wang and Hsueh-I Lu and I. P. Hsu
                 and J.-Y. Chen",
  title =        "Design theory and implementation for low-power
                 segmented bus systems",
  journal =      j-TODAES,
  volume =       "8",
  number =       "1",
  pages =        "38--54",
  month =        jan,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 7 11:12:05 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Yao:2003:FRC,
  author =       "Bo Yao and Hongyu Chen and Chung-Kuan Cheng and Ronald
                 Graham",
  title =        "Floorplan representations: {Complexity} and
                 connections",
  journal =      j-TODAES,
  volume =       "8",
  number =       "1",
  pages =        "55--80",
  month =        jan,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 7 11:12:05 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Riepe:2003:TPN,
  author =       "Michael A. Riepe and Karem A. Sakallah",
  title =        "Transistor placement for noncomplementary digital
                 {VLSI} cell synthesis",
  journal =      j-TODAES,
  volume =       "8",
  number =       "1",
  pages =        "81--107",
  month =        jan,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 7 11:12:05 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Blanton:2003:PIP,
  author =       "R. D. (Shawn) Blanton and John P. Hayes",
  title =        "On the properties of the input pattern fault model",
  journal =      j-TODAES,
  volume =       "8",
  number =       "1",
  pages =        "108--124",
  month =        jan,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 7 11:12:05 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{VanAchteren:2003:SSD,
  author =       "Tanja {Van Achteren} and Francky Catthoor and Rudy
                 Lauwereins and Geert Deconinck",
  title =        "Search space definition and exploration for nonuniform
                 data reuse opportunities in data-dominant
                 applications",
  journal =      j-TODAES,
  volume =       "8",
  number =       "1",
  pages =        "125--139",
  month =        jan,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 7 11:12:05 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Edwards:2003:TCC,
  author =       "Stephen A. Edwards",
  title =        "Tutorial: {Compiling} concurrent languages for
                 sequential processors",
  journal =      j-TODAES,
  volume =       "8",
  number =       "2",
  pages =        "141--187",
  month =        apr,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 7 11:12:06 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Wu:2003:RBP,
  author =       "Guang-Ming Wu and Yun-Chih Chang and Yao-Wen Chang",
  title =        "Rectilinear block placement using {B*}-trees",
  journal =      j-TODAES,
  volume =       "8",
  number =       "2",
  pages =        "188--202",
  month =        apr,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 7 11:12:06 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kim:2003:MDO,
  author =       "Ki-Wook Kim and Seong-Ook Jung and Taewhan Kim and
                 Sung-Mo Kang",
  title =        "Minimum delay optimization for domino logic
                 circuits---a coupling-aware approach",
  journal =      j-TODAES,
  volume =       "8",
  number =       "2",
  pages =        "203--213",
  month =        apr,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 7 11:12:06 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Pinar:2003:CSI,
  author =       "Ali Pinar and C. L. Liu",
  title =        "Compacting sequences with invariant transition
                 frequencies",
  journal =      j-TODAES,
  volume =       "8",
  number =       "2",
  pages =        "214--221",
  month =        apr,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 7 11:12:06 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Singhal:2003:SOA,
  author =       "Vigyan Singhal and Carl Pixley and Adnan Aziz and Shaz
                 Qadeer and Robert Brayton",
  title =        "Sequential optimization in the absence of global
                 reset",
  journal =      j-TODAES,
  volume =       "8",
  number =       "2",
  pages =        "222--251",
  month =        apr,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 7 11:12:06 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lee:2003:COV,
  author =       "Chingren Lee and Jenq Kuen Lee and Tingting Hwang and
                 Shi-Chun Tsai",
  title =        "Compiler optimization on {VLIW} instruction scheduling
                 for low power",
  journal =      j-TODAES,
  volume =       "8",
  number =       "2",
  pages =        "252--268",
  month =        apr,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 7 11:12:06 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lopez-Vallejo:2003:HSP,
  author =       "Marisa L{\'o}pez-Vallejo and Juan Carlos L{\'o}pez",
  title =        "On the hardware-software partitioning problem:
                 {System} modeling and partitioning techniques",
  journal =      j-TODAES,
  volume =       "8",
  number =       "3",
  pages =        "269--297",
  month =        jul,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 7 11:12:06 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Obenaus:2003:GFP,
  author =       "Stefan Thomas Obenaus and Ted H. Szymanski",
  title =        "{Gravity}: {Fast} placement for {$3$-D} {VLSI}",
  journal =      j-TODAES,
  volume =       "8",
  number =       "3",
  pages =        "298--315",
  month =        jul,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 7 11:12:06 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Yang:2003:CRD,
  author =       "X. Yang and M. Wang and R. Kastner and S. Ghiasi and
                 M. Sarrafzadeh",
  title =        "Congestion reduction during placement with provably
                 good approximation bound",
  journal =      j-TODAES,
  volume =       "8",
  number =       "3",
  pages =        "316--333",
  month =        jul,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 7 11:12:06 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Constantinides:2003:SSA,
  author =       "G. A. Constantinides and P. Y. K. Cheung and W. Luk",
  title =        "Synthesis of saturation arithmetic architectures",
  journal =      j-TODAES,
  volume =       "8",
  number =       "3",
  pages =        "334--354",
  month =        jul,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 7 11:12:06 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kuchcinski:2003:CDS,
  author =       "Krzysztof Kuchcinski",
  title =        "Constraints-driven scheduling and resource
                 assignment",
  journal =      j-TODAES,
  volume =       "8",
  number =       "3",
  pages =        "355--383",
  month =        jul,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 7 11:12:06 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lee:2003:ACG,
  author =       "J.-Y. Lee and I.-C. Park",
  title =        "Address code generation for {DSP} instruction-set
                 architectures",
  journal =      j-TODAES,
  volume =       "8",
  number =       "3",
  pages =        "384--395",
  month =        jul,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 7 11:12:06 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Rawat:2003:I,
  author =       "Shishpal Rawat and Hans-Joachim Wunderlich",
  title =        "Introduction",
  journal =      j-TODAES,
  volume =       "8",
  number =       "4",
  pages =        "397--398",
  month =        oct,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Oct 31 06:04:08 MST 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Goel:2003:STA,
  author =       "Sandeep Kumar Goel and Erik Jan Marinissen",
  title =        "{SOC} test architecture design for efficient
                 utilization of test bandwidth",
  journal =      j-TODAES,
  volume =       "8",
  number =       "4",
  pages =        "399--429",
  month =        oct,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Oct 31 06:04:08 MST 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{El-Maleh:2003:TVD,
  author =       "Aiman H. El-Maleh and Yahya E. Osais",
  title =        "Test vector decomposition-based static compaction
                 algorithms for combinational circuits",
  journal =      j-TODAES,
  volume =       "8",
  number =       "4",
  pages =        "430--459",
  month =        oct,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Oct 31 06:04:08 MST 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Reddy:2003:TDV,
  author =       "Sudhakar M. Reddy and Kohei Miyase and Seiji Kajihara
                 and Irith Pomeranz",
  title =        "On test data volume reduction for multiple scan chain
                 designs",
  journal =      j-TODAES,
  volume =       "8",
  number =       "4",
  pages =        "460--469",
  month =        oct,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Oct 31 06:04:08 MST 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Li:2003:TDC,
  author =       "Lei Li and Krishnendu Chakrabarty and Nur A. Touba",
  title =        "Test data compression using dictionaries with
                 selective entries and fixed-length indices",
  journal =      j-TODAES,
  volume =       "8",
  number =       "4",
  pages =        "470--490",
  month =        oct,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Oct 31 06:04:08 MST 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Singh:2003:MST,
  author =       "Adit D. Singh and Markus Seuring and Michael
                 G{\"o}ssel and Egor S. Sogomonyan",
  title =        "Multimode scan: {Test} per clock {BIST} for {IP}
                 cores",
  journal =      j-TODAES,
  volume =       "8",
  number =       "4",
  pages =        "491--505",
  month =        oct,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Oct 31 06:04:08 MST 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Nummer:2003:THP,
  author =       "Muhammad Nummer and Manoj Sachdev",
  title =        "Testing high-performance pipelined circuits with
                 slow-speed testers",
  journal =      j-TODAES,
  volume =       "8",
  number =       "4",
  pages =        "506--521",
  month =        oct,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Oct 31 06:04:08 MST 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Parthasarathy:2003:PTA,
  author =       "Kumar Parthasarathy and Turker Kuyel and Dana Price
                 and Le Jin and Degang Chen and Randall Geiger",
  title =        "{BIST} and production testing of {ADCs} using
                 imprecise stimulus",
  journal =      j-TODAES,
  volume =       "8",
  number =       "4",
  pages =        "522--545",
  month =        oct,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Oct 31 06:04:08 MST 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Li:2003:CLF,
  author =       "Zhuo Li and Xiang Lu and Wangqi Qiu and Weiping Shi
                 and D. M. H. Walker",
  title =        "A circuit level fault model for resistive bridges",
  journal =      j-TODAES,
  volume =       "8",
  number =       "4",
  pages =        "546--559",
  month =        oct,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Oct 31 06:04:08 MST 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Niggemeyer:2003:DAM,
  author =       "Dirk Niggemeyer and Elizabeth M. Rudnick",
  title =        "A data acquisition methodology for on-chip repair of
                 embedded memories",
  journal =      j-TODAES,
  volume =       "8",
  number =       "4",
  pages =        "560--576",
  month =        oct,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Oct 31 06:04:08 MST 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Neuberger:2003:MBU,
  author =       "Gustavo Neuberger and Fernanda de Lima and Luigi Carro
                 and Ricardo Reis",
  title =        "A multiple bit upset tolerant {SRAM} memory",
  journal =      j-TODAES,
  volume =       "8",
  number =       "4",
  pages =        "577--590",
  month =        oct,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Oct 31 06:04:08 MST 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Bunker:2004:FHS,
  author =       "Annette Bunker and Ganesh Gopalakrishnan and Sally A.
                 Mckee",
  title =        "Formal hardware specification languages for protocol
                 compliance verification",
  journal =      j-TODAES,
  volume =       "9",
  number =       "1",
  pages =        "1--32",
  month =        jan,
  year =         "2004",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Jan 28 17:18:10 MST 2004",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Li:2004:PMA,
  author =       "Hao Li and Srinivas Katkoori and Wai-Kei Mak",
  title =        "Power minimization algorithms for {LUT}-based {FPGA}
                 technology mapping",
  journal =      j-TODAES,
  volume =       "9",
  number =       "1",
  pages =        "33--51",
  month =        jan,
  year =         "2004",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Jan 28 17:18:10 MST 2004",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Cho:2004:FMB,
  author =       "Jeonghun Cho and Yunheung Paek and David Whalley",
  title =        "Fast memory bank assignment for fixed-point digital
                 signal processors",
  journal =      j-TODAES,
  volume =       "9",
  number =       "1",
  pages =        "52--74",
  month =        jan,
  year =         "2004",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Jan 28 17:18:10 MST 2004",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Das:2004:MDR,
  author =       "Sandip Das and Susmita Sur-Kolay and Bhargab B.
                 Bhattacharya",
  title =        "{Manhattan}-diagonal routing in channels and
                 switchboxes",
  journal =      j-TODAES,
  volume =       "9",
  number =       "1",
  pages =        "75--104",
  month =        jan,
  year =         "2004",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Jan 28 17:18:10 MST 2004",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Wu:2004:BBA,
  author =       "Lieh-Ming Wu and Kuochen Wang and Chuang-Yi Chiu",
  title =        "A {BNF}-based automatic test program generator for
                 compatible microprocessor verification",
  journal =      j-TODAES,
  volume =       "9",
  number =       "1",
  pages =        "105--132",
  month =        jan,
  year =         "2004",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Jan 28 17:18:10 MST 2004",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kjeldsberg:2004:SRE,
  author =       "P. G. Kjeldsberg and F. Catthoor and E. J. Aas",
  title =        "Storage requirement estimation for optimized design of
                 data intensive applications",
  journal =      j-TODAES,
  volume =       "9",
  number =       "2",
  pages =        "133--158",
  month =        apr,
  year =         "2004",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Nov 4 08:12:30 MST 2004",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Sabade:2004:BTM,
  author =       "Sagar S. Sabade and Duncan M. Walker",
  title =        "{I$_{\mbox {DDX}}$}-based test methods: a survey",
  journal =      j-TODAES,
  volume =       "9",
  number =       "2",
  pages =        "159--198",
  month =        apr,
  year =         "2004",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Nov 4 08:12:30 MST 2004",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Ma:2004:SCU,
  author =       "Yuchun Ma and Xianlong Hong and Sheqin Dong and Yici
                 Cai and Chung-Kuan Cheng and Jun Gu",
  title =        "Stairway compaction using corner block list and its
                 applications with rectilinear blocks",
  journal =      j-TODAES,
  volume =       "9",
  number =       "2",
  pages =        "199--211",
  month =        apr,
  year =         "2004",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Nov 4 08:12:30 MST 2004",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Murthy:2004:BMP,
  author =       "Praveen K. Murthy and Shuvra S. Bhattacharyya",
  title =        "Buffer merging---a powerful technique for reducing
                 memory requirements of synchronous dataflow
                 specifications",
  journal =      j-TODAES,
  volume =       "9",
  number =       "2",
  pages =        "212--237",
  month =        apr,
  year =         "2004",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Nov 4 08:12:30 MST 2004",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Doboli:2004:TLL,
  author =       "Alex Doboli and Nagu Dhanwada and Adrian Nunez-Aldana
                 and Ranga Vemuri",
  title =        "A two-layer library-based approach to synthesis of
                 analog systems from {VHDL-AMS} specifications",
  journal =      j-TODAES,
  volume =       "9",
  number =       "2",
  pages =        "238--271",
  month =        apr,
  year =         "2004",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Nov 4 08:12:30 MST 2004",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Sundararajan:2004:NAI,
  author =       "Vijay Sundararajan and Sachin S. Sapatnekar and Keshab
                 K. Parhi",
  title =        "A new approach for integration of min-area retiming
                 and min-delay padding for simultaneously addressing
                 short-path and long-path constraints",
  journal =      j-TODAES,
  volume =       "9",
  number =       "3",
  pages =        "273--289",
  month =        jul,
  year =         "2004",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Nov 4 08:12:30 MST 2004",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lepak:2004:SSI,
  author =       "Kevin M. Lepak and Min Xu and Jun Chen and Lei He",
  title =        "Simultaneous shield insertion and net ordering for
                 capacitive and inductive coupling minimization",
  journal =      j-TODAES,
  volume =       "9",
  number =       "3",
  pages =        "290--309",
  month =        jul,
  year =         "2004",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Nov 4 08:12:30 MST 2004",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Vicente:2004:APT,
  author =       "Juan D. Vicente and Juan Lanchares and Rom{\'a}n
                 Hermida",
  title =        "Annealing placement by thermodynamic combinatorial
                 optimization",
  journal =      j-TODAES,
  volume =       "9",
  number =       "3",
  pages =        "310--332",
  month =        jul,
  year =         "2004",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Nov 4 08:12:30 MST 2004",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Dandalis:2004:ACE,
  author =       "Andreas Dandalis and Viktor K. Prasanna",
  title =        "An adaptive cryptographic engine for {Internet}
                 protocol security architectures",
  journal =      j-TODAES,
  volume =       "9",
  number =       "3",
  pages =        "333--353",
  month =        jul,
  year =         "2004",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Nov 4 08:12:30 MST 2004",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Yang:2004:FVE,
  author =       "Jun Yang and Rajiv Gupta and Chuanjun Zhang",
  title =        "Frequent value encoding for low power data buses",
  journal =      j-TODAES,
  volume =       "9",
  number =       "3",
  pages =        "354--384",
  month =        jul,
  year =         "2004",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Nov 4 08:12:30 MST 2004",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Dasdan:2004:EAF,
  author =       "Ali Dasdan",
  title =        "Experimental analysis of the fastest optimum cycle
                 ratio and mean algorithms",
  journal =      j-TODAES,
  volume =       "9",
  number =       "4",
  pages =        "385--418",
  month =        oct,
  year =         "2004",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Nov 4 08:12:30 MST 2004",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Ghosh:2004:COE,
  author =       "Arijit Ghosh and Tony Givargis",
  title =        "Cache optimization for embedded processor cores: an
                 analytical approach",
  journal =      j-TODAES,
  volume =       "9",
  number =       "4",
  pages =        "419--440",
  month =        oct,
  year =         "2004",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Nov 4 08:12:30 MST 2004",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Gupta:2004:CPC,
  author =       "Sumit Gupta and Rajesh Kumar Gupta and Nikil D. Dutt
                 and Alexandru Nicolau",
  title =        "Coordinated parallelizing compiler optimizations and
                 high-level synthesis",
  journal =      j-TODAES,
  volume =       "9",
  number =       "4",
  pages =        "441--470",
  month =        oct,
  year =         "2004",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Nov 4 08:12:30 MST 2004",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Cota:2004:RCN,
  author =       "{\'E}rika Cota and Luigi Carro and Marcelo
                 Lubaszewski",
  title =        "Reusing an on-chip network for the test of core-based
                 systems",
  journal =      j-TODAES,
  volume =       "9",
  number =       "4",
  pages =        "471--499",
  month =        oct,
  year =         "2004",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Nov 4 08:12:30 MST 2004",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Krishna:2004:AHE,
  author =       "C. V. Krishna and Abhijit Jas and Nur A. Touba",
  title =        "Achieving high encoding efficiency with partial
                 dynamic {LFSR} reseeding",
  journal =      j-TODAES,
  volume =       "9",
  number =       "4",
  pages =        "500--516",
  month =        oct,
  year =         "2004",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Nov 4 08:12:30 MST 2004",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Hung:2004:SCR,
  author =       "William N. N. Hung and Xiaoyu Song and El Mostapha
                 Aboulhamid and Andrew Kennings and Alan Coppola",
  title =        "Segmented channel routability via satisfiability",
  journal =      j-TODAES,
  volume =       "9",
  number =       "4",
  pages =        "517--528",
  month =        oct,
  year =         "2004",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Nov 4 08:12:30 MST 2004",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Dutt:2005:E,
  author =       "Nikil Dutt",
  title =        "Editorial",
  journal =      j-TODAES,
  volume =       "10",
  number =       "1",
  pages =        "1--2",
  month =        jan,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Apr 14 10:34:36 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Cong:2005:TMA,
  author =       "Jason Cong and Hui Huang and Xin Yuan",
  title =        "Technology mapping and architecture evaluation for $ k
                 / m$-macrocell-based {FPGAs}",
  journal =      j-TODAES,
  volume =       "10",
  number =       "1",
  pages =        "3--23",
  month =        jan,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Apr 14 10:34:36 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Ruan:2005:BEL,
  author =       "Shanq-Jang Ruan and Kun-Lin Tsai and Edwin Naroska and
                 Feipei Lai",
  title =        "Bipartitioning and encoding in low-power pipelined
                 circuits",
  journal =      j-TODAES,
  volume =       "10",
  number =       "1",
  pages =        "24--32",
  month =        jan,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Apr 14 10:34:36 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Memik:2005:SAO,
  author =       "Seda Ogrenci Memik and Ryan Kastner and Elaheh
                 Bozorgzadeh and Majid Sarrafzadeh",
  title =        "A scheduling algorithm for optimization and early
                 planning in high-level synthesis",
  journal =      j-TODAES,
  volume =       "10",
  number =       "1",
  pages =        "33--57",
  month =        jan,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Apr 14 10:34:36 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Adya:2005:CTM,
  author =       "S. N. Adya and I. L. Markov",
  title =        "Combinatorial techniques for mixed-size placement",
  journal =      j-TODAES,
  volume =       "10",
  number =       "1",
  pages =        "58--90",
  month =        jan,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Apr 14 10:34:36 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Nourani:2005:RHE,
  author =       "Mehrdad Nourani and Mohammad H. Tehranipour",
  title =        "{RL-Huffman} encoding for test compression and power
                 reduction in scan applications",
  journal =      j-TODAES,
  volume =       "10",
  number =       "1",
  pages =        "91--115",
  month =        jan,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Apr 14 10:34:36 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Jan:2005:GMR,
  author =       "Gene Eu Jan and Ki-Yin Chang and Su Gao and Ian
                 Parberry",
  title =        "A $4$-geometry maze router and its application on
                 multiterminal nets",
  journal =      j-TODAES,
  volume =       "10",
  number =       "1",
  pages =        "116--135",
  month =        jan,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Apr 14 10:34:36 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Arato:2005:AAH,
  author =       "P{\'e}ter Arat{\'o} and Zolt{\'a}n {\'A}d{\'a}m Mann
                 and Andr{\'a}s Orb{\'a}n",
  title =        "Algorithmic aspects of hardware\slash software
                 partitioning",
  journal =      j-TODAES,
  volume =       "10",
  number =       "1",
  pages =        "136--156",
  month =        jan,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Apr 14 10:34:36 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kagaris:2005:UMP,
  author =       "Dimitri Kagaris",
  title =        "A unified method for phase shifter computation",
  journal =      j-TODAES,
  volume =       "10",
  number =       "1",
  pages =        "157--167",
  month =        jan,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Apr 14 10:34:36 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kao:2005:EAF,
  author =       "Chi-Chou Kao and Yen-Tai Lai",
  title =        "An efficient algorithm for finding the minimal-area
                 {FPGA} technology mapping",
  journal =      j-TODAES,
  volume =       "10",
  number =       "1",
  pages =        "168--186",
  month =        jan,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Apr 14 10:34:36 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Chabini:2005:SOR,
  author =       "Noureddine Chabini and El Mostapha Aboulhamid and
                 Isma{\"\i}l Chabini and Yvon Savaria",
  title =        "Scheduling and optimal register placement for
                 synchronous circuits derived using software pipelining
                 techniques",
  journal =      j-TODAES,
  volume =       "10",
  number =       "2",
  pages =        "187--204",
  month =        apr,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Apr 26 10:39:39 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Cao:2005:SSL,
  author =       "Aiqun Cao and Naran Sirisantana and Cheng-Kok Koh and
                 Kaushik Roy",
  title =        "Synthesis of skewed logic circuits",
  journal =      j-TODAES,
  volume =       "10",
  number =       "2",
  pages =        "205--228",
  month =        apr,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Apr 26 10:39:39 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kadayif:2005:OIT,
  author =       "I. Kadayif and A. Sivasubramaniam and M. Kandemir and
                 G. Kandiraju and G. Chen",
  title =        "Optimizing instruction {TLB} energy using software and
                 hardware techniques",
  journal =      j-TODAES,
  volume =       "10",
  number =       "2",
  pages =        "229--257",
  month =        apr,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Apr 26 10:39:39 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Liu:2005:ETT,
  author =       "Xiao Liu and Michael S. Hsiao and Sreejit Chakravarty
                 and Paul J. Thadikaran",
  title =        "Efficient techniques for transition testing",
  journal =      j-TODAES,
  volume =       "10",
  number =       "2",
  pages =        "258--278",
  month =        apr,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Apr 26 10:39:39 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Poon:2005:DPM,
  author =       "Kara K. W. Poon and Steven J. E. Wilton and Andy Yan",
  title =        "A detailed power model for field-programmable gate
                 arrays",
  journal =      j-TODAES,
  volume =       "10",
  number =       "2",
  pages =        "279--302",
  month =        apr,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Apr 26 10:39:39 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Bhattacharya:2005:OWP,
  author =       "Soumendu Bhattacharya and Abhijit Chatterjee",
  title =        "Optimized wafer-probe and assembled package test
                 design for analog circuits",
  journal =      j-TODAES,
  volume =       "10",
  number =       "2",
  pages =        "303--329",
  month =        apr,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Apr 26 10:39:39 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Mohanty:2005:EED,
  author =       "Saraju P. Mohanty and N. Ranganathan",
  title =        "Energy-efficient datapath scheduling using multiple
                 voltages and dynamic clocking",
  journal =      j-TODAES,
  volume =       "10",
  number =       "2",
  pages =        "330--353",
  month =        apr,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Apr 26 10:39:39 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Davoodi:2005:VSU,
  author =       "Azadeh Davoodi and Ankur Srivastava",
  title =        "Voltage scheduling under unpredictabilities: a risk
                 management paradigm",
  journal =      j-TODAES,
  volume =       "10",
  number =       "2",
  pages =        "354--368",
  month =        apr,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Apr 26 10:39:39 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Wang:2005:EAV,
  author =       "Zhong Wang and Xiaobo Sharon Hu",
  title =        "Energy-aware variable partitioning and instruction
                 scheduling for multibank memory architectures",
  journal =      j-TODAES,
  volume =       "10",
  number =       "2",
  pages =        "369--388",
  month =        apr,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Apr 26 10:39:39 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Cong:2005:LSC,
  author =       "Jason Cong and Joseph R. Shinnerl and Min Xie and Tim
                 Kong and Xin Yuan",
  title =        "Large-scale circuit placement",
  journal =      j-TODAES,
  volume =       "10",
  number =       "2",
  pages =        "389--430",
  month =        apr,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Apr 26 10:39:39 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Paul:2005:HLM,
  author =       "Joann M. Paul and Donald E. Thomas and Andrew S.
                 Cassidy",
  title =        "High-level modeling and simulation of single-chip
                 programmable heterogeneous multiprocessors",
  journal =      j-TODAES,
  volume =       "10",
  number =       "3",
  pages =        "431--461",
  month =        jul,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Sep 22 11:16:52 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Roy:2005:FSV,
  author =       "Arnab Roy and S. K. Panda and Rajeev Kumar and P. P.
                 Chakrabarti",
  title =        "A framework for systematic validation and debugging of
                 pipeline simulators",
  journal =      j-TODAES,
  volume =       "10",
  number =       "3",
  pages =        "462--491",
  month =        jul,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Sep 22 11:16:52 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Banerjee:2005:OFT,
  author =       "Ansuman Banerjee and Pallab Dasgupta",
  title =        "The open family of temporal logics: {Annotating}
                 temporal operators with input constraints",
  journal =      j-TODAES,
  volume =       "10",
  number =       "3",
  pages =        "492--522",
  month =        jul,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Sep 22 11:16:52 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Koushanfar:2005:BST,
  author =       "Farinaz Koushanfar and Inki Hong and Miodrag
                 Potkonjak",
  title =        "Behavioral synthesis techniques for intellectual
                 property protection",
  journal =      j-TODAES,
  volume =       "10",
  number =       "3",
  pages =        "523--545",
  month =        jul,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Sep 22 11:16:52 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Gupta:2005:RAS,
  author =       "Puneet Gupta and Andrew B. Kahng and Stefanus Mantik",
  title =        "Routing-aware scan chain ordering",
  journal =      j-TODAES,
  volume =       "10",
  number =       "3",
  pages =        "546--560",
  month =        jul,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Sep 22 11:16:52 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Xiang:2005:AIP,
  author =       "Hua Xiang and Xiaoping Tang and Martin D. F. Wong",
  title =        "An algorithm for integrated pin assignment and buffer
                 planning",
  journal =      j-TODAES,
  volume =       "10",
  number =       "3",
  pages =        "561--572",
  month =        jul,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Sep 22 11:16:52 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lee:2005:PDD,
  author =       "Jaehwan John Lee and Vincent John {Mooney III}",
  title =        "An $ o(\mbox {min}(m, n)) $ parallel deadlock
                 detection algorithm",
  journal =      j-TODAES,
  volume =       "10",
  number =       "3",
  pages =        "573--586",
  month =        jul,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Sep 22 11:16:52 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Harris:2005:I,
  author =       "Ian G. Harris",
  title =        "Introduction",
  journal =      j-TODAES,
  volume =       "10",
  number =       "4",
  pages =        "587--588",
  month =        oct,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jan 13 07:41:02 MST 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Suhaib:2005:XIM,
  author =       "Syed M. Suhaib and Deepak A. Mathaikutty and Sandeep
                 K. Shukla and David Berner",
  title =        "{XFM}: an incremental methodology for developing
                 formal models",
  journal =      j-TODAES,
  volume =       "10",
  number =       "4",
  pages =        "589--609",
  month =        oct,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jan 13 07:41:02 MST 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Fujita:2005:ECB,
  author =       "Masahiro Fujita",
  title =        "Equivalence checking between behavioral and {RTL}
                 descriptions with virtual controllers and datapaths",
  journal =      j-TODAES,
  volume =       "10",
  number =       "4",
  pages =        "610--626",
  month =        oct,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jan 13 07:41:02 MST 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Feng:2005:UDP,
  author =       "Tao Feng and Li-C Wang and Kwang-Ting (Tim) Cheng and
                 Chih-Chang (Andy) Lin",
  title =        "Using $2$-domain partitioned {OBDD} data structure in
                 an enhanced symbolic simulator",
  journal =      j-TODAES,
  volume =       "10",
  number =       "4",
  pages =        "627--650",
  month =        oct,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jan 13 07:41:02 MST 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Higgins:2005:SDA,
  author =       "Jason T. Higgins and Mark D. Aagaard",
  title =        "Simplifying the design and automating the verification
                 of pipelines with structural hazards",
  journal =      j-TODAES,
  volume =       "10",
  number =       "4",
  pages =        "651--672",
  month =        oct,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jan 13 07:41:02 MST 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Shamshiri:2005:ILT,
  author =       "Saeed Shamshiri and Hadi Esmaeilzadeh and Zainalabdein
                 Navabi",
  title =        "Instruction-level test methodology for {CPU} core
                 self-testing",
  journal =      j-TODAES,
  volume =       "10",
  number =       "4",
  pages =        "673--689",
  month =        oct,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jan 13 07:41:02 MST 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Al-Yamani:2005:TCE,
  author =       "Ahmad A. Al-Yamani and Edward J. McCluskey",
  title =        "Test chip experimental results on high-level
                 structural test",
  journal =      j-TODAES,
  volume =       "10",
  number =       "4",
  pages =        "690--701",
  month =        oct,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jan 13 07:41:02 MST 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Ciordas:2005:EBM,
  author =       "Calin Ciordas and Twan Basten and Andrei
                 R{\u{a}}dulescu and Kees Goossens and Jef {Van
                 Meerbergen}",
  title =        "An event-based monitoring service for networks on
                 chip",
  journal =      j-TODAES,
  volume =       "10",
  number =       "4",
  pages =        "702--723",
  month =        oct,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jan 13 07:41:02 MST 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Dutt:2006:E,
  author =       "Nikil Dutt",
  title =        "Editorial",
  journal =      j-TODAES,
  volume =       "11",
  number =       "1",
  pages =        "1--2",
  month =        jan,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Apr 12 07:15:39 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Givargis:2006:ZCI,
  author =       "Tony Givargis",
  title =        "Zero cost indexing for improved processor cache
                 performance",
  journal =      j-TODAES,
  volume =       "11",
  number =       "1",
  pages =        "3--25",
  month =        jan,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Apr 12 07:15:39 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Constantinides:2006:WLO,
  author =       "George A. Constantinides",
  title =        "Word-length optimization for differentiable nonlinear
                 systems",
  journal =      j-TODAES,
  volume =       "11",
  number =       "1",
  pages =        "26--43",
  month =        jan,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Apr 12 07:15:39 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Su:2006:AMS,
  author =       "Qing Su and Jamil Kawa and Charles Chiang and Yehia
                 Massoud",
  title =        "Accurate modeling of substrate resistive coupling for
                 floating substrates",
  journal =      j-TODAES,
  volume =       "11",
  number =       "1",
  pages =        "44--51",
  month =        jan,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Apr 12 07:15:39 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Davoodi:2006:ETG,
  author =       "Azadeh Davoodi and Ankur Srivastava",
  title =        "Effective techniques for the generalized low-power
                 binding problem",
  journal =      j-TODAES,
  volume =       "11",
  number =       "1",
  pages =        "52--69",
  month =        jan,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Apr 12 07:15:39 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Schaumont:2006:ICE,
  author =       "Patrick Schaumont and Doris Ching and Ingrid
                 Verbauwhede",
  title =        "An interactive codesign environment for
                 domain-specific coprocessors",
  journal =      j-TODAES,
  volume =       "11",
  number =       "1",
  pages =        "70--87",
  month =        jan,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Apr 12 07:15:39 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Jiang:2006:RCD,
  author =       "Iris Hui-Ru Jiang and Song-Ra Pan and Yao-Wen Chang
                 and Jing-Yang Jou",
  title =        "Reliable crosstalk-driven interconnect optimization",
  journal =      j-TODAES,
  volume =       "11",
  number =       "1",
  pages =        "88--103",
  month =        jan,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Apr 12 07:15:39 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kulkarni:2006:CTA,
  author =       "Dhananjay Kulkarni and Walid A. Najjar and Robert
                 Rinker and Fadi J. Kurdahi",
  title =        "Compile-time area estimation for {LUT}-based {FPGAs}",
  journal =      j-TODAES,
  volume =       "11",
  number =       "1",
  pages =        "104--122",
  month =        jan,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Apr 12 07:15:39 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Shrivastava:2006:CFC,
  author =       "Aviral Shrivastava and Partha Biswas and Ashok Halambi
                 and Nikil Dutt and Alex Nicolau",
  title =        "Compilation framework for code size reduction using
                 reduced bit-width {ISAs (rISAs)}",
  journal =      j-TODAES,
  volume =       "11",
  number =       "1",
  pages =        "123--146",
  month =        jan,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Apr 12 07:15:39 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{You:2006:CLP,
  author =       "Yi-Ping You and Chingren Lee and Jenq Kuen Lee",
  title =        "Compilers for leakage power reduction",
  journal =      j-TODAES,
  volume =       "11",
  number =       "1",
  pages =        "147--164",
  month =        jan,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Apr 12 07:15:39 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Shao:2006:LST,
  author =       "Zili Shao and Bin Xiao and Chun Xue and Qingfeng Zhuge
                 and Edwin H.-M. Sha",
  title =        "Loop scheduling with timing and switching-activity
                 minimization for {VLIW DSP}",
  journal =      j-TODAES,
  volume =       "11",
  number =       "1",
  pages =        "165--185",
  month =        jan,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Apr 12 07:15:39 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Mohanty:2006:IMS,
  author =       "Saraju P. Mohanty and N. Ranganathan and Sunil K.
                 Chappidi",
  title =        "{ILP} models for simultaneous energy and transient
                 power minimization during behavioral synthesis",
  journal =      j-TODAES,
  volume =       "11",
  number =       "1",
  pages =        "186--212",
  month =        jan,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Apr 12 07:15:39 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Ozdal:2006:TLB,
  author =       "Muhammet Mustafa Ozdal and Martin D. F. Wong",
  title =        "Two-layer bus routing for high-speed printed circuit
                 boards",
  journal =      j-TODAES,
  volume =       "11",
  number =       "1",
  pages =        "213--227",
  month =        jan,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Apr 12 07:15:39 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kandemir:2006:IEB,
  author =       "M. Kandemir and J. Ramanujam and U. Sezer",
  title =        "Improving the energy behavior of block buffering using
                 compiler optimizations",
  journal =      j-TODAES,
  volume =       "11",
  number =       "1",
  pages =        "228--250",
  month =        jan,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Apr 12 07:15:39 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Ayala-Rincon:2006:PTS,
  author =       "M. Ayala-Rinc{\'o}n and C. H. Llanos and R. P. Jacobi
                 and R. W. Hartenstein",
  title =        "Prototyping time- and space-efficient computations of
                 algebraic operations over dynamically reconfigurable
                 systems modeled by rewriting-logic",
  journal =      j-TODAES,
  volume =       "11",
  number =       "2",
  pages =        "251--281",
  month =        apr,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Aug 23 10:13:18 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Absar:2006:RAI,
  author =       "Javed Absar and Francky Catthoor",
  title =        "Reuse analysis of indirectly indexed arrays",
  journal =      j-TODAES,
  volume =       "11",
  number =       "2",
  pages =        "282--305",
  month =        apr,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Aug 23 10:13:18 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Dasdan:2006:HIT,
  author =       "Ali Dasdan and Ivan Hom",
  title =        "Handling inverted temperature dependence in static
                 timing analysis",
  journal =      j-TODAES,
  volume =       "11",
  number =       "2",
  pages =        "306--324",
  month =        apr,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Aug 23 10:13:18 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Li:2006:ETO,
  author =       "Zuoyuan Li and Xianlong Hong and Qiang Zhou and Jinian
                 Bian and Hannah H. Yang and Vijay Pitchumani",
  title =        "Efficient thermal-oriented {$3$D} floorplanning and
                 thermal via planning for two-stacked-die integration",
  journal =      j-TODAES,
  volume =       "11",
  number =       "2",
  pages =        "325--345",
  month =        apr,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Aug 23 10:13:18 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Padmanaban:2006:IGM,
  author =       "Saravanan Padmanaban and Spyros Tragoudas",
  title =        "Implicit grading of multiple path delay faults",
  journal =      j-TODAES,
  volume =       "11",
  number =       "2",
  pages =        "346--361",
  month =        apr,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Aug 23 10:13:18 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Chen:2006:OSM,
  author =       "Deming Chen and Jason Cong and Junjuan Xu",
  title =        "Optimal simultaneous module and multivoltage
                 assignment for low power",
  journal =      j-TODAES,
  volume =       "11",
  number =       "2",
  pages =        "362--386",
  month =        apr,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Aug 23 10:13:18 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Zhu:2006:CZD,
  author =       "Haikun Zhu and Chung-Kuan Cheng and Ronald Graham",
  title =        "On the construction of zero-deficiency parallel prefix
                 circuits with minimum depth",
  journal =      j-TODAES,
  volume =       "11",
  number =       "2",
  pages =        "387--409",
  month =        apr,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Aug 23 10:13:18 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kandemir:2006:REC,
  author =       "Mahmut Taylan Kandemir",
  title =        "Reducing energy consumption of multiprocessor {SoC}
                 architectures by exploiting memory bank locality",
  journal =      j-TODAES,
  volume =       "11",
  number =       "2",
  pages =        "410--441",
  month =        apr,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Aug 23 10:13:18 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Su:2006:CTD,
  author =       "Fei Su and Sule Ozev and Krishnendu Chakrabarty",
  title =        "Concurrent testing of digital microfluidics-based
                 biochips",
  journal =      j-TODAES,
  volume =       "11",
  number =       "2",
  pages =        "442--464",
  month =        apr,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Aug 23 10:13:18 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Atienza:2006:SDM,
  author =       "David Atienza and Jose M. Mendias and Stylianos
                 Mamagkakis and Dimitrios Soudris and Francky Catthoor",
  title =        "Systematic dynamic memory management design
                 methodology for reduced memory footprint",
  journal =      j-TODAES,
  volume =       "11",
  number =       "2",
  pages =        "465--489",
  month =        apr,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Aug 23 10:13:18 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Li:2006:LVA,
  author =       "Wei Li and Daniel Blakely and Scott {Van Sooy} and
                 Keven Dunn and David Kidd and Robert Rogenmoser and
                 Dian Zhou",
  title =        "{LVS} verification across multiple power domains for a
                 quad-core microprocessor",
  journal =      j-TODAES,
  volume =       "11",
  number =       "2",
  pages =        "490--500",
  month =        apr,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Aug 23 10:13:18 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Cheatham:2006:SFT,
  author =       "Jason A. Cheatham and John M. Emmert and Stan
                 Baumgart",
  title =        "A survey of fault tolerant methodologies for {FPGAs}",
  journal =      j-TODAES,
  volume =       "11",
  number =       "2",
  pages =        "501--533",
  month =        apr,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Aug 23 10:13:18 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Pedram:2006:ISI,
  author =       "Massoud Pedram",
  title =        "Introduction to special issue: {Novel} paradigms in
                 system-level design",
  journal =      j-TODAES,
  volume =       "11",
  number =       "3",
  pages =        "535--536",
  month =        jul,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Aug 23 10:13:19 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Pinto:2006:SLD,
  author =       "Alessandro Pinto and Alvise Bonivento and Allberto L.
                 Sangiovanni-Vincentelli and Roberto Passerone and Marco
                 Sgroi",
  title =        "System level design paradigms: {Platform-based} design
                 and communication synthesis",
  journal =      j-TODAES,
  volume =       "11",
  number =       "3",
  pages =        "537--563",
  month =        jul,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Aug 23 10:13:19 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Marculescu:2006:CCR,
  author =       "Radu Marculescu and Umit Y. Ogras and Nicholas H.
                 Zamora",
  title =        "Computation and communication refinement for
                 multiprocessor {SoC} design: a system-level
                 perspective",
  journal =      j-TODAES,
  volume =       "11",
  number =       "3",
  pages =        "564--592",
  month =        jul,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Aug 23 10:13:19 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Pop:2006:AOD,
  author =       "Paul Pop and Petru Eles and Zebo Peng and Traian Pop",
  title =        "Analysis and optimization of distributed real-time
                 embedded systems",
  journal =      j-TODAES,
  volume =       "11",
  number =       "3",
  pages =        "593--625",
  month =        jul,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Aug 23 10:13:19 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Mishra:2006:ADL,
  author =       "Prabhat Mishra and Aviral Shrivastava and Nikil Dutt",
  title =        "Architecture description language {(ADL)-driven}
                 software toolkit generation for architectural
                 exploration of programmable {SOCs}",
  journal =      j-TODAES,
  volume =       "11",
  number =       "3",
  pages =        "626--658",
  month =        jul,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Aug 23 10:13:19 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lysecky:2006:WP,
  author =       "Roman Lysecky and Greg Stitt and Frank Vahid",
  title =        "{Warp Processors}",
  journal =      j-TODAES,
  volume =       "11",
  number =       "3",
  pages =        "659--681",
  month =        jul,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Aug 23 10:13:19 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Su:2006:MPF,
  author =       "Fei Su and Krishnendu Chakrabarty",
  title =        "Module placement for fault-tolerant
                 microfluidics-based biochips",
  journal =      j-TODAES,
  volume =       "11",
  number =       "3",
  pages =        "682--710",
  month =        jul,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Aug 23 10:13:19 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Hanchate:2006:GTF,
  author =       "Narender Hanchate and Nagarajan Ranganathan",
  title =        "A game-theoretic framework for multimetric
                 optimization of interconnect delay, power, and
                 crosstalk noise during wire sizing",
  journal =      j-TODAES,
  volume =       "11",
  number =       "3",
  pages =        "711--739",
  month =        jul,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Aug 23 10:13:19 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Chen:2006:SPC,
  author =       "Gang Chen and Jason Cong",
  title =        "Simultaneous placement with clustering and
                 duplication",
  journal =      j-TODAES,
  volume =       "11",
  number =       "3",
  pages =        "740--772",
  month =        jul,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Aug 23 10:13:19 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Bhanja:2006:SFG,
  author =       "Sanjukta Bhanja and Karthikeyan Lingasubramanian and
                 N. Ranganathan",
  title =        "A stimulus-free graphical probabilistic switching
                 model for sequential circuits using dynamic {Bayesian}
                 networks",
  journal =      j-TODAES,
  volume =       "11",
  number =       "3",
  pages =        "773--796",
  month =        jul,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Aug 23 10:13:19 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Cao:2006:POS,
  author =       "Aiqun Cao and Ruibing Lu and Chen Li and Cheng-Kok
                 Koh",
  title =        "Postlayout optimization for synthesis of {Domino}
                 circuits",
  journal =      j-TODAES,
  volume =       "11",
  number =       "4",
  pages =        "797--821",
  month =        oct,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Nov 15 06:47:05 MST 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Nacul:2006:STC,
  author =       "Andr{\'e} C. N{\'a}cul and Tony Givargis",
  title =        "Synthesis of time-constrained multitasking embedded
                 software",
  journal =      j-TODAES,
  volume =       "11",
  number =       "4",
  pages =        "822--847",
  month =        oct,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Nov 15 06:47:05 MST 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kang:2006:STA,
  author =       "Kunhyuk Kang and Bipul C. Paul and Kaushik Roy",
  title =        "Statistical timing analysis using levelized covariance
                 propagation considering systematic and random
                 variations of process parameters",
  journal =      j-TODAES,
  volume =       "11",
  number =       "4",
  pages =        "848--879",
  month =        oct,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Nov 15 06:47:05 MST 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kuo:2006:DID,
  author =       "Wu-An Kuo and Tingting Hwang and Allen C.-H. Wu",
  title =        "Decomposition of instruction decoders for low-power
                 designs",
  journal =      j-TODAES,
  volume =       "11",
  number =       "4",
  pages =        "880--889",
  month =        oct,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Nov 15 06:47:05 MST 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Liu:2006:CML,
  author =       "Yi-Yu Liu and Kuo-Hua Wang and Tingting Hwang",
  title =        "Crosstalk minimization in logic synthesis for {PLAs}",
  journal =      j-TODAES,
  volume =       "11",
  number =       "4",
  pages =        "890--915",
  month =        oct,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Nov 15 06:47:05 MST 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Goren:2006:TSG,
  author =       "Sezer G{\"o}ren and F. Joel Ferguson",
  title =        "Test sequence generation for controller verification
                 and test with high coverage",
  journal =      j-TODAES,
  volume =       "11",
  number =       "4",
  pages =        "916--938",
  month =        oct,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Nov 15 06:47:05 MST 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Wu:2006:MWR,
  author =       "Zhong-Zhen Wu and Shih-Chieh Chang",
  title =        "Multiple wire reconnections based on implication flow
                 graph",
  journal =      j-TODAES,
  volume =       "11",
  number =       "4",
  pages =        "939--952",
  month =        oct,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Nov 15 06:47:05 MST 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Wang:2006:PDT,
  author =       "Chi-Shong Wang and Chingwei Yeh",
  title =        "Performance-driven technology mapping with {MSG}
                 partition and selective gate duplication",
  journal =      j-TODAES,
  volume =       "11",
  number =       "4",
  pages =        "953--973",
  month =        oct,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Nov 15 06:47:05 MST 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Gangwar:2007:IIC,
  author =       "Anup Gangwar and M. Balakrishnan and Anshul Kumar",
  title =        "Impact of intercluster communication mechanisms on
                 {ILP} in clustered {VLIW} architectures",
  journal =      j-TODAES,
  volume =       "12",
  number =       "1",
  pages =        "1:1--1:??",
  month =        jan,
  year =         "2007",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:08:29 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "VLIW processors have started gaining acceptance in the
                 embedded systems domain. However, monolithic register
                 file VLIW processors with a large number of functional
                 units are not viable. This is because of the need for a
                 large number of ports to support FU requirements, which
                 makes them expensive and extremely slow. A simple
                 solution is to break the register file into a number of
                 smaller register files with a subset of FUs connected
                 to it. These architectures are termed clustered VLIW
                 processors. In this article, we first build a case for
                 clustered VLIW processors with four or more clusters by
                 showing that the achievable ILP in most of the media
                 applications for a 16 ALU and 8 LD/ST VLIW processor is
                 around 20. We then provide a classification of the
                 intercluster interconnection design space, and show
                 that a large part of this design space is currently
                 unexplored. Next, using our performance evaluation
                 methodology, we evaluate a subset of this design space
                 and show that the most commonly used type of
                 interconnection, RF-to-RF, fails to meet achievable
                 performance by a large factor, while certain other
                 types of interconnections can lower this gap
                 considerably. We also establish that this behavior is
                 heavily application dependent, emphasizing the
                 importance of application-specific architecture
                 exploration. We also present results about the
                 statistical behavior of these different architectures
                 by varying the number of clusters in our framework from
                 4 to 16. These results clearly show the advantages of
                 one specific architecture over others. Finally, based
                 on our results, we propose a new interconnection
                 network, which should lower this performance gap.",
  acknowledgement = ack-nhfb,
  articleno =    "1",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "ASIP; clustered VLIW processors; performance
                 evaluation; VLIW",
}

@Article{Zamora:2007:SLP,
  author =       "Nicholas H. Zamora and Xiaoping Hu and Radu
                 Marculescu",
  title =        "System-level performance\slash power analysis for
                 platform-based design of multimedia applications",
  journal =      j-TODAES,
  volume =       "12",
  number =       "1",
  pages =        "2:1--2:??",
  month =        jan,
  year =         "2007",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:08:29 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The objective of this article is to introduce the use
                 of Stochastic Automata Networks (SANs) as an effective
                 formalism for application-architecture modeling in
                 system-level average-case analysis for platform-based
                 design. By platform, we mean a family of heterogeneous
                 architectures that satisfy a set of architectural
                 constraints imposed to allow re-use of hardware and
                 software components. More precisely, we show how SANs
                 can be used early in the design cycle to identify the
                 best performance/power trade-offs among several
                 application-architecture combinations. Having this
                 information available not only helps avoid lengthy
                 simulations for predicting power and performance
                 figures, but also enables efficient mapping of
                 different applications onto a chosen platform. We
                 illustrate the benefits of our methodology by using the
                 ``Picture-in-Picture'' video decoder as a driver
                 application.",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "average-case analysis; design space exploration;
                 hardware/software codesign; Markov chains; performance
                 models; platform-based design; stochastic automata
                 networks (SANs)",
}

@Article{Sham:2007:ARD,
  author =       "Chiu-Wing Sham and Evangeline F. Y. Young",
  title =        "Area reduction by deadspace utilization on
                 interconnect optimized floorplan",
  journal =      j-TODAES,
  volume =       "12",
  number =       "1",
  pages =        "3:1--3:??",
  month =        jan,
  year =         "2007",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:08:29 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Interconnect optimization has become the major concern
                 in floorplanning. Many approaches would use simulated
                 annealing (SA) with a cost function composed of a
                 weighted sum of area, wirelength, and interconnect
                 cost. These approaches can reduce the interconnect cost
                 efficiently but the area penalty of the interconnect
                 optimized floorplan is usually quite large. In this
                 article, we propose an approach called deadspace
                 utilization (DSU) to reclaim the unused area of an
                 interconnect optimized floorplan by linear programming.
                 Since modules are not necessarily rectangular in shape
                 in floorplanning, some deadspace can be redistributed
                 to the modules to increase the area occupied by each
                 module. If the area of each module can be expanded by
                 the same ratio, the whole floorplan can be compacted by
                 that ratio to give a smaller floorplan. However, we
                 will limit the compaction ratio to prevent
                 overcongestion. Experiments show that we can apply this
                 deadspace utilization technique to reduce the area and
                 total wirelength of an interconnect optimized floorplan
                 further while the routability can be maintained at the
                 same time.",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "area reduction; Floorplanning",
}

@Article{Li:2007:SBC,
  author =       "Lei Li and Zhanglei Wang and Krishnendu Chakrabarty",
  title =        "Scan-{BIST} based on cluster analysis and the encoding
                 of repeating sequences",
  journal =      j-TODAES,
  volume =       "12",
  number =       "1",
  pages =        "4:1--4:??",
  month =        jan,
  year =         "2007",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:08:29 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We present a built-in self-test (BIST) approach for
                 full-scan designs that extracts the most frequently
                 occurring sequences from deterministic test patterns.
                 The extracted sequences are stored on-chip, and are
                 used during test application. Three sets of test
                 patterns are applied to the circuit under test during a
                 BIST test session; these include pseudorandom patterns,
                 semirandom patterns, and deterministic patterns. The
                 semirandom patterns are generated based on the stored
                 sequences and they are more likely to detect
                 hard-to-detect faults than pseudorandom patterns. The
                 deterministic patterns are encoded using either the
                 stored sequences or the LFSR reseeding technique to
                 reduce test data volume. We use the cluster analysis
                 technique for sequence extraction to reduce the amount
                 of data to be stored. Experimental results for the
                 ISCAS-89 benchmark circuits show that the proposed
                 approach often requires less on-chip storage and test
                 data volume than other recent BIST methods.",
  acknowledgement = ack-nhfb,
  articleno =    "4",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Built-in self-test (BIST); clustering test data
                 volume; test compression",
}

@Article{Cai:2007:WAD,
  author =       "Yuan Cai and Marcus T. Schmitz and Bashir M.
                 Al-Hashimi and Sudhakar M. Reddy",
  title =        "Workload-ahead-driven online energy minimization
                 techniques for battery-powered embedded systems with
                 time-constraints",
  journal =      j-TODAES,
  volume =       "12",
  number =       "1",
  pages =        "5:1--5:??",
  month =        jan,
  year =         "2007",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:08:29 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article proposes a new online voltage scaling
                 (VS) technique for battery-powered embedded systems
                 with real-time constraints. The VS technique takes into
                 account the execution times and discharge currents of
                 tasks to further reduce the battery charge consumption
                 when compared to the recently reported slack forwarding
                 technique [Ahmed and Chakrabarti 2004], while
                 maintaining low online complexity of O (1).
                 Furthermore, we investigate the impact of online
                 rescheduling and remapping on the battery charge
                 consumption for tasks with data dependency which has
                 not been explicitly addressed in the literature and
                 propose a novel rescheduling/remapping technique.
                 Finally, we take leakage power into consideration and
                 extend the proposed online techniques to include
                 adaptive body biasing (ABB) which is used to reduce the
                 leakage power. We demonstrate and compare the
                 efficiency of the presented techniques using seven
                 real-life benchmarks and numerous automatically
                 generated examples.",
  acknowledgement = ack-nhfb,
  articleno =    "5",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "adaptive body biasing; battery; Dynamic voltage
                 scaling; embedded systems",
}

@Article{Zhu:2007:HMF,
  author =       "Xinping Zhu and Sharad Malik",
  title =        "A hierarchical modeling framework for on-chip
                 communication architectures of multiprocessing {SoCs}",
  journal =      j-TODAES,
  volume =       "12",
  number =       "1",
  pages =        "6:1--6:??",
  month =        jan,
  year =         "2007",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:08:29 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In multiprocessor-based SoCs, optimizing the
                 communication architecture is often as important, if
                 not more important, than optimizing the computation
                 architecture. While there are mature platforms and
                 techniques for the modeling and evaluation of
                 architectures of processing elements, the same is not
                 true for the communication architectures. This article
                 presents an application-driven retargetable prototyping
                 platform that fills this gap. This environment aims to
                 facilitate the design exploration of the communication
                 subsystem through application-level execution-driven
                 simulations and quantitative analysis. Based on an
                 analysis of a wide range of on-chip communication
                 architectures, we describe how a specific hierarchical
                 class library can be used to develop new on-chip
                 communication architectures, or variants of existing
                 ones with relatively little incremental effort. We
                 demonstrate this through three case studies including
                 two commercial on-chip bus systems and an on-chip
                 packet switching network. Here we show that, through
                 careful analysis and construction, it is possible for
                 the modeling environment to support the common features
                 of these architectures as part of the library and
                 permit instantiation of the individual architectures as
                 variants of the library design. Consequently,
                 system-level design choices regarding the communication
                 architecture can be made with high confidence in the
                 early stages of design. In addition to improving design
                 quality, this methodology also results in significantly
                 shortening design-time.",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "bus; design exploration; multiprocessor system;
                 network-on-chip; object-oriented modeling; on-chip
                 communication architecture; packet-switching network;
                 Retargetable simulation",
}

@Article{Majumder:2007:HPV,
  author =       "Subhashis Majumder and Susmita Sur-Kolay and Bhargab
                 B. Bhattacharya and Swarup Kumar Das",
  title =        "Hierarchical partitioning of {VLSI} floorplans by
                 staircases",
  journal =      j-TODAES,
  volume =       "12",
  number =       "1",
  pages =        "7:1--7:??",
  month =        jan,
  year =         "2007",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:08:29 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article addresses the problem of recursively
                 bipartitioning a given floorplan F using monotone
                 staircases. At each level of the hierarchy, a monotone
                 staircase from one corner of F to its opposite corner
                 is identified, such that (i) the two parts of the
                 bipartition are nearly equal in area (or in the number
                 of blocks), and (ii) the number of nets crossing the
                 staircase is minimal. The problem of area-balanced
                 bipartitioning is shown to be NP-hard, and a
                 maxflow-based heuristic is proposed. Such a hierarchy
                 may be useful to repeater placement in deep-submicron
                 physical design, and also to global routing.",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "balanced bipartitioning; Floorplanning; global
                 routing; network flow; NP-completeness",
}

@Article{Lee:2007:ISS,
  author =       "Jong-Eun Lee and Kiyoung Choi and Nikil D. Dutt",
  title =        "Instruction set synthesis with efficient instruction
                 encoding for configurable processors",
  journal =      j-TODAES,
  volume =       "12",
  number =       "1",
  pages =        "8:1--8:??",
  month =        jan,
  year =         "2007",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:08:29 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Application-specific instructions can significantly
                 improve the performance, energy-efficiency, and code
                 size of configurable processors. While generating new
                 instructions from application-specific operation
                 patterns has been a common way to improve the
                 instruction set (IS) of a configurable processor,
                 automating the design of ISs for given applications
                 poses new challenges---how to create as well as utilize
                 new instructions in a systematic manner, and how to
                 choose the best set of application-specific
                 instructions considering the various effects the new
                 instructions may have on the data path and the
                 compilation? To address these problems, we present a
                 novel IS synthesis framework that optimizes the IS
                 through an efficient instruction encoding for the given
                 application as well as for the given data path
                 architecture. We first build a library of new
                 instructions created with various encoding alternatives
                 taking into account the data path architecture
                 constraints, and then select the best set of
                 instructions while satisfying the instruction bitwidth
                 constraint. We formulate the problem using integer
                 linear programming and also present an effective
                 heuristic algorithm. Experimental results using our
                 technique generate ISs that show improvements of up to
                 about 40\% over the native IS for several application
                 benchmarks running on typical embedded RISC
                 processors.",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Application-specific instruction set processor (ASIP);
                 bitwidth-economical; configurable processor;
                 instruction encoding; ISA customization and
                 specialization",
}

@Article{Dutt:2007:E,
  author =       "Nikil Dutt",
  title =        "Editorial",
  journal =      j-TODAES,
  volume =       "12",
  number =       "2",
  pages =        "9:1--9:??",
  month =        apr,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1230800.1230801",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:08:48 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Wang:2007:DIC,
  author =       "Chao Wang and Zijiang Yang and Franjo
                 Ivan{\v{c}}i{\'c} and Aarti Gupta",
  title =        "Disjunctive image computation for software
                 verification",
  journal =      j-TODAES,
  volume =       "12",
  number =       "2",
  pages =        "10:1--10:??",
  month =        apr,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1230800.1230802",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:08:48 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Existing BDD-based symbolic algorithms designed for
                 hardware designs do not perform well on software
                 programs. We propose novel techniques based on unique
                 characteristics of software programs. Our algorithm
                 divides an image computation step into a disjunctive
                 set of easier ones that can be performed in isolation.
                 We use hypergraph partitioning to minimize the number
                 of live variables in each disjunctive component, and
                 variable scopes to simplify transition relations and
                 reachable state subsets. Our experiments on nontrivial
                 C programs show that BDD-based symbolic algorithms can
                 directly handle software models with a much larger
                 number of state variables than for hardware designs.",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "binary decision diagram; formal verification; image
                 computation; Model checking; reachability analysis",
}

@Article{Mochocki:2007:TOA,
  author =       "Bren Mochocki and Xiaobo Sharon Hu and Gang Quan",
  title =        "Transition-overhead-aware voltage scheduling for
                 fixed-priority real-time systems",
  journal =      j-TODAES,
  volume =       "12",
  number =       "2",
  pages =        "11:1--11:??",
  month =        apr,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1230800.1230803",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:08:48 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Time transition overhead is a critical problem for
                 hard real-time systems that employ dynamic voltage
                 scaling (DVS) for power and energy management. While it
                 is a common practice of much previous work to ignore
                 transition overhead, these algorithms cannot guarantee
                 deadlines and/or are less effective in saving energy
                 when transition overhead is significant and not
                 appropriately dealt with. In this article we introduce
                 two techniques, one offline and one online, to
                 correctly account for transition overhead in preemptive
                 fixed-priority real-time systems. We present several
                 DVS scheduling algorithms that implement these methods
                 that can guarantee task deadlines under arbitrarily
                 large transition time overheads and reduce energy
                 consumption by as much as 40\% when compared to
                 previous methods.",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Dynamic voltage scaling; fixed priority; low power;
                 scheduling; transition overhead",
}

@Article{Chang:2007:PLP,
  author =       "Hongliang Chang and Sachin S. Sapatnekar",
  title =        "Prediction of leakage power under process
                 uncertainties",
  journal =      j-TODAES,
  volume =       "12",
  number =       "2",
  pages =        "12:1--12:??",
  month =        apr,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1230800.1230804",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:08:48 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In this article, we present a method to analyze the
                 total leakage current of a circuit under process
                 variations, considering interdie and intradie
                 variations as well as the effect of the spatial
                 correlations of intradie variations. The approach
                 considers both the subthreshold and gate tunneling
                 leakage power, as well as their interactions. With
                 process variations, each leakage component is
                 approximated by a lognormal distribution, and the total
                 chip leakage is computed as a sum of the correlated
                 lognormals. Since the lognormals to be summed are large
                 in number and have complicated correlation structures
                 due to both spatial correlations and the correlation
                 among different leakage mechanisms, we propose an
                 efficient method to reduce the number of correlated
                 lognormals for summation to a manageable quantity. We
                 do so by identifying dominant states of leakage
                 currents and taking advantage of the spatial
                 correlation model and input states at the gates. An
                 improved approach utilizing the principal components
                 computed from spatially correlated process parameters
                 is also proposed to further improve runtime efficiency.
                 We show that the proposed methods are effective in
                 predicting the probability distribution of total chip
                 leakage, and that ignoring spatial correlations can
                 underestimate the standard deviation of full-chip
                 leakage power.",
  acknowledgement = ack-nhfb,
  articleno =    "12",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Circuit; leakage; process variation; yield",
}

@Article{Mohanty:2007:MBE,
  author =       "Sumit Mohanty and Viktor K. Prasanna",
  title =        "A model-based extensible framework for efficient
                 application design using {FPGA}",
  journal =      j-TODAES,
  volume =       "12",
  number =       "2",
  pages =        "13:1--13:??",
  month =        apr,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1230800.1230805",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:08:48 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "For an FPGA designer, several choices are available in
                 terms of target FPGA devices, IP-cores, algorithms,
                 synthesis options, runtime reconfiguration, degrees of
                 parallelism, among others, while implementing a design.
                 Evaluation of design alternatives in the early stages
                 of the design cycle is important because the choices
                 made can have a critical impact on the performance of
                 the final design. However, a large number of
                 alternatives not only results in a large number of
                 designs, but also makes it a hard problem to
                 efficiently manage, simulate, and evaluate them. In
                 this article, we present a framework for FPGA-based
                 application design that addresses the aforementioned
                 issues. This framework supports a hierarchical modeling
                 approach that integrates application and device
                 modeling techniques and allows development of a library
                 of models for design reuse. The framework integrates a
                 high-level performance estimator for rapid estimation
                 of the latency, area, and energy of the designs. In
                 addition, a design space exploration tool allows
                 efficient evaluation of candidate designs against the
                 given performance requirements. The framework also
                 supports extension through integration of widely used
                 tools for FPGA-based design while presenting a unified
                 environment for different target FPGAs. We demonstrate
                 our framework through the modeling and performance
                 estimation of a signal processing kernel and the design
                 of end-to-end applications.",
  acknowledgement = ack-nhfb,
  articleno =    "13",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "design tool; extensible; Modeling; reuse",
}

@Article{Tang:2007:PDF,
  author =       "Weiyu Tang and Arun Kejariwal and Alexander V.
                 Veidenbaum and Alexandru Nicolau",
  title =        "A predictive decode filter cache for reducing power
                 consumption in embedded processors",
  journal =      j-TODAES,
  volume =       "12",
  number =       "2",
  pages =        "14:1--14:??",
  month =        apr,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1230800.1230806",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:08:48 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "With advances in semiconductor technology, power
                 management has increasingly become a very important
                 design constraint in processor design. In embedded
                 processors, instruction fetch and decode consume more
                 than 40\% of processor power. This calls for
                 development of power minimization techniques for the
                 fetch and decode stages of the processor pipeline. For
                 this, filter cache has been proposed as an
                 architectural extension for reducing the power
                 consumption. A filter cache is placed between the CPU
                 and the instruction cache (I-cache) to provide the
                 instruction stream. A filter cache has the advantages
                 of shorter access time and lower power consumption.
                 However, the downside of a filter cache is a possible
                 performance loss in case of cache misses. \par

                 In this article, we present a novel technique---decode
                 filter cache (DFC)---for minimizing power consumption
                 with minimal performance impact. The DFC stores decoded
                 instructions. Thus, a hit in the DFC eliminates
                 instruction fetch and its subsequent decoding. The
                 bypassing of both instruction fetch and decode reduces
                 processor power. We present a runtime approach for
                 predicting whether the next fetch source is present in
                 the DFC. In case a miss is predicted, we reduce the
                 miss penalty by accessing the I-cache directly. We
                 propose to classify instructions as cacheable or
                 noncacheable, depending on the decode width. For
                 efficient use of the cache space, a sectored cache
                 design is used for the DFC so that both cacheable and
                 noncacheable instructions can coexist in the DFC
                 sector. Experimental results show that the DFC reduces
                 processor power by 34\% on an average and our next
                 fetch prediction mechanism reduces miss penalty by more
                 than 91\%.",
  acknowledgement = ack-nhfb,
  articleno =    "14",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Cache; embedded processors; power optimization",
}

@Article{Issenin:2007:DDR,
  author =       "Ilya Issenin and Erik Brockmeyer and Miguel Miranda
                 and Nikil Dutt",
  title =        "{DRDU}: a data reuse analysis technique for efficient
                 scratch-pad memory management",
  journal =      j-TODAES,
  volume =       "12",
  number =       "2",
  pages =        "15:1--15:??",
  month =        apr,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1230800.1230807",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:08:48 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In multimedia and other streaming applications, a
                 significant portion of energy is spent on data
                 transfers. Exploiting data reuse opportunities in the
                 application, we can reduce this energy by making copies
                 of frequently used data in a small local memory and
                 replacing speed- and power-inefficient transfers from
                 main off-chip memory by more efficient local data
                 transfers. In this article we present an automated
                 approach for analyzing these opportunities in a program
                 that allows modification of the program to use custom
                 scratch-pad memory configurations comprising a
                 hierarchical set of buffers for local storage of
                 frequently reused data. Using our approach we are able
                 to both reduce energy consumption of the memory
                 subsystem when using a scratch-pad memory by about a
                 factor of two, on average, and improve memory system
                 performance compared to a cache of the same size.",
  acknowledgement = ack-nhfb,
  articleno =    "15",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "compiler analysis; data reuse analysis; memory
                 hierarchy; Scratch-pad memory management",
}

@Article{Hosseinabady:2007:LTA,
  author =       "Mohammad Hosseinabady and Pejman Lotfi-Kamran and
                 Zainalabedin Navabi",
  title =        "Low test application time resource binding for
                 behavioral synthesis",
  journal =      j-TODAES,
  volume =       "12",
  number =       "2",
  pages =        "16:1--16:??",
  month =        apr,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1230800.1230808",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:08:48 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Recent advances in process technology have led to a
                 rapid increase in the density of integrated circuits
                 (ICs). Increased density and the need to test for new
                 types of defects in nanometer technologies have
                 resulted in a tremendous increase in test application
                 time (TAT). This article presents a test synthesis
                 method to reduce test application time for testing the
                 datapath of a design. The test application time is
                 reduced by applying a test-time-aware resource sharing
                 algorithm on a scheduled control data flow graph (CDFG)
                 of a design.",
  acknowledgement = ack-nhfb,
  articleno =    "16",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "CDFG; high-level synthesis; test synthesis;
                 Testability",
}

@Article{Elshoukry:2007:CPA,
  author =       "Mohammed Elshoukry and Mohammad Tehranipoor and C. P.
                 Ravikumar",
  title =        "A critical-path-aware partial gating approach for test
                 power reduction",
  journal =      j-TODAES,
  volume =       "12",
  number =       "2",
  pages =        "17:1--17:??",
  month =        apr,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1230800.1230809",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:08:48 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Power reduction during test application is important
                 from the viewpoint of chip reliability and for
                 obtaining correct test results. One of the ways to
                 reduce scan test power is to block transitions
                 propagating from the outputs of scan cells through
                 combinational logic. In order to accomplish this, some
                 researchers have proposed setting primary inputs to
                 appropriate values or adding extra gates at the outputs
                 of scan cells. In this article, we point out the
                 limitations of such full gating techniques in terms of
                 area overhead and performance degradation. We propose
                 an alternate solution where a partial set of scan cells
                 is gated. A subset of scan cells is selected to give
                 maximum reduction in test power within a given area
                 constraint. An alternate formulation of the problem is
                 to treat maximum permitted test power as a constraint
                 and achieve a test power that is within this limit
                 using the fewest number of gated scan cells, thereby
                 leading to the least impact in area overhead. Our
                 problem formulation also comprehends performance
                 constraints and prevents the inclusion of gating points
                 on critical paths. The area overhead is predictable and
                 closely corresponds to the average power reduction.",
  acknowledgement = ack-nhfb,
  articleno =    "17",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Low-power testing; partial gating; scan cell gating;
                 scan testing",
}

@Article{Pomeranz:2007:FDT,
  author =       "Irith Pomeranz and Sudhakar M. Reddy",
  title =        "Forming {N}-detection test sets without test
                 generation",
  journal =      j-TODAES,
  volume =       "12",
  number =       "2",
  pages =        "18:1--18:??",
  month =        apr,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1230800.1230810",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:08:48 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We describe a procedure for forming $n$-detection test
                 sets for $ n > 1$ without applying a test generation
                 procedure to target faults. The proposed procedure
                 accepts a one-detection test set. It extracts test
                 cubes for target faults from the one-detection test
                 set, and merges the test cubes to obtain new test
                 vectors. By extracting and merging different test cubes
                 in different iterations of this process, an
                 $n$-detection test set is obtained. Merging of test
                 cubes does not require test generation or fault
                 simulation. Fault simulation is required for extracting
                 test cubes for target faults. We demonstrate that the
                 resulting test set is as effective in detecting
                 untargeted faults as an $n$-detection test set
                 generated by a deterministic test generation procedure.
                 We also discuss the application of the proposed
                 procedure starting from a random test set (instead of a
                 one-detection test set).",
  acknowledgement = ack-nhfb,
  articleno =    "18",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "$n$-detection test sets; Bridging faults; stuck-at
                 faults; test generation",
}

@Article{Fan:2007:ECD,
  author =       "Hongbing Fan and Jiping Liu and Yu-Liang Wu and
                 Chak-Chung Cheung",
  title =        "The exact channel density and compound design for
                 generic universal switch blocks",
  journal =      j-TODAES,
  volume =       "12",
  number =       "2",
  pages =        "19:1--19:??",
  month =        apr,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1230800.1230811",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:08:48 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "A switch block of $k$ sides $W$ terminals on each side
                 is said to be universal (a $ (k, W)$-USB) if it is
                 routable for every set of 2-pin nets of channel density
                 at most $W$. The generic optimum universal switch block
                 design problem is to design a $ (k, W)$-USB with the
                 minimum number of switches for every pair of $ (k, W)$.
                 This problem was first proposed and solved for $ k = 4$
                 in Chang et al. [1996], and then solved for even $W$ or
                 for $ k \leq 6$ in Shuy et al. [2000] and Fan et al.
                 [2002b]. No optimum $ (k, W)$-USB is known for $ k \geq
                 7$ and odd $ W \geq 3$. But it is already known that
                 when $W$ is a large odd number, a near-optimum $ (k,
                 W)$-USB can be obtained by a disjoint union of $ (W f_2
                 (k)) / 2$ copies of the optimum $ (k, 2)$-USB and a
                 noncompound $ (k, f_2 (k))$-USB, where the value of $
                 f_2 (k)$ is unknown for $ k \geq 8$. In this article,
                 we show that $ f_2 (k) = k + 3 - i / 3$, where $ 1 \leq
                 i \leq 6$ and $ i \equiv k \pmod 6$, and present an
                 explicit design for the noncompound $ (k, f_2
                 (k))$-USB. Combining these two results we obtain the
                 exact designs of $ (k, W)$-USBs for all $ k \geq 7$ and
                 odd $ W \geq 3$. The new $ (k, W)$-USB designs also
                 yield an efficient detailed routing algorithm.",
  acknowledgement = ack-nhfb,
  articleno =    "19",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "FPGA architecture; routing algorithm; universal switch
                 block",
}

@Article{Lim:2007:ISI,
  author =       "Sung Kyu Lim and Massoud Pedram",
  title =        "Introduction to special issue on demonstrable software
                 systems and hardware platforms",
  journal =      j-TODAES,
  volume =       "12",
  number =       "3",
  pages =        "20:1--20:??",
  month =        aug,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1255456.1255457",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:09:12 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  articleno =    "20",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Hsu:2007:ESC,
  author =       "Chia-Jui Hsu and Ming-Yung Ko and Shuvra S.
                 Bhattacharyya and Suren Ramasubbu and Jos{\'e} Luis
                 Pino",
  title =        "Efficient simulation of critical synchronous dataflow
                 graphs",
  journal =      j-TODAES,
  volume =       "12",
  number =       "3",
  pages =        "21:1--21:??",
  month =        aug,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1255456.1255458",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:09:12 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "System-level modeling, simulation, and synthesis using
                 electronic design automation (EDA) tools are key steps
                 in the design process for communication and signal
                 processing systems, and the synchronous dataflow (SDF)
                 model of computation is widely used in EDA tools for
                 these purposes. Behavioral representations of modern
                 wireless communication systems typically result in
                 critical SDF graphs: These consist of hundreds of
                 components (or more) and involve complex intercomponent
                 connections with highly multirate relationships (i.e. ,
                 with large variations in average rates of data transfer
                 or component execution across different subsystems).
                 Simulating such systems using conventional SDF
                 scheduling techniques generally leads to unacceptable
                 simulation time and memory requirements on modern
                 workstations and high-end PCs. In this article, we
                 present a novel simulation-oriented scheduler (SOS)
                 that strategically integrates several techniques for
                 graph decomposition and SDF scheduling to provide
                 effective, joint minimization of time and memory
                 requirements for simulating critical SDF graphs. We
                 have implemented SOS in the advanced design system
                 (ADS) from Agilent Technologies. Our results from this
                 implementation demonstrate large improvements in
                 simulating real-world, large-scale, and highly
                 multirate wireless communication systems (e. g. , 3GPP,
                 Bluetooth, 802. 16e, CDMA 2000, XM radio, EDGE, and
                 Digital TV).",
  acknowledgement = ack-nhfb,
  articleno =    "21",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Scheduling; simulation; synchronous dataflow",
}

@Article{Herrera:2007:FHS,
  author =       "Fernando Herrera and Eugenio Villar",
  title =        "A framework for heterogeneous specification and design
                 of electronic embedded systems in {SystemC}",
  journal =      j-TODAES,
  volume =       "12",
  number =       "3",
  pages =        "22:1--22:??",
  month =        aug,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1255456.1255459",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:09:12 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This work proposes a methodology which enables
                 heterogeneous specification of complex, electronic
                 systems in SystemC supporting the integration of
                 components under different models of computation
                 (MoCs). This feature is necessary in order to deal with
                 the growing complexity, concurrency, and heterogeneity
                 of electronic embedded systems. The specification
                 methodology is based on the SystemC standard language.
                 Nevertheless, the use of SystemC for heterogeneous
                 system specification is not straightforward. The first
                 problem to be addressed is the efficient and
                 predictable mapping of untimed events required by
                 abstract MoCs over the discrete-event MoC on which the
                 SystemC simulation kernel is based. This mapping is
                 essential in order to understand the simulation results
                 provided by the SystemC model of those MoCs. The
                 specification methodology proposes the set of rules and
                 guidelines required by each specific MoC. Moreover, the
                 methodology supports a smooth integration of several
                 MoCs in the same system specification. A set of
                 facilities is provided covering the deficiencies of the
                 language. These facilities constitute the
                 methodology-specific library called HetSC. The
                 methodology and associated library have been
                 demonstrated to be useful for the specification of
                 complex, heterogeneous embedded systems supporting
                 essential design tasks such as performance analysis and
                 SW generation.",
  acknowledgement = ack-nhfb,
  articleno =    "22",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Heterogeneous specification; models of computation;
                 SystemC",
}

@Article{Lee:2007:CCA,
  author =       "Hyung Gyu Lee and Naehyuck Chang and Umit Y. Ogras and
                 Radu Marculescu",
  title =        "On-chip communication architecture exploration: a
                 quantitative evaluation of point-to-point, bus, and
                 network-on-chip approaches",
  journal =      j-TODAES,
  volume =       "12",
  number =       "3",
  pages =        "23:1--23:??",
  month =        aug,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1255456.1255460",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:09:12 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Traditionally, design-space exploration for
                 systems-on-chip (SoCs) has focused on the computational
                 aspects of the problem at hand. However, as the number
                 of components on a single chip and their performance
                 continue to increase, a shift from computation-based to
                 communication-based design becomes mandatory. As a
                 result, the communication architecture plays a major
                 role in the area, performance, and energy consumption
                 of the overall system. This article presents a
                 comprehensive evaluation of three on-chip communication
                 architectures targeting multimedia applications.
                 Specifically, we compare and contrast the
                 network-on-chip (NoC) with point-to-point (P2P) and
                 bus-based communication architectures in terms of area,
                 performance, and energy consumption. As the main
                 contribution, we present complete P2P, bus-, and
                 NoC-based implementations of a real multimedia
                 application (i.e. the MPEG-2 encoder), and provide
                 direct measurements using an FPGA prototype and actual
                 video clips, rather than simulation and synthetic
                 workloads. We also support the experimental findings
                 through a theoretical analysis. Both experimental and
                 analysis results show that the NoC architecture scales
                 very well in terms of area, performance, energy, and
                 design effort, while the P2P and bus-based
                 architectures scale poorly on all accounts except for
                 performance and area, respectively.",
  acknowledgement = ack-nhfb,
  articleno =    "23",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "FPGA prototype; MPEG-2 encoder; Networks-on-chip;
                 point-to-point; system-on-chip",
}

@Article{Ha:2007:PHS,
  author =       "Soonhoi Ha and Sungchan Kim and Choonseung Lee and
                 Youngmin Yi and Seongnam Kwon and Young-Pyo Joo",
  title =        "{PeaCE}: a hardware-software codesign environment for
                 multimedia embedded systems",
  journal =      j-TODAES,
  volume =       "12",
  number =       "3",
  pages =        "24:1--24:??",
  month =        aug,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1255456.1255461",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:09:12 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Existent hardware-software (HW-SW) codesign tools
                 mainly focus on HW-SW cosimulation to build a virtual
                 prototyping environment that enables software design
                 and system verification without need of making a
                 hardware prototype. Not only HW-SW cosimulation, but
                 also HW-SW codesign methodology involves system
                 specification, functional simulation, design-space
                 exploration, and hardware-software cosynthesis. The
                 PeaCE codesign environment is the first full-fledged
                 HW-SW codesign environment that provides seamless
                 codesign flow from functional simulation to system
                 synthesis. Targeting for multimedia applications with
                 real-time constraints, PeaCE specifies the system
                 behavior with a heterogeneous composition of three
                 models of computation and utilizes features of the
                 formal models maximally during the whole design
                 process. It is also a reconfigurable framework in the
                 sense that third-party design tools can be integrated
                 to build a customized tool chain. Experiments with
                 industry-strength examples prove the viability of the
                 proposed technique.",
  acknowledgement = ack-nhfb,
  articleno =    "24",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "design-space exploration; embedded systems;
                 Hardware-software codesign; hardware-software
                 cosimulation; model-based design",
}

@Article{Atienza:2007:HSE,
  author =       "David Atienza and Pablo G. {Del Valle} and Giacomo
                 Paci and Francesco Poletti and Luca Benini and Giovanni
                 {De Micheli} and Jose M. Mendias and Roman Hermida",
  title =        "{HW-SW} emulation framework for temperature-aware
                 design in {MPSoCs}",
  journal =      j-TODAES,
  volume =       "12",
  number =       "3",
  pages =        "26:1--26:??",
  month =        aug,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1255456.1255463",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:09:12 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "New tendencies envisage multiprocessor
                 systems-on-chips (MPSoCs) as a promising solution for
                 the consumer electronics market. MPSoCs are complex to
                 design, as they must execute multiple applications
                 (games, video) while meeting additional design
                 constraints (energy consumption, time-to-market).
                 Moreover, the rise of temperature in the die for MPSoCs
                 can seriously affect their final performance and
                 reliability. In this article, we present a new
                 hardware-software emulation framework that allows
                 designers a complete exploration of the thermal
                 behavior of final MPSoC designs early in the design
                 flow. The proposed framework uses FPGA emulation as the
                 key element to model hardware components of the
                 considered MPSoC platform at multimegahertz speeds. It
                 automatically extracts detailed system statistics that
                 are used as input to our software thermal library
                 running in a host computer. This library calculates at
                 runtime the temperature of on-chip components, based on
                 the collected statistics from the emulated system and
                 final floorplan of the MPSoC. This enables fast testing
                 of various thermal management techniques. Our results
                 show speedups of three orders of magnitude compared to
                 cycle-accurate MPSoC simulators.",
  acknowledgement = ack-nhfb,
  articleno =    "26",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "emulation; FPGA; MPSoC; temperature; Thermal-aware
                 design",
}

@Article{Wu:2007:EPM,
  author =       "Wei Wu and Lingling Jin and Jun Yang and Pu Liu and
                 Sheldon X.-D. Tan",
  title =        "Efficient power modeling and software thermal sensing
                 for runtime temperature monitoring",
  journal =      j-TODAES,
  volume =       "12",
  number =       "3",
  pages =        "26:1--26:??",
  month =        aug,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1255456.1255462",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:09:12 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The evolution of microprocessors has been hindered by
                 increasing power consumption and heat dissipation on
                 die. An excessive amount of heat creates reliability
                 problems, reduces the lifetime of a processor, and
                 elevates the cost of cooling and packaging
                 considerably. It is therefore imperative to be able to
                 monitor the temperature variations across the die in a
                 timely and accurate manner. \par

                 Most current techniques rely on on-chip thermal sensors
                 to report the temperature of the processor.
                 Unfortunately, significant variation in chip
                 temperature both spatially and temporally exposes the
                 limitation of the sensors. We present a compensating
                 approach to tracking chip temperature through an OS
                 resident software module that generates live power and
                 thermal profiles of the processor. We developed such a
                 software thermal sensor (STS) in a Linux system with a
                 Pentium 4 Northwood core. We employed highly efficient
                 numerical methods in our model to minimize the overhead
                 of temperature calculation. We also developed an
                 efficient algorithm for functional unit power modeling.
                 Our power and thermal models are calibrated and
                 validated against on-chip sensor readings, thermal
                 images of the Northwood heat spreader, and the
                 thermometer measurements on the package. The resulting
                 STS offers detailed power and temperature breakdowns of
                 each functional unit at runtime, enabling more
                 efficient online power and thermal monitoring and
                 management at a higher level, such as the operating
                 system.",
  acknowledgement = ack-nhfb,
  articleno =    "26",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Power; thermal",
}

@Article{Huang:2007:ESC,
  author =       "Po-Kuan Huang and Soheil Ghiasi",
  title =        "Efficient and scalable compiler-directed energy
                 optimization for realtime applications",
  journal =      j-TODAES,
  volume =       "12",
  number =       "3",
  pages =        "27:1--27:??",
  month =        aug,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1255456.1255464",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:09:12 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "With continuing shrinkage of technology feature sizes,
                 the share of leakage in total energy consumption of
                 digital systems continues to grow. Coordinated supply
                 voltage and body bias throttling enables the compiler
                 to better optimize the total energy consumption of the
                 system in future technology nodes. We present a
                 compilation technique that targets realtime
                 applications running on embedded processors with
                 combined dynamic voltage scaling (DVS) and adaptive
                 body biasing (ABB) capabilities. Considering the delay
                 and energy penalty of switching between operating modes
                 of the processor, our compiler judiciously inserts
                 mode-switch instructions in selected locations of the
                 code and generates executable binary that is guaranteed
                 to meet the deadline constraint. More importantly, our
                 algorithm runs very fast and comes reasonably close to
                 the theoretical limit of energy optimization using DVS
                 + ABB. At 65nm technology, we improve the energy
                 dissipation of the generated code by an average of 33.
                 20\% under deadline constraints. While our technique's
                 improvement in energy dissipation over conventional DVS
                 is marginal (6. 91\%) at 130nm, the average improvement
                 continues to grow to 13. 19\%, 22. 97\%, and 33. 21\%
                 for 90nm, 65nm, and 45nm technology nodes,
                 respectively. Compared to a recent ILP-based
                 competitor, we improve the runtime by more than three
                 orders of magnitude, while producing improved
                 results.",
  acknowledgement = ack-nhfb,
  articleno =    "27",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "energy-aware compiler; Leakage; technology scaling",
}

@Article{Shi:2007:CSO,
  author =       "Yiyu Shi and Paul Mesa and Hao Yu and Lei He",
  title =        "Circuit-simulated obstacle-aware {Steiner} routing",
  journal =      j-TODAES,
  volume =       "12",
  number =       "3",
  pages =        "28:1--28:??",
  month =        aug,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1255456.1255465",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:09:12 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article develops circuit-simulated routing
                 algorithms. We model the routing graph by an RC network
                 with terminals as inputs, and show that the faster an
                 output reaches its peak, the higher the possibility for
                 the corresponding Hanan or escape node to become a
                 Steiner point. This enables us to select Steiner points
                 and then apply any minimum spanning tree algorithm to
                 obtain obstacle-free or obstacle-aware Steiner routing.
                 Compared with existing algorithms, our algorithms have
                 significant gain on either wirelength or runtime for
                 obstacle-free routing, and on both wirelength and
                 runtime for obstacle-aware routing.",
  acknowledgement = ack-nhfb,
  articleno =    "28",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "OARSMT; Routing; RSMT; simulation",
}

@Article{Chakrapani:2007:PSC,
  author =       "Lakshmi N. Chakrapani and Pinar Korkmaz and Bilge E.
                 S. Akgul and Krishna V. Palem",
  title =        "Probabilistic system-on-a-chip architectures",
  journal =      j-TODAES,
  volume =       "12",
  number =       "3",
  pages =        "29:1--29:??",
  month =        aug,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1255456.1255466",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:09:12 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Parameter variations, noise susceptibility, and
                 increasing energy dissipation of cmos devices have been
                 recognized as major challenges in circuit and
                 microarchitecture design in the nanometer regime. Among
                 these, parameter variations and noise susceptibility
                 are increasingly causing cmos devices to behave in an
                 ``unreliable'' or ``probabilistic'' manner. To address
                 these challenges, a shift in design paradigm from
                 current-day deterministic designs to ``statistical'' or
                 ``probabilistic'' designs is deemed inevitable. To
                 respond to this need, in this article, we introduce and
                 study an entirely novel family of probabilistic
                 architectures: the probabilistic system-on-a-chip
                 (psoc). psoc architectures are based on cmos devices
                 rendered probabilistic due to noise, referred to as
                 probabilistic CMOS or PCMOS devices. We demonstrate
                 that in addition to harnessing the probabilistic
                 behavior of pcmos devices, psoc architectures yield
                 significant improvements, both in energy consumed as
                 well as performance in the context of probabilistic or
                 randomized applications with broad utility. All of our
                 application and architectural savings are quantified
                 using the product of the energy and performance,
                 denoted (energy $ \times $ performance): The
                 pcmos-based gains are as high as a substantial
                 multiplicative factor of over 560 when compared to a
                 competing energy-efficient cmos-based realization. Our
                 architectural design is application specific and
                 involves navigating design space spanning the algorithm
                 (application), its architecture (psoc), and the
                 probabilistic technology (pcmos).",
  acknowledgement = ack-nhfb,
  articleno =    "29",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Embedded systems; probabilistic computing",
}

@Article{Hsieh:2007:FDC,
  author =       "Ang-Chih Hsieh and Tzu-Teng Lin and Tsuang-Wei Chang
                 and Tingting Hwang",
  title =        "A functionality-directed clustering technique for
                 low-power {MTCMOS} design---computation of
                 simultaneously discharging current",
  journal =      j-TODAES,
  volume =       "12",
  number =       "3",
  pages =        "30:1--30:??",
  month =        aug,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1255456.1255467",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:09:12 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Multithreshold CMOS (MTCMOS) is a circuit style that
                 can effectively reduce leakage power consumption. Sleep
                 transistor sizing is the key issue when a MTCMOS
                 circuit is designed. If the size of sleep transistor is
                 large enough, the circuit performance can surely be
                 maintained but the area and dynamic power consumption
                 of the sleep transistor may increase. On the other
                 hand, if the sleep transistor size is too small, there
                 will be significant performance degradation because of
                 the increased resistance to ground. Previous approaches
                 [Kao et al. 1998; Anis et al. 2002] to designing sleep
                 transistor size are based mainly on mutually-exclusive
                 discharge patterns. However, these approaches
                 considered only the topology of a circuit (i.e.,
                 interconnections of nodes in the circuit-graph saving
                 the functionality of node). We observed that any two
                 possible simultaneously switching gates may not
                 discharge at the same time in terms of functionality.
                 Thus, we propose an algorithm to determine how to
                 cluster cells to share sleep transistors, while taking
                 both topology and functionality into consideration.
                 Moreover, one placement refinement algorithm that takes
                 clustering information into account will be presented.
                 At the logic level, the results show that the proposed
                 clustering method can achieve an average of 22\%
                 reduction in terms of the number of unit-size sleep
                 transistors as compared to a method that does not
                 consider functionality. At the physical level, two
                 placement results are discussed. The first is produced
                 by a traditional placement tool plus topology check
                 (functionality check) for insertion of sleep
                 transistors. It shows that the functionality check
                 algorithm produces 9\% less chip area as compared with
                 the topology check algorithm. The second result is
                 produced by a placement refinement algorithm where the
                 initial placement is done in the first placement
                 experiment. It shows that the placement refinement
                 algorithm achieves 5\% more reduction in area at the
                 expense of 4\% increase in wire length. Totally, around
                 14\% reduction is achieved by utilizing the clustering
                 information.",
  acknowledgement = ack-nhfb,
  articleno =    "30",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "DSTN; low power; MTCMOS; sleep transistor",
}

@Article{Dastidar:2007:VST,
  author =       "Tathagato Rai Dastidar and P. P. Chakrabarti",
  title =        "A verification system for transient response of analog
                 circuits",
  journal =      j-TODAES,
  volume =       "12",
  number =       "3",
  pages =        "31:1--31:??",
  month =        aug,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1255456.1255468",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:09:12 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We present a method for application of formal
                 techniques like model checking and equivalence checking
                 for validation of the transient response of nonlinear
                 analog circuits. We propose a temporal logic called Ana
                 CTL (computational tree logic for analog circuit
                 verification) which is suitable for specifying
                 properties specific to analog circuits. The application
                 of Ana CTL for validation of transient behavior of
                 arbitrarily nonlinear analog circuits is presented. The
                 transient response of a circuit under all possible
                 input waveforms is represented as a finite state
                 machine (FSM), by bounding and discretizing the
                 continuous state space of an analog circuit. We have
                 developed algorithms to run Ana CTL queries on this
                 discretized model using search-based methods which
                 reduce the runtime considerably by avoiding creation of
                 the whole FSM. The application of these methods on
                 several real-life analog circuits is presented and we
                 show that this system is a useful aid for detecting and
                 debugging early design errors. \par

                 We also present methods for checking the equivalence of
                 transient response of two analog circuits. The behavior
                 of two different analog circuits can rarely be exactly
                 similar. Hence, we introduce a notion of approximate
                 equivalence. A query language for checking different
                 notions of user-definable approximate equivalence is
                 presented which extends the syntax of the Ana CTL model
                 checking language. In its extended form, Ana CTL can be
                 used combining model checking with equivalence
                 checking.",
  acknowledgement = ack-nhfb,
  articleno =    "31",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Ana CTL; Analog circuits; equivalence checking; model
                 checking; query language; transient response",
}

@Article{Chang:2007:PRE,
  author =       "Kai-Hui Chang and Igor L. Markov and Valeria
                 Bertacco",
  title =        "Postplacement rewiring by exhaustive search for
                 functional symmetries",
  journal =      j-TODAES,
  volume =       "12",
  number =       "3",
  pages =        "32:1--32:??",
  month =        aug,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1255456.1255469",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:09:12 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We propose two new algorithms for rewiring: a
                 postplacement optimization that reconnects pins of a
                 given netlist without changing the logic function and
                 gate locations. In the first algorithm, we extract
                 small subcircuits consisting of several gates from the
                 design and reconnect pins according to the symmetries
                 of the subcircuits. To enhance the power of symmetry
                 detection, we also propose a graph-based symmetry
                 detector that can identify permutational and
                 phase-shift symmetries on multiple input and output
                 wires, as well as hybrid symmetries, creating abundant
                 opportunities for rewiring. Our second algorithm,
                 called long-range rewiring, is based on reconnecting
                 equivalent pins and can augment the first approach for
                 further optimization. We apply our techniques for
                 wirelength optimization and observe that they provide
                 wirelength reduction comparable to that achieved by
                 detailed placement.",
  acknowledgement = ack-nhfb,
  articleno =    "32",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "placement; rewiring; VLSI",
}

@Article{Mathaikutty:2007:EMD,
  author =       "Deepak Mathaikutty and Hiren Patel and Sandeep Shukla
                 and Axel Jantsch",
  title =        "{EWD}: a metamodeling driven customizable multi-{MoC}
                 system modeling framework",
  journal =      j-TODAES,
  volume =       "12",
  number =       "3",
  pages =        "33:1--33:??",
  month =        aug,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1255456.1255470",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:09:12 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We present the EWD design environment and methodology,
                 a modeling and simulation framework suited for complex
                 and heterogeneous embedded systems with varying degrees
                 of expressibility and modeling fidelity. This
                 environment promotes the use of multiple models of
                 computation (MoCs) to support heterogeneity and
                 metamodeling for conformance tests of syntactic and
                 static semantics during the process of modeling.
                 Therefore, EWD is a multiple MoC modeling and
                 simulation framework that ensures conformance of the
                 MoC formalisms during model construction using a
                 metamodeling approach. In addition, EWD provides a
                 suite of translation tools that generate executable
                 models for two simulation frameworks to demonstrate its
                 language-independent modeling framework. The EWD
                 methodology uses the Generic Modeling Environment for
                 customization of the MoC-specific modeling syntax into
                 a visual representation. To embed the execution
                 semantics of the MoCs into the models, we have built
                 parsing and translation tools that leverage an
                 XML-based interoperability language. This
                 interoperability language is then translated into
                 executable Standard ML or Haskell models that can also
                 be analyzed by existing simulation frameworks such as
                 SML-Sys or ForSyDe. In summary, EWD is a metamodeling
                 driven multitarget design environment with multi-MoC
                 modeling capability.",
  acknowledgement = ack-nhfb,
  articleno =    "33",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "denotational semantics; ForSyDe; functional language;
                 heterogeneous system design; interoperable modeling
                 language; metamodel; Metamodeling; MoC; Ptolemy II;
                 SystemC",
}

@Article{Stitt:2007:BS,
  author =       "Greg Stitt and Frank Vahid",
  title =        "Binary synthesis",
  journal =      j-TODAES,
  volume =       "12",
  number =       "3",
  pages =        "34:1--34:??",
  month =        aug,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1255456.1255471",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:09:12 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Recent high-level synthesis approaches and C-based
                 hardware description languages attempt to improve the
                 hardware design process by allowing developers to
                 capture desired hardware functionality in a well-known
                 high-level source language. However, these approaches
                 have yet to achieve wide commercial success due in part
                 to the difficulty of incorporating such approaches into
                 software tool flows. The requirement of using a
                 specific language, compiler, or development environment
                 may cause many software developers to resist such
                 approaches due to the difficulty and possible
                 instability of changing well-established robust tool
                 flows. Thus, in the past several years, synthesis from
                 binaries has been introduced, both in research and in
                 commercial tools, as a means of better integrating with
                 tool flows by supporting all high-level languages and
                 software compilers. Binary synthesis can be more easily
                 integrated into a software development tool-flow by
                 only requiring an additional backend tool, and it even
                 enables completely transparent dynamic translation of
                 executing binaries to configurable hardware circuits.
                 In this article, we survey the key technologies
                 underlying the important emerging field of binary
                 synthesis. We compare binary synthesis to several
                 related areas of research, and we then describe the key
                 technologies required for effective binary synthesis:
                 decompilation techniques necessary for binary synthesis
                 to achieve results competitive with source-level
                 synthesis, hardware/software partitioning methods
                 necessary to find critical binary regions suitable for
                 synthesis, synthesis methods for converting regions to
                 custom circuits, and binary update methods that enable
                 replacement of critical binary regions by circuits.",
  acknowledgement = ack-nhfb,
  articleno =    "34",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Binary synthesis; configurable logic; FPGA;
                 hardware/software codesign; hardware/software
                 partitioning; synthesis from software binaries; warp
                 processors",
}

@Article{Galanis:2007:SES,
  author =       "Michalis D. Galanis and Gregory Dimitroulakos and
                 Spyros Tragoudas and Costas E. Goutis",
  title =        "Speedups in embedded systems with a high-performance
                 coprocessor datapath",
  journal =      j-TODAES,
  volume =       "12",
  number =       "3",
  pages =        "35:1--35:??",
  month =        aug,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1255456.1255472",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:09:12 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article presents the speedups achieved in a
                 generic single-chip microprocessor system by employing
                 a high-performance datapath. The datapath acts as a
                 coprocessor that accelerates computational-intensive
                 kernel sections thereby increasing the overall
                 performance. We have previously introduced the datapath
                 which is composed of Flexible Computational Components
                 (FCCs). These components can realize any two-level
                 template of primitive operations. The automated
                 coprocessor synthesis method from high-level software
                 description and its integration to a design flow for
                 executing applications on the system is presented. For
                 evaluating the effectiveness of our coprocessor
                 approach, analytical study in respect to the type of
                 the custom datapath and to the microprocessor
                 architecture is performed. The overall application
                 speedups of several real-life applications relative to
                 the software execution on the microprocessor are
                 estimated using the design flow. These speedups range
                 from 1. 75 to 5. 84, with an average value of 3. 04,
                 while the overhead in circuit area is small. The design
                 flow achieved the acceleration of the applications near
                 to theoretical speedup bounds. A comparison with
                 another high-performance datapath showed that the
                 proposed coprocessor achieves smaller area-time
                 products by an average of 23\% for the generated
                 datapaths. Additionally, the FCC coprocessor achieves
                 better performance in accelerating kernels relative to
                 software-programmable DSP cores.",
  acknowledgement = ack-nhfb,
  articleno =    "35",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "chaining; coprocessor datapath; design flow; kernels;
                 Performance improvements; synthesis",
}

@Article{Roy:2007:EPA,
  author =       "Suchismita Roy and P. P. Chakrabarti and Pallab
                 Dasgupta",
  title =        "Event propagation for accurate circuit delay
                 calculation using {SAT}",
  journal =      j-TODAES,
  volume =       "12",
  number =       "3",
  pages =        "36:1--36:??",
  month =        aug,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1255456.1255473",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:09:12 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "A SAT-based modeling for event propagation in
                 gate-level digital circuits, which is used for accurate
                 calculation of critical delay in combinational and
                 sequential circuits, is presented in this article. The
                 accuracy of the critical delay estimation process
                 depends on the accuracy with which the circuit in
                 operation is modeled. A high level of precision in the
                 modeling of the internal events in a circuit for the
                 sake of greater accuracy causes a combinatorial blowup
                 in the size of the problem, resulting in a scalability
                 bottleneck for which most existing techniques effect a
                 trade-off by restricting themselves to less precise
                 models. SAT based techniques have a good track record
                 in efficiency and scalability when the problem sizes
                 become too large for most other methods. This article
                 proposes a SAT-based technique for symbolic event
                 propagation within a circuit which facilitates the
                 estimation of the critical delay of circuits with a
                 greater degree of accuracy, while at the same time
                 scaling efficiently to large circuits. We report very
                 encouraging results on the ISCAS85 and ISCAS89
                 benchmark circuits using the proposed technique.",
  acknowledgement = ack-nhfb,
  articleno =    "36",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Critical delay; event propagation; SAT",
}

@Article{Yuh:2007:TFU,
  author =       "Ping-Hung Yuh and Chia-Lin Yang and Yao-Wen Chang",
  title =        "Temporal floorplanning using the three-dimensional
                 transitive closure {subGraph}",
  journal =      j-TODAES,
  volume =       "12",
  number =       "4",
  pages =        "37:1--37:??",
  month =        sep,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1278349.1278350",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:09:35 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Improving logic capacity by time-sharing, dynamically
                 reconfigurable Field Gate Programmable Arrays (FPGAs)
                 are employed to handle designs of high complexity and
                 functionality. In this paper, we use a novel
                 graph-based topological floorplan representation, named
                 3D-subTCG (3-Dimensional Transitive Closure subGraph),
                 to deal with the 3-dimensional (temporal)
                 floorplanning/placement problem, arising from
                 dynamically reconfigurable FPGAs. The 3D-subTCG uses
                 three transitive closure graphs to model the temporal
                 and spatial relations between modules. We derive the
                 feasibility conditions for the precedence constraints
                 induced by the execution of the dynamically
                 reconfigurable FPGAs. Because the geometric
                 relationship is transparent to the 3D-subTCG and its
                 induced operations (i.e., we can directly detect the
                 relationship between any two tasks from the
                 representation), we can easily detect any violation of
                 the temporal precedence constraints on 3D-subTCG. We
                 also derive important properties of the 3D-subTCG to
                 reduce the solution space and shorten the running time
                 for 3D (temporal) foorplanning/placement. Experimental
                 results show that our 3D-subTCG-based algorithm is very
                 effective and efficient.",
  acknowledgement = ack-nhfb,
  articleno =    "37",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "partially dynamical reconfiguration; Reconfigurable
                 computing; temporal floorplanning",
}

@Article{Liu:2007:IEM,
  author =       "Jinfeng Liu and Pai H. Chou",
  title =        "Idle energy minimization by mode sequence
                 optimization",
  journal =      j-TODAES,
  volume =       "12",
  number =       "4",
  pages =        "38:1--38:??",
  month =        sep,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1278349.1278351",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:09:35 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article presents techniques for reducing idle
                 energy by mode-sequence optimization (MSO) under timing
                 constraints. Our component-level CoMSO algorithm
                 computes energy-optimal mode-transition sequences for
                 different lengths of idle intervals. Our system-level
                 SyMSO algorithm shifts tasks within slack intervals
                 while satisfying all timing and resource constraints in
                 the given schedule. Experimental results on a
                 commercial software-defined radio show that these new
                 techniques can reduce idle energy by 50--70\%, or
                 30--50\% of total system energy over previous
                 offline-optimal but unsequenced techniques based on
                 localized break-even-time analysis, thanks to rich
                 options offered by mode sequencing.",
  acknowledgement = ack-nhfb,
  articleno =    "38",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "communication speed selection;
                 communication/computation trade-offs; embedded
                 multi-processor; Functional partitioning; low-power
                 design",
}

@Article{Gorjiara:2007:UFE,
  author =       "Bita Gorjiara and Nader Bagherzadeh and Pai H. Chou",
  title =        "Ultra-fast and efficient algorithm for energy
                 optimization by gradient-based stochastic voltage and
                 task scheduling",
  journal =      j-TODAES,
  volume =       "12",
  number =       "4",
  pages =        "39:1--39:??",
  month =        sep,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1278349.1278352",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:09:35 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This paper presents a new technique, called Adaptive
                 Stochastic Gradient Voltage-and-Task Scheduling
                 (ASG-VTS), for power optimization of multicore hard
                 realtime systems. ASG-VTS combines stochastic and
                 energy-gradient techniques to simultaneously solve the
                 slack distribution and task reordering problem. It
                 produces very efficient results with few mode
                 transitions. Our experiments show that ASG-VTS reduces
                 number of mode transitions by 4. 8 times compared to
                 traditional energy-gradient-based approaches. Also, our
                 heuristic algorithm can quickly find a solution that is
                 as good as the optimal for a real-life GSM
                 encoder/decoder benchmark. The runtime of ASG-VTS is
                 150 times and 1034 times faster than energy-gradient
                 based and optimal ILP algorithms, respectively. Since
                 the runtime of ASG-VTS is very low, it is ideal for
                 design space exploration in system-level design tools.
                 We have also developed a web-based interface for
                 ASG-VTS algorithm.",
  acknowledgement = ack-nhfb,
  articleno =    "39",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Power management; slack distribution; voltage and task
                 scheduling",
}

@Article{Vanbroekhoven:2007:PDS,
  author =       "Peter Vanbroekhoven and Gerda Janssens and Maurice
                 Bruynooghe and Francky Catthoor",
  title =        "A practical dynamic single assignment transformation",
  journal =      j-TODAES,
  volume =       "12",
  number =       "4",
  pages =        "40:1--40:??",
  month =        sep,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1278349.1278353",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:09:35 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This paper presents a novel method to construct a
                 dynamic single assignment (DSA) form of array
                 intensive, pointer free C programs. A program in DSA
                 form does not perform any destructive update of scalars
                 and array elements; that is, each element is written at
                 most once. As DSA makes the dependencies between
                 variable references explicit, it facilitates complex
                 analyses and optimizations of programs. Existing
                 transformations into DSA perform a complex data flow
                 analysis with exponential analysis time, and they work
                 only for a limited class of input programs. Our method
                 removes irregularities from the data flow by adding
                 copy assignments to the program, so that it can use
                 simple data flow analyses. The presented DSA
                 transformation scales very well with growing program
                 sizes and overcomes a number of important limitations
                 of existing methods. We have implemented the method and
                 it is being used in the context of memory optimization
                 and verification of those optimizations. Experiments
                 show that in practice, the method scales well indeed,
                 and that added copy operations can be removed in case
                 they are unwanted.",
  acknowledgement = ack-nhfb,
  articleno =    "40",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "arrays; Data flow analysis; parallelization; reaching
                 definitions; single assignment",
}

@Article{Kobayashi:2007:MOS,
  author =       "Yuki Kobayashi and Murali Jayapala and Praveen
                 Raghavan and Francky Catthoor and Masaharu Imai",
  title =        "Methodology for operation shuffling and {L0} cluster
                 generation for low energy heterogeneous {VLIW}
                 processors",
  journal =      j-TODAES,
  volume =       "12",
  number =       "4",
  pages =        "41:1--41:??",
  month =        sep,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1278349.1278354",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:09:35 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Clustering L0 buffers is effective for energy
                 reduction in the instruction memory hierarchy of
                 embedded VLIW processors. However, the efficiency of
                 the clustering depends on the schedule of the target
                 application. Especially in heterogeneous or data
                 clustered VLIW processors, determining energy efficient
                 scheduling is more constraining. \par

                 This article proposes a realistic technique supported
                 by a tool flow to explore operation shuffling for
                 improving generation of L0 clusters. The tool flow
                 explores assignment of operations for each cycle and
                 generates various schedules. This approach makes it
                 possible to reduce energy consumption for various
                 processor architectures. However, the computational
                 complexity is large because of the huge exploration
                 space. Therefore, some heuristics are also developed,
                 which reduce the size of the exploration space while
                 the solution quality remains reasonable. Furthermore,
                 we also propose a technique to support VLIW processors
                 with multiple data clusters, which is essential to
                 apply the methodology to real world processors.
                 \par

                 The experimental results indicate potential gains of up
                 to 27. 6\% in energy in L0 buffers, through operation
                 shuffling for heterogeneous processor architectures as
                 well as a homogeneous architecture. Furthermore, the
                 proposed heuristics drastically reduce the exploration
                 search space by about 90\%, while the results are
                 comparable to full search, with average differences of
                 less than 1\%. The experimental results indicate that
                 energy efficiency can be improved in most of the media
                 benchmarks by the proposed methodology, where the
                 average gain is around 10\% in comparison with
                 generating clusters without operation shuffling.",
  acknowledgement = ack-nhfb,
  articleno =    "41",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Compilers for low energy; loop buffers; VLIW
                 processors",
}

@Article{Maslov:2007:TSR,
  author =       "D. Maslov and G. W. Dueck and D. M. Miller",
  title =        "Techniques for the synthesis of reversible {Toffoli}
                 networks",
  journal =      j-TODAES,
  volume =       "12",
  number =       "4",
  pages =        "42:1--42:??",
  month =        sep,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1278349.1278355",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:09:35 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We present certain new techniques for the synthesis of
                 reversible networks of Toffoli gates, as well as
                 improvements to previous methods. Gate count and
                 technology oriented cost metrics are used. Two new
                 synthesis procedures employing Reed--Muller spectra are
                 introduced and shown to complement earlier synthesis
                 approaches. The previously proposed template
                 simplification method is enhanced through the
                 introduction of a faster and more efficient template
                 application algorithm, an updated classification of the
                 templates, and the addition of new templates of sizes 7
                 and 9. A resynthesis approach is introduced wherein a
                 sequence of gates is chosen from a network, and the
                 reversible specification it realizes is resynthesized
                 as an independent problem in hopes of reducing the
                 network cost. Empirical results are presented to show
                 that the methods are efficient in terms of the
                 realization of reversible benchmark specifications.",
  acknowledgement = ack-nhfb,
  articleno =    "42",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "circuit optimization; quantum computing; reversible
                 logic synthesis",
}

@Article{Bouchebaba:2007:MMO,
  author =       "Youcef Bouchebaba and Bruno Girodias and Gabriela
                 Nicolescu and El Mostapha Aboulhamid and Bruno
                 Lavigueur and Pierre Paulin",
  title =        "{MPSoC} memory optimization using program
                 transformation",
  journal =      j-TODAES,
  volume =       "12",
  number =       "4",
  pages =        "43:1--43:??",
  month =        sep,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1278349.1278356",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:09:35 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Multiprocessor system-on-a-chip (MPSoC) architectures
                 have received a lot of attention in the past years, but
                 few advances in compilation techniques target these
                 architectures. This is particularly true for the
                 exploitation of data locality. Most of the compilation
                 techniques for parallel architectures discussed in the
                 literature are based on a single loop nest. This
                 article presents new techniques that consist in
                 applying loop fusion and tiling to several loop nests
                 and to parallelize the resulting code across different
                 processors. These two techniques reduce the number of
                 memory accesses. However, they increase dependencies
                 and thereby reduce the exploitable parallelism in the
                 code. This article tries to address this contradiction.
                 To optimize the memory space used by temporary arrays,
                 smaller buffers are used as a replacement. Different
                 strategies are studied to optimize the processing time
                 spent accessing these buffers. The experiments show
                 that these techniques yield a significant reduction in
                 the number of data cache misses (30\%) and in
                 processing time (50\%).",
  acknowledgement = ack-nhfb,
  articleno =    "43",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "compiler transformations; data cache; Data locality;
                 embedded systems",
}

@Article{Das:2007:FVT,
  author =       "Dipankar Das and P. P. Chakrabarti and Rajeev Kumar",
  title =        "Functional verification of task partitioning for
                 multiprocessor embedded systems",
  journal =      j-TODAES,
  volume =       "12",
  number =       "4",
  pages =        "44:1--44:??",
  month =        sep,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1278349.1278357",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:09:35 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "With the advent of multiprocessor embedded platforms,
                 application partitioning and mapping have gained
                 primacy as a design step. The output of this design
                 step is a multithreaded partitioned application where
                 each thread is mapped to a processing element
                 (processor or ASIC) in the multiprocessor platform.
                 This partitioned application must be verified to be
                 consistent with the native unpartitioned application.
                 This verification task is called application (or task)
                 partitioning verification. \par

                 This work proposes a code-block-level
                 containment-checking-based methodology for application
                 partitioning verification. We use a UML-based
                 code-block-level modeling language which is rich enough
                 to model most designs. We formulate the application
                 partitioning verification problem as a special case of
                 the containment checking problem, which we call the
                 complete containment checking problem. We propose a
                 state space reduction technique specific to the
                 containment checking, reachability analysis, and
                 deadlock detection problems. We propose novel data
                 structures and token propagation methodologies which
                 enhance the efficiency of containment checking. We
                 present an efficient containment checking algorithm for
                 the application partitioning verification problem. We
                 develop a containment checking tool called TraceMatch
                 and present experimental results. We present a
                 comparison of the state space reduction achieved by
                 TraceMatch with that achieved by formal analysis and
                 verification tools like Spin, PEP, PROD, and LoLA.",
  acknowledgement = ack-nhfb,
  articleno =    "44",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Containment checking; multiprocessor embedded systems;
                 state space reduction; UML activity diagrams",
}

@Article{Huang:2007:CSS,
  author =       "Shih-Hsu Huang and Yow-Tyng Nieh",
  title =        "Clock skew scheduling with race conditions
                 considered",
  journal =      j-TODAES,
  volume =       "12",
  number =       "4",
  pages =        "45:1--45:??",
  month =        sep,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1278349.1278358",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:09:35 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In this article, we provide a fresh viewpoint to the
                 interactions between clock skew scheduling and delay
                 insertion. A race-condition-aware (RCA) clock skew
                 scheduling is proposed to determine the clock skew
                 schedule by taking race conditions (i.e., hold
                 violations) into account. Our objective is not only to
                 optimize the clock period, but also to minimize
                 heuristically the required inserted delay. Compared
                 with previous work, our major contribution includes the
                 following two aspects. First, our approach achieves
                 exactly the same results, but has significant
                 improvement in time complexity. Second, our viewpoint
                 can be generalized to other sequential timing
                 optimization techniques.",
  acknowledgement = ack-nhfb,
  articleno =    "45",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "logic synthesis; performance optimization; Sequential
                 circuits; timing optimization",
}

@Article{Wang:2007:ETR,
  author =       "Gang Wang and Wenrui Gong and Brian Derenzi and Ryan
                 Kastner",
  title =        "Exploring time\slash resource trade-offs by solving
                 dual scheduling problems with the ant colony
                 optimization",
  journal =      j-TODAES,
  volume =       "12",
  number =       "4",
  pages =        "46:1--46:??",
  month =        sep,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1278349.1278359",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:09:35 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Design space exploration during high-level synthesis
                 is often conducted through ad hoc probing of the
                 solution space using some scheduling algorithm. This is
                 not only time consuming but also very dependent on
                 designer's experience. We propose a novel design
                 exploration method that exploits the duality of time-
                 and resource-constrained scheduling problems. Our
                 exploration automatically constructs a time/area
                 tradeoff curve in a fast, effective manner. It is a
                 general approach and can be combined with any
                 high-quality scheduling algorithm. In our work, we use
                 the max-min ant colony optimization technique to solve
                 both time- and resource-constrained scheduling
                 problems. Our algorithm provides significant
                 solution-quality savings (average 17. 3\% reduction of
                 resource counts) with similar runtime compared to using
                 force-directed scheduling exhaustively at every time
                 step. It also scales well across a comprehensive
                 benchmark suite constructed with classic and real-life
                 samples.",
  acknowledgement = ack-nhfb,
  articleno =    "46",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "ant colony optimization; Design space exploration;
                 instruction scheduling; max-min ant system",
}

@Article{Ghosh:2007:LPT,
  author =       "Swaroop Ghosh and Swarup Bhunia and Kaushik Roy",
  title =        "Low-Power and Testable Circuit Synthesis Using
                 {Shannon} Decomposition",
  journal =      j-TODAES,
  volume =       "12",
  number =       "4",
  pages =        "47:1--47:??",
  month =        sep,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1278349.1278360",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:34:15 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/bibnet/authors/s/shannon-claude-elwood.bib;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Structural transformation of a design to enhance its
                 testability while satisfying design constraints on
                 power and performance can result in improved test cost
                 and test confidence. In this article, we analyze the
                 testability in a new style of logic design based on
                 Shannon's decomposition and supply gating. We observe
                 that the tree structure of a logic circuit due to
                 Shannon's decomposition makes it intrinsically more
                 testable than a conventionally synthesized circuit,
                 while at the same time providing an improvement in
                 active power. We have analyzed four different aspects
                 of the testability of a circuit: (a) IDDQ test
                 sensitivity, (b) test power during scan-based testing,
                 (c) test length (for both ATPG-generated deterministic
                 and random patterns), and (d) noise immunity.
                 Simulation results on a set of MCNC benchmarks show
                 promising results on all these aspects (an average
                 improvement of 94\% in IDDQ sensitivity, 50\% in test
                 power, 19\% (21\%) in test length for deterministic
                 (random) patterns, and 50\% in coupling noise
                 immunity). We have also demonstrated that the new logic
                 structure can improve parametric yield (6\% on average)
                 of a circuit under process variations when considering
                 a bound on circuit leakage.",
  acknowledgement = ack-nhfb,
  acmid =        "1278360",
  articleno =    "47",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Design-for-test; dynamic supply gating; IDDQ; noise
                 immunity; Shannon expansion; test coverage; test
                 power",
  subject-dates = "Claude Elwood Shannon (1916--2001)",
}

@Article{Ostler:2007:IHT,
  author =       "Chris Ostler and Karam S. Chatha and Vijay Ramamurthi
                 and Krishnan Srinivasan",
  title =        "{ILP} and heuristic techniques for system-level design
                 on network processor architectures",
  journal =      j-TODAES,
  volume =       "12",
  number =       "4",
  pages =        "48:1--48:??",
  month =        sep,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1278349.1278361",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:09:35 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Network processors incorporate several architectural
                 features, including symmetric multiprocessing (SMP),
                 block multithreading, and multiple memory elements, to
                 support the high-performance requirements of current
                 day applications. This article presents automated
                 system-level design techniques for application
                 development on such architectures. We propose integer
                 linear programming formulations and heuristic
                 techniques for process allocation and data mapping on
                 SMP and block-multithreading-based network processors.
                 The techniques incorporate process transformations and
                 multithreading-aware data mapping to maximize the
                 throughput of the application. The article presents
                 experimental results that evaluate the techniques by
                 implementing network processing applications on the
                 Intel IXP 2400 architecture.",
  acknowledgement = ack-nhfb,
  articleno =    "48",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "block multithreading; multiprocessor",
}

@Article{Gopalakrishnan:2007:OPD,
  author =       "Sivaram Gopalakrishnan and Priyank Kalla",
  title =        "Optimization of polynomial datapaths using finite ring
                 algebra",
  journal =      j-TODAES,
  volume =       "12",
  number =       "4",
  pages =        "49:1--49:??",
  month =        sep,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1278349.1278362",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:09:35 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article presents an approach to area optimization
                 of arithmetic datapaths at register-transfer level
                 (RTL). The focus is on those designs that perform
                 polynomial computations (add, mult) over finite
                 word-length operands (bit-vectors). We model such
                 polynomial computations over $m$-bit vectors as algebra
                 over finite integer rings of residue classes $ Z_2^m$ .
                 Subsequently, we use the number-theoretic and algebraic
                 properties of such rings to transform a given datapath
                 computation into another, bit-true equivalent
                 computation. We also derive a cost model to estimate,
                 at RTL, the area cost of the computation. Using the
                 transformation procedure along with the cost model, we
                 devise algorithmic procedures to search for a
                 lower-cost implementation. We show how these
                 theoretical concepts can be applied to RTL optimization
                 of arithmetic datapaths within practical CAD settings.
                 Experiments conducted over a variety of benchmarks
                 demonstrate substantial optimizations using our
                 approach.",
  acknowledgement = ack-nhfb,
  articleno =    "49",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "arithmetic datapaths; finite ring algebra; High-level
                 synthesis; modulo arithmetic; polynomial datapaths",
}

@Article{Hu:2007:IHM,
  author =       "Q. Hu and P. G. Kjeldsberg and A. Vandecappelle and M.
                 Palkovic and F. Catthoor",
  title =        "Incremental hierarchical memory size estimation for
                 steering of loop transformations",
  journal =      j-TODAES,
  volume =       "12",
  number =       "4",
  pages =        "50:1--50:??",
  month =        sep,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1278349.1278363",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:09:35 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Modern embedded multimedia and telecommunications
                 systems need to store and access huge amounts of data.
                 This becomes a critical factor for the overall energy
                 consumption, area, and performance of the systems. Loop
                 transformations are essential to improve the data
                 access locality and regularity in order to optimally
                 design or utilize a memory hierarchy. However, due to
                 abstract high-level cost functions, current loop
                 transformation steering techniques do not take the
                 memory platform sufficiently into account. They usually
                 also result in only one final transformation solution.
                 On the other hand, the loop transformation search space
                 for real-life applications is huge, especially if the
                 memory platform is still not fully fixed. Use of
                 existing loop transformation techniques will therefore
                 typically lead to suboptimal end-products. It is
                 critical to find all interesting loop transformation
                 instances. This can only be achieved by performing an
                 evaluation of the effect of later design stages at the
                 early loop transformation stage. \par

                 This article presents a fast incremental hierarchical
                 memory-size requirement estimation technique. It
                 estimates the influence of any given sequence of loop
                 transformation instances on the mapping of application
                 data onto a hierarchical memory platform. As the exact
                 memory platform instantiation is often not yet defined
                 at this high-level design stage, a platform-independent
                 estimation is introduced with a Pareto curve output for
                 each loop transformation instance. Comparison among the
                 Pareto curves helps the designer, or a steering tool,
                 to find all interesting loop transformation instances
                 that might later lead to low-power data mapping for any
                 of the many possible memory hierarchy instances.
                 Initially, the source code is used as input for
                 estimation. However, performing the estimation
                 repeatedly from the source code is too slow for large
                 search space exploration. An incremental approach,
                 based on local updating of the previous result, is
                 therefore used to handle sequences of different loop
                 transformations. Experiments show that the initial
                 approach takes a few seconds, which is two orders of
                 magnitude faster than state-of-the-art solutions but
                 still too costly to be performed interactively many
                 times. The incremental approach typically takes just a
                 few milliseconds, which is another two orders of
                 magnitude faster than the initial approach. This huge
                 speedup allows us for the first time to handle
                 real-life industrial-size applications and get
                 realistic feedback during loop transformation
                 exploration.",
  acknowledgement = ack-nhfb,
  articleno =    "50",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "code transformation; Data optimization; high-level
                 synthesis; memory architecture exploration; memory size
                 estimation",
}

@Article{You:2007:CCP,
  author =       "Yi-Ping You and Chung-Wen Huang and Jenq Kuen Lee",
  title =        "Compilation for compact power-gating controls",
  journal =      j-TODAES,
  volume =       "12",
  number =       "4",
  pages =        "51:1--51:??",
  month =        sep,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1278349.1278364",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:09:35 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Power leakage constitutes an increasing fraction of
                 the total power consumption in modern semiconductor
                 technologies due to the continuing size reductions and
                 increasing speeds of transistors. Recent studies have
                 attempted to reduce leakage power using integrated
                 architecture and compiler power-gating mechanisms. This
                 approach involves compilers inserting instructions into
                 programs to shut down and wake up components, as
                 appropriate. While early studies showed this approach
                 to be effective, there are concerns about the large
                 amount of power-control instructions being added to
                 programs due to the increasing amount of components
                 equipped with power-gating controls in SoC design
                 platforms. In this article we present a sink-n-hoist
                 framework for a compiler to generate balanced
                 scheduling of power-gating instructions. Our solution
                 attempts to merge several power-gating instructions
                 into a single compound instruction, thereby reducing
                 the amount of power-gating instructions issued. We
                 performed experiments by incorporating our compiler
                 analysis and scheduling policies into SUIF compiler
                 tools and by simulating the energy consumption using
                 Wattch toolkits. The experimental results demonstrate
                 that our mechanisms are effective in reducing the
                 amount of power-gating instructions while further
                 reducing leakage power compared to previous methods.",
  acknowledgement = ack-nhfb,
  articleno =    "51",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "balanced scheduling; Compilers for low power;
                 data-flow analysis; leakage-power reduction;
                 power-gating mechanisms",
}

@Article{Chen:2007:NMA,
  author =       "Gang Chen and Xiaoyu Song and Feng Liu and Qingping
                 Tan and Fei He",
  title =        "A note on {``A mapping algorithm for computer-assisted
                 exploration in the design of embedded systems''}",
  journal =      j-TODAES,
  volume =       "12",
  number =       "4",
  pages =        "52:1--52:??",
  month =        sep,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1278349.1278365",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:09:35 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  note =         "See \cite{Mariatos:2001:MAC}.",
  acknowledgement = ack-nhfb,
  articleno =    "52",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Dutt:2008:Ea,
  author =       "Nikil Dutt",
  title =        "Editorial",
  journal =      j-TODAES,
  volume =       "13",
  number =       "1",
  pages =        "1:1--1:??",
  month =        jan,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1297666.1297667",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:10:00 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  articleno =    "1",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Hsiao:2008:ISS,
  author =       "Michael S. Hsiao and Robert B. Jones",
  title =        "Introduction to special section on high-level design,
                 validation, and test",
  journal =      j-TODAES,
  volume =       "13",
  number =       "1",
  pages =        "2:1--2:??",
  month =        jan,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1297666.1297668",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:10:00 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Cabodi:2008:BID,
  author =       "Gianpiero Cabodi and Marco Murciano and Sergio Nocco
                 and Stefano Quer",
  title =        "Boosting interpolation with dynamic localized
                 abstraction and redundancy removal",
  journal =      j-TODAES,
  volume =       "13",
  number =       "1",
  pages =        "3:1--3:??",
  month =        jan,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1297666.1297669",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:10:00 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "SAT--based Unbounded Model Checking based on Craig
                 Interpolants is often able to overcome BDDs and other
                 SAT--based techniques on large verification instances.
                 Based on refutation proofs generated by SAT solvers,
                 interpolants provide compact circuit representations of
                 state sets, as they abstract away several nonrelevant
                 details of the proofs. We propose three main
                 contributions, aimed at controlling interpolant size
                 and traversal depth. First of all, we introduce
                 interpolant--based dynamic abstraction to reduce the
                 support of computed interpolants. Subsequently, we
                 propose new advances in interpolant compaction by
                 redundancy removal. Finally, we introduce interpolant
                 computation exploiting circuit quantification, instead
                 of SAT refutation proofs. These techniques heavily rely
                 on an effective application of the incremental SAT
                 paradigm. The experimental results proposed in this
                 paper are specifically oriented to prove properties,
                 rather than disproving them, i.e., they target complete
                 verification instead of simply hunting bugs. They show
                 how this methodology is able to stretch the
                 applicability of interpolant--based Model Checking to
                 larger and deeper verification instances.",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "abstraction; Interpolant; redundancy removal",
}

@Article{Boule:2008:ABA,
  author =       "Marc Boul{\'e} and Zeljko Zilic",
  title =        "Automata-based assertion-checker synthesis of {PSL}
                 properties",
  journal =      j-TODAES,
  volume =       "13",
  number =       "1",
  pages =        "4:1--4:??",
  month =        jan,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1297666.1297670",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:10:00 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Assertion-based verification with languages such as
                 PSL is gaining in importance. From assertions, one can
                 generate hardware assertion checkers for use in
                 emulation, simulation acceleration and silicon debug.
                 We present techniques for checker generation of the
                 complete set of PSL properties, including all variants
                 of operators, both strong and weak. A full
                 automata-based approach allows an entire assertion to
                 be represented by a single automaton, hence allowing
                 optimizations that can not be done in a modular
                 approach where subcircuits are created only for
                 individual operators. For this purpose, automata
                 algorithms are developed for the base cases, and a
                 complete set of rewrite rules is derived for other
                 operators. Automata splitting is introduced for an
                 efficient implementation of the eventually! operator.",
  acknowledgement = ack-nhfb,
  articleno =    "4",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "assertion checkers; Assertion-Based Verification;
                 automata; emulation; hardware; PSL",
}

@Article{Rahaman:2008:CTB,
  author =       "H. Rahaman and J. Mathew and D. K. Pradhan and A. M.
                 Jabir",
  title =        "{C}-testable bit parallel multipliers over {$ {\rm
                 GF}(2^m) $}",
  journal =      j-TODAES,
  volume =       "13",
  number =       "1",
  pages =        "5:1--5:??",
  month =        jan,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1297666.1297671",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:10:00 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We present a C-testable design of polynomial basis
                 (PB) bit-parallel (BP) multipliers over GF(2 m ) for
                 100\% coverage of stuck-at faults. Our design method
                 also includes the method for test vector generation,
                 which is simple and efficient. C-testability is
                 achieved with three control inputs and approximately
                 6\% additional hardware. Only 8 constant vectors are
                 required irrespective of the sizes of the fields and
                 primitive polynomial. We also present a Built-In
                 Self-Test (BIST) architecture for generating the test
                 vectors efficiently, which eliminates the need for the
                 extra control inputs. Since these circuits have
                 critical applications as parts of cryptography (e. g. ,
                 Elliptic Curve Crypto (ECC) systems) hardware, the BIST
                 architecture may provide with added level of security,
                 as the tests would be done internally and without the
                 requirement of probing by external testing equipment.
                 Finally we present experimental results comprising the
                 area, delay and power of the testable multipliers of
                 various sizes with the help of the Synopsys{\reg} tools
                 using UMC 0. 18 micron CMOS technology library.",
  acknowledgement = ack-nhfb,
  articleno =    "5",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "built-in self-test; C-testable; cryptography; digital
                 signal processing; error control code; fault; Galois
                 field; multiplier; polynomials; stuck-at fault;
                 testing; TPG; VLSI design",
}

@Article{Taktak:2008:TAD,
  author =       "Sami Taktak and Jean-Lou Desbarbieux and Emmanuelle
                 Encrenaz",
  title =        "A tool for automatic detection of deadlock in wormhole
                 networks on chip",
  journal =      j-TODAES,
  volume =       "13",
  number =       "1",
  pages =        "6:1--6:??",
  month =        jan,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1297666.1297672",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:10:00 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We present an extension of Duato's necessary and
                 sufficient condition a routing function must satisfy in
                 order to be deadlock-free, to support environment
                 constraints inducing extra-dependencies between
                 messages. We also present an original algorithm to
                 automatically check the deadlock-freeness of a network
                 with a given routing function. A prototype tool has
                 been developed and automatic deadlock checking of large
                 scale networks with various routing functions have been
                 successfully achieved. We provide comparative results
                 with standard approach, highlighting the benefits of
                 our method.",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Deadlock; interconnection networks; networks on chip;
                 wormhole routing",
}

@Article{Zhou:2008:NER,
  author =       "Hai Zhou",
  title =        "A new efficient retiming algorithm derived by formal
                 manipulation",
  journal =      j-TODAES,
  volume =       "13",
  number =       "1",
  pages =        "7:1--7:??",
  month =        jan,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1297666.1297673",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:10:00 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "A new efficient algorithm is derived for the minimal
                 period retiming by formal manipulation. Contrary to all
                 previous algorithms, which used fixed period
                 feasibility checking to binary-search a candidate
                 range, the derived algorithm checks the optimality of a
                 feasible period directly. It is much simpler and more
                 efficient than previous algorithms. Experimental
                 results showed that it is even faster than ASTRA, an
                 efficient heuristic algorithm. Since the derived
                 algorithm is incremental by nature, it also opens the
                 opportunity to be combined with other optimization
                 techniques.",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "algorithm derivation; Clockperiod minimization;
                 retiming",
}

@Article{Krishnaswamy:2008:PTM,
  author =       "Smita Krishnaswamy and George F. Viamontes and Igor L.
                 Markov and John P. Hayes",
  title =        "Probabilistic transfer matrices in symbolic
                 reliability analysis of logic circuits",
  journal =      j-TODAES,
  volume =       "13",
  number =       "1",
  pages =        "8:1--8:??",
  month =        jan,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1297666.1297674",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:10:00 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We propose the probabilistic transfer matrix (PTM)
                 framework to capture nondeterministic behavior in logic
                 circuits. PTMs provide a concise description of both
                 normal and faulty behavior, and are well-suited to
                 reliability and error susceptibility calculations. A
                 few simple composition rules based on connectivity can
                 be used to recursively build larger PTMs (representing
                 entire logic circuits) from smaller gate PTMs. PTMs for
                 gates in series are combined using matrix
                 multiplication, and PTMs for gates in parallel are
                 combined using the tensor product operation. PTMs can
                 accurately calculate joint output probabilities in the
                 presence of reconvergent fanout and inseparable joint
                 input distributions. To improve computational
                 efficiency, we encode PTMs as algebraic decision
                 diagrams (ADDs). We also develop equivalent ADD
                 algorithms for newly defined matrix operations such as
                 {\tt eliminate\_variables} and {\tt
                 eliminate\_redundant\_variables}, which aid in the
                 numerical computation of circuit PTMs. We use PTMs to
                 evaluate circuit reliability and derive polynomial
                 approximations for circuit error probabilities in terms
                 of gate error probabilities. PTMs can also analyze the
                 effects of logic and electrical masking on error
                 mitigation. We show that ignoring logic masking can
                 overestimate errors by an order of magnitude. We
                 incorporate electrical masking by computing error
                 attenuation probabilities, based on analytical models,
                 into an extended PTM framework for reliability
                 computation. We further define a susceptibility measure
                 to identify gates whose errors are not well masked. We
                 show that hardening a few gates can significantly
                 improve circuit reliability.",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "fault tolerance; Symbolic analysis",
}

@Article{Tzeng:2008:VPS,
  author =       "Chao-Wen Tzeng and Jheng-Syun Yang and Shi-Yu Huang",
  title =        "A versatile paradigm for scan chain diagnosis of
                 complex faults using signal processing techniques",
  journal =      j-TODAES,
  volume =       "13",
  number =       "1",
  pages =        "9:1--9:??",
  month =        jan,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1297666.1297675",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:10:00 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Scan chains are popularly used as the channels for
                 silicon testing and debugging. However, they have also
                 been identified as one of the culprits of silicon
                 failure more recently. To cope with this problem,
                 several scan chain diagnosis approaches have been
                 proposed in the past. The existing methods, however,
                 suffer from one common drawback---that is, they rely on
                 fault models and matching heuristics to locate the
                 faults. Such a paradigm may run into difficulty when
                 the fault under diagnosis does not match the fault
                 model exactly, for example, when there is a bridging
                 between a flip-flop and a logic cell, or the fault is
                 temporal and only manifests itself intermittently. In
                 light of this, we propose in this article a more
                 versatile model-free paradigm for locating the faulty
                 flip-flops in a scan chain, incorporating a number of
                 signal processing techniques, such as filtering and
                 edge detection. These techniques performed on the test
                 responses of the failing chip under diagnosis directly
                 can effectively reveal the fault location(s) in a scan
                 chain. As compared to the previous works, our approach
                 is better capable of handling intermittent faults and
                 bridging faults, even under nonideal conditions, for
                 example, when the core logic is also faulty.
                 Experimental results on several real designs indicate
                 that this approach can indeed catch some nasty faults
                 that previous methods could not catch.",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "design for testability; Diagnosis; fault; profiling;
                 scan chain",
}

@Article{Johnson:2008:IME,
  author =       "F. Ryan Johnson and Joann M. Paul",
  title =        "Interrupt modeling for efficient high-level scheduler
                 design space exploration",
  journal =      j-TODAES,
  volume =       "13",
  number =       "1",
  pages =        "10:1--10:??",
  month =        jan,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1297666.1297676",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:10:00 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Single Chip Heterogeneous Multiprocessors executing a
                 wide variety of software are increasingly common in
                 consumer electronics. Because of the mix of real-time
                 and best effort software across the entire chip, a key
                 design element of these systems is the choice of
                 scheduling strategy. Without task migration, the
                 benefits of single chip processing cannot be fully
                 realized. Previously, high-level modeling environments
                 have not been capable of modeling asynchronous events
                 such as interrupts and preemptive scheduling while
                 preserving the performance benefits of high level
                 simulation. This paper shows how extensions to Modeling
                 Environment for Software and Hardware (MESH) enable
                 precise modeling of these asynchronous events while
                 running more than 1000 faster than cycle-accurate
                 simulation. We discuss how we achieved this and
                 illustrate its use in modeling preemptive scheduling.
                 We evaluate the potential of migrating running tasks
                 between processors to improve performance in a
                 multimedia cell phone example. We show that by allowing
                 schedulers to rebalance processor loads as new tasks
                 arrive significant performance gains can be achieved
                 over statically partitioned and dynamic scheduling
                 approaches. In our example, we show that system
                 response time can be improved by as much as 1. 96 times
                 when a preemptive migratory scheduler is used, despite
                 the overhead incurred by scheduling tasks across
                 multiple processors and transferring state during the
                 migration of running tasks. The contribution of this
                 work is to provide a framework for evaluating
                 preemptive scheduling policies and task migration in a
                 high level simulator, by combining the new ability to
                 model interrupts with dramatically increased efficiency
                 in the high-level modeling of scheduling and
                 communication MESH already provides.",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Heterogeneous chip multiprocessors; MESH; scenario
                 oriented design",
}

@Article{Ogras:2008:AOP,
  author =       "Umit Y. Ogras and Radu Marculescu",
  title =        "Analysis and optimization of prediction-based flow
                 control in networks-on-chip",
  journal =      j-TODAES,
  volume =       "13",
  number =       "1",
  pages =        "11:1--11:??",
  month =        jan,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1297666.1297677",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:10:00 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Networks-on-Chip (NoC) communication architectures
                 have emerged recently as a scalable solution to on-chip
                 communication problems. While the NoC architectures may
                 offer higher bandwidth compared to traditional
                 bus-based communication, their performance can degrade
                 significantly in the absence of effective flow control
                 algorithms. Unfortunately, flow control algorithms
                 developed for macronetworks, either rely on local
                 information, or suffer from large communication
                 overhead and unpredictable delays. Hence, using them in
                 the NoC context is problematic at best. For this
                 reason, we propose a predictive closed-loop flow
                 control mechanism and make the following contributions:
                 First, we develop traffic source and router models
                 specifically targeted to NoCs. Then, we utilize these
                 models to predict the possible congestion in the
                 network. Based on this information, the proposed scheme
                 controls the packet injection rate at traffic sources
                 in order to regulate the total number of packets in the
                 network. We also illustrate the proposed traffic source
                 model and the applicability of the proposed flow
                 controller to actual designs using real NoC
                 implementations. Finally, simulations and experimental
                 study using our FPGA prototype show that the proposed
                 controller delivers a better performance compared to
                 the traditional switch-to-switch flow control
                 algorithms under various real and synthetic traffic
                 patterns.",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "congestion control; flow control; Multi-processor
                 systems; networks-on-chip",
}

@Article{Chang:2008:TCS,
  author =       "Kuei-Chung Chang and Jih-Sheng Shen and Tien-Fu Chen",
  title =        "Tailoring circuit-switched network-on-chip to
                 application-specific system-on-chip by two optimization
                 schemes",
  journal =      j-TODAES,
  volume =       "13",
  number =       "1",
  pages =        "12:1--12:??",
  month =        jan,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1297666.1297678",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:10:00 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "As the number of cores on a chip increases, power
                 consumed by the communication structures takes a
                 significant portion of the overall power budget. In
                 this article, we first propose a circuit-switched
                 interconnection architecture which uses crossroad
                 switches to construct dedicated channels dynamically
                 between any pairs of cores for nonhuge
                 application-specific SoCs. The structure of the
                 crossroad switch is simple, which can be regarded as a
                 NoC-lite router, and we can easily construct a
                 low-power on-chip network with these switches by a
                 system-level design methodology. We also present the
                 design methodology to tailor the proposed
                 interconnection architecture to low-power structures by
                 two proposed optimization schemes with profiled
                 communication characteristics. The first scheme is
                 power-aware topology construction, which can build
                 low-power application-specific interconnection
                 topologies. To further reduce the power consumption, we
                 propose the second optimization scheme to predetermine
                 the operating mode of dual-mode switches in the NoC at
                 runtime. We evaluate several interconnection
                 techniques, and the results show that the proposed
                 architecture is more low-power and high-performance
                 than others under some constraints and scale
                 boundaries. We take multimedia applications as case
                 studies, and experimental results show the power
                 savings of power-aware topology approximate to 49\% of
                 the interconnection architecture. The power consumption
                 can be further reduced approximately 25\% by applying
                 partially dedicated path mechanism.",
  acknowledgement = ack-nhfb,
  articleno =    "12",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Application specific; interconnection; low power;
                 networks on chip; systems on chips",
}

@Article{Abbasian:2008:WBD,
  author =       "A. Abbasian and S. Hatami and A. Afzali-Kusha and M.
                 Pedram",
  title =        "Wavelet-based dynamic power management for
                 nonstationary service requests",
  journal =      j-TODAES,
  volume =       "13",
  number =       "1",
  pages =        "13:1--13:??",
  month =        jan,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1297666.1297679",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:10:00 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In this article, a wavelet-based dynamic power
                 management policy (WBDPM) is proposed. In this
                 approach, the workload source (service requester) is
                 modeled by a nonstationary time series which, in turn,
                 represented by a nondecimated Haar wavelet as its
                 basis. The proposed approach is robust and has the
                 ability to minimize energy dissipation under different
                 performance constraints. To assess the accuracy of the
                 model, the algorithm was implemented for data extracted
                 from the hard disks of computers. Prediction results of
                 this approach for the case of a nonstationary service
                 requester exhibit accuracies of more than 95\%.",
  acknowledgement = ack-nhfb,
  articleno =    "13",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Dynamic power management; low-power system design;
                 nonstationary service request; wavelet-based
                 prediction",
}

@Article{Su:2008:SNT,
  author =       "Yu-Shih Su and Po-Hsien Chang and Shih-Chieh Chang and
                 Tingting Hwang",
  title =        "Synthesis of a novel timing-error detection
                 architecture",
  journal =      j-TODAES,
  volume =       "13",
  number =       "1",
  pages =        "14:1--14:??",
  month =        jan,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1297666.1297680",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:10:00 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Delay variation can cause a design to fail its timing
                 specification. Ernst et al. [2003] observe that the
                 worst delay of a design is least probable to occur.
                 They propose a mechanism to detect and correct
                 occasional errors while the design can be optimized for
                 the common cases. Their experimental results show
                 significant performance (or power) gain as compared
                 with the worst-case design. However, the architecture
                 in Ernst et al. [2003] suffers the short path problem,
                 which is difficult to resolve. In this article, we
                 propose a novel error-detecting architecture to solve
                 the short path problem. Our experimental results show
                 considerable performance gain can be achieved with
                 reasonable area overhead.",
  acknowledgement = ack-nhfb,
  articleno =    "14",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "fault tolerance; Logic synthesis",
}

@Article{Raabe:2008:RDS,
  author =       "Andreas Raabe and Philipp A. Hartmann and Joachim K.
                 Anlauf",
  title =        "{ReChannel}: {Describing} and simulating
                 reconfigurable hardware in {systemC}",
  journal =      j-TODAES,
  volume =       "13",
  number =       "1",
  pages =        "15:1--15:??",
  month =        jan,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1297666.1297681",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:10:00 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "With the ongoing integration of (dynamic)
                 reconfiguration into current system models, new
                 methodologies and tools are needed to help the designer
                 during the development process. This article introduces
                 a language extension for SystemC along with a design
                 methodology for describing and simulating dynamically
                 reconfigurable systems at all levels of abstraction.
                 The presented library provides maximum freedom of
                 description of reconfiguration behavior and its
                 control, while featuring simulation of runtime
                 configuration, removal, and exchange of custom modules
                 as well as third-party IP-cores during the complete
                 architecture refinement process. When designing at
                 RT-level, the resulting hardware description can easily
                 be synthesized by standard synthesis tools.",
  acknowledgement = ack-nhfb,
  articleno =    "15",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "dynamic reconfiguration; hardware description;
                 Reconfigurable hardware; refinement; simulation;
                 SystemC",
}

@Article{Zhou:2008:AAS,
  author =       "Xiangrong Zhou and Chenjie Yu and Alokika Dash and
                 Peter Petrov",
  title =        "Application-aware snoop filtering for low-power cache
                 coherence in embedded multiprocessors",
  journal =      j-TODAES,
  volume =       "13",
  number =       "1",
  pages =        "16:1--16:??",
  month =        jan,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1297666.1297682",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:10:00 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Maintaining local caches coherently in shared-memory
                 multiprocessors results in significant power
                 consumption. The customization methodology we propose
                 exploits the fact that in embedded systems, important
                 knowledge is available to the system designers
                 regarding memory sharing between tasks. We demonstrate
                 how the snoop-induced cache probings can be
                 significantly reduced by identifying and exploiting in
                 a deterministic way the shared memory regions between
                 the processors. Snoop activity is enabled only for the
                 accesses referring to known shared regions. The
                 hardware support is not only cost efficient, but also
                 software programmable, which allows for
                 reprogrammability and customization across different
                 tasks and applications.",
  acknowledgement = ack-nhfb,
  articleno =    "16",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Cache coherence; embedded multiprocessors; low-power
                 embedded systems; snoop filtering",
}

@Article{Ahn:2008:SSC,
  author =       "Yongjin Ahn and Keesung Han and Ganghee Lee and
                 Hyunjik Song and Junhee Yoo and Kiyoung Choi and
                 Xingguang Feng",
  title =        "{SoCDAL}: {System-on-chip design AcceLerator}",
  journal =      j-TODAES,
  volume =       "13",
  number =       "1",
  pages =        "17:1--17:??",
  month =        jan,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1297666.1297683",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:10:00 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Time-to-market pressure and the ever-growing design
                 complexity of multiprocessor system-on-chips have
                 demanded an efficient design environment that enables
                 fast exploration of large design space. In this
                 article, we introduce a new design environment, called
                 SoCDAL, for accelerating multiprocessor system-on-chip
                 design through fast design-space exploration targeting
                 real-time multimedia systems. SoCDAL is a set of mostly
                 automated tools covering system specification,
                 hardware/software estimation,
                 application-to-architecture mapping, simulation model
                 generation, and system verification through simulation.
                 For system specification, the process network model has
                 been widely used for system specification because of
                 its modeling capability. However, it is hard to use for
                 real-time systems design, since its behavior cannot be
                 estimated statically. We introduce a new approach which
                 enables analyzing a process network model statically
                 with some restrictions. For the hardware/software
                 estimation, we analyze codes statically.
                 Application-to-architecture mapping process implements
                 a novel algorithm to support an arbitrary number of
                 processors, with performance evaluation by static
                 scheduling considering communication behavior. Mapping
                 results are used to generate simulation models
                 automatically at several transaction levels to be
                 pipelined to a commercial tool. We show the
                 effectiveness of our approaches by some experimental
                 results with multimedia applications such as JPEG, H.
                 263, and H. 264 encoders, as well as an H. 264
                 decoder.",
  acknowledgement = ack-nhfb,
  articleno =    "17",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "application-to-architecture mapping; Codesign;
                 design-space exploration; multiprocessor
                 system-on-chip; process networks; scheduling;
                 simulation; specification; static hardware/software
                 estimation; synchronous dataflow; transaction-level
                 model; worst-case execution time",
}

@Article{Zamora:2008:EMU,
  author =       "Nicholas H. Zamora and Xiaoping Hu and Umit Y. Ogras
                 and Radu Marculescu",
  title =        "Enabling multimedia using resource-constrained video
                 processing techniques: a node-centric perspective",
  journal =      j-TODAES,
  volume =       "13",
  number =       "1",
  pages =        "18:1--18:??",
  month =        jan,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1297666.1297684",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:10:00 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Successful proliferation of multimedia-enabled devices
                 and advances in very large-scale integration (VLSI)
                 technology has spawned new research efforts in
                 migrating video processing applications onto ever
                 smaller and more inexpensive devices. This article
                 focuses on the technical challenges associated with
                 that migration. \par

                 Due to limitations in size, battery lifetime, and,
                 ultimately, cost, mapping complex video applications
                 onto resource-constrained systems is a very challenging
                 proposition. To this end, we first consider a
                 technique, region-of-interest (ROI) processing, of
                 defining a window within a video frame and only
                 operating on the data inside that window, ignoring the
                 rest of the frame. By using this lossy technique, the
                 processing requirements can be reduced by roughly 80\%
                 while the error introduced in the quality of the
                 results is roughly 10\%. The other technique is
                 adaptive data partitioning (ADP) combined with a
                 content-based power management algorithm. By
                 distributing video processing among multiple processors
                 and shutting them down when they are not needed, the
                 energy consumed per processor can be reduced by 60\%
                 without sacrificing the performance of the underlying
                 video-based application. \par

                 Taken together, these novel techniques enable ambient
                 multimedia systems and maintain the needed overall
                 efficiency in video processing.",
  acknowledgement = ack-nhfb,
  articleno =    "18",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "data partitioning; lossy and lossless video
                 processing; real-time video processing;
                 Region-of-interest (ROI)",
}

@Article{Lee:2008:FCB,
  author =       "Kyungsoo Lee and Naehyuck Chang and Jianli Zhuo and
                 Chaitali Chakrabarti and Sudheendra Kadri and Sarma
                 Vrudhula",
  title =        "A fuel-cell-battery hybrid for portable embedded
                 systems",
  journal =      j-TODAES,
  volume =       "13",
  number =       "1",
  pages =        "19:1--19:??",
  month =        jan,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1297666.1297685",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:10:00 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article presents our work on the development of a
                 fuel cell (FC) and battery hybrid (FC-Bh) system for
                 use in portable microelectronic systems. We describe
                 the design and control of the hybrid system, as well as
                 a dynamic power management (DPM)-based energy
                 management policy that extends its operational
                 lifetime. The FC is of the proton exchange membrane
                 (PEM) type, operates at room temperature, and has an
                 energy density which is 4--6 times that of a Li-ion
                 battery. The FC cannot respond to sudden changes in the
                 load, and so a system powered solely by the FC is not
                 economical. An FC-Bh power source, on the other hand,
                 can provide the high energy density of the FC and the
                 high power density of a battery. \par

                 In this work we first describe the prototype FC-Bh
                 system that we have built. Such a prototype helps to
                 characterize the performance of a hybrid power source,
                 and also helps explore new energy management strategies
                 for embedded systems powered by hybrid sources. Next we
                 describe a Matlab/Simulink-based FC-Bh system simulator
                 which serves as an alternate experimental platform and
                 that enables quick evaluation of system-level control
                 policies. Finally, we present an optimization framework
                 that explicitly considers the characteristics of the
                 FC-Bh system and is aimed at minimizing the fuel
                 consumption. This optimization framework is applied on
                 top of a prediction-based DPM policy and is used to
                 derive a new fuel-efficient DPM scheme. The proposed
                 scheme demonstrates up to 32\% system lifetime
                 extension compared to a competing scheme when run on a
                 real trace-based MPEG encoding example.",
  acknowledgement = ack-nhfb,
  articleno =    "19",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "battery; DPM; fuel cell; hybrid systems; Simulation;
                 simulator",
}

@Article{Chao:2008:LPG,
  author =       "Wei-Chung Chao and Wai-Kei Mak",
  title =        "Low-power gated and buffered clock network
                 construction",
  journal =      j-TODAES,
  volume =       "13",
  number =       "1",
  pages =        "20:1--20:??",
  month =        jan,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1297666.1297686",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:10:00 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We propose an efficient algorithm to construct a
                 low-power zero-skew gated clock network, given the
                 module locations and activity information. Unlike
                 previous works, we consider masking logic insertion and
                 buffer insertion simultaneously, and guarantee to yield
                 a zero-skew clock tree. Both the logical and physical
                 information of the modules are carefully taken into
                 consideration when determining where masking logic
                 should be inserted. We also account for the power
                 overhead of the control signals so that the total
                 average power consumption of the constructed zero-skew
                 gated clock network can be minimized. To this end, we
                 present a recursive approach to compute the effective
                 switched capacitance of a general gated and buffered
                 clock network, accounting for both the clock tree's and
                 controller tree's switched capacitance. The power
                 consumptions of the gated clock networks constructed by
                 our algorithm are 20 to 36\% lower than those reported
                 in the best previous work in the literature.",
  acknowledgement = ack-nhfb,
  articleno =    "20",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "buffer; clock gating; Clock tree; low power;
                 zero-skew",
}

@Article{Sham:2008:OWR,
  author =       "Chiu-Wing Sham and Evangeline F. Y. Young and Hai
                 Zhou",
  title =        "Optimizing wirelength and routability by searching
                 alternative packings in floorplanning",
  journal =      j-TODAES,
  volume =       "13",
  number =       "1",
  pages =        "21:1--21:??",
  month =        jan,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1297666.1297687",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:10:00 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Recent advances in VLSI technology have made
                 optimization of the interconnect delay and routability
                 of a circuit more important. We should consider
                 interconnect planning as early as possible. We propose
                 a postfloorplanning step to reduce the interconnect
                 cost of a floorplan by searching alternative packings.
                 If a packing contains a rectangular bounding box of a
                 group of modules, we can rearrange the blocks in the
                 bounding box to obtain a new floorplan with the same
                 area, but possibly with a smaller interconnect cost.
                 Experimental results show that we can reduce the
                 interconnect cost of a packing without any penalty in
                 area.",
  acknowledgement = ack-nhfb,
  articleno =    "21",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Floorplanning; wirelength reduction",
}

@Article{Wu:2008:CPR,
  author =       "Meng-Chiou Wu and Rung-Bin Lin and Shih-Cheng Tsai",
  title =        "Chip placement in a reticle for multiple-project wafer
                 fabrication",
  journal =      j-TODAES,
  volume =       "13",
  number =       "1",
  pages =        "22:1--22:??",
  month =        jan,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1297666.1297688",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:10:00 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Chip placement in a reticle is crucial to the cost of
                 a multiproject wafer run. In this article we develop
                 several chip placement methods based on the
                 volume-driven compatibility optimization (VOCO)
                 concept, which maximizes dicing compatibility among
                 chips with large-volume requirements while minimizing
                 reticle dimensions. Our mixed-integer linear
                 programming models with VOCO are too complex to render
                 good solutions for large test cases. Our B*-tree with
                 VOCO and HQ with VOCO use $ 16 \% \sim 29 \% $ fewer
                 wafers and $ 8 \% \sim 19 \% $ less reticle area than
                 the hierarchical quadrisection (HQ) method proposed by
                 Kahng et al. [2005]",
  acknowledgement = ack-nhfb,
  articleno =    "22",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "compatibility graph; conflict graph; mixed-integer
                 linear programming (MILP); Multiple-project wafers
                 (MPW); reticle floorplanning; set cover; set partition;
                 shuttle mask; simulated annealing (SA); wafer dicing",
}

@Article{Dutt:2008:Eb,
  author =       "Nikil Dutt",
  title =        "Editorial",
  journal =      j-TODAES,
  volume =       "13",
  number =       "2",
  pages =        "23:1--23:??",
  month =        apr,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1344418.1344419",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:10:39 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  articleno =    "23",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Saluja:2008:SBA,
  author =       "Nikhil Saluja and Kanupriya Gulati and Sunil P.
                 Khatri",
  title =        "{SAT}-based {ATPG} using multilevel compatible
                 don't-cares",
  journal =      j-TODAES,
  volume =       "13",
  number =       "2",
  pages =        "24:1--24:??",
  month =        apr,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1344418.1344420",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:10:39 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In a typical IC design flow, circuits are optimized
                 using multilevel don't cares. The computed don't cares
                 are discarded before Technology Mapping or Automatic
                 Test Pattern Generation (ATPG). In this paper, we
                 present two combinational ATPG algorithms for
                 combinational designs. These algorithms utilize the
                 multilevel don't cares that are computed for the design
                 during technology independent logic optimization. They
                 are based on Boolean Satisfiability (SAT), and utilize
                 the single stuck-at fault model. Both algorithms make
                 use of the Compatible Observability Don't Cares (CODCs)
                 associated with nodes of the circuit, to speed up the
                 ATPG process. For large circuits, both algorithms make
                 use of approximate CODCs (ACODCs), which we can compute
                 efficiently. Our first technique speeds up fault
                 propagation by modifying the active clauses in the
                 transitive fanout (TFO) of the fault site. In our
                 second technique, we define new j - active variables
                 for specific nodes in the transitive fanin (TFI) of the
                 fault site. Using these j-active variables we write
                 additional clauses to speed up fault justification.
                 Experimental results demonstrate that the combination
                 of these techniques (when using CODCs) results in an
                 average reduction of 45\% in ATPG runtimes. When ACODCs
                 are used, a speed-up of about 30\% is obtained in the
                 ATPG run-times for large designs. We compare our method
                 against a commercial structural ATPG tool as well. Our
                 method is slower for small designs, but for large
                 designs, we obtain a 31\% average speedup over the
                 commercial tool.",
  acknowledgement = ack-nhfb,
  articleno =    "24",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Automatic test pattern generation (ATPG); Boolean
                 satisfiabilty (SAT); don't cares; testing",
}

@Article{Muchherla:2008:NEW,
  author =       "Kishore Kumar Muchherla and Pinhong Chen and Dongsheng
                 Ma and Janet Meiling Wang",
  title =        "A noniterative equivalent waveform model for timing
                 analysis in presence of crosstalk",
  journal =      j-TODAES,
  volume =       "13",
  number =       "2",
  pages =        "25:1--25:??",
  month =        apr,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1344418.1344421",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:10:39 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Due to the nonuniform interconnect scaling in the Deep
                 Sub Micron (DSM) region, the coupling capacitance
                 between wires becomes an increasingly dominant fraction
                 of the total wire capacitance. This couple capacitance
                 introduces server crosstalk which causes delay
                 variations on signal lines and raises signal integrity
                 problems. Therefore, including crosstalk in the timing
                 analysis methods has become imperative for current
                 technologies. And to correctly model the crosstalk,
                 output loading effects, waveform shape and gate driving
                 capability have to be considered. However, most
                 existing crosstalk models have not yet included these
                 factors and consequently suffer from the low accuracy
                 problem. In this article, we propose a noniterative
                 equivalent waveform model that addresses the above
                 mentioned issues. Our experimental results have shown
                 that the new model achieves 3 times speed up and 95\%
                 accuracy compared to the existing models.",
  acknowledgement = ack-nhfb,
  articleno =    "25",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Deep sub micron; delay; equivalent waveform; noise;
                 timing analysis",
}

@Article{Yan:2008:TDO,
  author =       "Jin-Tai Yan",
  title =        "Timing-driven octilinear {Steiner} tree construction
                 based on {Steiner-point} reassignment and path
                 reconstruction",
  journal =      j-TODAES,
  volume =       "13",
  number =       "2",
  pages =        "26:1--26:??",
  month =        apr,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1344418.1344422",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:10:39 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "It is well known that the problem of constructing a
                 timing-driven rectilinear Steiner tree for any signal
                 net is important in performance-driven designs and has
                 been extensively studied. Until now, many efficient
                 approaches have been proposed for the construction of a
                 timing-driven rectilinear Steiner tree. As technology
                 process advances, $ + 45^\circ $ and $ - 45^\circ $
                 diagonal segments can be permitted in an octilinear
                 routing model. To our knowledge, no approach is
                 proposed to construct a timing-driven octilinear
                 Steiner tree for any signal net. In this paper, given a
                 rectilinear Steiner tree for any signal net, we propose
                 an efficient transformation-based approach to construct
                 a timing-driven octilinear Steiner tree based on the
                 computation of the octilinear distance and the concept
                 of Steiner-point reassignment and path reconstruction
                 in an octilinear routing model. The experimental
                 results show that our proposed transformation-based
                 approach can use reasonable CPU time to construct a
                 TOST, and a 10\%--18\% improvement in timing delay and
                 a 5\%--14\% improvement in total wire length in the
                 original RSTs are obtained in the construction of TOSTs
                 for the tested signal nets.",
  acknowledgement = ack-nhfb,
  articleno =    "26",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Elmore delay; Global routing; octilinear Steiner tree;
                 Steiner points",
}

@Article{Baldassin:2008:OSB,
  author =       "Alexandro Baldassin and Paulo Centoducatte and Sandro
                 Rigo and Daniel Casarotto and Luiz C. V. Santos and Max
                 Schultz and Olinto Furtado",
  title =        "An open-source binary utility generator",
  journal =      j-TODAES,
  volume =       "13",
  number =       "2",
  pages =        "27:1--27:??",
  month =        apr,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1344418.1344423",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:10:39 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Electronic system level (ESL) modeling allows early
                 hardware-dependent software (HDS) development. Due to
                 broad CPU diversity and shrinking time-to-market, HDS
                 development can neither rely on hand-retargeting binary
                 tools, nor can it rely on pre-existent tools within
                 standard packages. As a consequence, binary utilities
                 which can be easily adapted to new CPU targets are of
                 increasing interest. We present in this article a
                 framework for automatic generation of binary utilities.
                 It relies on two innovative ideas: platform-aware
                 modeling and more inclusive relocation handling.
                 Generated assemblers, linkers, disassemblers and
                 debuggers were validated for MIPS, SPARC, PowerPC,
                 i8051 and PIC16F84. An open-source prototype generator
                 is available for download.",
  acknowledgement = ack-nhfb,
  articleno =    "27",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Platform debugging; retargetable tools; TLM",
}

@Article{Moscola:2008:RCB,
  author =       "James Moscola and John W. Lockwood and Young H. Cho",
  title =        "Reconfigurable content-based router using
                 hardware-accelerated language parser",
  journal =      j-TODAES,
  volume =       "13",
  number =       "2",
  pages =        "28:1--28:??",
  month =        apr,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1344418.1344424",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:10:39 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article presents a dense logic design for
                 matching multiple regular expressions with a field
                 programmable gate array (FPGA) at 10 + Gbps. It
                 leverages on the design techniques that enforce the
                 shortest critical path on most FPGA architectures while
                 optimizing the circuit size. The architecture is
                 capable of supporting a maximum throughput of 12. 90
                 Gbps on a Xilinx Virtex 4 LX200 and its performance is
                 linearly scalable with size. Additionally, this article
                 presents techniques for parsing data streams to provide
                 semantic information for patterns found within a data
                 stream. We illustrate how a content-based router can be
                 implemented with our parsing techniques using an XML
                 parser as an example. The content-based router
                 presented was designed, implemented, and tested in a
                 Xilinx Virtex XCV2000E FPGA on the FPX platform. It is
                 capable of processing 32-bits of data per clock cycle
                 and runs at 100 MHz. This allows the system to process
                 and route XML messages at 3. 2 Gbps.",
  acknowledgement = ack-nhfb,
  articleno =    "28",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "content-based routing; parser hardware; Parsing;
                 pattern matching; regular expressions; XML",
}

@Article{Jones:2008:RFI,
  author =       "Alex K. Jones and Swapna Dontharaju and Shenchih Tung
                 and Leo Mats and Peter J. Hawrylak and Raymond R. Hoare
                 and James T. Cain and Marlin H. Mickle",
  title =        "Radio frequency identification prototyping",
  journal =      j-TODAES,
  volume =       "13",
  number =       "2",
  pages =        "29:1--29:??",
  month =        apr,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1344418.1344425",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:10:39 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "While RFID is starting to become a ubiquitous
                 technology, the variation between different RFID
                 systems still remains high. This paper presents several
                 prototyping environments for different components of
                 radio frequency identification (RFID) tags to
                 demonstrate how many of these components can be
                 standardized for many different purposes. We include
                 two active tag prototypes, one based on a
                 microprocessor and the second based on custom hardware.
                 To program these devices we present a design automation
                 flow that allows RFID transactions to be described in
                 terms of primitives with behavior written in ANSI C
                 code. To save power with active RFID devices we
                 describe a passive transceiver switch called the
                 ``burst switch'' and demonstrate how this can be used
                 in a system with a microprocessor or custom hardware
                 controller. Finally, we present a full RFID system
                 prototyping environment based on real-time spectrum
                 analysis technology currently deployed at the
                 University of Pittsburgh RFID Center of Excellence.
                 Using our prototyping techniques we show how
                 transactions from multiple standards can be combined
                 and targeted to several microprocessors include the
                 Microchip PIC, Intel StrongARM and XScale, and AD Chips
                 EISC as well as several hardware targets including the
                 Altera Apex, Actel Fusion, Xilinx Coolrunner II,
                 Spartan 3 and Virtex 2, and cell-based ASICs.",
  acknowledgement = ack-nhfb,
  articleno =    "29",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Design automation; low-power; prototyping; RFID",
}

@Article{Hu:2008:PSF,
  author =       "Yu Hu and Yan Lin and Lei He and Tim Tuan",
  title =        "Physical synthesis for {FPGA} interconnect power
                 reduction by dual-Vdd budgeting and retiming",
  journal =      j-TODAES,
  volume =       "13",
  number =       "2",
  pages =        "30:1--30:??",
  month =        apr,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1344418.1344426",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:10:39 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Field programmable dual-Vdd interconnects are
                 effective in reducing FPGA power. We formulate the
                 dual-Vdd-aware slack budgeting problem as a linear
                 program (LP) and a min-cost network flow problem,
                 respectively. Both algorithms reduce interconnect power
                 by 50\% on average compared to single-Vdd
                 interconnects, but the network-flow-based algorithm
                 runs 11x faster on MCNC benchmarks. Furthermore, we
                 develop simultaneous retiming and slack budgeting
                 (SRSB) with flip-flop layout constraints in dual-Vdd
                 FPGAs based on mixed integer linear programming, and
                 speed-up the algorithm by LP relaxation and local
                 legalization. Compared to retiming followed by slack
                 budgeting, SRSB reduces interconnect power by up to 28.
                 8\%.",
  acknowledgement = ack-nhfb,
  articleno =    "30",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "FPGA; Low power; retiming",
}

@Article{AlKhatib:2008:MSC,
  author =       "Iyad {Al Khatib} and Francesco Poletti and Davide
                 Bertozzi and Luca Benini and Mohamed Bechara and Hasan
                 Khalifeh and Axel Jantsch and Rustam Nabiev",
  title =        "A multiprocessor system-on-chip for real-time
                 biomedical monitoring and analysis: {ECG} prototype
                 architectural design space exploration",
  journal =      j-TODAES,
  volume =       "13",
  number =       "2",
  pages =        "31:1--31:??",
  month =        apr,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1344418.1344427",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:10:39 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In this article we focus on multiprocessor
                 system-on-chip (MPSoC) architectures for human heart
                 electrocardiogram (ECG) real time analysis as a
                 hardware/software (HW/SW) platform offering an advance
                 relative to state-of-the-art solutions. This is a
                 relevant biomedical application with good potential
                 market, since heart diseases are responsible for the
                 largest number of yearly deaths. Hence, it is a good
                 target for an application-specific system-on-chip (SoC)
                 and HW/SW codesign. We investigate a symmetric
                 multiprocessor architecture based on STMicroelectronics
                 VLIW DSPs that process in real time 12-lead ECG
                 signals. This architecture improves upon
                 state-of-the-art SoC designs for ECG analysis in its
                 ability to analyze the full 12 leads in real time, even
                 with high sampling frequencies, and its ability to
                 detect heart malfunction for the whole ECG signal
                 interval. We explore the design space by considering a
                 number of hardware and software architectural options.
                 Comparing our design with present-day solutions from an
                 SoC and application point-of-view shows that our
                 platform can be used in real time and without
                 failures.",
  acknowledgement = ack-nhfb,
  articleno =    "31",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "electrocardiogram algorithms; embedded system design;
                 hardware space exploration; Multiprocessor
                 system-on-chip; real time analysis",
}

@Article{Zhou:2008:HTC,
  author =       "Xiangrong Zhou and Peter Petrov",
  title =        "Heterogeneously tagged caches for low-power embedded
                 systems with virtual memory support",
  journal =      j-TODAES,
  volume =       "13",
  number =       "2",
  pages =        "32:1--32:??",
  month =        apr,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1344418.1344428",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:10:39 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "An energy-efficient data cache organization for
                 embedded processors with virtual memory is proposed.
                 Application knowledge regarding memory references is
                 used to eliminate most tag translations. A novel
                 tagging scheme is introduced, where both virtual and
                 physical tags coexist. Physical tags and special
                 handling of superset index bits are only used for
                 references to shared regions in order to avoid cache
                 inconsistency. By eliminating the need for most address
                 translations on cache access, a significant power
                 reduction is achieved. We outline an efficient hardware
                 architecture, where the application information is
                 captured in a reprogrammable way and the cache is
                 minimally modified.",
  acknowledgement = ack-nhfb,
  articleno =    "32",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Embedded systems",
}

@Article{Liu:2008:PVA,
  author =       "Fang Liu and Sule Ozev and Plamen K. Nikolov",
  title =        "Parametric variability analysis for multistage analog
                 circuits using analytical sensitivity modeling",
  journal =      j-TODAES,
  volume =       "13",
  number =       "2",
  pages =        "33:1--33:??",
  month =        apr,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1344418.1344429",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:10:39 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Process variations play an increasingly important role
                 on the success of analog circuits. State-of-the-art
                 analog circuits are based on complex architectures and
                 contain many hierarchical layers and parameters.
                 Knowledge of the parameter variances and their
                 contribution patterns is crucial for a successful
                 design process. This information is valuable to find
                 solutions for many problems in design, design
                 automation, testing, and fault tolerance. In this
                 article, we present a hierarchical variance analysis
                 methodology for multistage analog circuits. Starting
                 from the process/layout level, we derive implicit
                 hierarchical relations and extract the sensitivity
                 information analytically. We make use of previously
                 computed values whenever possible so as to reduce
                 computational time. The proposed approach is
                 particularly geared for the domain of design and test
                 automation, where multiple runs on slightly different
                 circuits are necessary. Experimental results indicate
                 that the proposed method provides both accuracy and
                 computational efficiency when compared with prior
                 approaches.",
  acknowledgement = ack-nhfb,
  articleno =    "33",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "analog circuits; Hierarchical variance analysis;
                 parameter correlations; performance model; process
                 variations",
}

@Article{Cheng:2008:FSI,
  author =       "Lei Cheng and Deming Chen and Martin D. F. Wong",
  title =        "A fast simultaneous input vector generation and gate
                 replacement algorithm for leakage power reduction",
  journal =      j-TODAES,
  volume =       "13",
  number =       "2",
  pages =        "34:1--34:??",
  month =        apr,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1344418.1344430",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:10:39 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The Input vector control (IVC) technique is based on
                 the observation that the leakage current in a CMOS
                 logic gate depends on gate input state, and a good
                 input vector is able to minimize leakage when the
                 circuit is in sleep mode. The gate replacement
                 technique is a very effective method to further reduce
                 the leakage current. In this article, we propose a fast
                 heuristic algorithm to find a low-leakage input vector
                 with simultaneous gate replacement. Results on MCNC91
                 benchmark circuits show that our algorithm produces
                 14\% better leakage current reduction with several
                 orders of magnitude speedup in runtime for large
                 circuits compared to the previous state-of-the-art
                 algorithm. In particular, the average runtime for the
                 ten largest combinational circuits has been
                 dramatically reduced from 1879 seconds to 0.34
                 seconds.",
  acknowledgement = ack-nhfb,
  articleno =    "34",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "gate replacement; Input vector control; leakage
                 reduction",
}

@Article{Bernasconi:2008:OKS,
  author =       "Anna Bernasconi and Valentina Ciriani and Roberto
                 Cordone",
  title =        "The optimization of {kEP-SOPs}: {Computational}
                 complexity, approximability and experiments",
  journal =      j-TODAES,
  volume =       "13",
  number =       "2",
  pages =        "35:1--35:??",
  month =        apr,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1344418.1344431",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:10:39 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We propose a new algebraic four-level expression
                 called k-EXOR-projected sum of products (kEP-SOP). The
                 optimization of a kEP-SOP is NP NP-hard, but can be
                 approximated within a fixed performance guarantee in
                 polynomial time. Moreover, fully testable circuits
                 under the stuck-at-fault model can be derived from
                 kEP-SOPs by adding at most a constant number of
                 multiplexer gates. The experiments show that the
                 computational time is very short and the results are
                 most of the time optimal with respect to the number of
                 products involved. kEP-SOPs also prove experimentally a
                 good starting point for general multilevel logic
                 synthesis.",
  acknowledgement = ack-nhfb,
  articleno =    "35",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "approximation algorithm; Automatic synthesis;
                 multilevel logic synthesis; optimization; testing",
}

@Article{Bahar:2008:IJA,
  author =       "R. Iris Bahar and Krishnendu Chakrabarty",
  title =        "Introduction to joint {ACM JETC\slash TODAES} special
                 issue on new, emerging, and specialized technologies",
  journal =      j-TODAES,
  volume =       "13",
  number =       "2",
  pages =        "36:1--36:??",
  month =        apr,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1344418.1344432",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jun 12 18:10:39 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  articleno =    "36",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Dutt:2008:E,
  author =       "Nikil Dutt",
  title =        "Editorial",
  journal =      j-TODAES,
  volume =       "13",
  number =       "3",
  pages =        "37:1--37:??",
  month =        jul,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1367045.1367046",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Aug 5 18:41:27 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  articleno =    "37",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Jones:2008:ISS,
  author =       "Alex K. Jones and Robert Walker",
  title =        "Introduction to the special section on demonstrable
                 software systems and hardware platforms {II}",
  journal =      j-TODAES,
  volume =       "13",
  number =       "3",
  pages =        "38:1--38:??",
  month =        jul,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1367045.1367047",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Aug 5 18:41:27 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  articleno =    "38",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kwon:2008:RPP,
  author =       "Seongnam Kwon and Yongjoo Kim and Woo-Chul Jeun and
                 Soonhoi Ha and Yunheung Paek",
  title =        "A retargetable parallel-programming framework for
                 {MPSoC}",
  journal =      j-TODAES,
  volume =       "13",
  number =       "3",
  pages =        "39:1--39:??",
  month =        jul,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1367045.1367048",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Aug 5 18:41:27 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "As more processing elements are integrated in a single
                 chip, embedded software design becomes more
                 challenging: It becomes a parallel programming for
                 nontrivial heterogeneous multiprocessors with diverse
                 communication architectures, and design constraints
                 such as hardware cost, power, and timeliness. In the
                 current practice of parallel programming with MPI or
                 OpenMP, the programmer should manually optimize the
                 parallel code for each target architecture and for the
                 design constraints. Thus, the design-space exploration
                 of MPSoC (multiprocessor systems-on-chip) costs become
                 prohibitively large as software development overhead
                 increases drastically. To solve this problem, we
                 develop a parallel-programming framework based on a
                 novel programming model called common intermediate code
                 (CIC). In a CIC, functional parallelism and data
                 parallelism of application tasks are specified
                 independently of the target architecture and design
                 constraints. Then, the CIC translator translates the
                 CIC into the final parallel code, considering the
                 target architecture and design constraints to make the
                 CIC retargetable. Experiments with preliminary
                 examples, including the H.263 decoder, show that the
                 proposed parallel-programming framework increases the
                 design productivity of MPSoC software significantly.",
  acknowledgement = ack-nhfb,
  articleno =    "39",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "design-space exploration; embedded software;
                 multiprocessor system on chip; parallel-programming;
                 software generation",
}

@Article{Kumar:2008:MSS,
  author =       "Akash Kumar and Shakith Fernando and Yajun Ha and Bart
                 Mesman and Henk Corporaal",
  title =        "Multiprocessor systems synthesis for multiple
                 use-cases of multiple applications on {FPGA}",
  journal =      j-TODAES,
  volume =       "13",
  number =       "3",
  pages =        "40:1--40:??",
  month =        jul,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1367045.1367049",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Aug 5 18:41:27 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Future applications for embedded systems demand chip
                 multiprocessor designs to meet real-time deadlines. The
                 large number of applications in these systems generates
                 an exponential number of use-cases. The key design
                 automation challenges are designing systems for these
                 use-cases and fast exploration of software and hardware
                 implementation alternatives with accurate performance
                 evaluation of these use-cases. These challenges cannot
                 be overcome by current design methodologies which are
                 semiautomated, time consuming, and error prone.\par

                 In this article, we present a design methodology to
                 generate multiprocessor systems in a systematic and
                 fully automated way for {\em multiple use-cases}.
                 Techniques are presented to merge multiple use-cases
                 into one hardware design to minimize cost and design
                 time, making it well suited for fast design-space
                 exploration (DSE) in MPSoC systems. Heuristics to
                 partition use-cases are also presented such that each
                 partition can fit in an FPGA, and all use-cases can be
                 catered for.\par

                 The proposed methodology is implemented into a tool for
                 Xilinx FPGAs for evaluation. The tool is also made
                 available online for the benefit of the research
                 community and is used to carry out a DSE case study
                 with multiple use-cases of real-life applications: H263
                 and JPEG decoders. The generation of the entire design
                 takes about 100 ms, and the whole DSE was completed in
                 45 minutes, including FPGA mapping and synthesis. The
                 heuristics used for use-case partitioning reduce the
                 design-exploration time elevenfold in a case study with
                 mobile-phone applications.",
  acknowledgement = ack-nhfb,
  articleno =    "40",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "design exploration; FPGA; multi-application;
                 multimedia systems; multiple use-cases; multiprocessor
                 systems; synchronous data-flow graphs",
}

@Article{Krashinsky:2008:ISV,
  author =       "Ronny Krashinsky and Christopher Batten and Krste
                 Asanovi{\'c}",
  title =        "Implementing the {Scale} vector-thread processor",
  journal =      j-TODAES,
  volume =       "13",
  number =       "3",
  pages =        "41:1--41:??",
  month =        jul,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1367045.1367050",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Aug 5 18:41:27 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The Scale vector-thread processor is a
                 complexity-effective solution for embedded computing
                 which flexibly supports both vector and highly
                 multithreaded processing. The 7.1-million transistor
                 chip has 16 decoupled execution clusters, vector load
                 and store units, and a nonblocking 32KB cache. An
                 automated and iterative design and verification flow
                 enabled a performance-, power-, and area-efficient
                 implementation with two person-years of development
                 effort. Scale has a core area of 16.6 mm$^2$ in 180 nm
                 technology, and it consumes 400 mW--1.1 W while running
                 at 260 MHz.",
  acknowledgement = ack-nhfb,
  articleno =    "41",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "hybrid C++/Verilog simulation; iterative VLSI design
                 flow; multithreaded processors; procedural datapath
                 pre-placement; vector processors; vector-thread
                 processors",
}

@Article{Mishra:2008:SDD,
  author =       "Prabhat Mishra and Nikil Dutt",
  title =        "Specification-driven directed test generation for
                 validation of pipelined processors",
  journal =      j-TODAES,
  volume =       "13",
  number =       "3",
  pages =        "42:1--42:??",
  month =        jul,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1367045.1367051",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Aug 5 18:41:27 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Functional validation is a major bottleneck in
                 pipelined processor design due to the combined effects
                 of increasing design complexity and lack of efficient
                 techniques for directed test generation. Directed test
                 vectors can reduce overall validation effort, since
                 shorter tests can obtain the same coverage goal
                 compared to the random tests. This article presents a
                 specification-driven directed test generation
                 methodology. The proposed methodology makes three
                 important contributions. First, a general graph model
                 is developed that can capture the structure and
                 behavior (instruction set) of a wide variety of
                 pipelined processors. The graph model is generated from
                 the processor specification. Next, we propose a
                 functional fault model that is used to define the
                 functional coverage for pipelined architectures.
                 Finally, we propose two complementary test generation
                 techniques: test generation using model checking, and
                 test generation using template-based procedures. These
                 test generation techniques accept the graph model of
                 the architecture as input and generate test programs to
                 detect all the faults in the functional fault model.
                 Our experimental results on two pipelined processor
                 models demonstrate several orders-of-magnitude
                 reduction in overall validation effort by drastically
                 reducing both test-generation time and number of test
                 programs required to achieve a coverage goal.",
  acknowledgement = ack-nhfb,
  articleno =    "42",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "functional validation; model checking; test
                 generation",
}

@Article{Joo:2008:ECP,
  author =       "Yongsoo Joo and Youngjin Cho and Donghwa Shin and
                 Jaehyun Park and Naehyuck Chang",
  title =        "An energy characterization platform for memory devices
                 and energy-aware data compression for multilevel-cell
                 flash memory",
  journal =      j-TODAES,
  volume =       "13",
  number =       "3",
  pages =        "43:1--43:??",
  month =        jul,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1367045.1367052",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Aug 5 18:41:27 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Memory devices often consume more energy than
                 microprocessors in current portable embedded systems,
                 but their energy consumption changes significantly with
                 the type of transaction, data values, and access
                 timing, as well as depending on the total number of
                 transactions. These variabilities mean that an
                 innovative tool and framework are required to
                 characterize modern memory devices running in embedded
                 system architectures.\par

                 We introduce an energy measurement and characterization
                 platform for memory devices, and demonstrate an
                 application to multilevel-cell (MLC) flash memories, in
                 which we discover significant value-dependent
                 programming energy variations. We introduce an
                 energy-aware data compression method that minimizes the
                 flash programming energy, rather than the size of the
                 compressed data, which is formulated as an entropy
                 coding with unequal bit-pattern costs. Deploying a
                 probabilistic approach, we derive energy-optimal
                 bit-pattern probabilities and expected values of the
                 bit-pattern costs which are applicable to the large
                 amounts of compressed data typically found in
                 multimedia applications. Then we develop an
                 energy-optimal prefix coding that uses integer linear
                 programming, and construct a prefix-code table. From a
                 consideration of Pareto-optimal energy consumption, we
                 can make tradeoffs between data size and programming
                 energy, such as a 41\% energy savings for a 52\% area
                 overhead.",
  acknowledgement = ack-nhfb,
  articleno =    "43",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "compression; flash memory; MLC",
}

@Article{Huffmire:2008:DSS,
  author =       "Ted Huffmire and Brett Brotherton and Nick Callegari
                 and Jonathan Valamehr and Jeff White and Ryan Kastner
                 and Tim Sherwood",
  title =        "Designing secure systems on reconfigurable hardware",
  journal =      j-TODAES,
  volume =       "13",
  number =       "3",
  pages =        "44:1--44:??",
  month =        jul,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1367045.1367053",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Aug 5 18:41:27 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The extremely high cost of custom ASIC fabrication
                 makes FPGAs an attractive alternative for deployment of
                 custom hardware. Embedded systems based on
                 reconfigurable hardware integrate many functions onto a
                 single device. Since embedded designers often have no
                 choice but to use soft IP cores obtained from third
                 parties, the cores operate at different trust levels,
                 resulting in mixed-trust designs. The goal of this
                 project is to evaluate recently proposed security
                 primitives for reconfigurable hardware by building a
                 real embedded system with several cores on a single
                 FPGA and implementing these primitives on the system.
                 Overcoming the practical problems of integrating
                 multiple cores together with security mechanisms will
                 help us to develop realistic security-policy
                 specifications that drive enforcement mechanisms on
                 embedded systems.",
  acknowledgement = ack-nhfb,
  articleno =    "44",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Advanced Encryption Standard (AES); controlled
                 sharing; enforcement mechanisms; execution monitors;
                 Field programmable gate arrays (FPGAs); hardware
                 security; isolation; memory protection; reference
                 monitors; security policies; security primitives;
                 separation; static analysis; systems-on-a-chip (SoCs)",
}

@Article{Manolios:2008:AVS,
  author =       "Panagiotis Manolios and Sudarshan K. Srinivasan",
  title =        "Automatic verification of safety and liveness for
                 pipelined machines using {WEB} refinement",
  journal =      j-TODAES,
  volume =       "13",
  number =       "3",
  pages =        "45:1--45:??",
  month =        jul,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1367045.1367054",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Aug 5 18:41:27 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We show how to automatically verify that complex
                 pipelined machine models satisfy the same safety and
                 liveness properties as their instruction-set
                 architecture (ISA) models by using well-founded
                 equivalence bisimulation (WEB) refinement. We show how
                 to reduce WEB-refinement proof obligations to formulas
                 expressible in the decidable logic of counter
                 arithmetic with lambda expressions and uninterpreted
                 functions (CLU). This allows us to automate the
                 verification of the pipelined machine models by using
                 the UCLID decision procedure to transform CLU formulas
                 to Boolean satisfiability problems. To relate pipelined
                 machine states to ISA states, we use the commitment and
                 flushing refinement maps. We evaluate our work using 17
                 pipelined machine models that contain various features,
                 including deep pipelines, precise exceptions, branch
                 prediction, interrupts, and instruction queues. Our
                 experimental results show that the overhead of proving
                 liveness, obtained by comparing the cost of proving
                 both safety and liveness with the cost of only proving
                 safety, is about 17\%, but depends on the refinement
                 map used; for example, the liveness overhead is 23\%
                 when flushing is used and is negligible when commitment
                 is used.",
  acknowledgement = ack-nhfb,
  articleno =    "45",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "bisimulation; commitment; flushing; liveness;
                 pipelined machines; refinement; refinement maps; SAT;
                 verification",
}

@Article{Wu:2008:PVA,
  author =       "Huaizhi Wu and Martin D. F. Wong and Wilsin Gosti",
  title =        "Postplacement voltage assignment under performance
                 constraints",
  journal =      j-TODAES,
  volume =       "13",
  number =       "3",
  pages =        "46:1--46:??",
  month =        jul,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1367045.1367055",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Aug 5 18:41:27 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Multi-Vdd is an effective method to reduce both
                 leakage and dynamic power. A key challenge in a
                 multi-Vdd design is to control the complexity of the
                 power-supply system and limit the demand for level
                 shifters. This can be tackled by grouping cells of
                 different supply voltages into a small number of
                 voltage islands. Recently, an elegant algorithm was
                 proposed for generating voltage islands that balance
                 the power-versus-design-cost tradeoff under performance
                 requirement, according to the placement proximity of
                 the critical cells. One prerequisite of this algorithm
                 is an initial voltage assignment at the standard-cell
                 level that meets timing. In this article, we present a
                 novel method to produce quality voltage assignment
                 which not only meets timing but also forms good
                 proximity of the critical cells to provide a smooth
                 input to the aforementioned voltage island generation.
                 Our algorithm is based on effective delay budgeting and
                 efficient computation of physical proximity by Voronoi
                 diagram. Our extensive experiments on real industrial
                 designs show that our algorithm leads to 25\%--75\%
                 improvement in the voltage island generation in terms
                 of the number of voltage islands generated, with
                 computation time only linear to design size.",
  acknowledgement = ack-nhfb,
  articleno =    "46",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "low power; timing; voltage assignment; Voronoi
                 diagram",
}

@Article{Bombieri:2008:ROT,
  author =       "Nicola Bombieri and Franco Fummi and Graziano
                 Pravadelli",
  title =        "Reuse and optimization of testbenches and properties
                 in a {TLM-to-RTL} design flow",
  journal =      j-TODAES,
  volume =       "13",
  number =       "3",
  pages =        "47:1--47:??",
  month =        jul,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1367045.1367056",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Aug 5 18:41:27 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In transaction-level modeling (TLM), verification
                 methodologies based on transactions allow testbenches,
                 properties, and IP cores in mixed TL-RTL designs to be
                 reused. However, no papers in the literature analyze
                 the effectiveness of transaction-based verification
                 (TBV) in comparison to the more traditional RTL
                 approach. The first contribution of this article is the
                 introduction of a functional-fault-model-based
                 methodology for demonstrating the effectiveness of
                 reuse through TBV. A second contribution is the
                 introduction of a similar methodology for efficient
                 property checking which identifies and removes
                 redundant properties prior to assertion-based
                 verification or model checking.",
  acknowledgement = ack-nhfb,
  articleno =    "47",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "fault models; functional verification; model checking;
                 TBV; TLM",
}

@Article{Inoue:2008:PVS,
  author =       "Hiroaki Inoue and Junji Sakai and Masato Edahiro",
  title =        "Processor virtualization for secure mobile terminals",
  journal =      j-TODAES,
  volume =       "13",
  number =       "3",
  pages =        "48:1--48:??",
  month =        jul,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1367045.1367057",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Aug 5 18:41:27 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We propose a processor virtualization architecture,
                 VIRTUS, to provide a dedicated domain for preinstalled
                 applications and virtualized domains for downloaded
                 native applications. With it, security-oriented
                 next-generation mobile terminals can provide any number
                 of domains for native applications. VIRTUS features
                 three new technologies, namely, VMM asymmetrization,
                 dynamic interdomain communication (IDC), and
                 virtualization-assist logic, and it is first in the
                 world to virtualize an ARM-based multiprocessor.
                 Evaluations have shown that VMM asymmetrization results
                 in significantly less performance degradation and LOC
                 increase than do other VMMs. Further, dynamic IDC
                 overhead is low enough, and virtualization-assist logic
                 can be implemented in a sufficiently small area.",
  acknowledgement = ack-nhfb,
  articleno =    "48",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "multiprocessor; processor virtualization",
}

@Article{Sanz:2008:CSS,
  author =       "Concepci{\'o}n Sanz and Manuel Prieto and Jos{\'e}
                 Ignacio G{\'o}mez and Antonis Papanikolaou and Miguel
                 Miranda and Francky Catthoor",
  title =        "Combining system scenarios and configurable memories
                 to tolerate unpredictability",
  journal =      j-TODAES,
  volume =       "13",
  number =       "3",
  pages =        "49:1--49:??",
  month =        jul,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1367045.1367058",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Aug 5 18:41:27 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Process variability and the dynamism of new
                 applications increase the uncertainty of embedded
                 systems and force designers to use pessimistic
                 assumptions, which have a tremendous impact on both the
                 performance and energy consumption of their memory
                 organizations. In this article we introduce an
                 experimental framework which tries to mitigate the
                 effects of both sources of unpredictability. At compile
                 time, an extensive profiling helps us to detect system
                 scenarios and bounds application dynamism. At the
                 organization level, we incorporate a heterogeneous
                 memory architecture composed by several configurable
                 memories. A calibration process and a runtime control
                 system adapt the platform to the current application
                 needs. Our approach manages to reduce significantly the
                 energy overhead associated to both variability and
                 application dynamism (up to 60\%, according to our
                 simulations) without compromising the timing
                 constraints existing in our target domain of dynamic
                 periodic multimedia applications.",
  acknowledgement = ack-nhfb,
  articleno =    "49",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "parametric yield; process variation; variability
                 compensation",
}

@Article{Ozturk:2008:IBE,
  author =       "Ozcan Ozturk and Mahmut Kandemir",
  title =        "{ILP}-based energy minimization techniques for banked
                 memories",
  journal =      j-TODAES,
  volume =       "13",
  number =       "3",
  pages =        "50:1--50:??",
  month =        jul,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1367045.1367059",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Aug 5 18:41:27 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Main memories can consume a significant portion of
                 overall energy in many data-intensive embedded
                 applications. One way of reducing this energy
                 consumption is banking, that is, dividing available
                 memory space into multiple banks and placing unused
                 (idle) memory banks into low-power operating modes.
                 Prior work investigated code-restructuring- and
                 data-layout-reorganization-based approaches for
                 increasing the energy benefits that could be obtained
                 from a banked memory architecture. This article
                 explores different techniques that can potentially
                 coexist within the same optimization framework for
                 maximizing benefits of low-power operating modes. These
                 techniques include employing nonuniform bank sizes,
                 data migration, data compression, and data replication.
                 By using these techniques, we try to increase the
                 chances for utilizing low-power operating modes in a
                 more effective manner, and achieve further energy
                 savings over what could be achieved by exploiting
                 low-power modes alone. Specifically, nonuniform banking
                 tries to match bank sizes with application-data access
                 patterns. The goal of data migration is to cluster data
                 with similar access patterns in the same set of banks.
                 Data compression reduces the size of the data used by
                 an application, and thus helps reduce the number of
                 memory banks occupied by data. Finally, data
                 replication increases bank idleness by duplicating
                 select read-only data blocks across banks. We formulate
                 each of these techniques as an ILP (integer linear
                 programming) problem, and solve them using a commercial
                 solver. Our experimental analysis using several
                 benchmarks indicates that all the techniques presented
                 in this framework are successful in reducing memory
                 energy consumption. Based on our experience with these
                 techniques, we recommend to compiler writers for banked
                 memories to consider data compression, replication, and
                 migration.",
  acknowledgement = ack-nhfb,
  articleno =    "50",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "compilers; data compression; DRAM; low-power operating
                 modes; memory banking; migration; replication",
}

@Article{Das:2008:RSA,
  author =       "Sabyasachi Das and Sunil P. Khatri",
  title =        "Resource sharing among mutually exclusive
                 sum-of-product blocks for area reduction",
  journal =      j-TODAES,
  volume =       "13",
  number =       "3",
  pages =        "51:1--51:??",
  month =        jul,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1367045.1367060",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Aug 5 18:41:27 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In state-of-the-art digital designs, arithmetic blocks
                 consume a major portion of the total area of the IC.
                 The arithmetic sum-of-product (SOP) is the most widely
                 used arithmetic block. Some of the examples of SOP are
                 adder, subtractor, multiplier, multiply-accumulator
                 (MAC), squarer, chain-of-adders, incrementor,
                 decrementor, etc. In this article, we introduce a
                 novel, area-efficient architecture to share different
                 SOP blocks which are used in a mutually exclusive
                 manner. We implement the core functions of the largest
                 SOP only once and reuse different parts of the core
                 subblocks for all other SOP operations with the help of
                 multiplexers. This architecture can be used in the
                 nontiming-critical paths of the design, to save
                 significant amounts of area. Our experimental data
                 shows that the proposed sharing-based architecture
                 results in about 37\% area savings compared to the
                 results obtained from a commercially available
                 best-in-class datapath synthesis tool. In addition, our
                 proposed shared implementation consumes about 18\% less
                 power. These improvements were verified on
                 placed-and-routed designs as well.",
  acknowledgement = ack-nhfb,
  articleno =    "51",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Tseng:2008:PPD,
  author =       "I-Lun Tseng and Adam Postula",
  title =        "Partitioning parameterized 45-degree polygons with
                 constraint programming",
  journal =      j-TODAES,
  volume =       "13",
  number =       "3",
  pages =        "52:1--52:??",
  month =        jul,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1367045.1367061",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Aug 5 18:41:27 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "An algorithm for partitioning parameterized 45-degree
                 polygons into parameterized trapezoids is proposed in
                 this article. The algorithm is based on the plane-sweep
                 technique and can handle polygons with complicated
                 constraints. The input to the algorithm consists of the
                 contour of a parameterized polygon to be partitioned
                 and a set of constraints for parameters of the contour.
                 The algorithm uses horizontal cuts only and generates a
                 number of nonoverlapping trapezoids whose union is the
                 original parameterized polygon. Processing of
                 constraints and coordinates that contain first-order
                 multiple-variable polynomials has been made possible by
                 incorporating the JaCoP constraint programming library.
                 The proposed algorithm has been implemented in Java
                 programming language and can be used as the basis to
                 build the trapezoidal corner stitching data structure
                 for parameterized VLSI layout masks.",
  acknowledgement = ack-nhfb,
  articleno =    "52",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "analog and mixed-signal design; parameterized layouts;
                 parameterized polygons; polygon decomposition;
                 trapezoidal corner stitching",
}

@Article{Sehgal:2008:PAS,
  author =       "Anuja Sehgal and Sudarshan Bahukudumbi and Krishnendu
                 Chakrabarty",
  title =        "Power-aware {SoC} test planning for effective
                 utilization of port-scalable testers",
  journal =      j-TODAES,
  volume =       "13",
  number =       "3",
  pages =        "53:1--53:??",
  month =        jul,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1367045.1367062",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Aug 5 18:41:27 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Many system-on-chip (SoC) integrated circuits contain
                 embedded cores with different scan frequencies. To
                 better meet the test requirements for such
                 heterogeneous SoCs, leading tester companies have
                 recently introduced port-scalable testers, which can
                 simultaneously drive groups of channels at different
                 data rates. However, the number of tester channels
                 available for scan testing is limited; therefore, a
                 higher shift frequency can increase the test time for a
                 core if the resulting test access architecture reduces
                 the bit-width used to access it. We present a scalable
                 test planning technique that exploits port scalability
                 of testers to reduce SoC test time. We compare the
                 proposed heuristic optimization method to two baseline
                 methods based on prior works that use a single scan
                 data rate for all embedded cores. We also propose a
                 power-aware test planning technique to effectively
                 utilize port-scalable testers under constraints of test
                 power consumption. Experimental results are presented
                 for power-aware test scheduling to illustrate the
                 impact of power constraints on overall test time.",
  acknowledgement = ack-nhfb,
  articleno =    "53",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "integer linear programming; port-scalable testers; SoC
                 test; test access architecture",
}

@Article{Pecenka:2008:ESR,
  author =       "Tomas Pecenka and Lukas Sekanina and Zdenek Kotasek",
  title =        "Evolution of synthetic {RTL} benchmark circuits with
                 predefined testability",
  journal =      j-TODAES,
  volume =       "13",
  number =       "3",
  pages =        "54:1--54:??",
  month =        jul,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1367045.1367063",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Aug 5 18:41:27 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article presents a new real-world application of
                 evolutionary computing in the area of digital-circuits
                 testing. A method is described which enables to evolve
                 large synthetic RTL benchmark circuits with a
                 predefined structure and testability. Using the
                 proposed method, a new collection of synthetic
                 benchmark circuits was developed. These benchmark
                 circuits will be useful in a validation process of
                 novel algorithms and tools in the area of
                 digital-circuits testing. Evolved benchmark circuits
                 currently represent the most complex benchmark circuits
                 with a known level of testability. Furthermore, these
                 circuits are the largest that have ever been designed
                 by means of evolutionary algorithms. This work also
                 investigates suitable parameters of the evolutionary
                 algorithm for this problem and explores the limits in
                 the complexity of evolved circuits.",
  acknowledgement = ack-nhfb,
  articleno =    "54",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "benchmark circuit; evolvable hardware; testability
                 analysis",
}

@Article{Pedram:2008:E,
  author =       "Massoud Pedram",
  title =        "Editorial",
  journal =      j-TODAES,
  volume =       "13",
  number =       "4",
  pages =        "55:1--55:??",
  month =        sep,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1391962.1391963",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Oct 1 16:09:32 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  articleno =    "55",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Guan:2008:SAP,
  author =       "Nan Guan and Qingxu Deng and Zonghua Gu and Wenyao Xu
                 and Ge Yu",
  title =        "Schedulability analysis of preemptive and
                 nonpreemptive {EDF} on partial runtime-reconfigurable
                 {FPGAs}",
  journal =      j-TODAES,
  volume =       "13",
  number =       "4",
  pages =        "56:1--56:??",
  month =        sep,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1391962.1391964",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Oct 1 16:09:32 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Field Programmable Gate Arrays (FPGAs) are very
                 popular in today's embedded systems design, and Partial
                 Runtime-Reconfigurable (PRTR) FPGAs allow HW tasks to
                 be placed and removed dynamically at runtime. Hardware
                 task scheduling on PRTR FPGAs brings many challenging
                 issues to traditional real-time scheduling theory,
                 which have not been adequately addressed by the
                 research community compared to software task scheduling
                 on CPUs. In this article, we consider the
                 schedulability analysis problem of HW task scheduling
                 on PRPR FPGAs. We derive utilization bounds for several
                 variants of global preemptive/nonpreemptive EDF
                 scheduling, and compare the performance of different
                 utilization bound tests.",
  acknowledgement = ack-nhfb,
  articleno =    "56",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "FPGA; Real-time scheduling; reconfigurable devices",
}

@Article{Mukherjee:2008:HLC,
  author =       "Rajarshi Mukherjee and Song Liu and Seda Ogrenci Memik
                 and Somsubhra Mondal",
  title =        "A high-level clustering algorithm targeting dual
                 {V$_{dd}$ FPGAs}",
  journal =      j-TODAES,
  volume =       "13",
  number =       "4",
  pages =        "57:1--57:??",
  month =        sep,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1391962.1391965",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Oct 1 16:09:32 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Recent advanced power optimizations deployed in
                 commercial FPGAs, laid out a roadmap towards FPGA
                 devices that can be integrated into ultra low power
                 systems. In this article, we present a high-level
                 design tool to support the process of mapping an
                 application onto a FPGA device with dual supply
                 voltages. Our main contribution in this paper is an
                 algorithm, which creates voltage scaling ready clusters
                 by utilizing the timing slack available in the designs.
                 We propose to first create clusters of CLBs within a
                 given CLB-level netlist. This clustering algorithm
                 intends to group chains of CLBs possessing similar
                 amounts of timing slack along their critical path
                 together. Once these clusters are identified, they are
                 placed onto respective V$_{dd}$ partitions on the
                 device. We have evaluated different dual V$_{dd}$
                 fabrics and the potential gain in power consumption is
                 explored. When a subset of the logic blocks on the
                 device can be driven by low V$_{dd}$ levels (either
                 with a dedicated low V$_{dd}$ supply or with a
                 programmable selection between low and high V$_{dd}$
                 levels for these blocks) this affects placement and
                 routing. As a result the maximum frequency of the
                 designs may be affected. In order to evaluate the
                 overall impact of creating voltage islands, we measured
                 the Energy-Delay Product for our benchmark designs. We
                 observed that the Energy-Delay product can be decreased
                 by 26.9\% when the placement of the designs into
                 different voltage levels is guided by our clustering
                 algorithm.",
  acknowledgement = ack-nhfb,
  articleno =    "57",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "clustering; Dynamic power; field programmable gate
                 arrays; partitioning; placement; voltage scaling",
}

@Article{Resano:2008:ESR,
  author =       "Javier Resano and Juan Antonio Clemente and Carlos
                 Gonzalez and Daniel Mozos and Francky Catthoor",
  title =        "Efficiently scheduling runtime reconfigurations",
  journal =      j-TODAES,
  volume =       "13",
  number =       "4",
  pages =        "58:1--58:??",
  month =        sep,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1391962.1391966",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Oct 1 16:09:32 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Due to the emergence of portable devices that must run
                 complex dynamic applications there is a need for
                 flexible platforms for embedded systems. Runtime
                 reconfigurable hardware can provide this flexibility
                 but the reconfiguration latency can significantly
                 decrease the performance. When dealing with task
                 graphs, runtime support that schedules the
                 reconfigurations in advance can drastically reduce this
                 overhead. However, executing complex scheduling
                 heuristics at runtime may generate an excessive
                 penalty. Hence, we have developed a hybrid
                 design-time/runtime reconfiguration scheduling
                 heuristic that generates its final schedule at runtime
                 but carries out most computations at design-time. We
                 have tested our approach in a PowerPC 405 processor
                 embedded on a FPGA demonstrating that it generates a
                 very small runtime penalty while providing almost as
                 good schedules as a full runtime approach.",
  acknowledgement = ack-nhfb,
  articleno =    "58",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "FPGAs; hardware multitasking; Reconfigurable
                 architectures; runtime/design-time scheduling",
}

@Article{Garg:2008:SLT,
  author =       "Siddharth Garg and Diana Marculescu",
  title =        "System-level throughput analysis for process variation
                 aware multiple voltage-frequency island designs",
  journal =      j-TODAES,
  volume =       "13",
  number =       "4",
  pages =        "59:1--59:??",
  month =        sep,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1391962.1391967",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Oct 1 16:09:32 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The increasing variability in manufacturing process
                 parameters is expected to lead to significant
                 performance degradation in deep submicron technologies.
                 Multiple Voltage-Frequency Island (VFI) design styles
                 with fine-grained, process-variation aware clocking
                 have recently been shown to possess increased immunity
                 to manufacturing process variations. In this article,
                 we propose a theoretical framework that allows
                 designers to quantify the performance improvement that
                 is to be expected if they were to migrate from a fully
                 synchronous design to the proposed multiple VFI design
                 style. Specifically, we provide techniques to
                 efficiently and accurately estimate the probability
                 distribution of the execution rate (or throughput) of
                 both single and multiple VFI systems under the
                 influence of manufacturing process variations. Finally,
                 using an MPEG-2 encoder benchmark, we demonstrate how
                 the proposed analysis framework can be used by
                 designers to make architectural decisions such as the
                 granularity of VFI domain partitioning based on the
                 throughput constraints their systems are required to
                 satisfy.",
  acknowledgement = ack-nhfb,
  articleno =    "59",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Globally asynchronous locally synchronous;
                 manufacturing process variations; maximum cycle mean;
                 performance analysis; system-level design;
                 voltage-frequency islands",
}

@Article{Ozturk:2008:APB,
  author =       "Ozcan Ozturk and Mahmut Kandemir and Guangyu Chen",
  title =        "Access pattern-based code compression for
                 memory-constrained systems",
  journal =      j-TODAES,
  volume =       "13",
  number =       "4",
  pages =        "60:1--60:??",
  month =        sep,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1391962.1391968",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Oct 1 16:09:32 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "As compared to a large spectrum of performance
                 optimizations, relatively less effort has been
                 dedicated to optimize other aspects of embedded
                 applications such as memory space requirements, power,
                 real-time predictability, and reliability. In
                 particular, many modern embedded systems operate under
                 tight memory space constraints. One way of addressing
                 this constraint is to compress executable code and data
                 as much as possible. While researchers on code
                 compression have studied efficient hardware and
                 software based code compression strategies, many of
                 these techniques do not take application behavior into
                 account; that is, the same compression/decompression
                 strategy is used irrespective of the application being
                 optimized. This article presents an
                 application-sensitive code compression strategy based
                 on control flow graph (CFG) representation of the
                 embedded program. The idea is to start with a memory
                 image wherein all basic blocks of the application are
                 compressed, and decompress only the blocks that are
                 predicted to be needed in the near future. When the
                 current access to a basic block is over, our approach
                 also decides the point at which the block could be
                 compressed. We propose and evaluate several compression
                 and decompression strategies that try to reduce memory
                 requirements without excessively increasing the
                 original instruction cycle counts. Some of our
                 strategies make use of profile data, whereas others are
                 fully automatic. Our experimental evaluation using
                 seven applications from the MediaBench suite and three
                 large embedded applications reveals that the proposed
                 code compression strategy is very successful in
                 practice. Our results also indicate that working at a
                 basic block granularity, as opposed to a procedure
                 granularity, is important for maximizing memory space
                 savings.",
  acknowledgement = ack-nhfb,
  articleno =    "60",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "CFG; code access pattern; code compression; Embedded
                 systems; memory optimization",
}

@Article{Baradaran:2008:CAM,
  author =       "Nastaran Baradaran and Pedro C. Diniz",
  title =        "A compiler approach to managing storage and memory
                 bandwidth in configurable architectures",
  journal =      j-TODAES,
  volume =       "13",
  number =       "4",
  pages =        "61:1--61:??",
  month =        sep,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1391962.1391969",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Oct 1 16:09:32 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Configurable architectures offer the unique
                 opportunity of realizing hardware designs tailored to
                 the specific data and computational patterns of an
                 application code. Customizing the storage structures is
                 becoming increasingly important in mitigating the
                 continuing gap between memory latencies and internal
                 computing speeds. In this article we describe and
                 evaluate a compiler algorithm that maps the arrays of a
                 loop-based computation to internal storage structures,
                 either RAM blocks or discrete registers. Our objective
                 is to minimize the overall execution time while
                 considering the capacity and bandwidth constraints of
                 the storage resources. The novelty of our approach lies
                 in creating a single framework that combines high-level
                 compiler techniques with lower-level scheduling
                 information for mapping the data. We illustrate the
                 benefits of our approach for a set of image/signal
                 processing kernels using a Xilinx Virtex\TM{}
                 Field-Programmable Gate Array (FPGA). Our algorithm
                 leads to faster designs compared to the
                 state-of-the-art {\em custom data layout\/} mapping
                 technique, in some instances using less storage. When
                 compared to hand-coded designs, our results are
                 comparable in terms of execution time and resources,
                 but are derived in a minute fraction of the design
                 time.",
  acknowledgement = ack-nhfb,
  articleno =    "61",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Compiler analysis; configurable architectures;
                 high-level hardware synthesis; storage allocation and
                 management",
}

@Article{Banerjee:2008:ASM,
  author =       "Ansuman Banerjee and Pallab Dasgupta and P. P.
                 Chakrabarti",
  title =        "Auxiliary state machines + context-triggered
                 properties in verification",
  journal =      j-TODAES,
  volume =       "13",
  number =       "4",
  pages =        "62:1--62:??",
  month =        sep,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1391962.1391970",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Oct 1 16:09:32 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Formal specifications of interface protocols between a
                 design-under-test and its environment mostly consist of
                 two types of correctness requirements, namely (a) a set
                 of invariants that applies throughout the protocol
                 execution and (b) a set of {\em context-triggered\/}
                 properties that applies only when the protocol state
                 belongs to a specific set of contexts. To model such
                 requirements, an increasingly popular design choice in
                 the assertion IP design community has been the use of
                 abstract {\em context state machines\/} and
                 state-oriented properties. In this paper, we formalize
                 this modeling style and present algorithms for
                 verifying such specifications. Specifically, we present
                 a purely formal approach and a semi-formal approach for
                 verifying such specifications. We demonstrate the use
                 of this design style in modeling some of the industry
                 standard protocol descriptions and present encouraging
                 results.",
  acknowledgement = ack-nhfb,
  articleno =    "62",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Panda:2008:SBV,
  author =       "S. K. Panda and Arnab Roy and P. P. Chakrabarti and
                 Rajeev Kumar",
  title =        "Simulation-based verification using {Temporally
                 Attributed Boolean Logic}",
  journal =      j-TODAES,
  volume =       "13",
  number =       "4",
  pages =        "63:1--63:??",
  month =        sep,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1391962.1391971",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Oct 1 16:09:32 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We propose a specification logic called Temporally
                 Attributed Boolean (TAB) Logic for Assertion Based
                 Verification, which allows us to: (i) represent
                 assertions succinctly, (ii) incorporate
                 data-orientation and (iii) associate timing to design
                 intentions. TAB Logic allows us to write specifications
                 functionally linking system variables from different
                 temporal contexts. We present examples to show the
                 motivation for this logic especially in the context of
                 high level modeling of complex real time systems. We
                 formally define TAB Logic, formulate the problem of
                 verification on a simulation trace and present
                 efficient algorithms to check TAB assertions, both
                 offline and online. We present results of application
                 of TAB Logic for Instruction Semantics and Bus
                 Transaction Verification of a bus integrated pipelined
                 processor core implementation. We also employ TAB Logic
                 to validate the Interrupt mode behavior of the
                 processor core implementation. Further, we show the
                 utility of TAB Logic in fault detection. Finally, we
                 demonstrate the applicability of TAB Logic in the
                 domain of simulation based verification of analog
                 circuits like Operational Amplifiers and DC-DC
                 Converters. We finally discuss the limitations of TAB
                 logic and conclude.",
  acknowledgement = ack-nhfb,
  articleno =    "63",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Bus verification; instruction semantics verification;
                 interrupt testing; offline-online verification
                 algorithm; simulation based verification; temporal
                 logic; timing verification",
}

@Article{Wang:2008:LAS,
  author =       "Sying-Jyan Wang and Kuo-Lin Peng and Kuang-Cyun Hsiao
                 and Katherine Shu-Min Li",
  title =        "Layout-aware scan chain reorder for launch-off-shift
                 transition test coverage",
  journal =      j-TODAES,
  volume =       "13",
  number =       "4",
  pages =        "64:1--64:??",
  month =        sep,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1391962.1391972",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Oct 1 16:09:32 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Launch-off-shift (LOS) is a popular delay test
                 technique for scan-based designs. However, it is
                 usually not possible to achieve good delay fault
                 coverage in LOS test due to conflicts in test vectors.
                 In this article, we propose a layout-based scan chain
                 ordering method to improve fault coverage for LOS test
                 with limited routing overhead. A fast and effective
                 algorithm is used to eliminate conflicts in test
                 vectors while at the same time restrict the extra scan
                 chain routing. This approach provides many advantages.
                 (1) The proposed method can improve delay fault
                 coverage for LOS test. (2) With layout information
                 taken into account, the routing penalty is limited, and
                 thus the impact on circuit performance will not be
                 significant. Experimental results show that the
                 proposed LOS test method achieves about the same level
                 of delay fault coverage as enhanced scan does, while
                 the average scan chain wire length is about 2.2 times
                 of the shortest scan chain.",
  acknowledgement = ack-nhfb,
  articleno =    "64",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "scan chain ordering; Scan test; test generation;
                 transition faults",
}

@Article{Moiseev:2008:TAP,
  author =       "Konstantin Moiseev and Avinoam Kolodny and Shmuel
                 Wimer",
  title =        "Timing-aware power-optimal ordering of signals",
  journal =      j-TODAES,
  volume =       "13",
  number =       "4",
  pages =        "65:1--65:??",
  month =        sep,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1391962.1391973",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Oct 1 16:09:32 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "A computationally efficient technique for reducing
                 interconnect active power in VLSI systems is presented.
                 Power reduction is accomplished by simultaneous wire
                 spacing and net ordering, such that cross-capacitances
                 between wires are optimally shared. The existence of a
                 unique power-optimal wire order within a bundle is
                 proven, and a method to construct this order is
                 derived. The optimal order of wires depends only on the
                 activity factors of the underlying signals; hence, it
                 can be performed prior to spacing optimization. By
                 using this order of wires, optimality of the combined
                 solution is guaranteed (as compared with any other
                 ordering and spacing of the wires). Timing-aware power
                 optimization is enabled by simultaneously considering
                 timing criticality weights and activity factors for the
                 signals. The proposed algorithm has been applied to
                 various interconnect layouts, including wire bundles
                 from high-end microprocessor circuits in 65 nm
                 technology. Interconnect power reduction of 17\% on
                 average has been observed in such bundles.",
  acknowledgement = ack-nhfb,
  articleno =    "65",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "interconnect optimization; power optimization; Wire
                 ordering; wire spacing",
}

@Article{Lu:2008:EDI,
  author =       "Chao-Hung Lu and Hung-Ming Chen and Chien-Nan Jimmy
                 Liu",
  title =        "Effective decap insertion in area-array {SoC}
                 floorplan design",
  journal =      j-TODAES,
  volume =       "13",
  number =       "4",
  pages =        "66:1--66:??",
  month =        sep,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1391962.1391974",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Oct 1 16:09:32 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "As VLSI technology enters the nanometer era, supply
                 voltages continue to drop due to the reduction of power
                 dissipation, but it makes power integrity problems even
                 worse. Employing decoupling capacitances (decaps) in
                 floorplan stage is a common approach to alleviating
                 supply noise problems. Previous researches overestimate
                 the decap budget and do not fully utilize the empty
                 space of the floorplan. A floorplan usually has a lot
                 of available space that can be used to insert the decap
                 without increasing the floorplan area. Therefore, the
                 goal of this work is to develop a better model to
                 calculate the required decap to solve the power supply
                 noise problem in area-array based designs, and increase
                 the usage of available space in the floorplan to reduce
                 the area overhead caused by decap insertion. The
                 experimental results of this work are encouraging.
                 Compared with previous approaches, our methodology
                 reduces 38\% of the decap budget in average for MCNC
                 benchmarks but can still meet the power supply noise
                 requirements. The final floorplan areas with decap are
                 also smaller than the numbers reported in previous
                 works.",
  acknowledgement = ack-nhfb,
  articleno =    "66",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "decap insertion; floorplan; Power supply noise",
}

@Article{Moffitt:2008:CDF,
  author =       "Michael D. Moffitt and Jarrod A. Roy and Igor L.
                 Markov and Martha E. Pollack",
  title =        "Constraint-driven floorplan repair",
  journal =      j-TODAES,
  volume =       "13",
  number =       "4",
  pages =        "67:1--67:??",
  month =        sep,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1391962.1391975",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Oct 1 16:09:32 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In this work, we propose a new and efficient approach
                 to the {\em floorplan repair\/} problem, where violated
                 design constraints are satisfied by applying small
                 changes to an existing rough floorplan. Such a
                 floorplan can be produced by a human designer, a
                 scalable placement algorithm, or result from
                 engineering adjustments to an existing floorplan. In
                 such cases, overlapping modules must be separated, and
                 others may need to be repositioned to satisfy
                 additional requirements. Our algorithmic framework uses
                 an expressive graph-based encoding of constraints which
                 can reflect fixed-outline, region, proximity and
                 alignment constraints. By tracking the implications of
                 existing constraints, we resolve violations by imposing
                 gradual modifications to the floorplan, in an attempt
                 to preserve the characteristics of its initial design.
                 Empirically, our approach is effective at removing
                 overlaps and repairing violations that may occur when
                 design constraints are acquired and imposed
                 dynamically.",
  acknowledgement = ack-nhfb,
  articleno =    "67",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "constraints; Floorplanning; legalization",
}

@Article{Ozdal:2008:ORA,
  author =       "Muhammet Mustafa Ozdal and Martin D. F. Wong and
                 Philip S. Honsinger",
  title =        "Optimal routing algorithms for rectilinear pin
                 clusters in high-density multichip modules",
  journal =      j-TODAES,
  volume =       "13",
  number =       "4",
  pages =        "68:1--68:??",
  month =        sep,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1391962.1391976",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Oct 1 16:09:32 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "As the circuit densities and transistor counts are
                 increasing, the package routing problem is becoming
                 more and more challenging. In this article, we study an
                 important routing problem encountered in typical
                 high-end MCM designs: routing within dense pin
                 clusters. Pin clusters are often formed by pins that
                 belong to the same functional unit or the same data
                 bus, and can become bottlenecks in terms of overall
                 routability. Typically, these clusters have irregular
                 shapes, which can be approximated with rectilinear
                 convex boundaries. Since such boundaries have often
                 irregular shapes, a traditional escape routing
                 algorithm may give unroutable solutions. In this
                 article, we study how the positions of escape terminals
                 on a convex boundary affect the overall routability.
                 For this purpose, we propose a set of necessary and
                 sufficient conditions to model routability outside a
                 rectilinear convex boundary. Given an escape routing
                 solution, we propose an optimal algorithm to select the
                 maximal subset of nets that are routable outside the
                 boundary. After that, we focus on an integrated
                 approach to consider routability constraints (outside
                 the boundary) during the actual escape routing
                 algorithm. Here, we propose an optimal algorithm to
                 find the best escape routing solution that satisfies
                 all routability constraints. Our experiments
                 demonstrate that we can reduce the number of layers by
                 17\% on the average, by using this integrated
                 methodology.",
  acknowledgement = ack-nhfb,
  articleno =    "68",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Escape routing; multi-chip modules; network flow",
}

@Article{Keinert:2009:SAE,
  author =       "Joachim Keinert and Martin Streub{\"u}hr and Thomas
                 Schlichter and Joachim Falk and Jens Gladigau and
                 Christian Haubelt and J{\"u}rgen Teich and Michael
                 Meredith",
  title =        "{SystemCoDesigner} --- an automatic {ESL} synthesis
                 approach by design space exploration and behavioral
                 synthesis for streaming applications",
  journal =      j-TODAES,
  volume =       "14",
  number =       "1",
  pages =        "1:1--1:??",
  month =        jan,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1455229.1455230",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 26 18:12:50 MST 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "With increasing design complexity, the gap from ESL
                 (Electronic System Level) design to RTL synthesis
                 becomes more and more crucial to many industrial
                 projects. Although several behavioral synthesis tools
                 exist to automatically generate synthesizable RTL code
                 from C/C++/SystemC-based input descriptions and
                 software generation for embedded processors is
                 automated as well, an efficient ESL synthesis
                 methodology combining both is still missing. This
                 article presents SystemCoDesigner, a novel
                 SystemC-based ESL tool to automatically optimize a
                 hardware/software SoC (System on Chip) implementation
                 with respect to several objectives. Starting from a
                 SystemC behavioral model, SystemCoDesigner
                 automatically extracts the mathematical model, performs
                 a behavioral synthesis step, and explores the
                 multiobjective design space using state-of-the-art
                 multiobjective optimization algorithms. During design
                 space exploration, a single design point is evaluated
                 by simulating highly accurate performance models, which
                 are automatically generated from the SystemC behavioral
                 model and the behavioral synthesis results. Moreover,
                 SystemCoDesigner permits the automatic generation of
                 bit streams for FPGA targets from any previously
                 optimized SoC implementation. Thus SystemCoDesigner is
                 the first fully automated ESL synthesis tool providing
                 a correct-by-construction generation of
                 hardware/software SoC implementations. As a case study,
                 a model of a Motion-JPEG decoder was automatically
                 optimized and implemented using SystemCoDesigner.
                 Several synthesized SoC variants based on this model
                 show different tradeoffs between required hardware
                 costs and achieved system throughput, ranging from
                 software-only solutions to pure hardware
                 implementations that reach real-time performance for
                 QCIF streams on a 50MHz FPGA.",
  acknowledgement = ack-nhfb,
  articleno =    "1",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "hardware/software codesign; System design",
}

@Article{Hansson:2009:CTC,
  author =       "Andreas Hansson and Kees Goossens and Marco Bekooij
                 and Jos Huisken",
  title =        "{CoMPSoC}: a template for composable and predictable
                 multi-processor system on chips",
  journal =      j-TODAES,
  volume =       "14",
  number =       "1",
  pages =        "2:1--2:??",
  month =        jan,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1455229.1455231",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 26 18:12:50 MST 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "A growing number of applications, often with firm or
                 soft real-time requirements, are integrated on the same
                 System on Chip, in the form of either hardware or
                 software intellectual property. The applications are
                 started and stopped at run time, creating different
                 use-cases. Resources, such as interconnects and
                 memories, are shared between different applications,
                 both within and between use-cases, to reduce silicon
                 cost and power consumption.\par

                 The functional and temporal behaviour of the
                 applications is verified by simulation and formal
                 methods. Traditionally, designers resort to monolithic
                 verification of the system as whole, since the
                 applications interfere in shared resources, and thus
                 affect each other's behaviour. Due to interference
                 between applications, the integration and verification
                 complexity grows exponentially in the number of
                 applications, and the task to verify correct behaviour
                 of concurrent applications is on the system designer
                 rather than the application designers.\par

                 In this work, we propose a Composable and Predictable
                 Multi-Processor System on Chip (CoMPSoC) platform
                 template. This scalable hardware and software template
                 removes all interference between applications through
                 resource reservations. We demonstrate how this enables
                 a divide-and-conquer design strategy, where all
                 applications, potentially using different programming
                 models and communication paradigms, are developed and
                 verified independently of one another. Performance is
                 analyzed per application, using state-of-the-art
                 dataflow techniques or simulation, depending on the
                 requirements of the application. These results still
                 apply when the applications are integrated onto the
                 platform, thus separating system-level design and
                 application design.",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Composable; model of computation; network on chip;
                 predictable; system on chip",
}

@Article{Gheorghita:2009:SSB,
  author =       "Stefan Valentin Gheorghita and Martin Palkovic and
                 Juan Hamers and Arnout Vandecappelle and Stelios
                 Mamagkakis and Twan Basten and Lieven Eeckhout and Henk
                 Corporaal and Francky Catthoor and Frederik Vandeputte
                 and Koen {De Bosschere}",
  title =        "System-scenario-based design of dynamic embedded
                 systems",
  journal =      j-TODAES,
  volume =       "14",
  number =       "1",
  pages =        "3:1--3:??",
  month =        jan,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1455229.1455232",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 26 18:12:50 MST 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In the past decade, real-time embedded systems have
                 become much more complex due to the introduction of a
                 lot of new functionality in one application, and due to
                 running multiple applications concurrently. This
                 increases the dynamic nature of today's applications
                 and systems, and tightens the requirements for their
                 constraints in terms of deadlines and energy
                 consumption. State-of-the-art design methodologies try
                 to cope with these novel issues by identifying several
                 most used cases and dealing with them separately,
                 reducing the newly introduced complexity. This article
                 presents a generic and systematic design-time/run-time
                 methodology for handling the dynamic nature of modern
                 embedded systems, which can be utilized by existing
                 design methodologies to increase their efficiency. It
                 is based on the concept of {\em system scenarios},
                 which group system behaviors that are similar from a
                 multidimensional cost perspective --- such as resource
                 requirements, delay, and energy consumption --- in such
                 a way that the system can be configured to exploit this
                 cost similarity. At design-time, these scenarios are
                 individually optimized. Mechanisms for predicting the
                 current scenario at run-time, and for switching between
                 scenarios, are also derived. This design trajectory is
                 augmented with a run-time calibration mechanism, which
                 allows the system to learn on-the-fly during its
                 execution, and to adapt itself to the current input
                 stimuli, by extending the scenario set, changing the
                 scenario definitions, and both the prediction and
                 switching mechanisms. To show the generality of our
                 methodology, we show how it has been applied on four
                 very different real-life design problems. In all
                 presented case studies, substantial energy reductions
                 were obtained by exploiting scenarios.",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Design methodology; dynamic nature; embedded systems;
                 energy reduction; real-time systems; system scenarios",
}

@Article{Xu:2009:STA,
  author =       "Qiang Xu and Yubin Zhang and Krishnendu Chakrabarty",
  title =        "{SOC} test-architecture optimization for the testing
                 of embedded cores and signal-integrity faults on
                 core-external interconnects",
  journal =      j-TODAES,
  volume =       "14",
  number =       "1",
  pages =        "4:1--4:??",
  month =        jan,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1455229.1455233",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 26 18:12:50 MST 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The test time for core-external interconnect shorts
                 and opens is typically much less than that for
                 core-internal logic. Therefore, prior work on
                 test-infrastructure design for core-based
                 system-on-a-chip (SOC) has mainly focused on minimizing
                 the test time for core-internal logic. However, as
                 feature sizes shrink for newer process technologies,
                 the test time for signal integrity (SI) faults on
                 interconnects cannot be neglected. The test time for SI
                 faults can be comparable to, or even larger than, the
                 test time for the embedded cores. We investigate the
                 impact of interconnect SI tests on SOC
                 test-architecture design and optimization. A compaction
                 method for SI faults and algorithms for
                 test-architecture optimization are also presented.
                 Experimental results for the ITC'02 benchmarks show
                 that the proposed approach can significantly reduce the
                 overall testing time for core-internal logic and
                 core-external interconnects.",
  acknowledgement = ack-nhfb,
  articleno =    "4",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Core-based system-on-chip; interconnect testing; test
                 scheduling; test-access mechanism (TAM)",
}

@Article{Jin:2009:GND,
  author =       "Zhong-Yi Jin and Curt Schurgers and Rajesh K. Gupta",
  title =        "A gateway node with duty-cycled radio and processing
                 subsystems for wireless sensor networks",
  journal =      j-TODAES,
  volume =       "14",
  number =       "1",
  pages =        "5:1--5:??",
  month =        jan,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1455229.1455234",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 26 18:12:50 MST 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Wireless sensor nodes are increasingly being tasked
                 with computation and communication intensive functions
                 while still subject to constraints related to energy
                 availability. On these embedded platforms, once all low
                 power design techniques have been explored,
                 duty-cycling the various subsystems remains the primary
                 option to meet the energy and power constraints. This
                 requires the ability to provide spurts of high MIPS and
                 high bandwidth connections. However, due to the large
                 overheads associated with duty-cycling the computation
                 and communication subsystems, existing high performance
                 sensor platforms are not efficient in supporting such
                 an option. In this article, we present the design and
                 optimizations taken in a wireless gateway node (WGN)
                 that bridges data from wireless sensor networks to
                 Wi-Fi networks in an on-demand basis. We discuss our
                 strategies to reduce duty-cycling related costs by
                 partitioning the system and by reducing the amount of
                 time required to activate or deactivate the
                 high-powered components. We compare the design choices
                 and performance parameters with those made in the Intel
                 {\em Stargate\/} platform to show the effectiveness of
                 duty-cycling on our platform. We have built a working
                 prototype, and the experimental results with two
                 different power management schemes show significant
                 reductions in latency and average power consumption
                 compared to the {\em Stargate}. The WGN running our
                 power-gating scheme performs about six times better in
                 terms of average system power consumption than the {\em
                 Stargate\/} running the suspend-system scheme for large
                 working-periods where the active power dominates. For
                 short working-periods where the transition
                 (enable/disable) power becomes dominant, we perform up
                 to seven times better. The comparative performance of
                 our system is even greater when the sleep power
                 dominates.",
  acknowledgement = ack-nhfb,
  articleno =    "5",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Embedded systems; gateway; power savings; sensor
                 nodes",
}

@Article{Wu:2009:EER,
  author =       "Chin-Hsien Wu",
  title =        "An energy-efficient {I/O} request mechanism for
                 multi-bank flash-memory storage systems",
  journal =      j-TODAES,
  volume =       "14",
  number =       "1",
  pages =        "6:1--6:??",
  month =        jan,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1455229.1455235",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 26 18:12:50 MST 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Emerging critical issues for flash-memory storage
                 systems, especially with regard to implementation
                 within many embedded systems, are the programmed I/O
                 nature of data transfers and their energy-efficient
                 nature. We propose an I/O request mechanism in the
                 Memory-Technology-Device (MTD) layer to exploit the
                 programmed I/O-based data transfers for flash-memory
                 storage systems. We propose to revise the waiting
                 function in the Memory-Technology-Device (MTD) layer to
                 relieve the microprocessor from busy-waiting, in order
                 to make more CPU cycles available for other tasks. An
                 energy-efficient mechanism based on the I/O request
                 mechanism is also presented for multi-bank flash-memory
                 storage systems, which particularly focuses on
                 switching the power state of each flash-memory bank. We
                 demonstrate that the energy-efficient I/O request
                 mechanism not only saves more CPU cycles to execute
                 other tasks, but also reduces the energy consumption of
                 flash-memory, based on experiments incorporating
                 realistic system workloads.",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "embedded systems; energy-efficient; Flash Memory;
                 programmed I/O; storage systems",
}

@Article{Dontharaju:2009:DAP,
  author =       "Swapna Dontharaju and Shenchih Tung and James T. Cain
                 and Leonid Mats and Marlin H. Mickle and Alex K.
                 Jones",
  title =        "A design automation and power estimation flow for
                 {RFID} systems",
  journal =      j-TODAES,
  volume =       "14",
  number =       "1",
  pages =        "7:1--7:??",
  month =        jan,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1455229.1455236",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 26 18:12:50 MST 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "While RFID has become a ubiquitous technology, there
                 is still a need for RFID systems with different
                 capabilities, protocols, and features depending on the
                 application. This article describes a design automation
                 flow and power estimation technique for fast
                 implementation and design feedback of new RFID systems.
                 Physical layer features are described using {\em
                 waveform features}, which are used to automatically
                 generate physical layer encoding and decoding hardware
                 blocks. {\em RFID primitives\/} to be supported by the
                 tag are enumerated with {\em RFID macros\/} and the
                 behavior of each primitive is specified using ANSI-C
                 within the template to automatically generate the tag
                 controller. Case studies implementing widely used
                 standards such as ISO 18000 Part 7 and ISO 18000 Part
                 6C using this automation technique are presented. The
                 power macromodeling flow demonstrated here is shown to
                 be within 5\% to 10\% accuracy, while providing results
                 100 times faster than traditional methods. When
                 eliminating the need for certain features of ISO 18000
                 Part 6C, the design flow shows that the power required
                 by the implementation is reduced by nearly 50\%.",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "design automation; low-power; prototyping; RFID",
}

@Article{Dasdan:2009:PEA,
  author =       "Ali Dasdan",
  title =        "Provably efficient algorithms for resolving temporal
                 and spatial difference constraint violations",
  journal =      j-TODAES,
  volume =       "14",
  number =       "1",
  pages =        "8:1--8:??",
  month =        jan,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1455229.1455237",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 26 18:12:50 MST 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "A system of difference constraints is a formal model
                 of temporal and spatial constraints in many areas such
                 as scheduling, constraint satisfaction, and layout
                 compaction. During construction of such a system,
                 constraint violations often arise, and they need to be
                 resolved. Previous algorithms for this task fall into
                 two groups: those algorithms that are fast but cannot
                 resolve all violations, and those algorithms that can
                 resolve all violations but are exponentially slow. We
                 propose the first algorithms that are fast as well as
                 able to resolve all violations. Moreover, unlike the
                 previous algorithms, our algorithms support the
                 ordering of violations using their inherent criticality
                 or user-defined priority. We provably and
                 experimentally justify the efficiency and efficacy of
                 our algorithms.",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Behavioral synthesis; constraint satisfaction;
                 interface timing; layout compaction; multimedia
                 synchronization; rate analysis; real-time systems;
                 scheduling; timing constraints",
}

@Article{Sinha:2009:DIC,
  author =       "Arnab Sinha and Pallab Dasgupta and Bhaskar Pal and
                 Sayantan Das and Prasenjit Basu and P. P. Chakrabarti",
  title =        "Design intent coverage revisited",
  journal =      j-TODAES,
  volume =       "14",
  number =       "1",
  pages =        "9:1--9:??",
  month =        jan,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1455229.1455238",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 26 18:12:50 MST 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "{\em Design intent coverage\/} is a formal methodology
                 for analyzing the gap between a formal architectural
                 specification of a design and the formal functional
                 specifications of the component RTL blocks of the
                 design. In this article we extend the design intent
                 coverage methodology to hybrid specifications
                 containing both state-machines and formal properties.
                 We demonstrate the benefits of this extension in two
                 domains of considerable recent interest, namely (a) the
                 use of auxiliary state-machines in formal
                 specifications, and (b) the use of modest sized RTL
                 blocks in the design intent coverage analysis.",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Design Intent Coverage",
}

@Article{Yang:2009:MCS,
  author =       "Zijiang Yang and Chao Wang and Aarti Gupta and Franjo
                 Ivanv{\v{c}}i{\'c}",
  title =        "Model checking sequential software programs via mixed
                 symbolic analysis",
  journal =      j-TODAES,
  volume =       "14",
  number =       "1",
  pages =        "10:1--10:??",
  month =        jan,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1455229.1455239",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 26 18:12:50 MST 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We present an efficient symbolic search algorithm for
                 software model checking. Our algorithms perform
                 word-level reasoning by using a combination of decision
                 procedures in Boolean and integer and real domains, and
                 use novel symbolic search strategies optimized
                 specifically for sequential programs to improve
                 scalability. Experiments on real-world C programs show
                 that the new symbolic search algorithms can achieve
                 several orders-of-magnitude improvements over existing
                 methods based on bit-level (Boolean) reasoning.",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "binary decision diagram; composite symbolic formula;
                 image computation; Model checking; Presburger
                 arithmetic; reachability analysis",
}

@Article{Mehta:2009:ICH,
  author =       "Gayatri Mehta and Justin Stander and Mustafa Baz and
                 Brady Hunsaker and Alex K. Jones",
  title =        "Interconnect customization for a hardware fabric",
  journal =      j-TODAES,
  volume =       "14",
  number =       "1",
  pages =        "11:1--11:??",
  month =        jan,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1455229.1455240",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 26 18:12:50 MST 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article describes several multiplexer-based
                 interconnection strategies designed to improve energy
                 consumption of stripe-based coarse-grain reconfigurable
                 fabrics. Application requirements for the architecture
                 as well as two dense subgraphs are extracted from a
                 suite of signal and image processing benchmarks. These
                 statistics are used to drive the strategy of the
                 composition of multiplexer-based interconnect. The
                 article compares interconnects that are fully connected
                 between stripes, those with a cardinality of 8:1 to
                 4:1, and extensions that provide a 5:1 cardinality,
                 limited 6:1 cardinality, and hybrids between 5:1 and
                 3:1 cardinalities. Additionally, dedicated vertical
                 routes are considered replacing some computational
                 units with dedicated pass-gates. Using a fabric
                 interconnect model (FIM) written in XML, we demonstrate
                 that fabric instances and mappers can be automatically
                 generated using a Web-based design flow. Upon testing
                 these instances, we found that using an 8:1 cardinality
                 interconnect with 33\% of the computational units
                 replaced with dedicated pass-gates provided the best
                 energy versus mappability tradeoff, resulting in a 50\%
                 energy improvement over fully connected rows and 20\%
                 energy improvement over an 8:1 cardinality interconnect
                 without dedicated vertical routes.",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "architecture; computer-aided design; demonstrable;
                 hardware fabric; low-energy; Reconfigurable",
}

@Article{Sham:2009:CPE,
  author =       "Chiu-Wing Sham and Evangeline F. Y. Young and Jingwei
                 Lu",
  title =        "Congestion prediction in early stages of physical
                 design",
  journal =      j-TODAES,
  volume =       "14",
  number =       "1",
  pages =        "12:1--12:??",
  month =        jan,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1455229.1455241",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 26 18:12:50 MST 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Routability optimization has become a major concern in
                 physical design of VLSI circuits. Due to the recent
                 advances in VLSI technology, interconnect has become a
                 dominant factor of the overall performance of a
                 circuit. In order to optimize interconnect cost, we
                 need a good congestion estimation method to predict
                 routability in the early designing stages. Many
                 congestion models have been proposed but there's still
                 a lot of room for improvement. Besides, routers will
                 perform rip-up and reroute operations to prevent
                 overflow, but most models do not consider this case.
                 The outcome is that the existing models will usually
                 underestimate the routability. In this paper, we have a
                 comprehensive study on our proposed congestion models.
                 Results show that the estimation results of our
                 approaches are always more accurate than the previous
                 congestion models.",
  acknowledgement = ack-nhfb,
  articleno =    "12",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Estimation; floorplanning; placement",
}

@Article{Zhu:2009:ESA,
  author =       "Yi Zhu and Yuanfang Hu and Michael B. Taylor and
                 Chung-Kuan Cheng",
  title =        "Energy and switch area optimizations for {FPGA} global
                 routing architectures",
  journal =      j-TODAES,
  volume =       "14",
  number =       "1",
  pages =        "13:1--13:??",
  month =        jan,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1455229.1455242",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 26 18:12:50 MST 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Low energy and small switch area usage are two
                 important design objectives in FPGA global routing
                 architecture design. This article presents an improved
                 MCF model based CAD flow that performs aggressive
                 optimizations, such as topology and wire style
                 optimization, to reduce the energy and switch area of
                 FPGA global routing architectures. The experiments show
                 that when compared to traditional mesh architecture,
                 the optimized FPGA routing architectures achieve up to
                 10\% to 15\% energy savings and up to 20\% switch area
                 savings in average for a set of seven benchmark
                 circuits.",
  acknowledgement = ack-nhfb,
  articleno =    "13",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "FPGA; global routing; low power",
}

@Article{Huang:2009:OPR,
  author =       "Shih-Hsu Huang and Chia-Ming Chang and Yow-Tyng Nieh",
  title =        "Opposite-phase register switching for peak current
                 minimization",
  journal =      j-TODAES,
  volume =       "14",
  number =       "1",
  pages =        "14:1--14:??",
  month =        jan,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1455229.1455243",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 26 18:12:50 MST 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In a synchronous sequential circuit, huge current
                 peaks are often observed at the moment of clock
                 transition (since all registers are clocked). Previous
                 works focus on reducing the number of switching
                 registers. However, even though the switching registers
                 are the same, different combinations of switching
                 directions still result in different peak currents.
                 Based on that observation, in this article, we propose
                 an ECO (engineering change order) approach to minimize
                 the peak current by considering the switching
                 directions of registers. Our approach is well suitable
                 for reducing the peak current in IC testing.
                 Experimental data consistently show that our approach
                 works well in practice.",
  acknowledgement = ack-nhfb,
  articleno =    "14",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "IC testing; Logic synthesis; peak current; sequential
                 circuit synthesis",
}

@Article{Lin:2009:SCD,
  author =       "Yen-Chun Lin and Li-Ling Hung",
  title =        "Straightforward construction of depth-size optimal,
                 parallel prefix circuits with fan-out 2",
  journal =      j-TODAES,
  volume =       "14",
  number =       "1",
  pages =        "15:1--15:??",
  month =        jan,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1455229.1455244",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 26 18:12:50 MST 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Prefix computation is used in various areas and is
                 considered as a primitive operation. Parallel prefix
                 circuits are parallel prefix algorithms on the
                 combinational circuit model. The depth of a prefix
                 circuit is a measure of its processing time; smaller
                 depth implies faster computation. The size of a prefix
                 circuit is the number of operation nodes in it. Smaller
                 size implies less power consumption, less VLSI area,
                 and less cost. A prefix circuit with $n$ inputs is
                 depth-size optimal if its depth plus size equals $ 2 n
                 - 2$. A circuit with a smaller fan-out is in general
                 faster and occupies less VLSI area. To be of practical
                 use, the depth and fan-out of a prefix circuit should
                 be small. In this paper, a family of depth-size
                 optimal, parallel prefix circuits with fan-out 2 is
                 presented. This family of prefix circuits is easier to
                 construct and more amenable to automatic synthesis than
                 two other families of the same type, although the three
                 families have the same minimum depth among all
                 depth-size optimal prefix circuits with fan-out 2. The
                 balanced structure of the new family is also a merit.",
  acknowledgement = ack-nhfb,
  articleno =    "15",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Depth-size optimal; fan-out; parallel prefix
                 circuits",
}

@Article{Kahng:2009:LAA,
  author =       "Andrew B. Kahng and Chul-Hong Park and Puneet Sharma
                 and Qinke Wang",
  title =        "Lens aberration aware placement for timing yield",
  journal =      j-TODAES,
  volume =       "14",
  number =       "1",
  pages =        "16:1--16:??",
  month =        jan,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1455229.1455245",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 26 18:12:50 MST 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Process variations due to lens aberrations are to a
                 large extent systematic, and can be modeled for
                 purposes of analyses and optimizations in the design
                 phase. Traditionally, variations induced by lens
                 aberrations have been considered random due to their
                 small extent. However, as process margins reduce, and
                 as improvements in reticle enhancement techniques
                 control variations due to other sources with increased
                 efficacy, lens aberration-induced variations gain
                 importance. For example, our experiments indicate that
                 delays of most cells in the Artisan TSMC 90nm library
                 are affected by 2--8\% due to lens aberration.
                 Aberration-induced variations are systematic and depend
                 on the location in the lens field. In this article, we
                 first propose an aberration-aware timing analysis flow
                 that accounts for aberration-induced cell delay
                 variations. We then propose an aberration-aware
                 timing-driven analytical placement approach that
                 utilizes the predictable slow and fast regions created
                 on the chip due to aberration to improve cycle time. We
                 study the dependence of our improvement on chip size,
                 as well as use of the technique along with field
                 blading which allows partial reticle exposure. We
                 evaluate our technique on two testcases, {\em AES\/}
                 and {\em JPEG\/} implemented in 90nm technology. The
                 proposed technique reduces cycle time by 4.322\% (80ps)
                 at the cost of 1.587\% increase in trial-routed
                 wirelength for AES. On JPEG, we observe a cycle time
                 reduction of 5.182\% (132ps) at the cost of 1.095\%
                 increase in trial-routed wirelength.",
  acknowledgement = ack-nhfb,
  articleno =    "16",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "design for manufacturing; Layout; lithography; timing
                 yield",
}

@Article{Chien:2009:SMV,
  author =       "Chih-Da Chien and Cheng-An Chien and Jui-Chin Chu and
                 Jiun-In Guo and Ching-Hwa Cheng",
  title =        "A {252Kgates\slash 4.9Kbytes SRAM\slash 71mW}
                 multistandard video decoder for high definition video
                 applications",
  journal =      j-TODAES,
  volume =       "14",
  number =       "1",
  pages =        "17:1--17:??",
  month =        jan,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1455229.1455246",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 26 18:12:50 MST 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article proposes a low-cost, low-power
                 multistandard video decoder for high definition (HD)
                 video applications. The proposed design supports
                 multiple-standard (JPEG baseline, MPEG-1/2/4 Simple
                 Profile (SP), and H.264 Baseline Profile (BP)) video
                 decoding through interactive parsing control and common
                 parameter bus interface. In order to reduce hardware
                 cost, the shared adder-based structure and reusable
                 data management are proposed to achieve hardware
                 sharing and reduce internal memory size, respectively.
                 In addition, the proposed design is optimized through
                 reducing memory bandwidth by increasing both data reuse
                 amount and burst length of memory access as well as
                 eliminating cycle overhead in data access for
                 supporting HD video decoding with single AHB-based SDR
                 memory. The proposed 252Kgates/4.9kB/71mW/0.13$ \mu $ m
                 multi-standard video decoder reduces 72\% in gate count
                 and 87\% in power consumption as compared to the
                 state-of-the-art design, when operating at 120MHz for
                 real-time HD1080 video decoding with single AHB-based
                 SDR memory.",
  acknowledgement = ack-nhfb,
  articleno =    "17",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "H.264; MPEG; Video decoder",
}

@Article{Reviriego:2009:EED,
  author =       "Pedro Reviriego and Juan Antonio Maestro",
  title =        "Efficient error detection codes for multiple-bit upset
                 correction in {SRAMs} with {BICS}",
  journal =      j-TODAES,
  volume =       "14",
  number =       "1",
  pages =        "18:1--18:??",
  month =        jan,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1455229.1455247",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 26 18:12:50 MST 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Memories are one of the most widely used elements in
                 electronic systems, and their reliability when exposed
                 to Single Events Upsets (SEUs) has been studied
                 extensively. As transistor sizes shrink, Multiple Bits
                 Upsets (MBUs) are becoming an increasingly important
                 factor in the reliability of memories exposed to
                 radiation effects. To address this issue, Built-in
                 Current Sensors (BICS) have recently been applied in
                 conjunction with Single Error Correction/Double Error
                 Detection (SEC-DED) codes to protect memories from
                 MBUs. In this article, this approach is taken one step
                 further, proposing specific codes optimized to be
                 combined with BICS to provide protection against MBUs
                 in memories. By exploiting the locality of errors
                 within an MBU and the error detection and location
                 capabilities of BICS, the proposed codes result in both
                 a better protection level and a reduced cost compared
                 with the existing SEC-DED approach.",
  acknowledgement = ack-nhfb,
  articleno =    "18",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "error correcting codes; Fault tolerant memory;
                 high-level protection technique; protection against
                 radiation",
}

@Article{Avnit:2009:PCC,
  author =       "K. Avnit and V. D'silva and A. Sowmya and S. Ramesh
                 and S. Parameswaran",
  title =        "Provably correct on-chip communication: a formal
                 approach to automatic protocol converter synthesis",
  journal =      j-TODAES,
  volume =       "14",
  number =       "2",
  pages =        "19:1--19:??",
  month =        mar,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1497561.1497562",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Apr 2 15:06:01 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Hardware module reuse is a standard solution to the
                 problems of increasing complexity of chip architectures
                 and pressure to reduce time to market. In the absence
                 of a single module interface standard, predesigned
                 modules for ``plug-and-play'' usually require a
                 converter between incompatible interface protocols.
                 Current approaches to automatic synthesis of protocol
                 converters mostly lack formal foundations and either
                 employ abstractions far removed from the HDL
                 implementation level or grossly simplify the structure
                 of the protocols considered. This work presents a
                 state-machine-based formalism for modeling bus-based
                 communication protocols and a notion of protocol
                 compatibility and of correct conversion between
                 incompatible protocols. This formalism is used to
                 derive algorithms for checking protocol compatibility
                 and for provably correct, automatic converter
                 synthesis. Experiments with automatic converter
                 synthesis between different configurations of widely
                 used commercial bus protocols, such as AMBA AHB, ASB
                 APB, and the Open Core Protocol (OCP) are discussed.
                 The work here is unique in its combination of a
                 completely formal approach and the use of a low
                 abstraction level that enables precise modeling of
                 protocol characteristics that is also close to HDL.",
  acknowledgement = ack-nhfb,
  articleno =    "19",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "automatic design; converter synthesis; protocol
                 compatibility; System-on-chip",
}

@Article{Pasricha:2009:SLP,
  author =       "Sudeep Pasricha and Young-Hwan Park and Nikil Dutt and
                 Fadi J. Kurdahi",
  title =        "System-level {PVT} variation-aware power exploration
                 of on-chip communication architectures",
  journal =      j-TODAES,
  volume =       "14",
  number =       "2",
  pages =        "20:1--20:??",
  month =        mar,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1497561.1497563",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Apr 2 15:06:01 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "With the shift towards deep submicron (DSM)
                 technologies, the increase in leakage power and the
                 adoption of power-aware design methodologies have
                 resulted in potentially significant variations in power
                 consumption under different process, voltage, and
                 temperature (PVT) corners. In this article, we first
                 investigate the impact of PVT corners on power
                 consumption at the system-on-chip (SoC) level,
                 especially for the on-chip communication
                 infrastructure. Given a target technology library, we
                 then show how it is possible to ``scale up'' and
                 abstract the PVT variability at the system level,
                 allowing characterization of the PVT-aware design space
                 early in the design flow. We conducted several
                 experiments to estimate power for PVT corner cases, at
                 the gate level, as well as at the higher system level.
                 Our preliminary results are very interesting, and
                 indicate that (i) there are significant variations in
                 power consumption across PVT corners; and (ii) the
                 PVT-aware power estimation problem may be amenable to a
                 reasonably simple abstraction at the system level.",
  acknowledgement = ack-nhfb,
  articleno =    "20",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "digital systems; high-level synthesis; on-chip
                 communication architectures; performance exploration;
                 power estimation; PVT variation",
}

@Article{Mukhopadhyay:2009:IAA,
  author =       "Rajdeep Mukhopadhyay and S. K. Panda and Pallab
                 Dasgupta and John Gough",
  title =        "Instrumenting {AMS} assertion verification on
                 commercial platforms",
  journal =      j-TODAES,
  volume =       "14",
  number =       "2",
  pages =        "21:1--21:??",
  month =        mar,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1497561.1497564",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Apr 2 15:06:01 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The industry trend appears to be moving towards
                 designs that integrate large digital circuits with
                 multiple analog/RF (radio frequency) interfaces. In the
                 verification of these large integrated circuits, the
                 number of nets that need to be monitored has been
                 growing rapidly. Consequently, the mixed-signal design
                 community has been feeling the need for AMS (Analog and
                 Mixed Signal) assertions that can automatically monitor
                 conformance with expected time-domain behavior and help
                 in debugging deviations from the design intent. The
                 main challenges in providing this support are (a)
                 developing AMS assertion languages or AMS verification
                 libraries, and (b) instrumenting existing commercial
                 simulators to support assertion verification during
                 simulation. In this article, we report two approaches:
                 the first extends the {\em Open Verification Library\/}
                 (OVL) to the AMS domain by integrating a new collection
                 of AMS verification libraries; while the second extends
                 {\em SystemVerilog Assertions\/} (SVA) by augmenting
                 analog predicates into SVA. We demonstrate the use of
                 AMS-OVL on the Cadence Virtuoso environment while
                 emphasizing that our libraries can work in any
                 environment that supports Verilog and Verilog-A. We
                 also report the development of tool support for AMS-SVA
                 using a combination of Cadence NCSIM and Synopsys VCS.
                 We demonstrate the utility of both approaches on the
                 verification of LP3918, an integrated power management
                 unit (PMU) from National Semiconductors. We believe
                 that in the absence of existing EDA (Electronic Design
                 Automation) tools for AMS assertion verification, the
                 proposed approaches of integrating our libraries and
                 our tool sets with existing commercial simulators will
                 be of considerable and immediate practical value.",
  acknowledgement = ack-nhfb,
  articleno =    "21",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Assertion; integrated mixed signal design; OVL;
                 simulation; SVA; verification library",
}

@Article{Palkovic:2009:TOL,
  author =       "Martin Palkovic and Francky Catthoor and Henk
                 Corporaal",
  title =        "Trade-offs in loop transformations",
  journal =      j-TODAES,
  volume =       "14",
  number =       "2",
  pages =        "22:1--22:??",
  month =        mar,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1497561.1497565",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Apr 2 15:06:01 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Nowadays, multimedia systems deal with huge amounts of
                 memory accesses and large memory footprints. To
                 alleviate the impact of these accesses and reduce the
                 memory footprint, high-level memory exploration and
                 optimization techniques have been proposed. These
                 techniques try to more efficiently utilize the memory
                 hierarchy. An important step in these optimization
                 techniques are loop transformations (LT). They have a
                 crucial effect on later data memory footprint
                 optimization steps and code generation. However, the
                 state-of-the-art work has focused only on individual
                 objectives. The main one in literature involves
                 improving the locality of data accesses, and thus
                 reducing the data memory footprint. It does not
                 consider the trade-offs in the LT step in relation to
                 successive optimization steps. Therefore, it is not
                 globally efficient in mapping the application on the
                 target platform.\par

                 In this article we will discuss several trade-offs
                 during the loop transformations. To our knowledge, we
                 are the first ones considering these global trade-offs.
                 Previous work always gave mostly one solution, having
                 the best locality and thus the optimized memory
                 footprint, even though some research in two-dimensional
                 trade-offs in this area exists as well. We start from
                 this state-of-the-art solution with minimal footprint.
                 We show that by sacrificing the footprint, we can
                 obtain gains in data reuse (crucial for energy
                 reduction) and reduce the control-flow complexity. We
                 demonstrate our approach on a real-life application,
                 namely the QSDPCM video coder. At the end, we show that
                 considering trade-offs for this application leads to
                 16\% energy reduction in a two-layer memory subsystem
                 and 10\% cycle reduction on the ARM platform.",
  acknowledgement = ack-nhfb,
  articleno =    "22",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "cost components; Data transfer and storage
                 exploration; loop transformations; optimization;
                 trade-offs",
}

@Article{Fummi:2009:CMH,
  author =       "Franco Fummi and Mirko Loghi and Massimo Poncino and
                 Graziano Pravadelli",
  title =        "A cosimulation methodology for {HW\slash SW}
                 validation and performance estimation",
  journal =      j-TODAES,
  volume =       "14",
  number =       "2",
  pages =        "23:1--23:??",
  month =        mar,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1497561.1497566",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Apr 2 15:06:01 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Cosimulation strategies allow us to simulate and
                 verify HW/SW embedded systems before the real platform
                 is available. In this field, there is a large variety
                 of approaches that rely on different communication
                 mechanisms to implement an efficient interface between
                 the SW and the HW simulators. However, the literature
                 lacks a comprehensive methodology which addresses the
                 need for integrating and synchronizing heterogeneous
                 simulators, like, for example, the SystemC simulation
                 kernel for HW modules and an instruction set simulator
                 for SW applications, without being intrusive for the HW
                 and SW descriptions involved in the simulation. In this
                 context, this article presents, compares, and
                 integrates in a system-level framework two different
                 co-simulation strategies for modeling, analyzing, and
                 validating the performance of a HW/SW embedded system.
                 Moreover, for both of them, a mechanism is proposed to
                 provide an accurate time synchronization of the HW/SW
                 communication. The first strategy is intended to
                 provide an early cosimulation environment where HW/SW
                 interaction can be validated without involving the
                 operating system. The communication is implemented
                 between a single SW task and a SystemC description of
                 an HW module by exploiting the features of the remote
                 debugging interface of a debugger (the GNU GDB), and by
                 modifying the SystemC simulation kernel. On the other
                 hand, the second strategy is intended to be used in
                 further development steps, when the operating system is
                 introduced to validate the cosimulation between HW
                 modules and multitasking SW applications. In this
                 approach, the communication is implemented via
                 interrupts by using the features offered by the
                 operating system.\par

                 Experimental results are reported on two different case
                 studies to analyze and compare the effectiveness of
                 both the approaches.",
  acknowledgement = ack-nhfb,
  articleno =    "23",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Embedded Systems; HW/SW co-simulation; HW/SW
                 validation",
}

@Article{Inoue:2009:DSD,
  author =       "Hiroaki Inoue and Tsuyoshi Abe and Kazuhisa Ishizaka
                 and Junji Sakai and Masato Edahiro",
  title =        "Dynamic security domain scaling on embedded symmetric
                 multiprocessors",
  journal =      j-TODAES,
  volume =       "14",
  number =       "2",
  pages =        "24:1--24:??",
  month =        mar,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1497561.1497567",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Apr 2 15:06:01 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We propose a method for dynamic security-domain
                 scaling on SMPs that offers both highly scalable
                 performance and high security for future high-end
                 embedded systems. Its most important feature is its
                 highly efficient use of processor resources,
                 accomplished by dynamically changing the number of
                 processors within a security-domain (i.e., dynamically
                 yielding processors to other security-domains) in
                 response to application load requirements. Two new
                 technologies make this scaling possible without any
                 virtualization software: (1) self-transition management
                 and (2) unified virtual address mapping. Evaluations
                 show that this domain control provides highly scalable
                 performance and incurs almost no performance overhead
                 in security-domains. The increase in OSs in binary code
                 size is less than 1.5\%, and the time required for
                 individual state transitions is on the order of a
                 single millisecond. This scaling is the first in the
                 world to make possible the dynamic changing of the
                 number of processors within a security-domain on an ARM
                 SMP.",
  acknowledgement = ack-nhfb,
  articleno =    "24",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "AMP; dynamic security-domain scaling; SMP",
}

@Article{Qiu:2009:CMW,
  author =       "Meikang Qiu and Edwin H.-M. Sha",
  title =        "Cost minimization while satisfying hard\slash soft
                 timing constraints for heterogeneous embedded systems",
  journal =      j-TODAES,
  volume =       "14",
  number =       "2",
  pages =        "25:1--25:??",
  month =        mar,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1497561.1497568",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Apr 2 15:06:01 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In high-level synthesis for real-time embedded systems
                 using heterogeneous functional units (FUs), it is
                 critical to select the best FU type for each task.
                 However, some tasks may not have fixed execution times.
                 This article models each varied execution time as a
                 probabilistic random variable and solves {\em
                 heterogeneous assignment with probability\/} (HAP)
                 problem. The solution of the HAP problem assigns a
                 proper FU type to each task such that the total cost is
                 minimized while the timing constraint is satisfied with
                 a guaranteed confidence probability. The solutions to
                 the HAP problem are useful for both hard real-time and
                 soft real-time systems. Optimal algorithms are proposed
                 to find the optimal solutions for the HAP problem when
                 the input is a tree or a simple path. Two other
                 algorithms, one is optimal and the other is
                 near-optimal heuristic, are proposed to solve the
                 general problem. The experiments show that our
                 algorithms can effectively reduce the total cost while
                 satisfying timing constraints with guaranteed
                 confidence probabilities. For example, our algorithms
                 achieve an average reduction of 33.0\% on total cost
                 with 0.90 confidence probability satisfying timing
                 constraints compared with the previous work using
                 worst-case scenario.",
  acknowledgement = ack-nhfb,
  articleno =    "25",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Embedded Systems; heterogeneous; high-level synthesis;
                 real-time",
}

@Article{Zhou:2009:TAR,
  author =       "Xiangrong Zhou and Chenjie Yu and Peter Petrov",
  title =        "Temperature-aware register reallocation for register
                 file power-density minimization",
  journal =      j-TODAES,
  volume =       "14",
  number =       "2",
  pages =        "26:1--26:??",
  month =        mar,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1497561.1497569",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Apr 2 15:06:01 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Increased chip temperature has been known to cause
                 severe reliability problems and to significantly
                 increase leakage power. The register file has been
                 previously shown to exhibit the highest temperature
                 compared to all other hardware components in a modern
                 high-end embedded processor, which makes it
                 particularly susceptible to faults and elevated leakage
                 power. We show that this is mostly due to the highly
                 clustered register file accesses where a set of few
                 registers physically placed close to each other are
                 accessed with very high frequency. We propose
                 compile-time temperature-aware register reallocation
                 methodologies for breaking such groups of registers and
                 to uniformly distribute the accesses to the register
                 file. This is achieved with {\em no performance\/} and
                 {\em no hardware overheads}. We show that the
                 underlying problem is NP-hard, and subsequently
                 introduce and evaluate two efficient algorithmic
                 heuristics. Our extensive experimental study
                 demonstrates the efficiency of the proposed
                 methodology.",
  acknowledgement = ack-nhfb,
  articleno =    "26",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Hong:2009:RFD,
  author =       "Yu-Ru Hong and Juinn-Dar Huang",
  title =        "Reducing fault dictionary size for million-gate large
                 circuits",
  journal =      j-TODAES,
  volume =       "14",
  number =       "2",
  pages =        "27:1--27:??",
  month =        mar,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1497561.1497570",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Apr 2 15:06:01 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In general, fault dictionary is prevented from
                 practical applications in fault diagnosis due to its
                 extremely large size. Several previous works are
                 proposed for the fault dictionary size reduction.
                 However, some of them fail to bring down the size to an
                 acceptable level, and others might not be able to
                 handle today's million-gate circuits due to their high
                 time and space complexity. In this article, an
                 algorithm is presented to reduce the size of pass-fail
                 dictionary while still preserving high diagnostic
                 resolution. The proposed algorithm possesses low time
                 and space complexity by avoiding constructing the huge
                 distinguishability table, which inevitably boosts up
                 the required computation complexity. Experimental
                 results demonstrate that the proposed algorithm is
                 capable of handling industrial million-gate large
                 circuits in a reasonable amount of runtime and
                 memory.",
  acknowledgement = ack-nhfb,
  articleno =    "27",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "diagnostic resolution; fault diagnosis; Fault
                 dictionary",
}

@Article{Kavousianos:2009:EPS,
  author =       "Xrysovalantis Kavousianos and Dimitris Bakalis and
                 Dimitris Nikolos",
  title =        "Efficient partial scan cell gating for low-power
                 scan-based testing",
  journal =      j-TODAES,
  volume =       "14",
  number =       "2",
  pages =        "28:1--28:??",
  month =        mar,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1497561.1497571",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Apr 2 15:06:01 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Gating of the outputs of a portion of the scan cells
                 (partial gating) has been recently proposed as a method
                 for reducing the dynamic power dissipation during
                 scan-based testing. We present a new systematic method
                 for selecting, under area and performance design
                 constraints, the most suitable for gating subset of
                 scan cells as well as the proper gating value for each
                 one of them, aiming at the reduction of the average
                 switching activity during testing. We show that the
                 proposed method outperforms the corresponding already
                 known methods, with respect to average dynamic power
                 dissipation reduction.",
  acknowledgement = ack-nhfb,
  articleno =    "28",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Low-power testing; partial gating; scan cell gating;
                 scan-based testing",
}

@Article{Rakhmatov:2009:BVM,
  author =       "Daler Rakhmatov",
  title =        "Battery voltage modeling for portable systems",
  journal =      j-TODAES,
  volume =       "14",
  number =       "2",
  pages =        "29:1--29:??",
  month =        mar,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1497561.1497572",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Apr 2 15:06:01 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Limited battery life imposes stringent constraints on
                 the operation of battery-powered portable systems.
                 During battery discharge, the battery voltage
                 decreases, until a certain cutoff value is reached,
                 marking the end of battery life. The amount of
                 discharge capacity and energy delivered by the battery
                 during its life depends not only on the battery
                 characteristics, but also on the load conditions. A
                 different system design may result in a different
                 battery current (load) profile over time, leading to a
                 different battery voltage profile over time. This
                 article presents an analytical model that relates the
                 battery voltage to the battery current, thus
                 facilitating system design optimizations with respect
                 to the battery performance. It captures well-known
                 nonlinear phenomena of capacity loss at high discharge
                 rates, charge recovery, and capacity fading. The
                 proposed model has been validated against measurements
                 taken on Li-ion batteries. We also describe techniques
                 for efficient calculations of model's estimates, which
                 lets a user exploit accuracy-complexity tradeoffs.",
  acknowledgement = ack-nhfb,
  articleno =    "29",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "accuracy-complexity tradeoff; analytical modeling;
                 battery performance; battery-powered systems; Low-power
                 design",
}

@Article{Kumar:2009:EML,
  author =       "Yokesh Kumar and Prosenjit Gupta",
  title =        "External memory layout vs. schematic",
  journal =      j-TODAES,
  volume =       "14",
  number =       "2",
  pages =        "30:1--30:??",
  month =        mar,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1497561.1497573",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Apr 2 15:06:01 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The circuit represented by a VLSI layout must be
                 verified by checking it against the schematic circuit
                 as an important part of the functional verification
                 step. This involves two central problems of matching
                 the circuit graphs with each other (graph isomorphism)
                 and extracting a higher level of circuit from a given
                 level by finding subcircuits in the circuit graph
                 (subgraph isomorphism). Modern day VLSI layouts contain
                 millions of devices. Hence the memory requirements of
                 the data structures required by tools for verifying
                 them become huge and can easily exceed the amount of
                 internal memory available on a computer. In such a
                 scenario, a program not aware of the memory hierarchy
                 performs badly because of its unorganized input/output
                 operations (I/Os) as the speed of a disk access is
                 about a million times slower than accessing a main
                 memory location. In this article, we present
                 I/O-efficient algorithms for the graph isomorphism and
                 subgraph isomorphism problems in the context of
                 verification of VLSI layouts. Experimental results show
                 the need and utility of I/O-efficient algorithms for
                 handling problems with large memory requirements.",
  acknowledgement = ack-nhfb,
  articleno =    "30",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "design automation; external memory algorithms; Graph;
                 subgraph isomorphism; verification of layouts",
}

@Article{Chen:2009:SAP,
  author =       "Po-Yuan Chen and Kuan-Hsien Ho and Tingting Hwang",
  title =        "Skew-aware polarity assignment in clock tree",
  journal =      j-TODAES,
  volume =       "14",
  number =       "2",
  pages =        "31:1--31:??",
  month =        mar,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1497561.1497574",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Apr 2 15:06:01 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In modern sequential VLSI designs, clock tree plays an
                 important role in synchronizing different components in
                 a chip. To reduce peak current and power/ground noises
                 caused by clock network, assigning different signal
                 polarities to clock buffers is proposed in previous
                 work. Although peak current and power/ground noises are
                 minimized by signal polarities assignment, an
                 assignment without timing information may increase the
                 clock skew significantly. As a result, a timing-aware
                 signal polarities assigning technique is necessary. In
                 this article, we propose a novel signal polarities
                 assigning technique which can not only reduce peak
                 current and power/ground noises simultaneously but also
                 render the clock skew in control. The experimental
                 result shows that the clock skew produced by our
                 algorithm is 94\% of original clock skew in average
                 while the clock skews produced by three algorithms
                 (Partition, MST, Matching) in the absence of post clock
                 tuning steps in the previous work are 235\%, 272\%, and
                 283\%, respectively. Moreover, our algorithm is as
                 efficient as the three algorithms of the previous work
                 in reducing peak current and power/ground noises.",
  acknowledgement = ack-nhfb,
  articleno =    "31",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Clock skew; clock tree; peak current; polarity
                 assignment; power/ground noise",
}

@Article{Cho:2009:BHR,
  author =       "Minsik Cho and Katrina Lu and Kun Yuan and David Z.
                 Pan",
  title =        "{BoxRouter 2.0}: a hybrid and robust global router
                 with layer assignment for routability",
  journal =      j-TODAES,
  volume =       "14",
  number =       "2",
  pages =        "32:1--32:??",
  month =        mar,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1497561.1497575",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Apr 2 15:06:01 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In this article, we present BoxRouter 2.0, and discuss
                 its architecture and implementation. As
                 high-performance VLSI design becomes more
                 interconnect-dominant, efficient congestion elimination
                 in global routing is in greater demand. Hence, we
                 propose a global router which has a strong ability to
                 improve routability and minimize the number of vias
                 with blockages, while minimizing wirelength. BoxRouter
                 2.0 is extended from BoxRouter 1.0, but can perform
                 multi-layer routing with 2D global routing and layer
                 assignment. Our 2D global routing is equipped with two
                 ideas: node shifting for congestion-aware Steiner tree
                 and robust negotiation-based A* search for routing
                 stability. After 2D global routing, 2D-to-3D mapping is
                 done by the layer assignment which is powered by
                 progressive via/blockage-aware integer linear
                 programming. Experimental results show that BoxRouter
                 2.0 has better routability with comparable wirelength
                 than other routers on ISPD07 benchmark, and it can
                 complete (no overflow) the widely used ISPD98 benchmark
                 for the first time in the literature with the shortest
                 wirelength. We further generate a set of harder ISPD98
                 benchmarks to push the limit of BoxRouter 2.0, and
                 propose the hardened ISPD98 benchmarks to map
                 state-of-the-art solutions for future routing
                 research.",
  acknowledgement = ack-nhfb,
  articleno =    "32",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "congestion; global routing; integer linear
                 programming; layer assignment; physical design;
                 routability; VLSI",
}

@Article{Gulati:2009:FBH,
  author =       "Kanupriya Gulati and Suganth Paul and Sunil P. Khatri
                 and Srinivas Patil and Abhijit Jas",
  title =        "{FPGA}-based hardware acceleration for {Boolean}
                 satisfiability",
  journal =      j-TODAES,
  volume =       "14",
  number =       "2",
  pages =        "33:1--33:??",
  month =        mar,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1497561.1497576",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Apr 2 15:06:01 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We present an FPGA-based hardware solution to the
                 Boolean satisfiability (SAT) problem, with the main
                 goals of scalability and speedup. In our approach the
                 traversal of the implication graph as well as conflict
                 clause generation are performed in hardware, in
                 parallel. The experimental results and their analysis,
                 along with the performance models are discussed. We
                 show that an order of magnitude improvement in runtime
                 can be obtained over MiniSAT (the best-in-class
                 software based approach) by using a Virtex-4
                 (XC4VFX140) FPGA device. The resulting system can
                 handle instances with as many as 10K variables and 280K
                 clauses.",
  acknowledgement = ack-nhfb,
  articleno =    "33",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Boolean Constant Propagation (BCP); Boolean
                 satisfiabilty (SAT); conflict induced clauses; FPGA;
                 non-chronological backtrack",
}

@Article{Malik:2009:SCU,
  author =       "Avinash Malik and Zoran Salcic and Partha S. Roop",
  title =        "{SystemJ} compilation using the {Tandem Virtual
                 Machine} approach",
  journal =      j-TODAES,
  volume =       "14",
  number =       "3",
  pages =        "34:1--34:??",
  month =        may,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1529255.1529256",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Jun 3 16:12:53 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "SystemJ is a language based on the Globally
                 Asynchronous Locally Synchronous (GALS) paradigm. A
                 SystemJ program is a collection of GALS nodes, also
                 called clock domains, and each clock domain is a
                 synchronous program that extends the Java language.
                 Initial compilation of SystemJ has been to standard
                 Java executing on a Java Virtual Machine (JVM), which
                 is both inefficient and bulky for small embedded
                 systems. This article proposes a new approach for
                 compiling and executing SystemJ using a new type of
                 virtual machine, called a Tandem Virtual Machine (TVM).
                 The TVM approach provides an efficient implementation
                 of SystemJ on both standard processors and
                 resource-constrained embedded processors. The new
                 approach is based on separating the control-driven and
                 data-driven operations for execution on two virtual
                 machines. While the JVM executes the data-driven
                 operations, a Control Virtual Machine (CVM) is
                 introduced to execute the control-driven parts of a
                 SystemJ program. The TVM approach is capable of
                 handling all data-driven and control-driven operations
                 required by the GALS model. The benchmark results show
                 that the TVM has code size improvements of over 60\% on
                 average and also a substantial improvement in execution
                 speed over standard Java-based compilation.",
  acknowledgement = ack-nhfb,
  articleno =    "34",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "compilation; esterel; System-level design; SystemJ;
                 virtual machines",
}

@Article{Cong:2009:SRB,
  author =       "Jason Cong and Yiping Fan and Junjuan Xu",
  title =        "Simultaneous resource binding and interconnection
                 optimization based on a distributed register-file
                 microarchitecture",
  journal =      j-TODAES,
  volume =       "14",
  number =       "3",
  pages =        "35:1--35:??",
  month =        may,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1529255.1529257",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Jun 3 16:12:53 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Behavior synthesis and optimization beyond the
                 register-transfer level require an efficient
                 utilization of the underlying platform features. This
                 article presents a platform-based resource binding
                 approach based on a {\em Distributed Register-File
                 Microarchitecture (DRFM)}, which makes efficient use of
                 distributed embedded memory blocks as register files in
                 modern FPGAs. DRFM contains multiple islands, each
                 having a local register file, a functional unit pool,
                 and data-routing logic. Compared to the traditional
                 discrete-register counterpart, a DRFM allows use of the
                 platform-featured on-chip memory or register-file IP
                 blocks to implement its local register files, and this
                 results in a substantial saving of multiplexing logic
                 and global interconnects. DRFM provides a useful
                 architectural template and a direct optimization
                 objective for minimizing interisland connections for
                 synthesis algorithms. Given the scheduling solution and
                 resource (functional units) constraints, two novel
                 algorithms in the resource binding stage are developed
                 based on DRFM: (i) a simultaneous DRFM clustering and
                 binding algorithm, which decides the configuration of
                 DRFM and the assignment of operations into islands with
                 the focus on optimizing global connections; (ii) a
                 data-forwarding scheduling algorithm, which takes
                 advantage of the operation slacks to handle the
                 read-port restriction of register files. On the Xilinx
                 Virtex4 FPGA platform, experimental results with a set
                 of real-life test cases show a 50\% logic area
                 reduction achieved by applying our approach, with a
                 14.6\% performance improvement, compared to the
                 traditional discrete-register-based approach. Also,
                 experiments on small-size designs show that our
                 algorithm produces the same number of total connections
                 and at most one more maximum feeding-in connection
                 compared to optimal solutions generated by ILP.",
  acknowledgement = ack-nhfb,
  articleno =    "35",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Behavioral synthesis; distributed register file;
                 resource binding",
}

@Article{Raghavan:2009:PTG,
  author =       "Praveen Raghavan and Murali Jayapala and Andy
                 Lambrechts and Javed Absar and Francky Catthoor",
  title =        "Playing the trade-off game: {Architecture} exploration
                 using {Coffeee}",
  journal =      j-TODAES,
  volume =       "14",
  number =       "3",
  pages =        "36:1--36:??",
  month =        may,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1529255.1529258",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Jun 3 16:12:53 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Modern mobile devices need to be extremely energy
                 efficient. Due to the growing complexity of these
                 devices, energy-aware design exploration has become
                 increasingly important. Current exploration tools often
                 do not support energy estimation, or require the design
                 to be very detailed before estimation is possible. It
                 is important to get early feedback on both performance
                 and energy consumption during all phases of the design
                 and at higher abstraction levels. This article presents
                 a unified optimization and exploration framework to
                 explore source-level transformation to processor
                 architecture design space. The proposed retargetable
                 compiler and simulator framework can map applications
                 to a range of processors and memory configurations,
                 simulate, and report detailed performance and energy
                 estimates. An accurate and consistent energy modeling
                 approach is introduced which can estimate the energy
                 consumption of processor and memories at a component
                 level, which can help to guide the design process. Fast
                 energy-aware architecture exploration is illustrated by
                 modeling both state-of-the-art processors as well as
                 other architectures. Various design trade-offs are also
                 illustrated on different academic as well as industrial
                 benchmarks from both the wireless communication and
                 multimedia domain. We also illustrate a design space
                 exploration on different applications and show that
                 there is large trade-off space between application
                 performance, energy consumption, and area. We show that
                 the proposed framework is consistent, accurate, and
                 covers a large design space including various novel
                 low-power extensions in a unified framework.",
  acknowledgement = ack-nhfb,
  articleno =    "36",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "architecture exploration; area; compiler-architecture
                 interaction; design; embedded systems; Energy; loop
                 transformations; power estimation; power-performance
                 trade-off; processors; VLIW",
}

@Article{Das:2009:SBT,
  author =       "Dipankar Das and P. P. Chakrabarti and Rajeev Kumar",
  title =        "Scenario-based timing verification of multiprocessor
                 embedded applications",
  journal =      j-TODAES,
  volume =       "14",
  number =       "3",
  pages =        "37:1--37:??",
  month =        may,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1529255.1529259",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Jun 3 16:12:53 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This work presents a static timing-analysis method for
                 verification of scenario-based real-time properties, on
                 graphical task-level models of embedded applications.
                 Scenario-based properties specify timing constraints
                 which must be honored for specific control-flow
                 behaviors and task execution orderings. Static checking
                 of scenario-based properties currently requires
                 computationally expensive model checking methods. Hence
                 the proposed graph-based static timing-analysis
                 algorithm improves upon the state-of-the-art. This is
                 manifested in a significant performance advantage over
                 timed model checking (up to 1000X in several cases),
                 which suffers from state space explosion. The proposed
                 algorithm also employs compositional reasoning and
                 abstraction refinement for handling large problems. We
                 also illustrate methods for using scenario-based timing
                 analysis, which can act as alternatives to traditional
                 timed model checking for verification of timed systems
                 like FDDI and Fischer protocols. We implement this
                 timing verification algorithm as a tool called {\em
                 SymTime\/} and present experimental results for SymTime
                 comparing it with SPIN, UPPAAL, and a TCTL model
                 checker for Time Petri Nets, called Romeo.",
  acknowledgement = ack-nhfb,
  articleno =    "37",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "execution scenarios; real time systems; static timing
                 analysis; Timing verification",
}

@Article{Grosse:2009:MPO,
  author =       "Philippe Grosse and Yves Durand and Paul Feautrier",
  title =        "Methods for power optimization in {SOC}-based data
                 flow systems",
  journal =      j-TODAES,
  volume =       "14",
  number =       "3",
  pages =        "38:1--38:??",
  month =        may,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1529255.1529260",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Jun 3 16:12:53 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Whereas the computing power of DSP or general-purpose
                 processors was sufficient for 3G baseband
                 telecommunication algorithms, stringent timing
                 constraints of 4G wireless telecommunication systems
                 require computing-intensive data-driven architectures.
                 Managing the complexity of these systems within the
                 energy constraints of a mobile terminal is becoming a
                 major challenge for designers. System-level low-power
                 policies have been widely explored for generic
                 software-based systems, but data-flow architectures
                 used for high data-rate telecommunication systems
                 feature heterogeneous components that require specific
                 configurations for power management. In this study, we
                 propose an innovative power optimization scheme
                 tailored to self-synchronized data-flow systems. Our
                 technique, based on the synchronous data-flow modeling
                 approach, takes advantage of the latest low-power
                 techniques available for digital architectures. We
                 illustrate our optimization method on a complete 4G
                 telecommunication baseband modem and show the energy
                 savings expected by this technique considering present
                 and future silicon technologies.",
  acknowledgement = ack-nhfb,
  articleno =    "38",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "4G base-band modem; data-driven SOC; Power
                 optimization; synchronous data-flow graph",
}

@Article{Clarke:2009:WLS,
  author =       "Jonathan A. Clarke and George A. Constantinides and
                 Peter Y. K. Cheung",
  title =        "Word-length selection for power minimization via
                 nonlinear optimization",
  journal =      j-TODAES,
  volume =       "14",
  number =       "3",
  pages =        "39:1--39:??",
  month =        may,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1529255.1529261",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Jun 3 16:12:53 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article describes the first method for minimizing
                 the dynamic power consumption of a Digital Signal
                 Processing (DSP) algorithm implemented on
                 reconfigurable hardware via word-length optimization.
                 Fast models for estimating the power consumption of the
                 arithmetic components and the routing power of these
                 algorithm implementations are used within a constrained
                 nonlinear optimization formulation that solves a
                 relaxed version of word-length optimization. Tight
                 lower and upper bounds on the cost of the integer
                 word-length problem can be obtained using the proposed
                 solution, with typical upper bounds being 2.9\% and
                 5.1\% larger than the lower bounds for area and power
                 consumption, respectively. Heuristics can then use the
                 upper bound as a starting point from which to get even
                 closer to the known lower bound. Results show that
                 power consumption can be improved by up to 40\%
                 compared to that achieved when using simple word-length
                 selection techniques, and further comparisons are made
                 between the minimization of different cost functions
                 that give insight into the advantages offered by
                 multiple word-length optimization.",
  acknowledgement = ack-nhfb,
  articleno =    "39",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "bitwidth; power consumption; Power consumption; signal
                 processing; synthesis; word length",
}

@Article{Morgado:2009:GRS,
  author =       "P. Marques Morgado and Paulo F. Flores and L. Miguel
                 Silveira",
  title =        "Generating realistic stimuli for accurate power grid
                 analysis",
  journal =      j-TODAES,
  volume =       "14",
  number =       "3",
  pages =        "40:1--40:??",
  month =        may,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1529255.1529262",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Jun 3 16:12:53 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Power analysis tools are an integral component of any
                 current power sign-off methodology. The performance of
                 a design's power grid affects the timing and
                 functionality of a circuit, directly impacting the
                 overall performance. Ensuring power grid robustness
                 implies taking into account, among others, static and
                 dynamic effects of voltage drop, ground bounce, and
                 electromigration. This type of verification is usually
                 done by simulation, targeting a worst-case scenario
                 where devices, switching almost simultaneously, could
                 impose stern current demands on the power grid. While
                 determination of the exact worst-case switching
                 conditions from the grid perspective is usually not
                 practical, the choice of simulation stimuli has a
                 critical effect on the results of the analysis.
                 Targetting safe but unrealistic settings could lead to
                 pessimistic results and costly overdesigns in terms of
                 die area. In this article we describe a software tool
                 that generates a reasonable, realistic, set of stimuli
                 for simulation. The approach proposed accounts for
                 timing and spatial restrictions that arise from the
                 circuit's netlist and placement and generates an
                 approximation to the worst-case condition. The
                 resulting stimuli indicate that only a fraction of the
                 gates change in any given timing window, leading to a
                 more robust verification methodology, especially in the
                 dynamic case. Generating such stimuli is akin to
                 performing a standard static timing analysis, so the
                 tool fits well within conventional design frameworks.
                 Furthermore, the tool can be used for hotspot detection
                 in early design stages.",
  acknowledgement = ack-nhfb,
  articleno =    "40",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "ground bounce; Power grid; simulation; stimuli
                 generation; verification; voltage drop",
}

@Article{Yu:2009:APG,
  author =       "Hao Yu and Joanna Ho and Lei He",
  title =        "Allocating power ground vias in {$3$D} {ICs} for
                 simultaneous power and thermal integrity",
  journal =      j-TODAES,
  volume =       "14",
  number =       "3",
  pages =        "41:1--41:??",
  month =        may,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1529255.1529263",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Jun 3 16:12:53 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The existing work on via allocation in 3D ICs ignores
                 power/ground vias' ability to simultaneously reduce
                 voltage bounce and remove heat. This article develops
                 the first in-depth study on the allocation of
                 power/ground vias in 3D ICs with simultaneous
                 consideration of power and thermal integrity. By
                 identifying principal ports and parameters, effective
                 electrical and thermal macromodels are employed to
                 provide dynamic power and thermal integrity as well as
                 sensitivity with respect to via density. With the use
                 of sensitivity, an efficient via allocation
                 simultaneously driven by power and thermal integrity is
                 developed. Experiments show that, compared to
                 sequential power and thermal optimization using static
                 integrity, sequential optimization using the dynamic
                 integrity reduces nonsignal vias by up to 18\%, and
                 simultaneous optimization using dynamic integrity
                 further reduces nonsignal vias by up to 45.5\%.",
  acknowledgement = ack-nhfb,
  articleno =    "41",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "macromodeling; parametric 3D-IC design; Thermal and
                 power integrity",
}

@Article{Liu:2009:MAA,
  author =       "Bo Liu and Francisco V. Fern{\'a}ndez and Georges
                 Gielen and R. Castro-L{\'o}pez and E. Roca",
  title =        "A memetic approach to the automatic design of
                 high-performance analog integrated circuits",
  journal =      j-TODAES,
  volume =       "14",
  number =       "3",
  pages =        "42:1--42:??",
  month =        may,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1529255.1529264",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Jun 3 16:12:53 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article introduces an evolution-based
                 methodology, named memetic single-objective
                 evolutionary algorithm (MSOEA), for automated sizing of
                 high-performance analog integrated circuits. Memetic
                 algorithms may achieve higher global and local search
                 ability by properly combining operators from different
                 standard evolutionary algorithms. By integrating
                 operators from the differential evolution algorithm,
                 from the real-coded genetic algorithm, operators
                 inspired by the simulated annealing algorithm, and a
                 set of constraint handling techniques, MSOEA
                 specializes in handling analog circuit design problems
                 with numerous and tight design constraints. The method
                 has been tested through the sizing of several analog
                 circuits. The results show that design specifications
                 are met and objective functions are highly optimized.
                 Comparisons with available methods like genetic
                 algorithm and differential evolution in conjunction
                 with static penalty functions, as well as with
                 intelligent selection-based differential evolution, are
                 also carried out, showing that the proposed algorithm
                 has important advantages in terms of constraint
                 handling ability and optimization quality.",
  acknowledgement = ack-nhfb,
  articleno =    "42",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Analog circuit sizing; analog design automation;
                 constrained optimization; memetic algorithm",
}

@Article{Mutyam:2009:SST,
  author =       "Madhu Mutyam",
  title =        "Selective shielding technique to eliminate crosstalk
                 transitions",
  journal =      j-TODAES,
  volume =       "14",
  number =       "3",
  pages =        "43:1--43:??",
  month =        may,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1529255.1529265",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Jun 3 16:12:53 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "With CMOS process technology scaling to deep submicron
                 level, propagation delay across long on-chip buses is
                 becoming one of the main performance limiting factors
                 in high-performance designs. Propagation delay is very
                 significant when adjacent wires are transitioning in
                 opposite direction as compared to transitioning in the
                 same direction. As opposite transitions on adjacent
                 wires (called as {\em crosstalk transitions\/}) have
                 significant impact on propagation delay, several bus
                 encoding techniques have been proposed in literature to
                 eliminate such transitions.\par

                 We propose {\em selective shielding\/} technique to
                 eliminate crosstalk transitions. We show that the
                 selective shielding technique requires $ \lceil 3 n / 2
                 \rceil $ wires to encode a $n$-bit bus. SPICE
                 simulations by considering 90nm technology nodes reveal
                 that, for uniformly distributed random data, our
                 technique achieves nearly 39\% (21\%) delay savings
                 over 10 {\em mm\/}-length uncoded 32-bit bus for
                 pipelined (nonpipelined) data transmission at the cost
                 of nearly 7\% energy overhead.",
  acknowledgement = ack-nhfb,
  articleno =    "43",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "bus encoding; Crosstalk; power consumption; switching
                 activity",
}

@Article{Taskin:2009:CTR,
  author =       "Baris Taskin and Joseph Demaio and Owen Farell and
                 Michael Hazeltine and Ryan Ketner",
  title =        "Custom topology rotary clock router with tree
                 subnetworks",
  journal =      j-TODAES,
  volume =       "14",
  number =       "3",
  pages =        "44:1--44:??",
  month =        may,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1529255.1529266",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Jun 3 16:12:53 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Increasing demands on computing power have spurred the
                 development of faster, higher-density Integrated
                 Circuits (ICs), compounding power and complexity
                 concerns in design budgets. The clock distribution
                 network is a significant contributor to such power and
                 complexity concerns. Resonant rotary clocking is a
                 relatively new technology that realizes several
                 benefits over current clocking methods, including
                 power, frequency, and variation tolerance, yet lacks
                 the automation tools to promote increased use. Towards
                 this end, an automated rotary clock routing methodology
                 is presented that generates custom topology rotary ring
                 routes with tree subnetworks. In addition to the
                 benefits of adiabatic clocking, the presented custom
                 topology router permits 38.6\% shorter wirelengths on
                 average for register tapping, compared to traditional
                 prescribed skew, binary tree routing.",
  acknowledgement = ack-nhfb,
  articleno =    "44",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "clock network design; clock skew; multiphase
                 synchronization; Resonant rotary clocking",
}

@Article{Liu:2009:HPO,
  author =       "Chih-Hung Liu and Shih-Yi Yuan and Sy-Yen Kuo and
                 Szu-Chi Wang",
  title =        "High-performance obstacle-avoiding rectilinear
                 {Steiner} tree construction",
  journal =      j-TODAES,
  volume =       "14",
  number =       "3",
  pages =        "45:1--45:??",
  month =        may,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1529255.1529267",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Jun 3 16:12:53 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Rectilinear Steiner trees are used to route signal
                 nets by global and detail routers in VLSI design for a
                 long time. However, in current IC industry, there are
                 significantly increasing obstacles to be considered,
                 such as large-scale power networks, pre-routed nets, IP
                 blocks, and antenna jumpers. Accordingly, the {\em
                 obstacle-avoiding rectilinear Steiner minimal tree\/}
                 (OARSMT) problem has become more important. In this
                 article, we propose a new routing graph, {\em
                 obstacle-avoiding routing graph\/} (OARG), for the
                 OARSMT problem. Due to the important properties of
                 OARG, we construct a 3-step algorithm and a local
                 refinement scheme, which both can take advantage of
                 these properties, to find a suboptimal solution
                 efficiently. Furthermore, each step of our 3-step
                 algorithm as well as the local refinement scheme has
                 theoretical or practical benefits. Therefore, each of
                 them can be applicable to other existing works for
                 general or specific considerations such as efficiency
                 or effectiveness. Extensive experimental results show
                 that our method outperforms all existing works in terms
                 of wirelength and achieves the best speed
                 performance.",
  acknowledgement = ack-nhfb,
  articleno =    "45",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "obstacle-avoiding; rectilinear; Routing; Steiner
                 tree",
}

@Article{Yan:2009:TAS,
  author =       "Tan Yan and Martin D. F. Wong",
  title =        "Theories and algorithms on single-detour routing for
                 untangling twisted bus",
  journal =      j-TODAES,
  volume =       "14",
  number =       "3",
  pages =        "46:1--46:??",
  month =        may,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1529255.1529268",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Jun 3 16:12:53 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Previous works on PCB bus routing assume matched pin
                 ordering on both sides. But in practice, the pin
                 ordering might be mismatched and the nets become
                 twisted. In this article, we propose a preprocessing
                 step to untangle such twisted nets. We also introduce a
                 practical routing style, which we call {\em
                 single-detour routing}, to simplify the untangling
                 problem. We then present a necessary and sufficient
                 condition for the existence of single-detour routing
                 solutions. Furthermore, we present a
                 dynamic-programming-based algorithm to solve the
                 single-detour untangling problem with consideration of
                 wire capacity between adjacent pins. Our algorithm
                 produces an optimal single-detour routing solution that
                 rematches the pin ordering. By integrating our
                 algorithm into the bus router in a previous
                 length-matching router, we show that many routing
                 problems that cannot be solved previously can now be
                 solved with insignificant increase in runtime.",
  acknowledgement = ack-nhfb,
  articleno =    "46",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Bus routing; dynamic programming; printed circuit
                 board (PCB); single-detour routing; twisted bus",
}

@Article{Gopalakrishnan:2009:ATB,
  author =       "Sivaram Gopalakrishnan and Priyank Kalla",
  title =        "{2009 ACM TODAES} best paper award: {Optimization} of
                 polynomial datapaths using finite ring algebra",
  journal =      j-TODAES,
  volume =       "14",
  number =       "4",
  pages =        "47:1--47:??",
  month =        aug,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1562514.1562515",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 27 14:38:55 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  articleno =    "47",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Bertels:2009:EMM,
  author =       "Peter Bertels and Wim Heirman and Erik D'Hollander and
                 Dirk Stroobandt",
  title =        "Efficient memory management for hardware accelerated
                 {Java Virtual Machines}",
  journal =      j-TODAES,
  volume =       "14",
  number =       "4",
  pages =        "48:1--48:??",
  month =        aug,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1562514.1562516",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 27 14:38:55 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Application-specific hardware accelerators can
                 significantly improve a system's performance. In a
                 Java-based system, we then have to consider a hybrid
                 architecture that consists of a Java Virtual Machine
                 running on a general-purpose processor connected to the
                 hardware accelerator. In such a hybrid architecture,
                 data communication between the accelerator and the
                 general-purpose processor can incur a significant cost,
                 which may even annihilate the original performance
                 improvement of adding the accelerator. A careful layout
                 of the data in the memory structure is therefore of
                 major importance to maintain the acceleration
                 performance benefits.\par

                 This article addresses the reduction of the
                 communication cost in a distributed shared memory
                 consisting of the main memory of the processor and the
                 accelerator's local memory, which are unified in the
                 Java heap. Since memory access times are highly
                 nonuniform, a suitable allocation of objects in either
                 main memory or the accelerator's local memory can
                 significantly reduce the communication cost. We propose
                 several techniques for finding the optimal location for
                 each Java object's data, either statically through
                 profiling or dynamically at runtime. We show how we can
                 reduce communication cost by up to 86\% for the SPECjvm
                 and DaCapo benchmarks. We also show that the best
                 strategy is application dependent and also depends on
                 the relative cost of remote versus local accesses. For
                 a relative cost higher than 10, a self-learning dynamic
                 approach often results in the best performance.",
  acknowledgement = ack-nhfb,
  articleno =    "48",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Dynamic memory management; hardware acceleration; Java
                 Virtual Machine",
}

@Article{Faezipour:2009:HPE,
  author =       "Miad Faezipour and Mehrdad Nourani and Rina
                 Panigrahy",
  title =        "A hardware platform for efficient worm outbreak
                 detection",
  journal =      j-TODAES,
  volume =       "14",
  number =       "4",
  pages =        "49:1--49:??",
  month =        aug,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1562514.1562517",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 27 14:38:55 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Network Intrusion Detection Systems (NIDS) monitor
                 network traffic to detect attacks or unauthorized
                 activities. Traditional NIDSes search for patterns that
                 match typical network compromise or remote hacking
                 attempts. However, newer networking applications
                 require finding the frequently repeated strings in a
                 packet stream for further investigation of potential
                 attack attempts. Finding frequently repeated strings
                 within a given time frame of the packet stream has been
                 quite efficient to detect polymorphic worm outbreaks. A
                 novel real-time worm outbreak detection system using
                 two-phase hashing and monitoring repeated common
                 substrings is proposed in this article. We use the
                 concept of shared counters to minimize the memory cost
                 while efficiently sifting through suspicious strings.
                 The worm outbreak system has been prototyped on Altera
                 Stratix FPGA. We have tested the system for various
                 settings and packet stream sizes. Experimental results
                 verify that our system can support line speed of
                 gigabit-rates with negligible false positive and
                 negative rates.",
  acknowledgement = ack-nhfb,
  articleno =    "49",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "false negative; false positive; hashing; Network
                 Intrusion Detection System; polymorphic worm; shared
                 counters; worm outbreak",
}

@Article{Lee:2009:TSA,
  author =       "Byunghyun Lee and Ki-Seok Chung and Bontae Koo and
                 Nak-Woong Eum and Taewhan Kim",
  title =        "Thermal sensor allocation and placement for
                 reconfigurable systems",
  journal =      j-TODAES,
  volume =       "14",
  number =       "4",
  pages =        "50:1--50:??",
  month =        aug,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1562514.1562518",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 27 14:38:55 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "A dynamic monitoring of thermal behavior of hardware
                 resources using thermal sensors is very important to
                 maintain the operation of systems safe and reliable.
                 This article addresses the problem of thermal sensor
                 allocation and placement for reconfigurable systems.
                 For programmable logic arrays, the degree of the use of
                 hardware resources in the systems highly depends on the
                 target application to be implemented, making the
                 allocation of thermal sensors at the manufacturing
                 stage inadequate (or too costly if implemented) due to
                 the unpredictable thermal profile. This means that the
                 thermal sensor allocation could be processed at the
                 time when the reconfigurable logic is implemented
                 (i.e., at the post manufacturing stage). This work
                 proposes an effective solution to the problem of
                 thermal sensor allocation and placement at the
                 post-manufacturing stage. Specifically, we define the
                 Sensor Allocation and Placement Problem (SAPP), and
                 propose a solution which formulates SAPP into the
                 Unate-Covering Problem (UCP) and solves it optimally.
                 Also we combine SAPP with temperature correlation to
                 reduce required sensors more aggressively and propose a
                 solution by applying UCP again. We then provide an
                 extended solution to handle a practical design issue
                 where the hardware resources for the sensor
                 implementation on specific array locations have already
                 been used up by the application logic. Experimental
                 results using MCNC benchmarks show that our proposed
                 technique uses 62.4\% and 19.7\% less number of sensors
                 to monitor hotspots on the average than that used by
                 the grid-based and the bisection-based approaches while
                 the overhead of auxiliary circuitry is minimized,
                 respectively.",
  acknowledgement = ack-nhfb,
  articleno =    "50",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "optimal placement; reconfigurable system; Thermal
                 sensor; unate-covering problem",
}

@Article{Yuh:2009:TTB,
  author =       "Ping-Hung Yuh and Chia-Lin Yang and Yao-Wen Chang",
  title =        "{T}-trees: a tree-based representation for temporal
                 and three-dimensional floorplanning",
  journal =      j-TODAES,
  volume =       "14",
  number =       "4",
  pages =        "51:1--51:??",
  month =        aug,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1562514.1562519",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 27 14:38:55 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Improving logic capacity by time-sharing, dynamically
                 reconfigurable FPGAs are employed to handle designs of
                 high complexity and functionality. In this article, we
                 model each task as a 3D-box and deal with the temporal
                 floorplanning/placement problem for dynamically
                 reconfigurable FPGA architectures. We present a
                 tree-based data structure, called {\em T-trees}, to
                 represent the spatial and temporal relations among
                 tasks. Each node in a T-tree has at most three children
                 which represent the dimensional relationship among
                 tasks. For the T-tree, we develop an efficient packing
                 method and derive the condition to ensure the
                 satisfaction of precedence constraints which model the
                 temporal ordering among tasks induced by the execution
                 of dynamically reconfigurable FPGAs. Experimental
                 results show that our tree-based formulation can obtain
                 significantly better solution quality with less
                 execution time than the most recent state-of-the-art
                 work.",
  acknowledgement = ack-nhfb,
  articleno =    "51",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "partially dynamical reconfiguration; Reconfigurable
                 computing; temporal floorplanning",
}

@Article{Yuh:2009:LAT,
  author =       "Ping-Hung Yuh and Chia-Lin Yang and Chi-Feng Li and
                 Chung-Hsiang Lin",
  title =        "Leakage-aware task scheduling for partially
                 dynamically reconfigurable {FPGAs}",
  journal =      j-TODAES,
  volume =       "14",
  number =       "4",
  pages =        "52:1--52:??",
  month =        aug,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1562514.1562520",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 27 14:38:55 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "As technology continues to shrink, reducing leakage
                 power of Field-Programmable Gate Arrays (FPGAs) becomes
                 a critical issue for the practical use of FPGAs. In
                 this article, we address the leakage issue of partially
                 dynamically reconfigurable FPGA architectures with
                 sleep transistors embedded into FPGA fabrics. In
                 particular, we focus on eliminating leakage waste due
                 to the delay between reconfiguration and execution time
                 of a task. For partially dynamically reconfigurable
                 FPGAs, the configuration prefetching technique is
                 commonly used to hide runtime reconfiguration overhead.
                 With prefetching, the configuration of a task is loaded
                 into FPGAs as early as possible. Therefore, there is
                 often a delay between reconfiguration and execution
                 time of a task. In this period of time, the SRAM cells
                 allocated to a task cannot be turned off even though
                 they are not utilized.\par

                 In this article, we propose a two-stage task scheduling
                 methodology to reduce leakage waste due to the delay
                 between reconfiguration and execution time of a task
                 without sacrificing performance. In the first stage, a
                 performance-driven task scheduler that targets at
                 minimizing the schedule length is invoked to generate
                 an initial placement. In the second stage, a
                 postplacement leakage-aware task scheduling is applied
                 to refine the initial placement such that leakage waste
                 is minimized provided that the schedule length is not
                 increased. To solve the postplacement leakage
                 optimization problem, we propose two algorithms. The
                 first one is an optimal algorithm based on Integer
                 Linear Programming (ILP). The second algorithm is a
                 heuristic approach that iteratively refines the
                 placement to reduce leakage waste. Experimental results
                 on real and synthetic designs show that the efficiency
                 and effectiveness of the proposed postplacement leakage
                 reduction techniques.",
  acknowledgement = ack-nhfb,
  articleno =    "52",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "leakage; partially dynamical reconfiguration;
                 placement; Reconfigurable computing; scheduling",
}

@Article{Chen:2009:LRD,
  author =       "Po-Yuan Chen and Chiao-Chen Fang and Tingting Hwang
                 and Hsi-Pin Ma",
  title =        "Leakage reduction, delay compensation using
                 partition-based tunable body-biasing techniques",
  journal =      j-TODAES,
  volume =       "14",
  number =       "4",
  pages =        "53:1--53:??",
  month =        aug,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1562514.1562521",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 27 14:38:55 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In recent years, fabrication technology of CMOS has
                 scaled to nanometer dimensions. As scaling progresses,
                 several new challenges follow. Among them, the most
                 noticeable two are process variations and leakage
                 current of the circuit. To tackle the problems of
                 process variations and leakage current, an effective
                 way is to use a body-biasing technique. In substance,
                 using the RBB technique can minimize leakage current
                 but increase the delay of a gate. Contrary to RBB, the
                 FBB technique decreases the delay but increases leakage
                 current of a gate. In the previous work, a single
                 body-biasing is applied to the whole circuit. In a slow
                 circuit, since the FBB is applied to the whole circuit,
                 the leakage current of all gates in the circuit
                 increases dramatically. On the other hand, in a fast
                 circuit, RBB is applied to decrease the leakage
                 current. However, without violating the timing
                 specification, the value of body-biasing is restricted
                 by the critical paths, and the saving of leakage
                 current is limited. In this article, we propose a
                 design flow to partition the circuit into subcircuits
                 so that each subcircuit can be applied its individual
                 RBB or FBB. Experiments show that our method is able to
                 save leakage current from 42\% to 47\% as compared to
                 designs not using a body-biasing technique. Under
                 process variations, our method can save 42\% to 49\%
                 leakage on fast circuits and 20\% to 35\% on slow
                 circuits.",
  acknowledgement = ack-nhfb,
  articleno =    "53",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Body biasing; leakage current; low-power design;
                 process variations",
}

@Article{Ranganathan:2009:VAM,
  author =       "Nagarajan Ranganathan and Upavan Gupta and
                 Venkataraman Mahalingam",
  title =        "Variation-aware multimetric optimization during gate
                 sizing",
  journal =      j-TODAES,
  volume =       "14",
  number =       "4",
  pages =        "54:1--54:??",
  month =        aug,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1562514.1562522",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 27 14:38:55 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The aggressive scaling of technology has not only
                 accentuated the effects of intradie parametric
                 variations in devices, but it has also impacted the
                 effects of optimizing a certain performance metric on
                 the optimality of other metrics. Thus, there is a need
                 for optimization methods that can perform the
                 simultaneous optimization of multiple metrics
                 considering the effects of process variations. In this
                 article, a novel variation-aware gate sizing framework
                 has been developed that can perform simultaneous
                 optimization of multiple performance metrics. In this
                 framework, the relationships between the optimization
                 metrics (like dynamic power, leakage power, and
                 crosstalk noise) are modeled as a function of the gate
                 sizes in the objective function. The delay values
                 obtained from unconstrained delay optimization and the
                 noise margins derived from coupling capacitance
                 information form the constraints for the multimetric
                 optimization problem. As an abstract framework, it is
                 independent of the type of mathematical programming
                 approach as well as the metrics chosen to be optimized.
                 The framework has been implemented using a mathematical
                 programming approach and has been tested on ITC'99
                 benchmarks for different combinations of multimetric
                 and single-metric optimizations of delay, dynamic
                 power, leakage power, and crosstalk noise. The results
                 indicate that the framework identifies good solution
                 points, and is efficient for postlayout optimization
                 via gate sizing.",
  acknowledgement = ack-nhfb,
  articleno =    "54",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "crosstalk noise; delay; Gate sizing; mathematical
                 programming; optimization; power",
}

@Article{Moiseev:2009:PDO,
  author =       "Konstantin Moiseev and Avinoam Kolodny and Shmuel
                 Wimer",
  title =        "Power-delay optimization in {VLSI} microprocessors by
                 wire spacing",
  journal =      j-TODAES,
  volume =       "14",
  number =       "4",
  pages =        "55:1--55:??",
  month =        aug,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1562514.1562523",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 27 14:38:55 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The problem of optimal space allocation among
                 interconnect wires in a VLSI layout, in order to
                 minimize the switching power consumption and the
                 average signal delay, is addressed in this article. We
                 define a Weighted Power-Delay Sum (WPDS) objective
                 function and derive necessary and sufficient conditions
                 for the existence of optimal interwire space
                 allocation, based on the notion of capacitance density.
                 At the optimum, every wire must be in equilibrium of
                 its line-to-line weighted capacitance density on its
                 two opposite sides, and the WPDS of the whole circuit
                 is minimal if and only if capacitance density is
                 uniformly distributed across the entire layout. This
                 condition is shown to be equivalent to all paths of the
                 layout cross-capacitance graph having the same length
                 and all cuts having the same flow. An implementation
                 which has been used in the design of a recent
                 commercial high-end microprocessor and yielded 17\%
                 power reduction and 9\% delay reduction in top-level
                 interconnects is presented.",
  acknowledgement = ack-nhfb,
  articleno =    "55",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "delay-optimization; interconnect optimization; power
                 optimization; Wire spacing",
}

@Article{Engelke:2009:SSU,
  author =       "Piet Engelke and Bernd Becker and Michel Renovell and
                 Juergen Schloeffel and Bettina Braitling and Ilia
                 Polian",
  title =        "{SUPERB}: {Simulator Utilizing Parallel Evaluation of
                 Resistive Bridges}",
  journal =      j-TODAES,
  volume =       "14",
  number =       "4",
  pages =        "56:1--56:??",
  month =        aug,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1562514.1596831",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 27 14:38:55 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "A high-performance resistive bridging fault simulator
                 SUPERB (Simulator Utilizing Parallel Evaluation of
                 Resistive Bridges) is proposed. It is based on fault
                 sectioning in combination with parallel-pattern or
                 parallel-fault multiple-stuck-at simulation. It
                 outperforms a conventional interval-based resistive
                 bridging fault simulator by three orders of magnitude
                 while delivering identical results. Further competing
                 tools are outperformed by several orders of magnitude.
                 Industrial-size circuits, including a
                 multi-million-gates design, could be simulated with
                 runtimes within an order of magnitude of the runtimes
                 for pattern-parallel stuck-at fault simulation.",
  acknowledgement = ack-nhfb,
  articleno =    "56",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "bridging fault simulation; fault mapping; PPSFP;
                 Resistive bridging faults; SPPFP",
}

@Article{Chang:2009:DIE,
  author =       "Li-Pin Chang and Chun-Da Du",
  title =        "Design and implementation of an efficient
                 wear-leveling algorithm for solid-state-disk
                 microcontrollers",
  journal =      j-TODAES,
  volume =       "15",
  number =       "1",
  pages =        "6:1--6:??",
  month =        dec,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1640457.1640463",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Mar 15 11:18:31 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Solid-state disks (SSDs) are storage devices that
                 emulate hard drives with flash memory. They have been
                 widely deployed in mobile computers as disk drive
                 replacements. Flash memory is organized in terms of
                 erase blocks. With the current technology, a block can
                 reach the end of its lifetime after thousands of
                 erasure operations. Wear leveling is a technique to
                 evenly erase the entire flash memory so that all blocks
                 remain alive as long as possible. This study introduces
                 a new wear-leveling algorithm based the observation
                 that, under a real-life mobile PC's workload, most
                 erasure operations are contributed by a small fraction
                 of blocks. Our key ideas are (1) moving rarely updated
                 data to a block that is extraordinarily worn and (2)
                 avoiding repeatedly involving a block in wear-leveling
                 activities. This study presents a successful
                 implementation of the proposed wear-leveling algorithm
                 using about 200 bytes of RAM in an SSD controller rated
                 at 33 MHz. Evaluation results show that this algorithm
                 achieves even wear of the entire flash memory while
                 reducing the overheads of extra flash-memory
                 operations.",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "embedded systems; flash memory; solid-state disks;
                 Wear leveling",
}

@Article{Geelen:2009:SLE,
  author =       "Bert Geelen and Vissarion Ferentinos and Francky
                 Catthoor and Gauthier Lafruit and Diederik Verkest and
                 Rudy Lauwereins and Thanos Stouraitis",
  title =        "Spatial locality exploitation for runtime reordering
                 of {JPEG2000} wavelet data layouts",
  journal =      j-TODAES,
  volume =       "15",
  number =       "1",
  pages =        "8:1--8:??",
  month =        dec,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1640457.1640465",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Mar 15 11:18:31 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Exploitation of spatial locality is essential for
                 memories to increase the access bandwidth and to reduce
                 the access-related latency and energy per word. Spatial
                 locality exploitation of a kernel can be improved by
                 modifying placement of data in memory, but this may be
                 felt not only by the kernel itself, but also in other
                 application components accessing the same data. Thus
                 care is needed to avoid global miss-rate improvements
                 are thwarted by miss-rate increases in other
                 application components. This article examines
                 application-level miss-rate increases due to handling
                 modified Wavelet Transform data layouts by explicitly
                 reordering at runtime, exploiting the execution order
                 freedom within a reordering buffer when the layout of
                 surrounding components is known. For the JPEG2000
                 application, taking into account the reordering costs
                 still results in 80\% net WT miss-rate gains.",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Layout transformations; spatial locality; wavelet
                 transform",
}

@Article{Keutzer:2009:ATD,
  author =       "Kurt Keutzer and Peng Li and Li Shang and Hai Zhou",
  title =        "{ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)} special section call for papers:
                 {Parallel CAD}: Algorithm design and programming",
  journal =      j-TODAES,
  volume =       "15",
  number =       "1",
  pages =        "9:1--9:??",
  month =        dec,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1640457.1640466",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Mar 15 11:18:31 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kim:2009:MLP,
  author =       "Jaehyun Kim and Chungki Oh and Youngsoo Shin",
  title =        "Minimizing leakage power of sequential circuits
                 through mixed-{$ V_t $} flip-flops and multi-{$ V_t $}
                 combinational gates",
  journal =      j-TODAES,
  volume =       "15",
  number =       "1",
  pages =        "4:1--4:??",
  month =        dec,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1640457.1640461",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Mar 15 11:18:31 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The current use of multi-$ V_t $ to control leakage
                 power targets combinational gates, even though
                 sequential elements such as flip-flops and latches also
                 contribute appreciable leakage. We can, nevertheless,
                 apply multi-$ V_t $ to flip-flops, but few can take
                 advantage of high-$ V_t $, which causes abrupt changes
                 in timing. We combine low- and high-$ V_t $ at the
                 transistor level to design mixed-$ V_t $ flip-flops
                 with reduced leakage, an unchanged footprint, and a
                 small increase in either setup time or clock-to-Q
                 delay, but not both. An allocation algorithm for two $
                 V_t $'s determines the $ V_t $ (mixed, high, or low) of
                 each flip-flop and the $ V_t $ of each combinational
                 gate (high or low) in a sequential circuit. Experiments
                 with 65-nm technology show an average leakage saving of
                 42\% compared to conventional multi-$ V_t $ approaches;
                 the leakage of flip-flops alone is cut by 78\%. This
                 saving is largely unaffected by die-to-die or
                 within-die process variations, which we show through
                 simulations. Standard deviation of leakage caused by
                 process variation is also reduced due to less use of
                 low-$ V_t $ devices. We also extend our approach to
                 three $ V_t $'s, and obtain a further 14\% reduction in
                 leakage.",
  acknowledgement = ack-nhfb,
  articleno =    "4",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Flip-flop; leakage current; low power; mixed- V t;
                 sequential circuit",
}

@Article{Mu:2009:AHS,
  author =       "Jingqing Mu and Roman Lysecky",
  title =        "Autonomous hardware\slash software partitioning and
                 voltage\slash frequency scaling for low-power embedded
                 systems",
  journal =      j-TODAES,
  volume =       "15",
  number =       "1",
  pages =        "2:1--2:??",
  month =        dec,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1640457.1640459",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Mar 15 11:18:31 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Warp processing is a recent computing technology
                 capable of autonomously partitioning the critical
                 kernels within an executing software application to
                 hardware circuits implemented within an on-chip FPGA.
                 While previous performance-driven warp processing has
                 been shown to provide significant performance
                 improvements over software only execution, the dynamic
                 performance improvement of warp processors may be lost
                 for certain application domains, such as real-time
                 systems. Alternatively, as power consumption continue
                 to become a dominant design constraint, we present and
                 thoroughly analyze a low-power warp processing
                 methodology that leverages voltage and/or frequency
                 scaling to substantially reduce power consumption
                 without any performance degradation --- all without
                 requiring designer effort beyond the initial software
                 development.",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "dynamically adaptable systems; hardware/software
                 partitioning; low-power; low-power FPGAs;
                 reconfigurable computing; Warp processing",
}

@Article{Pomeranz:2009:UST,
  author =       "Irith Pomeranz and Sudhakar M. Reddy",
  title =        "Using stuck-at tests to form scan-based tests for
                 transition faults in standard-scan circuits",
  journal =      j-TODAES,
  volume =       "15",
  number =       "1",
  pages =        "7:1--7:??",
  month =        dec,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1640457.1640464",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Mar 15 11:18:31 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In enhanced-scan circuits, a two-pattern test for a
                 transition fault can be obtained by using a test {\em
                 t$_j$ \/} that detects a stuck-at fault, and preceding
                 it by a test {\em t$_i$ \/} that activates another
                 stuck-at fault. Thus, test generation for transition
                 faults can be done by combining pairs of stuck-at
                 tests. This provides an alternative to deterministic
                 test generation, as well as reduces the test storage
                 requirements for transition fault tests. We study the
                 possibility of generating scan-based tests for
                 transition faults in standard-scan circuits in a
                 similar way, by combining pairs of stuck-at tests.
                 Since it is not always possible to obtain a
                 standard-scan test that is equivalent to a two-pattern
                 test based on stuck-at tests {\em t$_i$ \/} and {\em
                 t$_j$}, it is not always possible to guarantee that the
                 combination of {\em t$_i$ \/} and {\em t$_j$ \/} will
                 detect a transition fault. To compensate for this, it
                 is necessary to try combinations of different stuck-at
                 test pairs, resulting in an increased simulation effort
                 to compute effective standard-scan tests. Our focus in
                 this work is on reducing this simulation effort by
                 reducing the number of stuck-at test pairs that need to
                 be considered.",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Broadside tests; scan circuits; skewed-load tests;
                 stuck-at faults; transition faults",
}

@Article{Rao:2009:COT,
  author =       "Rajeev R. Rao and Vivek Joshi and David Blaauw and
                 Dennis Sylvester",
  title =        "Circuit optimization techniques to mitigate the
                 effects of soft errors in combinational logic",
  journal =      j-TODAES,
  volume =       "15",
  number =       "1",
  pages =        "5:1--5:??",
  month =        dec,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1640457.1640462",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Mar 15 11:18:31 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Soft errors in combinational logic circuits are
                 emerging as a significant reliability problem for VLSI
                 designs. Technology scaling trends indicate that the
                 soft error rates (SER) of logic circuits will be
                 dominant factor for future technology generations. SER
                 mitigation in logic can be accomplished by optimizing
                 either the gates inside a logic block or the flipflops
                 present on the block boundaries. We present novel
                 circuit optimization techniques that target these
                 elements separately as well as in unison to reduce the
                 SER of combinational logic circuits.\par

                 First, we describe the construction of a new class of
                 flip-flop variants that leverage the effect of temporal
                 masking by selectively increasing the length of the
                 latching window thereby preventing faulty transients
                 from being registered. In contrast to previous
                 flip-flop designs that rely on logic duplication and
                 complicated circuit design styles, the new variants are
                 redesigned from the library flip-flop using efficient
                 transistor sizing. We then propose a flip-flop
                 selection method that uses slack information at each
                 primary output node to determine the flip-flop
                 configuration that produces maximum SER savings. Next,
                 we propose a gate sizing algorithm that trades off SER
                 reduction and area overhead. This approach first
                 computes bounds on the maximum achievable SER reduction
                 by resizing a gate. This bound is then used to prune
                 the circuit graph, arriving at a smaller set of
                 candidate gates on which we perform incremental
                 sensitivity computations to determine the gates that
                 are the largest contributors to circuit SER. Third, we
                 propose a unified, co-optimization approach combining
                 flip-flop selection with the gate sizing algorithm. The
                 joint optimization algorithm produces larger SER
                 reductions while incurring smaller circuit overhead
                 than either technique taken in isolation. Experimental
                 results on a variety of benchmarks show average SER
                 reductions of 10.7X with gate sizing, 5.7X with
                 flip-flop assignment, and 30.1X for the combined
                 optimization approach, with no delay penalties and area
                 overheads within 5-6\%. The runtimes for the
                 optimization algorithms are on the order of 1-3
                 minutes.",
  acknowledgement = ack-nhfb,
  articleno =    "5",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "circuit optimization; combinational logic; sequential
                 circuits; Soft errors",
}

@Article{Wolinski:2009:ADA,
  author =       "Christophe Wolinski and Krzysztof Kuchcinski and Erwan
                 Raffin",
  title =        "Automatic design of application-specific
                 reconfigurable processor extensions with {UPaK}
                 synthesis kernel",
  journal =      j-TODAES,
  volume =       "15",
  number =       "1",
  pages =        "1:1--1:??",
  month =        dec,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1640457.1640458",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Mar 15 11:18:31 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article presents a new tool for automatic design
                 of application-specific reconfigurable processor
                 extensions based on UPaK (Abstract Unified Patterns
                 Based Synthesis Kernel for Hardware and Software
                 Systems). We introduce a complete design flow that
                 identifies new instructions, selects specific
                 instructions and schedules a considered application on
                 the newly created reconfigurable architecture. The
                 identified extensions are implemented as specialized
                 sequential or parallel instructions. These instructions
                 are executed on a reconfigurable unit implementing all
                 merged patterns. Our method uses specially developed
                 algorithms for subgraph isomorphism that are
                 implemented as graph matching constraints. These
                 constraints together with separate algorithms are able
                 to efficiently identify computational patterns and
                 carry out application mapping and scheduling. Our
                 methods can handle both time-constrained and
                 resource-constrained scheduling. Experimental results
                 show that the presented method provides high coverage
                 of application graphs with small number of patterns and
                 ensures high application execution speedup both for
                 sequential and parallel application execution with
                 reconfigurable processor extensions implementing
                 selected patterns.",
  acknowledgement = ack-nhfb,
  articleno =    "1",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "constraint programming; Reconfigurable architectures;
                 resource assignment; scheduling; system-level
                 synthesis",
}

@Article{Wu:2009:PCV,
  author =       "Meng-Chen Wu and Ming-Ching Lu and Hung-Ming Chen and
                 Jing-Yang Jou",
  title =        "Performance-constrained voltage assignment in multiple
                 supply voltage {SoC} floorplanning",
  journal =      j-TODAES,
  volume =       "15",
  number =       "1",
  pages =        "3:1--3:??",
  month =        dec,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1640457.1640460",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Mar 15 11:18:31 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Using voltage island methodology to reduce power
                 consumption for System-on-a-Chip (SoC) designs has
                 become more and more popular recently. Currently this
                 approach has been considered either in system-level
                 architecture or postplacement stage. Since hierarchical
                 design and reusable intellectual property (IP) are
                 widely used, it is necessary to optimize
                 floorplanning/placement methodology considering voltage
                 islands generation to solve power and critical path
                 delay problems. In this article, we propose a
                 floorplanning methodology considering voltage islands
                 generation and performance constraints. Our method is
                 flexible and can be extended to hierarchical design.
                 The experimental results on some MCNC benchmarks show
                 that our method is effective in meeting performance
                 constraints and can simultaneously consider the
                 tradeoff between power routing cost and total power
                 dissipation.",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Cabodi:2010:SHA,
  author =       "Gianpiero Cabodi and Luciano Lavagno and Marco
                 Murciano and Alex Kondratyev and Yosinori Watanabe",
  title =        "Speeding-up heuristic allocation, scheduling and
                 binding with {SAT}-based abstraction\slash refinement
                 techniques",
  journal =      j-TODAES,
  volume =       "15",
  number =       "2",
  pages =        "12:1--12:??",
  month =        feb,
  year =         "2010",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1698759.1698762",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Mar 15 11:19:08 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Hardware synthesis is the process by which
                 system-level, Register Transfer (RT)-level, or
                 behavioral descriptions can be turned into real
                 implementations, in terms of logic gates. Scheduling is
                 one of the most time-consuming steps in the overall
                 design flow, and may become much more complex when
                 performing hardware synthesis from high-level
                 specifications. Exploiting a single scheduling strategy
                 on very large designs is often reductive and
                 potentially inadequate. Furthermore, finding the
                 ``best'' single candidate among all possible scheduling
                 algorithms is practically infeasible. In this article
                 we introduce a hybrid scheduling approach that is a
                 preliminary step towards a comprehensive solution not
                 yet provided by industrial or by academic solutions.
                 Our method relies on an abstract symbolic
                 representation of data flow nodes (operations) bound to
                 control flow paths: it produces a more realistic lower
                 bound during the prescheduling resource estimation step
                 and speeds up slower but accurate heuristic scheduling
                 techniques, thus achieving a globally improved
                 result.",
  acknowledgement = ack-nhfb,
  articleno =    "12",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "allocation; binding; High level synthesis; resource
                 estimation; satisfiability; scheduling",
}

@Article{Chang:2010:CPA,
  author =       "Naehyuck Chang and J{\"o}rg Henkel",
  title =        "Call for papers: {ACM Transactions on Design
                 Automation of Electronic Systems (TODAES)} special
                 section on low-power electronics and design",
  journal =      j-TODAES,
  volume =       "15",
  number =       "2",
  pages =        "20:1--20:??",
  month =        feb,
  year =         "2010",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1698759.1698770",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Mar 15 11:19:08 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  articleno =    "20",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Das:2010:TAM,
  author =       "Dipankar Das and P. P. Chakrabarti and Rajeev Kumar",
  title =        "Thermal analysis of multiprocessor {SoC} applications
                 by simulation and verification",
  journal =      j-TODAES,
  volume =       "15",
  number =       "2",
  pages =        "15:1--15:??",
  month =        feb,
  year =         "2010",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1698759.1698765",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Mar 15 11:19:08 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Overheating of computer chips leads to degradation of
                 performance and reliability. Therefore, preventing
                 chips from overheating in spite of increased
                 performance requirements has emerged as a major
                 challenge. Since the cost of cooling has been rising
                 steadily, various architecture and application design
                 techniques are used to prevent chip overheating.
                 Temperature-aware task scheduling has emerged as an
                 important application design methodology for addressing
                 this problem in multiprocessor SoC systems.\par

                 In this work we present the formulation and
                 implementation of a method for analyzing the thermal
                 (chip heating) behavior of a MPSoC task schedule,
                 during the early stages of the design. We highlight the
                 challenges in developing such a framework and propose
                 solutions for tackling them. Due to nondeterminism in
                 task execution times and decision branches,
                 multiprocessor applications cannot be evaluated
                 accurately by the current state-of-the-art {\em
                 thermal\/} {\em simulation\/} and {\em steady-state\/}
                 analysis methods. Hence an analysis covering
                 nondeterministic execution behaviors is required for
                 thermal analysis of MPSoC task schedules. To address
                 this issue we propose a model checking-based approach
                 for solving the thermal analysis problem and formulate
                 it as a hybrid automata reachability verification
                 problem. We present an algorithm for constructing this
                 hybrid automata given the task schedule, a set of power
                 profiles of tasks, and the Compact Thermal Model (CTM)
                 of the chip. Information about task power consumption
                 is inferred from Markov chains which are learned from
                 power profiles of tasks, obtained from simulation or
                 emulation runs. A numerical analysis-based algorithm
                 which uses CounterExample-Guided Abstraction Refinement
                 (CEGAR) is developed for reachability analysis of this
                 hybrid automata. We propose a directed simulation
                 methodology which uses results of a time-bounded
                 analysis of the hybrid automata modeling thermal
                 behavior of the application, to simulate the expected
                 worst-case execution runs of the same. The algorithms
                 presented in this work have been implemented in a
                 prototype tool called {\em HeatCheck}. We present
                 experimental results and analysis of thermal behavior
                 of a set of task schedules executing on a MPSoC
                 system.",
  acknowledgement = ack-nhfb,
  articleno =    "15",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "chip temperature; hybrid automata; Markov chain;
                 multiprocessor system-on-chip; Thermal analysis",
}

@Article{Jamieson:2010:BER,
  author =       "Peter Jamieson and Tobias Becker and Peter Y. K.
                 Cheung and Wayne Luk and Tero Rissa and Teemu
                 Pitk{\"a}nen",
  title =        "Benchmarking and evaluating reconfigurable
                 architectures targeting the mobile domain",
  journal =      j-TODAES,
  volume =       "15",
  number =       "2",
  pages =        "14:1--14:??",
  month =        feb,
  year =         "2010",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1698759.1698764",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Mar 15 11:19:08 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We present the GroundHog 2009 benchmarking suite that
                 evaluates the power consumption of reconfigurable
                 technology for applications targeting the mobile
                 computing domain. This benchmark suite includes seven
                 designs; one design targets fine-grained FPGA fabrics
                 allowing for quick state-of-the-art evaluation, and six
                 designs are specified at a high level allowing them to
                 target a range of existing and future reconfigurable
                 technologies. Each of the six designs can be stimulated
                 with the help of synthetically generated input stimuli
                 created by an open-source tool included in the
                 downloadable suite. Another tool is included to help
                 verify the correctness of each implemented design. To
                 demonstrate the potential of this benchmark suite, we
                 evaluate the power consumption of two modern industrial
                 FPGAs targeting the mobile domain. Also, we show how an
                 academic FPGA framework, VPR 5.0, that has been updated
                 for power estimates can be used to estimates the power
                 consumption of different FPGA architectures and an
                 open-source CAD flow mapping to these architectures.",
  acknowledgement = ack-nhfb,
  articleno =    "14",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "benchmark; Benchmarking; FPGAs; mobile; power",
}

@Article{Kurimoto:2010:PAE,
  author =       "Masanori Kurimoto and Hiroaki Suzuki and Rei Akiyama
                 and Tadao Yamanaka and Haruyuki Ohkuma and Hidehiro
                 Takata and Hirofumi Shinohara",
  title =        "Phase-adjustable error detection flip-flops with
                 2-stage hold-driven optimization, slack-based grouping
                 scheme and slack distribution control for dynamic
                 voltage scaling",
  journal =      j-TODAES,
  volume =       "15",
  number =       "2",
  pages =        "17:1--17:??",
  month =        feb,
  year =         "2010",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1698759.1698767",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Mar 15 11:19:08 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "For Dynamic Voltage Scaling (DVS), we propose a novel
                 design methodology. This methodology is composed of an
                 error detection circuit and three technologies to
                 reduce the area and power penalties which are the large
                 issues for the conventional DVS with error detection.
                 The proposed circuit, Phase-Adjustable Error Detection
                 Flip-Flip (PEDFF), adjusts the clock phase of an
                 additional FF for the timing error detection, based on
                 the timing slack. 2-Stage Hold-Driven Optimization
                 (2-SHDO) technology splits the hold-driven optimization
                 in two stages. Slack-Based Grouping Scheme (SBGS)
                 technology divides each timing path into appropriate
                 groups based on the timing slack. Slack Distribution
                 Control (SDC) technology improves the sharp
                 distribution of the path delay at which the logic
                 synthesis tool has relaxed the delay. We evaluate the
                 methodology by simulating a 32-bit microprocessor in 90
                 nm CMOS technology. The proposed methodology reduces
                 the energy consumption by 19.8\% compared to non-DVS.
                 The OR-tree's latency is shortened to 16.3\% compared
                 to the conventional DVS. The area and power penalties
                 for delay buffers on short paths are reduced to 35.0\%
                 and 40.6\% compared to the conventional DVS,
                 respectively. The proposed methodology with SDC reduces
                 the energy consumption by 17.0\% on another example
                 with the sharp slack distribution by the logic
                 synthesis compared to non-DVS.",
  acknowledgement = ack-nhfb,
  articleno =    "17",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "CTS; DVS; Error detection flip-flop; P{\&} R; STA",
}

@Article{Kwon:2010:SPC,
  author =       "Seongnam Kwon and Soonhoi Ha",
  title =        "Serialized parallel code generation framework for
                 {MPSoC}",
  journal =      j-TODAES,
  volume =       "15",
  number =       "2",
  pages =        "11:1--11:??",
  month =        feb,
  year =         "2010",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1698759.1698761",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Mar 15 11:19:08 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The models of computations that express concurrency
                 naturally are preferred for initial specification of
                 MPSoC system, since popular programming languages such
                 as C and C++ are designed for sequential execution. In
                 our previous work, we proposed a design framework where
                 two models are used for the initial specification of
                 the system behavior; task model at the top level and
                 dataflow model inside each task. After the partition
                 and mapping process is performed with each architecture
                 candidate, the target code is automatically generated
                 for both Design-Space Exploration (DSE) and final
                 implementation. In this article, we focus on parallel
                 code generation for MPSoC, proposing two main
                 techniques. The first is to express functional and data
                 parallelism differently following the partition and
                 mapping decision. In the proposed technique, the
                 generated code consists of multiple tasks running
                 concurrently, which achieves functional parallelism. On
                 the other hand, we use OpenMP directives to express
                 data parallelism inside a task. Second is to adopt the
                 code serialization technique to execute a multitasking
                 application without OS scheduler, aiming to generate
                 the highly portable code on various platforms for an
                 efficient DSE process. We extend the previous code
                 serialization techniques to multiprocessor systems and
                 utilize the formal properties of the dataflow model for
                 efficient code generation. The experiments including
                 H.263 codec example show the viability of the proposed
                 technique and the efficiency of the generated code.",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "design-space exploration; Embedded software;
                 multiprocessor system on chip; parallel programming;
                 software generation",
}

@Article{Li:2010:PAL,
  author =       "Duo Li and Sheldon X.-D. Tan and Eduardo H. Pacheco
                 and Murli Tirumala",
  title =        "Parameterized architecture-level dynamic thermal
                 models for multicore microprocessors",
  journal =      j-TODAES,
  volume =       "15",
  number =       "2",
  pages =        "16:1--16:??",
  month =        feb,
  year =         "2010",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1698759.1698766",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Mar 15 11:19:08 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In this article, we propose a new architecture-level
                 parameterized dynamic thermal behavioral modeling
                 algorithm for emerging thermal-related design and
                 optimization problems for high-performance multicore
                 microprocessor design. We propose a new approach,
                 called {\em ParThermPOF}, to build the parameterized
                 thermal performance models from the given accurate
                 architecture thermal and power information. The new
                 method can include a number of variable parameters such
                 as the locations of thermal sensors in a heat sink,
                 different components (heat sink, heat spreader, core,
                 cache, etc.), thermal conductivity of heat sink
                 materials, etc. The method consists of two steps:
                 first, a response surface method based on low-order
                 polynomials is applied to build the parameterized
                 models at each time point for all the given sampling
                 nodes in the parameter space. Second, an improved
                 Generalized Pencil-Of-Function (GPOF) method is
                 employed to build the transfer-function-based
                 behavioral models for each time-varying coefficient of
                 the polynomials generated in the first step.
                 Experimental results on a practical quad-core
                 microprocessor show that the generated parameterized
                 thermal model matches the given data very well. The
                 compact models by ParThermPOF offer two order of
                 magnitudes speedup over the commercial thermal analysis
                 tool {\em FloTHERM\/} on the given examples.
                 ParThermPOF is very suitable for design space
                 exploration and optimization where both time and system
                 parameters need to be considered.",
  acknowledgement = ack-nhfb,
  articleno =    "16",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "architecture; behavioral modeling;
                 chip-multiprocessor; Multicore; thermal modeling",
}

@Article{Paul:2010:LOC,
  author =       "Somnath Paul and Hamid Mahmoodi and Swarup Bhunia",
  title =        "Low-overhead {$ F_{\hbox {max}} $} calibration at
                 multiple operating points using delay-sensitivity-based
                 path selection",
  journal =      j-TODAES,
  volume =       "15",
  number =       "2",
  pages =        "19:1--19:??",
  month =        feb,
  year =         "2010",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1698759.1698769",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Mar 15 11:19:08 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Maximum operating frequency ({\em F\/}$_{{\em max \/
                 }}$) of a system often needs to be determined at
                 multiple operating points, defined by voltage and
                 temperatures. Such calibration is important for the
                 speed binning process, where the voltage-frequency (V-
                 {\em F\/}$_{{\em max \/ }}$) relation needs to be
                 accurately determined to sort chips into different bins
                 that can be used for different applications. Moreover,
                 adaptive systems typically require {\em F\/}$_{{\em max
                 \/ }}$ calibration at multiple operating points in
                 order to dynamically change operating condition such as
                 supply voltage or body bias for power, temperature, or
                 throughput management. For example, a Dynamic Voltage
                 and Frequency Scaling (DVFS) system requires accurate
                 delay calibration at multiple operating voltages in
                 order to apply the correct operating frequency
                 corresponding to a scaled supply. In this article, we
                 propose a low-overhead design technique that allows
                 efficient characterization of {\em F\/}$_{{\em max \/
                 }}$ at different operating voltages and temperatures.
                 The proposed method selects a set of representative
                 timing paths in a circuit based on their temperature
                 and voltage sensitivities and dynamically configures
                 them into a ring oscillator to compute the critical
                 path delay. Compared to existing {\em F\/}$_{{\em max
                 \/ }}$ calibration approaches, the proposed approach
                 provides the following two main advantages: (1) it
                 introduces a delay sensitivity metric to isolate few
                 representative timing paths; (2) it considers actual
                 timing paths instead of critical path replicas, thereby
                 accounting for local within-die delay variations. The
                 all-digital calibration method is robust under process
                 variations and achieves high delay estimation accuracy
                 (> 4\% error) at the cost of negligible design overhead
                 (1.7\% in delay, 0.3\% in power, and 3.5\% in
                 die-area).",
  acknowledgement = ack-nhfb,
  articleno =    "19",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "F max calibration; frequency binning; temperature
                 adaptation",
}

@Article{Reviriego:2010:RAM,
  author =       "Pedro Reviriego and Juan Antonio Maestro and Chris J.
                 Bleakley",
  title =        "Reliability analysis of memories protected with {BICS}
                 and a per-word parity bit",
  journal =      j-TODAES,
  volume =       "15",
  number =       "2",
  pages =        "18:1--18:??",
  month =        feb,
  year =         "2010",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1698759.1698768",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Mar 15 11:19:08 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article presents an analysis of the reliability
                 of memories protected with Built-in Current Sensors
                 (BICS) and a per-word parity bit when exposed to Single
                 Event Upsets (SEUs). Reliability is characterized by
                 Mean Time to Failure (MTTF) for which two analytic
                 models are proposed. A simple model, similar to the one
                 traditionally used for memories protected with
                 scrubbing, is proposed for the low error rate case. A
                 more complex Markov model is proposed for the high
                 error rate case. The accuracy of the models is checked
                 using a wide set of simulations. The results presented
                 in this article allow fast estimation of MTTF enabling
                 design of optimal memory configurations to meet
                 specified MTTF goals at minimum cost. Additionally the
                 power consumption of memories protected with BICS is
                 compared to that of memories using scrubbing in terms
                 of the number of read cycles needed in both
                 configurations.",
  acknowledgement = ack-nhfb,
  articleno =    "18",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "built-in current sensors; Error correcting codes;
                 Fault-tolerant memory; high-level protection
                 technique",
}

@Article{Schirner:2010:FAP,
  author =       "Gunar Schirner and Andreas Gerstlauer and Rainer
                 D{\"o}mer",
  title =        "Fast and accurate processor models for efficient
                 {MPSoC} design",
  journal =      j-TODAES,
  volume =       "15",
  number =       "2",
  pages =        "10:1--10:??",
  month =        feb,
  year =         "2010",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1698759.1698760",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Mar 15 11:19:08 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "With growing system complexity and ever-increasing
                 software content, the development of embedded software
                 for upcoming MPSoC architectures is a tremendous
                 challenge. Traditional ISS-based validation becomes
                 infeasible due to the large complexity.\par

                 Addressing the need for flexible and fast simulating
                 models, we introduce in this article our approach of
                 abstract processor modeling in the context of
                 multiprocessor architectures. We combine modeling of
                 computation on processors with an abstract RTOS and
                 accurate interrupt handling into a versatile,
                 multifaceted processor model with several levels of
                 features.\par

                 Our processor models are utilized in a framework
                 allowing designers to develop a system in a top-down
                 manner using automatic model generation and compilation
                 down to a given MPSoC architecture. During generation,
                 instances of our processor models are integrated into a
                 system model combining software, hardware, and bus
                 communication. The generated system model serves for
                 rapid design space exploration and a fast and accurate
                 system validation.\par

                 Our experimental results show the benefits of our
                 processor modeling using an actual multiprocessor
                 mobile phone baseband platform. Our abstract models of
                 this complex system reach a simulation speed of
                 300MCycles/s within a high accuracy of less than 3\%
                 error. In addition, our results quantify the
                 speed/accuracy trade-off at varying abstraction levels
                 of our models to guide future processor model
                 designers.",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "MPSoC; multi-processor system-on-chip; performance
                 prediction/estimation; Processor modeling; system-level
                 design; TLM; transaction-level model",
}

@Article{Yuan:2010:HSP,
  author =       "Mingxuan Yuan and Zonghua Gu and Xiuqiang He and Xue
                 Liu and Lei Jiang",
  title =        "Hardware\slash software partitioning and pipelined
                 scheduling on runtime reconfigurable {FPGAs}",
  journal =      j-TODAES,
  volume =       "15",
  number =       "2",
  pages =        "13:1--13:??",
  month =        feb,
  year =         "2010",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1698759.1698763",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Mar 15 11:19:08 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "FPGAs are widely used in today's embedded systems
                 design due to their low cost, high performance, and
                 reconfigurability. Partially RunTime-Reconfigurable
                 (PRTR) FPGAs, such as Virtex-2 Pro and Virtex-4 from
                 Xilinx, allow part of the FPGA area to be reconfigured
                 while the remainder continues to operate without
                 interruption, so that HW tasks can be placed and
                 removed dynamically at runtime. We address two problems
                 related to HW task scheduling on PRTR FPGAs: (1) HW/SW
                 partitioning. Given an application in the form of a
                 task graph with known execution times on the HW (FPGA)
                 and SW (CPU), and known area sizes on the FPGA, find an
                 valid allocation of tasks to either HW or SW and a
                 static schedule with the optimization objective of
                 minimizing the total schedule length (makespan). (2)
                 Pipelined scheduling. Given an input task graph,
                 construct a pipelined schedule on a PRTR FPGA with the
                 goal of maximizing system throughput while meeting a
                 given end-to-end deadline. Both problems are NP-hard.
                 Satisfiability Modulo Theories (SMT) is an extension to
                 SAT by adding the ability to handle arithmetic and
                 other decidable theories. We use the SMT solver Yices
                 with Linear Integer Arithmetic (LIA) theory as the
                 optimization engine for solving the two scheduling
                 problems. In addition, we present an efficient
                 heuristic algorithm based on kernel recognition for the
                 pipelined scheduling problem, a technique borrowed from
                 SW pipelining, to overcome the scalability problem of
                 the SMT-based optimal solution technique.",
  acknowledgement = ack-nhfb,
  articleno =    "13",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "HW/SW partitioning; runtime reconfigurable FPGA;
                 scheduling",
}

@Article{Blanc:2010:RAS,
  author =       "Nicolas Blanc and Daniel Kroening",
  title =        "Race analysis for {SystemC} using model checking",
  journal =      j-TODAES,
  volume =       "15",
  number =       "3",
  pages =        "21:1--21:??",
  month =        may,
  year =         "2010",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1754405.1754406",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jun 21 17:21:11 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "SystemC is a system-level modeling language that
                 offers a wide range of features to describe concurrent
                 systems at different levels of abstraction. The SystemC
                 standard permits simulators to implement a
                 deterministic scheduling policy, which often hides
                 concurrency-related design flaws. We present a novel
                 compiler for SystemC that integrates a very precise
                 formal race analysis by means of model checking. Our
                 compiler produces a simulator that uses the outcome of
                 the analysis to perform partial order reduction. The
                 key insight to make the model checking engine scale is
                 to apply it only to tiny fractions of the SystemC
                 model. We show that the outcome of the analysis is not
                 only valuable to eliminate redundant context switches
                 at runtime, but can also be used to diagnose race
                 conditions statically. In particular, our analysis is
                 able to reveal races that can remain undetected during
                 simulation and is able to formally prove the absence of
                 races.",
  acknowledgement = ack-nhfb,
  articleno =    "21",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "formal analysis; model checking; partial-order
                 reduction; simulation; SystemC",
}

@Article{Ahmed:2010:CBP,
  author =       "Waseem Ahmed and Douglas Myers",
  title =        "Concept-based partitioning for large multidomain
                 multifunctional embedded systems",
  journal =      j-TODAES,
  volume =       "15",
  number =       "3",
  pages =        "22:1--22:??",
  month =        may,
  year =         "2010",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1754405.1754407",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jun 21 17:21:11 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Hardware-software partitioning is an important phase
                 in embedded systems. Decisions made during this phase
                 impact the quality, cost, performance, and the delivery
                 date of the final product. Over the past decade or
                 more, various partitioning approaches have been
                 proposed. A majority operate at a relatively fine
                 granularity and use a low-level executable
                 specification as the starting point. This presents
                 problems if the context is families of industrial
                 products with frequent release of upgraded or new
                 members. Managing complexity using a low-level
                 specification is extremely challenging and impacts
                 developer productivity. Designing using a high-level
                 specification and component-based development, although
                 a better option, imposes component integration and
                 replacement problems during system evolution and new
                 product release. A new approach termed Concept-Based
                 Partitioning is presented that focuses on system
                 evolution, product lines, and large-scale reuse when
                 partitioning. Beginning with information from UML 2.0
                 sequence diagrams and a concept repository concepts are
                 identified and used as the unit of partitioning within
                 a specification. A methodology for the refinement of
                 interpart communication in the system specification
                 using sequence diagrams is also presented. Change
                 localization during system evolution, composability
                 during large-scale reuse, and provision for
                 configurable feature variations for a product line are
                 facilitated by a Generic Adaptive Layer (GAL) around
                 selected concepts. The methodology was applied on a
                 subsystem of an Unmanned Aerial Vehicle (UAV) using
                 various concepts which improved the composability of
                 concepts while keeping performance and size overhead
                 within the 2\% range.",
  acknowledgement = ack-nhfb,
  articleno =    "22",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Codesign; embedded system design; product families;
                 system evolution; system partitioning; UML",
}

@Article{Raval:2010:LPT,
  author =       "R. K. Raval and C. H. Fernandez and C. J. Bleakley",
  title =        "Low-power {TinyOS} tuned processor platform for
                 wireless sensor network motes",
  journal =      j-TODAES,
  volume =       "15",
  number =       "3",
  pages =        "23:1--23:??",
  month =        may,
  year =         "2010",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1754405.1754408",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jun 21 17:21:11 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In this article we describe a low-power processor
                 platform for use in Wireless Sensor Network (WSN) nodes
                 (motes). WSN motes are small, battery-powered devices
                 comprised of a processor, sensors, and a radio
                 frequency transceiver. It is expected that WSNs
                 consisting of large numbers of motes will offer
                 long-term, distributed monitoring, and control of
                 real-world equipment and phenomena. A key requirement
                 for these applications is long battery life. We
                 investigate a processor platform architecture based on
                 an application-specific programmable processor core,
                 System-On-Chip bus, and a hardware accelerator. The
                 architecture improves on the energy consumption of a
                 conventional microprocessor design by tuning the
                 architecture for a suite of TinyOS-based WSN
                 applications. The tuning method used minimizes changes
                 to the instruction set architecture facilitating rapid
                 software migration to the new platform. The processor
                 platform was implemented and validated in an FPGA-based
                 WSN mote. The benefits of the approach in terms of
                 energy consumption are estimated to be a reduction of
                 48\% for ASIC implementation relative to a conventional
                 programmable processor for a typical TinyOS application
                 suite without use of voltage scaling.",
  acknowledgement = ack-nhfb,
  articleno =    "23",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Embedded system design; hardware-software codesign;
                 low power processor; Wireless Sensor Network",
}

@Article{Guan:2010:RFP,
  author =       "Xuan Guan and Yunsi Fei",
  title =        "Register file partitioning and recompilation for
                 register file power reduction",
  journal =      j-TODAES,
  volume =       "15",
  number =       "3",
  pages =        "24:1--24:??",
  month =        may,
  year =         "2010",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1754405.1754409",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jun 21 17:21:11 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Register files in modern embedded processors
                 contribute a substantial budget in the energy
                 consumption due to their large switching capacitance
                 and long working time. For some embedded processors, on
                 average 25\% of registers account for 83\% of register
                 file accessing time. This motivates us to partition the
                 register file into hot and cold regions, with the most
                 frequently used registers placed in the hot region, and
                 the rarely accessed ones in the cold region. We employ
                 the bit-line splitting and drowsy register cell
                 techniques to reduce the overall register file
                 accessing power. We propose a novel approach to
                 partition the register in a way that can achieve the
                 largest power saving. We formulate the register file
                 partitioning process into a graph partitioning problem,
                 and apply an effective algorithm to obtain the optimal
                 result. We evaluate our algorithm for MiBench and
                 SPEC2000 applications on the SimpleScalar PISA system,
                 and an average saving of 58.3\% and 54.4\% over the
                 nonpartitioned register file accessing power is
                 achieved. The area overhead is negligible, and the
                 execution time overhead is acceptable (5.5\% for
                 MiBench 2.4\% for SPEC2000). Further evaluation for
                 MiBench applications is performed on Alpha and X86
                 system.",
  acknowledgement = ack-nhfb,
  articleno =    "24",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "compilers; Low-power design; processor architectures;
                 register file partitioning",
}

@Article{Zhang:2010:CSD,
  author =       "Yufu Zhang and Ankur Srivastava and Mohamed Zahran",
  title =        "On-chip sensor-driven efficient thermal profile
                 estimation algorithms",
  journal =      j-TODAES,
  volume =       "15",
  number =       "3",
  pages =        "25:1--25:??",
  month =        may,
  year =         "2010",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1754405.1754410",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jun 21 17:21:11 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article addresses the problem of chip-level
                 thermal profile estimation using runtime temperature
                 sensor readings. We address the challenges of: (a)
                 availability of only a few thermal sensors with
                 constrained locations (sensors cannot be placed just
                 anywhere); (b) random chip power density
                 characteristics due to unpredictable workloads and
                 fabrication variability. Firstly we model the random
                 power density as a probability density function. Given
                 such statistical characteristics and the runtime
                 thermal sensor readings, we exploit the correlation in
                 power dissipation among different chip modules to
                 estimate the expected value of temperature at each chip
                 location. Our methods are optimal if the underlying
                 power density has Gaussian nature. We give a heuristic
                 method to estimate the chip-level thermal profile when
                 the underlying randomness is non-Gaussian. An extension
                 of our method has also been proposed to address the
                 dynamic case. Several speedup strategies are carefully
                 investigated to improve the efficiency of the
                 estimation algorithm. Experimental results indicated
                 that, given only a few thermal sensors, our method can
                 generate highly accurate chip-level thermal profile
                 estimates within a few milliseconds.",
  acknowledgement = ack-nhfb,
  articleno =    "25",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "estimation; on-chip sensor; statistical; Thermal
                 profile",
}

@Article{Chang:2010:LSC,
  author =       "Kai-Hui Chang and Valeria Bertacco and Igor L. Markov
                 and Alan Mishchenko",
  title =        "Logic synthesis and circuit customization using
                 extensive external don't-cares",
  journal =      j-TODAES,
  volume =       "15",
  number =       "3",
  pages =        "26:1--26:??",
  month =        may,
  year =         "2010",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1754405.1754411",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jun 21 17:21:11 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Traditional digital circuit synthesis flows start from
                 an HDL behavioral definition and assume that circuit
                 functions are almost completely defined, making
                 don't-care conditions rare. However, recent design
                 methodologies do not always satisfy these assumptions.
                 For instance, third-party IP blocks used in a
                 system-on-chip are often overdesigned for the
                 requirements at hand. By focusing only on the input
                 combinations occurring in a specific application, one
                 could resynthesize the system to greatly reduce its
                 area and power consumption. Therefore we extend modern
                 digital synthesis with a novel technique, called SWEDE,
                 that makes use of extensive external don't-cares. In
                 addition, we utilize such don't-cares present
                 implicitly in existing simulation-based verification
                 environments for circuit customization. Experiments
                 indicate that SWEDE scales to large ICs with
                 half-million input vectors and handles practical cases
                 well.",
  acknowledgement = ack-nhfb,
  articleno =    "26",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Circuit customization; don't-care optimization; logic
                 synthesis",
}

@Article{Liu:2010:ECR,
  author =       "Shenghua Liu and Guoqiang Chen and Tom Tong Jing and
                 Lei He and Robi Dutta and Xian-Long Hong",
  title =        "Effective congestion reduction for {IC} package
                 substrate routing",
  journal =      j-TODAES,
  volume =       "15",
  number =       "3",
  pages =        "27:1--27:??",
  month =        may,
  year =         "2010",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1754405.1754412",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jun 21 17:21:11 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Off-chip substrate routing for high-density packages
                 is challenging due to requirements such as high
                 density, lack of vertical detour, non-Manhattan
                 routing, and primarily planar routing. The existing
                 substrate routing algorithms often result in a large
                 number of unrouted nets that have to be routed
                 manually. This article develops an effective yet
                 efficient diffusion-driven method D-Router to reduce
                 congestion. Starting with an initial routing, we
                 develop an effective diffusion-based congestion
                 reduction. We iteratively find a congested window and
                 spread out connections to reduce congestion inside the
                 window by a simulated diffusion process based on the
                 duality between congestion and concentration. The
                 window is released after the congestion is eliminated.
                 Compared with the state-of-the-art substrate routing
                 method that leads to 480 nets unrouted for ten
                 industrial designs with a total of 6415 nets, the
                 D-Router reduces the amount of unrouted nets to 104, a
                 reduction to the 4.6 multiple. In addition, the
                 D-Router obtains a similar reduction on unrouted nets
                 but runs up to 94 times faster when compared with a
                 negotiation-based substrate routing.",
  acknowledgement = ack-nhfb,
  articleno =    "27",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "congestion reduction; IC package; routability;
                 routing; substrate",
}

@Article{Shin:2010:PGC,
  author =       "Youngsoo Shin and Jun Seomun and Kyu-Myung Choi and
                 Takayasu Sakurai",
  title =        "Power gating: {Circuits}, design methodologies, and
                 best practice for standard-cell {VLSI} designs",
  journal =      j-TODAES,
  volume =       "15",
  number =       "4",
  pages =        "28:1--28:??",
  month =        sep,
  year =         "2010",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1835420.1835421",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Oct 6 09:42:42 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Power Gating has become one of the most widely used
                 circuit design techniques for reducing leakage current.
                 Its concept is very simple, but its application to
                 standard-cell VLSI designs involves many careful
                 considerations. The great complexity of designing a
                 power-gated circuit originates from the side effects of
                 inserting current switches, which have to be resolved
                 by a combination of extra circuitry and customized
                 tools and methodologies. In this tutorial we survey
                 these design considerations and look at the best
                 practice within industry and academia. Topics include
                 output isolation and data retention, current switch
                 design and sizing, and physical design issues such as
                 power networks, increases in area and wirelength, and
                 power grid analysis. Designers can benefit from this
                 tutorial by obtaining a better understanding of
                 implications of power gating during an early stage of
                 VLSI designs. We also review the ways in which power
                 gating has been improved. These include reducing the
                 sizes of switches, cutting transition delays, applying
                 power gating to smaller blocks of circuitry, and
                 reducing the energy dissipated in mode transitions.
                 Power Gating has also been combined with other circuit
                 techniques, and these hybrids are also reviewed.
                 Important open problems are identified as a stimulus to
                 research.",
  acknowledgement = ack-nhfb,
  articleno =    "28",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "design methodology; leakage current; low power; Power
                 gating; standard-cell; VLSI",
}

@Article{Yu:2010:PSA,
  author =       "Cheng-Juei Yu and Yi-Hsin Wu and Sheng-De Wang",
  title =        "An in-place search algorithm for the resource
                 constrained scheduling problem during high-level
                 synthesis",
  journal =      j-TODAES,
  volume =       "15",
  number =       "4",
  pages =        "29:1--29:??",
  month =        sep,
  year =         "2010",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1835420.1835422",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Oct 6 09:42:42 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We propose an in-place search algorithm for computing
                 the exact solutions to the resource constrained
                 scheduling problem. This algorithm supports operation
                 chaining, pipelining and multicycling in the underlying
                 scheduling problem. Based on two lower-bound estimation
                 mechanisms that are capable of predicting the criterion
                 values of search nodes represented by partially
                 scheduled data flow graphs, the proposed algorithm can
                 effectively prune the nonpromising search space and
                 finds the optimum usually several times faster than
                 existing techniques. As opposed to existing
                 search-based scheduling techniques whose space
                 complexity is squared or exponential in the search
                 depth, our approach requires only a constant storage
                 space during the traversal of the search tree. The low
                 space complexity is accomplished by using a
                 combination-generating algorithm, which leads our
                 approach to visit search nodes in such a way that each
                 one is obtained by making only a small change to its
                 sibling without keeping any parent nodes in memory.
                 Experimental results on several well known benchmarks
                 with varying resource constraints show the
                 effectiveness of the proposed algorithm.",
  acknowledgement = ack-nhfb,
  articleno =    "29",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Design automation; exact scheduling; high-level
                 synthesis; optimal scheduling; resource-constrained
                 scheduling",
}

@Article{Lee:2010:PTP,
  author =       "Kyoungwoo Lee and Aviral Shrivastava and Nikil Dutt
                 and Nalini Venkatasubramanian",
  title =        "Partitioning techniques for partially protected caches
                 in resource-constrained embedded systems",
  journal =      j-TODAES,
  volume =       "15",
  number =       "4",
  pages =        "30:1--30:??",
  month =        sep,
  year =         "2010",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1835420.1835423",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Oct 6 09:42:42 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Increasing exponentially with technology scaling, the
                 soft error rate even in earth-bound embedded systems
                 manufactured in deep subnanometer technology is
                 projected to become a serious design consideration.
                 Partially protected cache (PPC) is a promising
                 microarchitectural feature to mitigate failures due to
                 soft errors in power, performance, and cost sensitive
                 embedded processors. A processor with PPC maintains two
                 caches, one protected and the other unprotected, both
                 at the same level of memory hierarchy. The intuition
                 behind PPCs is that not all data in the application is
                 equally prone to soft errors. By finding and mapping
                 the data that is more prone to soft errors to the
                 protected cache, and error-resilient data to the
                 unprotected cache, failures induced by soft errors can
                 be significantly reduced at a minimal power and
                 performance penalty. Consequently, the effectiveness of
                 PPCs critically hinges on the compiler's ability to
                 partition application data into error-prone and
                 error-resilient data. The effectiveness of PPCs has
                 previously been demonstrated on multimedia applications
                 --- where an obvious partitioning of data exists, the
                 multimedia data is inherently resilient to soft errors,
                 and the rest of the data and the entire code is assumed
                 to be error-prone. Since the amount of multimedia data
                 is a quite significant component of the entire
                 application data, this obvious partitioning is quite
                 effective. However, no such obvious data and code
                 partitioning exists for general applications. This
                 severely restricts the applicability of PPCs to data
                 caches and instruction caches in general. This article
                 investigates vulnerability-based partitioning schemes
                 that are applicable to applications in general and
                 effectively reduce failures due to soft errors at
                 minimal power and performance overheads.\par

                 Our experimental results on an HP iPAQ-like processor
                 enhanced with PPC architecture, running benchmarks from
                 the MiBench suite demonstrate that our partitioning
                 heuristic efficiently finds page partitions for data
                 PPCs that can reduce the failure rate by 48\% at only
                 2\% performance and 7\% energy overhead, and finds page
                 partitions for instruction PPCs that reduce the failure
                 rate by 50\% at only 2\% performance and 8\% energy
                 overhead, on average.",
  acknowledgement = ack-nhfb,
  articleno =    "30",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "embedded systems; Page partitioning technique;
                 partially protected cache; soft error; vulnerability",
}

@Article{Bonny:2010:HBC,
  author =       "Talal Bonny and J{\"o}rg Henkel",
  title =        "{Huffman}-based code compression techniques for
                 embedded processors",
  journal =      j-TODAES,
  volume =       "15",
  number =       "4",
  pages =        "31:1--31:??",
  month =        sep,
  year =         "2010",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1835420.1835424",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Oct 6 09:42:42 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The size of embedded software is increasing at a rapid
                 pace. It is often challenging and time consuming to fit
                 an amount of required software functionality within a
                 given hardware resource budget. Code compression is a
                 means to alleviate the problem by providing substantial
                 savings in terms of code size. In this article we
                 introduce a novel and efficient hardware-supported
                 compression technique that is based on Huffman Coding.
                 Our technique reduces the size of the generated
                 decoding table, which takes a large portion of the
                 memory. It combines our previous techniques,
                 Instruction Splitting Technique and Instruction
                 Re-encoding Technique into new one called Combined
                 Compression Technique to improve the final compression
                 ratio by taking advantage of both previous techniques.
                 The instruction Splitting Technique is instruction set
                 architecture (ISA)-independent. It splits the
                 instructions into portions of varying size (called
                 patterns) before Huffman coding is applied. This
                 technique improves the final compression ratio by more
                 than 20\% compared to other known schemes based on
                 Huffman Coding. The average compression ratios achieved
                 using this technique are 48\% and 50\% for ARM and
                 MIPS, respectively. The Instruction Re-encoding
                 Technique is ISA-dependent. It investigates the
                 benefits of reencoding unused bits (we call them
                 reencodable bits) in the instruction format for a
                 specific application to improve the compression ratio.
                 Reencoding those bits can reduce the size of decoding
                 tables by up to 40\%. Using this technique, we improve
                 the final compression ratios in comparison to the first
                 technique to 46\% and 45\% for ARM and MIPS,
                 respectively (including all overhead that incurs). The
                 Combined Compression Technique improves the compression
                 ratio to 45\% and 42\% for ARM and MIPS, respectively.
                 In our compression technique, we have conducted
                 evaluations using a representative set of applications
                 and we have applied each technique to two major
                 embedded processor architectures, namely ARM and
                 MIPS.",
  acknowledgement = ack-nhfb,
  articleno =    "31",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "code compression; code density; Embedded systems;
                 Huffman coding",
}

@Article{Li:2010:CPG,
  author =       "Zhifang Li and Wenjian Luo and Lihua Yue and Xufa
                 Wang",
  title =        "On the completeness of the polymorphic gate set",
  journal =      j-TODAES,
  volume =       "15",
  number =       "4",
  pages =        "32:1--32:??",
  month =        sep,
  year =         "2010",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1835420.1835425",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Oct 6 09:42:42 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Polymorphic gates are special kinds of logic gates
                 that can exhibit different functions under the control
                 of environmental parameters, such as light,
                 temperature, and VDD. These polymorphic gates can be
                 used to build polymorphic circuits that perform
                 different functions under different environments.
                 Because polymorphic gates are different from
                 traditional logic gates, the existent completeness
                 theory for the traditional logic gate set is not
                 suitable for the polymorphic gate set. So far, only the
                 definition of the complete polymorphic gate set is
                 given. There is no approach to judging whether a given
                 polymorphic gate set is complete. The contributions of
                 this article include three aspects. First, the impact
                 of logic-1 and logic-0 on the completeness of the
                 polymorphic gate set is discussed. Second, the theory
                 and two related algorithms for judging the completeness
                 of polymorphic gate sets with two modes are given.
                 Finally, the theory and related algorithms for complete
                 polymorphic gate sets with more than two modes are
                 proposed.",
  acknowledgement = ack-nhfb,
  articleno =    "32",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "completeness theory; polymorphic circuit; Polymorphic
                 electronics; polymorphic gate",
}

@Article{Wang:2010:CDF,
  author =       "Renshen Wang and Evangeline Young and Chung-Kuan
                 Cheng",
  title =        "Complexity of {$3$-D} floorplans by analysis of graph
                 cuboidal dual hardness",
  journal =      j-TODAES,
  volume =       "15",
  number =       "4",
  pages =        "33:1--33:??",
  month =        sep,
  year =         "2010",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1835420.1835426",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Oct 6 09:42:42 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Interconnect dominated electronic design stimulates a
                 demand for developing circuits on the third dimension,
                 leading to 3-D integration. Recent advances in chip
                 fabrication technology enable 3-D circuit
                 manufacturing. However, there is still a possible
                 barrier of design complexity in exploiting 3-D
                 technologies. This article discusses the impact of
                 migrating from 2-D to 3-D on the difficulty of
                 floorplanning and placement. By looking at a basic
                 formulation of the graph cuboidal dual problem, we show
                 that the 3-D cases and the 3-layer 2.5-D cases are
                 fundamentally more difficult than the 2-D cases in
                 terms of computational complexity. By comparison among
                 these cases, the intrinsic complexity in 3-D floorplan
                 structures is revealed in the hard-to-decide relations
                 between topological connections and geometrical
                 contacts. The results show possible challenges in the
                 future for physical design and CAD of 3-D integrated
                 circuits.",
  acknowledgement = ack-nhfb,
  articleno =    "33",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "3-D integration; cuboidal dual; floorplanning;
                 hardness",
}

@Article{Chang:2010:GEC,
  author =       "Naehyuck Chang and J{\"o}rg Henkel",
  title =        "Guest Editorial: Current Trends in Low-Power Design",
  journal =      j-TODAES,
  volume =       "16",
  number =       "1",
  pages =        "1:1--1:??",
  month =        nov,
  year =         "2010",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1870109.1870110",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Dec 9 11:12:21 MST 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  articleno =    "1",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Bol:2010:NME,
  author =       "David Bol and Denis Flandre and Jean-Didier Legat",
  title =        "Nanometer {MOSFET} Effects on the Minimum-Energy Point
                 of Sub-45nm Subthreshold Logic---Mitigation at
                 Technology and Circuit Levels",
  journal =      j-TODAES,
  volume =       "16",
  number =       "1",
  pages =        "2:1--2:??",
  month =        nov,
  year =         "2010",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1870109.1870111",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Dec 9 11:12:21 MST 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Subthreshold operation of digital circuits enables
                 minimum energy consumption. In this article, we observe
                 that minimum energy Emin of subthreshold logic
                 dramatically increases when reaching 45nm CMOS node. We
                 demonstrate by circuit simulation and analytical
                 modeling that this increase comes from the combined
                 effects of variability, gate leakage, and Drain-Induced
                 Barrier Lowering (DIBL) effect. We then investigate the
                 new impact of individual MOSFET parameters Lg, Vt, and
                 Tox on Emin in sub-45nm technologies. We further
                 propose an optimum MOSFET selection, which favors
                 low-Vt mid-Lg devices in 45nm CMOS technology. The use
                 of such optimum MOSFETs yields 35\% Emin reduction for
                 a benchmark multiplier with good speed performances and
                 negligible area overhead.",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Calimera:2010:NAC,
  author =       "Andrea Calimera and Enrico Macii and Massimo Poncino",
  title =        "{NBTI}-Aware Clustered Power Gating",
  journal =      j-TODAES,
  volume =       "16",
  number =       "1",
  pages =        "3:1--3:??",
  month =        nov,
  year =         "2010",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1870109.1870112",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Dec 9 11:12:21 MST 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The emergence of Negative Bias Temperature Instability
                 (NBTI) as the most relevant source of reliability in
                 sub-90nm technologies has led to a new facet of the
                 traditional trade-off between power and reliability.
                 NBTI effects in fact manifest themselves as an increase
                 of the propagation delay of the devices over time,
                 which adds up to the delay penalty incurred by most
                 low-power design solutions. This implies that, given a
                 desired lifetime of a circuit (i.e., a given
                 performance target at some point in time), a
                 power-managed component will fail earlier than a
                 nonpower-managed one. In this work, we show how it is
                 possible to partially overcome this conflict, by
                 leveraging the benefits in terms of aging provided by
                 power-gating (i.e., by using switches that disconnect a
                 logic block from the ground).",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Cong:2010:BLO,
  author =       "Jason Cong and Bin Liu and Rupak Majumdar and Zhiru
                 Zhang",
  title =        "Behavior-Level Observability Analysis for Operation
                 Gating in Low-Power Behavioral Synthesis",
  journal =      j-TODAES,
  volume =       "16",
  number =       "1",
  pages =        "4:1--4:??",
  month =        nov,
  year =         "2010",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1870109.1870113",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Dec 9 11:12:21 MST 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Many techniques for power reduction in advanced RTL
                 synthesis tools rely explicitly or implicitly on
                 observability don't-care conditions. In this article we
                 propose a systematic approach to maximize the
                 effectiveness of these techniques by generating
                 power-friendly RTL descriptions in behavioral
                 synthesis. This is done using operation gating, that
                 is, explicitly adding a predicate to an operation based
                 on its observability condition, so that the operation,
                 once identified as unobservable at runtime, can be
                 avoided using RTL power optimization techniques such as
                 clock gating. We first introduce the concept of
                 behavior-level observability and its approximations in
                 the context of behavioral synthesis. We then propose an
                 efficient procedure to compute an approximated
                 behavior-level observability of every operation in a
                 dataflow graph.",
  acknowledgement = ack-nhfb,
  articleno =    "4",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Thorolfsson:2010:LPH,
  author =       "Thorlindur Thorolfsson and Samson Melamed and W. Rhett
                 Davis and Paul D. Franzon",
  title =        "Low-Power Hypercube Divided Memory {FFT} Engine Using
                 {$3$D} Integration",
  journal =      j-TODAES,
  volume =       "16",
  number =       "1",
  pages =        "5:1--5:??",
  month =        nov,
  year =         "2010",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1870109.1870114",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Dec 9 11:12:21 MST 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In this article we demonstrate a floating point FFT
                 processor that leverages both 3D integration and a
                 unique hypercube memory division scheme to reduce the
                 power consumption of a 1024 point FFT down to 4.227$
                 \mu $J. The hypercube memory division scheme lowers the
                 energy per memory access by 59.2\% and increases the
                 total required area by 16.8\%. The use of 3D
                 integration reduces the logic power by 5.2\%. We
                 describe the tool flow required to realize the 3D
                 implementation and perform a thermal analysis of it.",
  acknowledgement = ack-nhfb,
  articleno =    "5",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Dhiman:2010:VSE,
  author =       "Gaurav Dhiman and Giacomo Marchetti and Tajana
                 Rosing",
  title =        "{vGreen}: a System for Energy-Efficient Management of
                 Virtual Machines",
  journal =      j-TODAES,
  volume =       "16",
  number =       "1",
  pages =        "6:1--6:??",
  month =        nov,
  year =         "2010",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1870109.1870115",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Dec 9 11:12:21 MST 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In this article, we present vGreen, a multitiered
                 software system for energy-efficient virtual machine
                 management in a clustered virtualized environment. The
                 system leverages the use of novel hierarchical metrics
                 that work across the different abstractions in a
                 virtualized environment to capture power and
                 performance characteristics of both the virtual and
                 physical machines. These characteristics are then used
                 to implement policies for scheduling and power
                 management of virtual machines across the cluster.",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kim:2010:EEP,
  author =       "Jinsik Kim and Pai H. Chou",
  title =        "Energy-Efficient Progressive Remote Update for
                 Flash-Based Firmware of Networked Embedded Systems",
  journal =      j-TODAES,
  volume =       "16",
  number =       "1",
  pages =        "7:1--7:??",
  month =        nov,
  year =         "2010",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1870109.1870116",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Dec 9 11:12:21 MST 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Firmware update over a network connection is an
                 essential but expensive feature for many embedded
                 systems due to not only the relatively high power
                 consumption and limited bandwidth, but also
                 page-granular erasure before rewriting to flash memory.
                 This work proposes a page-level, link-time technique
                 that minimizes not only the size of patching scripts
                 but also perturbation to the firmware memory, over the
                 entire sequence of updates in the system's lifetime. We
                 propose a tool that first clusters functions to
                 minimize caller-callee dependency across pages, and
                 then orders the functions within each page to minimize
                 intrapage perturbation. Experimental results show our
                 technique to reduce the energy consumption of firmware
                 update by 30--42\% over the state-of-the-art.",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Yu:2010:EPE,
  author =       "Chenjie Yu and Peter Petrov",
  title =        "Energy- and Performance-Efficient Communication
                 Framework for Embedded {MPSoCs} through
                 Application-Driven Release Consistency",
  journal =      j-TODAES,
  volume =       "16",
  number =       "1",
  pages =        "8:1--8:??",
  month =        nov,
  year =         "2010",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1870109.1870117",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Dec 9 11:12:21 MST 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We present a framework for performance-, bandwidth-,
                 and energy-efficient intercore communication in
                 embedded MultiProcessor Systems-on-a-Chip (MPSoC). The
                 methodology seamlessly integrates compiler, operating
                 system, and hardware support to achieve a low-cost
                 communication between synchronized producers and
                 consumers. The technique is especially beneficial for
                 data-streaming applications exploiting pipeline
                 parallelism with computational phases mapped to
                 separate cores. Code transformations utilizing a simple
                 ISA support ensure that producer writes are propagated
                 to consumers with a single interconnect transaction per
                 cache block just prior to the producer exiting its
                 synchronization region. Furthermore, in order to
                 completely eliminate misses to shared data caused by
                 interference with private data and also to minimize the
                 cache energy, we integrate to the proposed framework a
                 cache way partitioning policy based on a simple cache
                 configurability support, which isolates the shared
                 buffers from other cache traffic.",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Jayakumar:2010:SIV,
  author =       "Nikhil Jayakumar and Sunil P. Khatri",
  title =        "A Simultaneous Input Vector Control and Circuit
                 Modification Technique to Reduce Leakage with Zero
                 Delay Penalty",
  journal =      j-TODAES,
  volume =       "16",
  number =       "1",
  pages =        "9:1--9:??",
  month =        nov,
  year =         "2010",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1870109.1870118",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Dec 9 11:12:21 MST 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Leakage power currently comprises a large fraction of
                 the total power consumption of an IC. Techniques to
                 minimize leakage have been researched widely. However,
                 most approaches to reducing leakage have an associated
                 performance penalty. In this article, we present an
                 approach which minimizes leakage by simultaneously
                 modifying the circuit while deriving the input vector
                 that minimizes leakage. In our approach, we selectively
                 modify a gate so that its output (in sleep mode) is in
                 a state which helps minimize the leakage of other gates
                 in its transitive fanout. Gate replacement is performed
                 in a slack-aware manner, to minimize the resulting
                 delay penalty.",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Wu:2010:SCR,
  author =       "Yu-Ze Wu and Mango C.-T. Chao",
  title =        "Scan-Cell Reordering for Minimizing Scan-Shift Power
                 Based on Nonspecified Test Cubes",
  journal =      j-TODAES,
  volume =       "16",
  number =       "1",
  pages =        "10:1--10:??",
  month =        nov,
  year =         "2010",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1870109.1870119",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Dec 9 11:12:21 MST 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article presents several scan-cell reordering
                 techniques to reduce the signal transitions during the
                 test mode while preserving the don't-care bits in the
                 test patterns for a later optimization. Combined with a
                 pattern-filling technique, the proposed scan-cell
                 reordering techniques can utilize both high response
                 correlations and pattern correlations to simultaneously
                 minimize scan-out and scan-in transitions. Those
                 scan-shift transitions can be further reduced by
                 selectively using the inverse connections between scan
                 cells. In addition, the trade-off between routing
                 overhead and power consumption can also be controlled
                 by the proposed scan-cell reordering techniques.",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Singh:2010:AJE,
  author =       "Montek Singh and Steven M. Nowick",
  title =        "{ACM Journal on Emerging Technologies in Computing
                 Systems}",
  journal =      j-TODAES,
  volume =       "16",
  number =       "1",
  pages =        "11:1--11:??",
  month =        nov,
  year =         "2010",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1870109.1870120",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Dec 9 11:12:21 MST 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Pedram:2011:CPV,
  author =       "Massoud Pedram",
  title =        "Call for papers: Verification issue and challenges
                 with multicore systems",
  journal =      j-TODAES,
  volume =       "16",
  number =       "2",
  pages =        "12:1--12:??",
  month =        mar,
  year =         "2011",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1929943.1929944",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Apr 1 16:07:45 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  articleno =    "12",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Bernasconi:2011:DRB,
  author =       "Anna Bernasconi and Valentina Ciriani",
  title =        "Dimension-reducible {Boolean} functions based on
                 affine spaces",
  journal =      j-TODAES,
  volume =       "16",
  number =       "2",
  pages =        "13:1--13:??",
  month =        mar,
  year =         "2011",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1929943.1929945",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Apr 1 16:07:45 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We define and study a new class of regular Boolean
                 functions called D-reducible. A D-reducible function,
                 depending on all its n input variables, can be studied
                 and synthesized in a space of dimension strictly
                 smaller than n. We show that the D-reducibility
                 property can be efficiently tested, in time polynomial
                 in the representation of f, that is, an initial SOP
                 form of f. A D-reducible function can be efficiently
                 decomposed, giving rise to a new logic form, that we
                 have called DredSOP. This form is shown here to be
                 generally smaller than the corresponding minimum SOP
                 form.",
  acknowledgement = ack-nhfb,
  articleno =    "13",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Wang:2011:OAE,
  author =       "Yi Wang and Hui Liu and Duo Liu and Zhiwei Qin and
                 Zili Shao and Edwin H.-M. Sha",
  title =        "Overhead-aware energy optimization for real-time
                 streaming applications on multiprocessor
                 {System-on-Chip}",
  journal =      j-TODAES,
  volume =       "16",
  number =       "2",
  pages =        "14:1--14:??",
  month =        mar,
  year =         "2011",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1929943.1929946",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Apr 1 16:07:45 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In this article, we focus on solving the energy
                 optimization problem for real-time streaming
                 applications on multiprocessor System-on-Chip by
                 combining task-level coarse-grained software pipelining
                 with DVS (Dynamic Voltage Scaling) and DPM (Dynamic
                 Power Management) considering transition overhead,
                 inter-core communication and discrete voltage levels.
                 We propose a two-phase approach to solve the problem.
                 In the first phase, we propose a coarse-grained task
                 parallelization algorithm called RDAG to transform a
                 periodic dependent task graph into a set of independent
                 tasks by exploiting the periodic feature of streaming
                 applications. In the second phase, we propose a
                 scheduling algorithm, GeneS, to optimize energy
                 consumption. GeneS is a genetic algorithm that can
                 search and find the best schedule within the solution
                 space generated by gene evolution.",
  acknowledgement = ack-nhfb,
  articleno =    "14",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Cong:2011:AMP,
  author =       "Jason Cong and Wei Jiang and Bin Liu and Yi Zou",
  title =        "Automatic memory partitioning and scheduling for
                 throughput and power optimization",
  journal =      j-TODAES,
  volume =       "16",
  number =       "2",
  pages =        "15:1--15:??",
  month =        mar,
  year =         "2011",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1929943.1929947",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Apr 1 16:07:45 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Memory bottleneck has become a limiting factor in
                 satisfying the explosive demands on performance and
                 cost in modern embedded system design. Selected
                 computation kernels for acceleration are usually
                 captured by nest loops, which are optimized by
                 state-of-the-art techniques like loop tiling and loop
                 pipelining. However, memory bandwidth bottlenecks
                 prevent designs from reaching optimal throughput with
                 respect to available parallelism. In this paper we
                 present an automatic memory partitioning technique
                 which can efficiently improve throughput and reduce
                 energy consumption of pipelined loop kernels for given
                 throughput constraints and platform requirements. Also,
                 our proposed algorithm can handle general array access
                 beyond affine array references.",
  acknowledgement = ack-nhfb,
  articleno =    "15",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Yan:2011:MUT,
  author =       "Guihai Yan and Yinhe Han and Hui Liu and Xiaoyao Liang
                 and Xiaowei Li",
  title =        "{MicroFix}: Using timing interpolation and delay
                 sensors for power reduction",
  journal =      j-TODAES,
  volume =       "16",
  number =       "2",
  pages =        "16:1--16:??",
  month =        mar,
  year =         "2011",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1929943.1929948",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Apr 1 16:07:45 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Traditional DVFS schemes are oblivious to fine-grained
                 adaptability resulting from path-grained timing
                 imbalance. With the awareness of such fine-grained
                 adaptability, better power-performance efficiency can
                 be obtained. We propose a new scheme, MicroFix, to
                 exploit such fine-grained adaptability. We first show
                 the potential resulted from the path-grained timing
                 imbalance and then present a new technique, Timing
                 Interpolation, to reap the fine-grained adaptability
                 for power reduction. Moreover, to eliminate the
                 conservative margins of traditional DVFS, unlike the
                 previous approaches such as Razor that reactively
                 handle the delay errors (induced by aggressively scaled
                 voltage/frequency) by enabling error detection and
                 recovery, we propose a proactive approach by error
                 prediction, thereby obviate the high-cost recovery
                 routines.",
  acknowledgement = ack-nhfb,
  articleno =    "16",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Pomeranz:2011:RSA,
  author =       "Irith Pomeranz and Sudhakar M. Reddy",
  title =        "Reducing the switching activity of test sequences
                 under transparent-scan",
  journal =      j-TODAES,
  volume =       "16",
  number =       "2",
  pages =        "17:1--17:??",
  month =        mar,
  year =         "2011",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1929943.1929949",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Apr 1 16:07:45 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Transparent-scan is a test application scheme for scan
                 circuits. It provides unique opportunities for test
                 compaction that do not exist with the standard test
                 application scheme. We show that it also provides
                 unique opportunities for reducing the power dissipation
                 of a scan-based test set. After translating a standard
                 scan-based test set into a transparent-scan sequence,
                 we apply two operations for reducing the power
                 dissipation of the sequence. The first operation
                 attempts to remove a test vector that causes high power
                 dissipation. The second operation attempts to replace a
                 scan clock cycle with a functional clock cycle, or a
                 functional clock cycle with a scan clock cycle, in
                 order to reduce the power dissipation.",
  acknowledgement = ack-nhfb,
  articleno =    "17",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Cauley:2011:PBC,
  author =       "Stephen Cauley and Venkataramanan Balakrishnan and Y.
                 Charlie Hu and Cheng-Kok Koh",
  title =        "A parallel branch-and-cut approach for detailed
                 placement",
  journal =      j-TODAES,
  volume =       "16",
  number =       "2",
  pages =        "18:1--18:??",
  month =        mar,
  year =         "2011",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1929943.1929950",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Apr 1 16:07:45 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We introduce a technique that utilizes distributing
                 computing resources for the efficient optimization of a
                 traditional physical design problem. Specifically, we
                 present a detailed placement strategy designed to
                 exploit distributed computing environments, where the
                 additional computing resources are employed in parallel
                 to improve the optimization time. A Mixed Integer
                 Programming (MIP) model and branch-and-cut optimization
                 strategy are employed to solve the standard cell
                 placement problem. By exploiting the problem structure,
                 our algorithm improves upon the solutions afforded by
                 existing optimization algorithms. First, an efficient
                 batch-branching technique can eliminate several integer
                 decision variables during each step of the optimization
                 procedure. This batch-branching scheme can be performed
                 serially or in parallel.",
  acknowledgement = ack-nhfb,
  articleno =    "18",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Li:2011:GRS,
  author =       "Yih-Lang Li and Yu-Ning Chang and Wen-Nai Cheng",
  title =        "A gridless routing system with nonslicing
                 floorplanning-based crosstalk reduction on gridless
                 track assignment",
  journal =      j-TODAES,
  volume =       "16",
  number =       "2",
  pages =        "19:1--19:??",
  month =        mar,
  year =         "2011",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1929943.1929951",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Apr 1 16:07:45 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Track assignment, which is an intermediate stage
                 between global routing and detailed routing, provides a
                 good platform for promoting performance, and for
                 imposing additional constraints during routing, such as
                 crosstalk. Gridless track assignment (GTA) has not been
                 addressed in public literature. This work develops a
                 gridless routing system integrating a congestion-driven
                 global router, crosstalk-driven GTA and an enhanced
                 implicit connection-graph-based router. Initial
                 assignment is produced rapidly with a left-edge like
                 algorithm. Crosstalk reduction on the assignment is
                 then transformed to a restricted nonslicing
                 floorplanning problem, and a deterministic O-Tree based
                 algorithm is employed to reassign each net segment.
                 Finally, each panel is partitioned into several
                 subpanels, and the subpanels are reordered using branch
                 and bound algorithm to decrease the crosstalk
                 further.",
  acknowledgement = ack-nhfb,
  articleno =    "19",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Liu:2011:SBA,
  author =       "Yu Liu and Kaijie Wu and Ramesh Karri",
  title =        "Scan-based attacks on linear feedback shift register
                 based stream ciphers",
  journal =      j-TODAES,
  volume =       "16",
  number =       "2",
  pages =        "20:1--20:??",
  month =        mar,
  year =         "2011",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1929943.1929952",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Apr 1 16:07:45 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Stream cipher is an important class of encryption
                 algorithm that encrypts plaintext messages one bit at a
                 time. Various stream ciphers are deployed in wireless
                 telecommunication applications because they have simple
                 hardware circuitry, are generally fast and consume very
                 low power. On the other hand, scan-based
                 Design-for-Test (DFT) is one of the most popular
                 methods to test IC devices. All flip-flops in the
                 Design Under Test are connected to one or more scan
                 chains and the states of the flip-flops can be scanned
                 out through these chains. In this paper, we present an
                 attack on stream cipher implementations by determining
                 the scan chain structure of the Linear Feedback Shift
                 Registers in their implementations.",
  acknowledgement = ack-nhfb,
  articleno =    "20",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Keutzer:2011:SSM,
  author =       "Kurt Keutzer and Peng Li and Li Shang and Hai Zhou",
  title =        "A Special Section on Multicore Parallel {CAD}:
                 Algorithm Design and Programming",
  journal =      j-TODAES,
  volume =       "16",
  number =       "3",
  pages =        "21:1--21:??",
  month =        jun,
  year =         "2011",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1970353.1970354",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Jun 14 11:55:50 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  articleno =    "21",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Ludwin:2011:EDP,
  author =       "Adrian Ludwin and Vaughn Betz",
  title =        "Efficient and Deterministic Parallel Placement for
                 {FPGAs}",
  journal =      j-TODAES,
  volume =       "16",
  number =       "3",
  pages =        "22:1--22:??",
  month =        jun,
  year =         "2011",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1970353.1970355",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Jun 14 11:55:50 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We describe a parallel simulated annealing algorithm
                 for FPGA placement. The algorithm proposes and
                 evaluates multiple moves in parallel, and has been
                 incorporated into Altera's Quartus II CAD system.
                 Across a set of 18 industrial benchmark circuits, we
                 achieve geometric average speedups during the quench of
                 2.7x and 4.0x on four and eight processors,
                 respectively, with individual circuits achieving
                 speedups of up to 3.6x and 5.9x. Over the course of the
                 entire anneal, we achieve speedups of up to 2.8x and
                 3.7x, with geometric average speedups of 2.1x and 2.4x.
                 Our algorithm is the first parallel placer to optimize
                 for criteria other than wirelength, such as critical
                 path length, and is one of the few deterministic
                 parallel placement algorithms.",
  acknowledgement = ack-nhfb,
  articleno =    "22",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Han:2011:DIT,
  author =       "Yiding Han and Koushik Chakraborty and Sanghamitra Roy
                 and Vilasita Kuntamukkala",
  title =        "Design and Implementation of a Throughput-Optimized
                 {GPU} Floorplanning Algorithm",
  journal =      j-TODAES,
  volume =       "16",
  number =       "3",
  pages =        "23:1--23:??",
  month =        jun,
  year =         "2011",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1970353.1970356",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Jun 14 11:55:50 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In this article, we propose a novel floorplanning
                 algorithm for GPUs. Floorplanning is an inherently
                 sequential algorithm, far from the typical programs
                 suitable for Single-Instruction Multiple-Thread
                 (SIMT)-style concurrency in a GPU. We propose a
                 fundamentally different approach of exploring the
                 floorplan solution space, where we evaluate concurrent
                 moves on a given floorplan. We illustrate several
                 performance optimization techniques for this algorithm
                 in GPUs. To improve the solution quality, we present a
                 comprehensive exploration of the design space,
                 including various techniques to adapt the annealing
                 approach in a GPU.",
  acknowledgement = ack-nhfb,
  articleno =    "23",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Liu:2011:GBP,
  author =       "Yifang Liu and Jiang Hu",
  title =        "{GPU}-Based Parallelization for Fast Circuit
                 Optimization",
  journal =      j-TODAES,
  volume =       "16",
  number =       "3",
  pages =        "24:1--24:??",
  month =        jun,
  year =         "2011",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1970353.1970357",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Jun 14 11:55:50 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The progress of GPU (Graphics Processing Unit)
                 technology opens a new avenue for boosting computing
                 power. This work is an attempt to exploit the GPU for
                 accelerating VLSI circuit optimization. We propose
                 GPU-based parallel computing techniques and apply them
                 on simultaneous gate sizing and threshold voltage
                 assignment, which is a popular method for VLSI
                 performance and power optimization. These techniques
                 include efficient task scheduling and memory
                 organization, all of which are aimed to fully utilize
                 the advantages of GPUs.",
  acknowledgement = ack-nhfb,
  articleno =    "24",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Hsu:2011:MSS,
  author =       "Chia-Jui Hsu and Jos{\'e} Luis Pino and Shuvra S.
                 Bhattacharyya",
  title =        "Multithreaded Simulation for Synchronous Dataflow
                 Graphs",
  journal =      j-TODAES,
  volume =       "16",
  number =       "3",
  pages =        "25:1--25:??",
  month =        jun,
  year =         "2011",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1970353.1970358",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Jun 14 11:55:50 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "For system simulation, Synchronous DataFlow (SDF) has
                 been widely used as a core model of computation in
                 design tools for digital communication and signal
                 processing systems. The traditional approach for
                 simulating SDF graphs is to compute and execute static
                 schedules in single-processor desktop environments.
                 Nowadays, however, multicore processors are
                 increasingly popular desktop platforms for their
                 potential performance improvements through thread-level
                 parallelism. Without novel scheduling and simulation
                 techniques that explicitly explore thread-level
                 parallelism for executing SDF graphs, current design
                 tools gain only minimal performance improvements on
                 multicore platforms. In this article, we present a new
                 multithreaded simulation scheduler, called MSS, to
                 provide simulation runtime speedup for executing SDF
                 graphs on multicore processors.",
  acknowledgement = ack-nhfb,
  articleno =    "25",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Liao:2011:AUB,
  author =       "Xiongfei Liao and Thambipillai Srikanthan",
  title =        "Accelerating {UNISIM}-Based Cycle-Level
                 Microarchitectural Simulations on Multicore Platforms",
  journal =      j-TODAES,
  volume =       "16",
  number =       "3",
  pages =        "26:1--26:??",
  month =        jun,
  year =         "2011",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1970353.1970359",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Jun 14 11:55:50 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "UNISIM has been shown to ease the development of
                 simulators for multi-/many-core systems. However,
                 UNISIM cycle-level simulations of large-scale
                 multiprocessor systems could be very time consuming. In
                 this article, we propose a systematic framework for
                 accelerating UNISIM cycle-level simulations on
                 multicore platforms. The proposed framework relies on
                 exploiting the fine-grained parallelism within the
                 simulated cycles using POSIX threads. A multithreaded
                 simulation engine has been devised from the
                 single-threaded UNISIM SystemC engine to facilitate the
                 exploitation of inherent parallelism. An adaptive
                 technique that manages the overall computation workload
                 by adjusting the number of threads employed at any
                 given time is proposed. In addition, we have introduced
                 a technique to balance the workloads of multithreaded
                 executions.",
  acknowledgement = ack-nhfb,
  articleno =    "26",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Garcia-Dopico:2011:NAV,
  author =       "Antonio Garc{\'\i}a-Dopico and Antonio P{\'e}rez and
                 Santiago Rodr{\'\i}guez and Mar{\'\i}a Isabel
                 Garc{\'\i}a",
  title =        "A New Algorithm for {VHDL} Parallel Simulation",
  journal =      j-TODAES,
  volume =       "16",
  number =       "3",
  pages =        "27:1--27:??",
  month =        jun,
  year =         "2011",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1970353.1970360",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Jun 14 11:55:50 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article proposes a new algorithm for parallel
                 synchronous simulation of VHDL designs to be executed
                 on desktop computers. Besides executing VHDL processes
                 in parallel, the algorithm focuses on parallelizing the
                 simulation kernel with special emphasis on signal
                 grouping while maintaining language semantics.
                 Synchronous approaches are the most suitable for shared
                 memory multiprocessor (SMP) desktop computers but may
                 be difficult to parallelize because of the low activity
                 detected in most of the designs. The degree of
                 parallelism is increased in this approach by performing
                 an exhaustive VHDL signal dependencies analysis and
                 avoiding any sequential phase in the simulator. VHDL
                 semantics impose a synchronization barrier after each
                 phase, that is, the process and the kernel simulation
                 phase, as the language definition does not allow
                 simultaneous execution of kernel and processes.",
  acknowledgement = ack-nhfb,
  articleno =    "27",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Zeng:2011:LDP,
  author =       "Zhiyu Zeng and Zhuo Feng and Peng Li and Vivek Sarin",
  title =        "Locality-Driven Parallel Static Analysis for Power
                 Delivery Networks",
  journal =      j-TODAES,
  volume =       "16",
  number =       "3",
  pages =        "28:1--28:??",
  month =        jun,
  year =         "2011",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1970353.1970361",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Jun 14 11:55:50 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Large VLSI on-chip Power Delivery Networks (PDNs) are
                 challenging to analyze due to the sheer network
                 complexity. In this article, a novel parallel
                 partitioning-based PDN analysis approach is presented.
                 We use the boundary circuit responses of each partition
                 to divide the full grid simulation problem into a set
                 of independent subgrid simulation problems. Instead of
                 solving exact boundary circuit responses, a more
                 efficient scheme is proposed to provide near-exact
                 approximation to the boundary circuit responses by
                 exploiting the spatial locality of the flip-chip-type
                 power grids. This scheme is also used in a block-based
                 iterative error reduction process to achieve fast
                 convergence.",
  acknowledgement = ack-nhfb,
  articleno =    "28",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Zhu:2011:MPL,
  author =       "Yuhao Zhu and Bo Wang and Yangdong Deng",
  title =        "Massively Parallel Logic Simulation with {GPUs}",
  journal =      j-TODAES,
  volume =       "16",
  number =       "3",
  pages =        "29:1--29:??",
  month =        jun,
  year =         "2011",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1970353.1970362",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Jun 14 11:55:50 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In this article, we developed a massively parallel
                 gate-level logical simulator to address the
                 ever-increasing computing demand for VLSI verification.
                 To the best of the authors' knowledge, this work is the
                 first one to leverage the power of modern GPUs to
                 successfully unleash the massive parallelism of a
                 conservative discrete event-driven algorithm, CMB
                 algorithm. A novel data-parallel strategy is proposed
                 to manipulate the fine-grain message passing mechanism
                 required by the CMB protocol. To support robust and
                 complete simulation for real VLSI designs, we establish
                 both a memory paging mechanism and an adaptive issuing
                 strategy to efficiently utilize the GPU memory with a
                 limited capacity.",
  acknowledgement = ack-nhfb,
  articleno =    "29",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Chatterjee:2011:GLS,
  author =       "Debapriya Chatterjee and Andrew Deorio and Valeria
                 Bertacco",
  title =        "Gate-Level Simulation with {GPU} Computing",
  journal =      j-TODAES,
  volume =       "16",
  number =       "3",
  pages =        "30:1--30:??",
  month =        jun,
  year =         "2011",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1970353.1970363",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Jun 14 11:55:50 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Functional verification of modern digital designs is a
                 crucial, time-consuming task impacting not only the
                 correctness of the final product, but also its time to
                 market. At the heart of most of today's verification
                 efforts is logic simulation, used heavily to verify the
                 functional correctness of a design for a broad range of
                 abstraction levels. In mainstream industry verification
                 methodologies, typical setups coordinate the validation
                 effort of a complex digital system by distributing
                 logic simulation tasks among vast server farms for
                 months at a time. Yet, the performance of logic
                 simulation is not sufficient to satisfy the demand,
                 leading to incomplete validation processes, escaped
                 functional bugs, and continuous pressure on the EDA
                 industry to develop faster simulation solutions.",
  acknowledgement = ack-nhfb,
  articleno =    "30",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Bondade:2011:HSC,
  author =       "Rajdeep Bondade and Dongsheng Ma",
  title =        "Hardware-Software Codesign of an Embedded
                 Multiple-Supply Power Management Unit for Multicore
                 {SoCs} Using an Adaptive Global\slash Local Power
                 Allocation and Processing Scheme",
  journal =      j-TODAES,
  volume =       "16",
  number =       "3",
  pages =        "31:1--31:??",
  month =        jun,
  year =         "2011",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1970353.1970364",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Jun 14 11:55:50 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Power dissipation has become a critical design
                 constraint for the growth of modern multicore systems
                 due to increasing clock frequencies, leakage currents,
                 and system parasitics. To overcome this urgent crisis,
                 this article presents an embedded platform for on-chip
                 power management of a multicore System-on-Chip (SoC).
                 The design involves the development of two key
                 components, from the hardware to the software level.
                 From the hardware perspective, a multiple-supply power
                 management unit is proposed and is implemented using a
                 Single-Inductor Multiple-Output (SIMO) DC-DC converter.
                 To dynamically respond to the sensed instantaneous
                 power demands and to accurately control the power
                 delivery to the processor cores, the power management
                 unit employs a software-defined adaptive global/local
                 power allocation feedback controller.",
  acknowledgement = ack-nhfb,
  articleno =    "31",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Stitt:2011:TWD,
  author =       "Greg Stitt and Frank Vahid",
  title =        "Thread Warping: Dynamic and Transparent Synthesis of
                 Thread Accelerators",
  journal =      j-TODAES,
  volume =       "16",
  number =       "3",
  pages =        "32:1--32:??",
  month =        jun,
  year =         "2011",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1970353.1970365",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Jun 14 11:55:50 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We introduce thread warping, a dynamic optimization
                 technique that customizes multicore architectures to a
                 given application by dynamically synthesizing threads
                 into custom accelerator circuits on FPGAs
                 (Field-Programmable Gate Arrays). Thread warping builds
                 upon previous dynamic synthesis techniques for
                 single-threaded applications, enabling dynamic
                 architectural adaptation to different amounts of
                 thread-level parallelism, while also exploiting
                 parallelism within each thread to further improve
                 performance. Furthermore, thread warping maintains the
                 important separation of function from architecture,
                 enabling portability of applications to architectures
                 with different quantities of microprocessors and FPGAs,
                 an advantage not shared by static compilation/synthesis
                 approaches. We introduce an approach consisting of CAD
                 tools and operating system support that enables thread
                 warping on potentially any microprocessor/FPGA
                 architecture.",
  acknowledgement = ack-nhfb,
  articleno =    "32",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Ain:2011:CPV,
  author =       "Antara Ain and Debjit Pal and Pallab Dasgupta and
                 Siddhartha Mukhopadhyay and Rajdeep Mukhopadhyay and
                 John Gough",
  title =        "{Chassis}: a Platform for Verifying {PMU} Integration
                 Using Autogenerated Behavioral Models",
  journal =      j-TODAES,
  volume =       "16",
  number =       "3",
  pages =        "33:1--33:??",
  month =        jun,
  year =         "2011",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1970353.1970367",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Jun 14 11:55:50 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Power Management Units (PMUs) are large integrated
                 circuits consisting of many predesigned mixed-signal
                 components. PMU integration poses a serious
                 verification problem considering the size of the
                 integrated circuit and the complexity of analog
                 simulation. In this article we present an approach for
                 automatic generation of behavioral models for PMU
                 components from top-down skeleton models, fitted with
                 parameter values estimated by bottom-up parameter
                 extraction algorithms. It is shown that replacing PMU
                 components with these autogenerated hybrid
                 automata-based abstract behavioral models enables
                 significant simulation speedup ({$>$} 20X on our
                 industrial test cases) and helps in early detection of
                 integration errors. The article also justifies the
                 level of accuracy in our models with respect to the
                 goal of verifying integrated PMUs.",
  acknowledgement = ack-nhfb,
  articleno =    "33",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Yu:2011:MQS,
  author =       "Yue Yu and Shangping Ren and Xiaobo Sharon Hu",
  title =        "A Metric for Quantifying Similarity between Timing
                 Constraint Sets in Real-Time Systems",
  journal =      j-TODAES,
  volume =       "16",
  number =       "3",
  pages =        "34:1--34:??",
  month =        jun,
  year =         "2011",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1970353.1970368",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Jun 14 11:55:50 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Real-time systems are systems in which their timing
                 behaviors must satisfy a specified set of timing
                 constraints and they often operate in a real-world
                 environment with scarce resources. As a result, the
                 actual runtime performance of these systems may deviate
                 from the design, either inevitably due to unpredictable
                 factors or by intention in order to improve system's
                 other Quality-of-Service (QoS) properties. In this
                 article, we first introduce a new metric, timing
                 constraint set similarity, to quantify the resemblance
                 between two different timing constraint sets. Because
                 directly calculating the exact value of the metric
                 involves calculating the size of a polytope which is a
                 #P-hard problem, we instead introduce an efficient
                 method for estimating its bound.",
  acknowledgement = ack-nhfb,
  articleno =    "34",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Abouzeid:2011:COS,
  author =       "Fady Abouzeid and Sylvain Clerc and Fabian Firmin and
                 Marc Renaudin and Tiempo Sas and Gilles Sicard",
  title =        "{40nm CMOS} {0.35V}-Optimized Standard Cell Libraries
                 for Ultra-Low Power Applications",
  journal =      j-TODAES,
  volume =       "16",
  number =       "3",
  pages =        "35:1--35:??",
  month =        jun,
  year =         "2011",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/1970353.1970369",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Jun 14 11:55:50 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Ultra-low voltage is now a well-known solution for
                 energy constrained applications designed using
                 nanometric process technologies. This work is focused
                 on setting up an automated methodology to enable the
                 design of ultra-low voltage digital circuits
                 exclusively using standard EDA tools. To achieve this
                 goal, a 0.35V energy-delay optimized library was
                 developed. This library, fully compliant with standard
                 library design flow and characterization, was verified
                 through the design and fabrication of a BCH decoder
                 circuit, following a standard front-end to back-end
                 flow. At 0.33V, it performs at 600 kHz with a dynamic
                 energy consumption reduced by a factor 14x from nominal
                 1.1V.",
  acknowledgement = ack-nhfb,
  articleno =    "35",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Qiu:2011:ATB,
  author =       "Meikang Qiu and Edwin H.-M. Sha",
  title =        "2011 {ACM} {TODAES} best paper award",
  journal =      j-TODAES,
  volume =       "16",
  number =       "4",
  pages =        "36:1--36:??",
  month =        oct,
  year =         "2011",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2003695.2003696",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Oct 22 09:25:48 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In high-level synthesis for real-time embedded systems
                 using heterogeneous functional units (FUs), it is
                 critical to select the best FU type for each task.
                 However, some tasks may not have fixed execution times.
                 This article models each varied execution time as a
                 probabilistic random variable and solves the
                 heterogeneous assignment with probability (HAP)
                 problem. The solution of the HAP problem assigns a
                 proper FU type to each task such that the total cost is
                 minimized while the timing constraint is satisfied with
                 a guaranteed confidence probability. The solutions to
                 the HAP problem are useful for both hard real-time and
                 soft real-time systems.",
  acknowledgement = ack-nhfb,
  articleno =    "36",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Sen:2011:COV,
  author =       "Alper Sen",
  title =        "Concurrency-oriented verification and coverage of
                 system-level designs",
  journal =      j-TODAES,
  volume =       "16",
  number =       "4",
  pages =        "37:1--37:??",
  month =        oct,
  year =         "2011",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2003695.2003697",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Oct 22 09:25:48 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Correct concurrent System-on-Chips (SoCs) are very
                 hard to design and reason about. In this work, we
                 develop an automated framework complete with
                 concurrency-oriented verification and coverage
                 techniques for system-level designs. Our techniques are
                 different from traditional simulation-based reliability
                 techniques, since concurrency information is often lost
                 in traditional techniques. We preserve concurrency
                 information to obtain unique verification techniques
                 that allow us to predict potential errors (formulated
                 as transaction-level assertions) from error-free
                 simulations. In order to do this, we exploit the
                 inherent concurrency in the designs to generate and
                 analyze novel partial-order simulation traces.
                 Additionally, to evaluate the confidence on
                 verification results and the gauge progress of
                 verification, we develop novel mutation testing based
                 on concurrent coverage metrics.",
  acknowledgement = ack-nhfb,
  articleno =    "37",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Fournier:2011:PAC,
  author =       "Laurent Fournier and Avi Ziv and Ekaterina Kutsy and
                 Ofer Strichman",
  title =        "A probabilistic analysis of coverage methods",
  journal =      j-TODAES,
  volume =       "16",
  number =       "4",
  pages =        "38:1--38:??",
  month =        oct,
  year =         "2011",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2003695.2003698",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Oct 22 09:25:48 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Coverage is an important measure for the quality and
                 completeness of the functional verification of hardware
                 logic designs. Verification teams spend a significant
                 amount of time looking for bugs in the design and in
                 providing high-quality coverage. This process is
                 performed through the use of various sampling
                 strategies for selecting test inputs. The selection of
                 sampling strategies to achieve the verification goals
                 is typically carried out in an intuitive manner. We
                 studied several commonly used sampling strategies and
                 provide a probabilistic framework for assessing and
                 comparing their relative values. For this analysis, we
                 derived results for two measures of interest: first,
                 the probability of finding a bug within a given number
                 of samplings; and second, the expected number of
                 samplings until a bug is detected.",
  acknowledgement = ack-nhfb,
  articleno =    "38",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Sun:2011:GDD,
  author =       "Wei-Tsun Sun and Zoran Salcic",
  title =        "{GALS-Designer}: a design framework for {GALS}
                 software systems",
  journal =      j-TODAES,
  volume =       "16",
  number =       "4",
  pages =        "39:1--39:??",
  month =        oct,
  year =         "2011",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2003695.2003699",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Oct 22 09:25:48 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "GALS-Designer is a framework for the design of
                 software systems which comply with the formal Globally
                 Asynchronous Locally Synchronous model of computation
                 (GALS). Those systems consist of single or multiple
                 GALS programs and their immediate environment, which
                 can be other programs and any other modules described
                 in SystemC. The framework integrates our libGALS
                 library for writing GALS programs and SystemC. It
                 enables modeling and simulation of single and multiple
                 GALS programs within the single SystemC executable
                 model on the host (simulation) operating system. The
                 same GALS programs can then be run without SystemC on a
                 target operating system for which the libGALS runtime
                 library is available.",
  acknowledgement = ack-nhfb,
  articleno =    "39",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Mittal:2011:TVA,
  author =       "Kartikey Mittal and Arpit Joshi and Madhu Mutyam",
  title =        "Timing variation-aware scheduling and resource binding
                 in high-level synthesis",
  journal =      j-TODAES,
  volume =       "16",
  number =       "4",
  pages =        "40:1--40:??",
  month =        oct,
  year =         "2011",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2003695.2003700",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Oct 22 09:25:48 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Due to technological scaling, process variations have
                 increased significantly, resulting in large variations
                 in the delay of the functional units. Hence, the
                 worst-case approach is becoming increasingly
                 pessimistic in meeting a certain performance yield. The
                 problem therefore is to increase the performance as
                 much as possible while maintaining the desired yield.
                 In this work, we introduce an integer linear
                 programming (ILP) formulation for scheduling and
                 resource binding in high-level synthesis (HLS) which
                 tries to mitigate the effect of timing variations. In
                 the presence of delay variations of resources, as
                 chained resources can give a better latency and
                 performance yield trade-off, instead of considering
                 them independently, we consider external chaining of
                 resources, that is, two or more resources are connected
                 by external wiring, and exploit operation chaining.",
  acknowledgement = ack-nhfb,
  articleno =    "40",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Wang:2011:RCM,
  author =       "Xiaofang Wang and Pallav Gupta",
  title =        "Resource-constrained multiprocessor synthesis for
                 floating-point applications on {FPGAs}",
  journal =      j-TODAES,
  volume =       "16",
  number =       "4",
  pages =        "41:1--41:??",
  month =        oct,
  year =         "2011",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2003695.2003701",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Oct 22 09:25:48 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Although state-of-the-art field-programmable gate
                 arrays offer exciting new opportunities in exploring
                 low-cost high-performance architectures for
                 data-intensive scientific applications, they also
                 present serious challenges.
                 Multiprocessor-on-programmable-chip, which integrates
                 software programmability and hardware reconfiguration,
                 provides substantial flexibility that results in
                 shorter design cycles, higher performance, and lower
                 cost. In this article, we present an
                 application-specific design methodology for
                 multiprocessor-on-programmable-chip architectures that
                 target applications involving large matrices and
                 floating-point operations. Given an application with
                 specific energy-performance and resource constraints,
                 our methodology aims to customize the architecture to
                 match the diverse computation and communication
                 requirements of the application tasks. Graph-based
                 analysis of the application drives system synthesis
                 that employs a precharacterized, parameterized hardware
                 component library of functional units.",
  acknowledgement = ack-nhfb,
  articleno =    "41",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kim:2011:MAO,
  author =       "Yongjoo Kim and Jongeun Lee and Aviral Shrivastava and
                 Yunheung Paek",
  title =        "Memory access optimization in compilation for
                 coarse-grained reconfigurable architectures",
  journal =      j-TODAES,
  volume =       "16",
  number =       "4",
  pages =        "42:1--42:??",
  month =        oct,
  year =         "2011",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2003695.2003702",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Oct 22 09:25:48 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Coarse-grained reconfigurable architectures (CGRAs)
                 promise high performance at high power efficiency. They
                 fulfil this promise by keeping the hardware extremely
                 simple, and moving the complexity to application
                 mapping. One major challenge comes in the form of data
                 mapping. For reasons of power-efficiency and
                 complexity, CGRAs use multibank local memory, and a row
                 of PEs share memory access. In order for each row of
                 the PEs to access any memory bank, there is a hardware
                 arbiter between the memory requests generated by the
                 PEs and the banks of the local memory. However, a
                 fundamental restriction remains in that a bank cannot
                 be accessed by two different PEs at the same time.",
  acknowledgement = ack-nhfb,
  articleno =    "42",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Bruneel:2011:DDF,
  author =       "Karel Bruneel and Wim Heirman and Dirk Stroobandt",
  title =        "Dynamic data folding with parameterizable {FPGA}
                 configurations",
  journal =      j-TODAES,
  volume =       "16",
  number =       "4",
  pages =        "43:1--43:??",
  month =        oct,
  year =         "2011",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2003695.2003703",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Oct 22 09:25:48 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In many applications, subsequent data manipulations
                 differ only in a small set of parameter values. Because
                 of their reconfigurability, FPGAs (field programmable
                 gate arrays) can be configured with a specialized
                 circuit each time the parameter values change. This
                 technique is called dynamic data folding. The
                 specialized circuits are smaller and faster than their
                 generic counterparts. However, the overhead involved in
                 generating the configurations for the specialized
                 circuits at runtime is very large when conventional
                 tools are used, and this overhead will in many cases
                 negate the benefit of using optimized configurations.
                 This article introduces an automatic method for
                 generating runtime parameterizable configurations from
                 arbitrary Boolean circuits.",
  acknowledgement = ack-nhfb,
  articleno =    "43",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Dong:2011:PCS,
  author =       "Wei Dong and Peng Li",
  title =        "Parallel circuit simulation with adaptively controlled
                 projective integration",
  journal =      j-TODAES,
  volume =       "16",
  number =       "4",
  pages =        "44:1--44:??",
  month =        oct,
  year =         "2011",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2003695.2003704",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Oct 22 09:25:48 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In this article, a parallel transient circuit
                 simulation approach based on an adaptively-controlled
                 time-stepping scheme is proposed. Different from the
                 widely-used implicit numerical integration techniques
                 in most transient simulators, this work exploits the
                 recently-developed explicit telescopic projective
                 numerical integration method for efficient parallel
                 circuit simulation. Because telescopic projective
                 integration addresses the well-known stability issue of
                 explicit numerical integrations by adopting
                 combinations of inner integrators and outer integrators
                 in a multilevel fashion, the simulation time-step is no
                 longer limited by the smallest time constant in the
                 circuit. With dynamic control of telescopic projective
                 integration, the proposed projective integration
                 framework not only leads to noticeable efficiency
                 improvement in circuit simulation, it also lends itself
                 to straightforward parallelization due to its explicit
                 nature.",
  acknowledgement = ack-nhfb,
  articleno =    "44",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Maestro:2011:MEL,
  author =       "Juan Antonio Maestro and Pedro Reviriego and Sanghyeon
                 Baeg and Shijie Wen and Richard Wong",
  title =        "Mitigating the effects of large multiple cell upsets
                 {(MCUs)} in memories",
  journal =      j-TODAES,
  volume =       "16",
  number =       "4",
  pages =        "45:1--45:??",
  month =        oct,
  year =         "2011",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2003695.2003705",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Oct 22 09:25:48 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Reliability is a critical issue for memories.
                 Radiation particles that hit the device can cause
                 errors in some cells, which can lead to data
                 corruption. To avoid this problem, memories are
                 protected with per-word error correction codes (ECCs).
                 Typically, single-error correction and double-error
                 detection (SEC-DED) codes are used. As technology
                 scales, errors caused by radiation particles on
                 memories tend to affect more than one cell---what is
                 known as a multiple cell upset (MCU). To ensure that
                 only a single cell is affected in each word,
                 interleaving is used. With interleaving, cells that
                 belong to the same word are placed at a sufficient
                 distance such that an MCU will only affect a single
                 cell on each word.",
  acknowledgement = ack-nhfb,
  articleno =    "45",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Healy:2011:IMF,
  author =       "Michael B. Healy and Fayez Mohamood and Hsien-Hsin S.
                 Lee and Sung Kyu Lim",
  title =        "Integrated microarchitectural floorplanning and
                 run-time controller for inductive noise mitigation",
  journal =      j-TODAES,
  volume =       "16",
  number =       "4",
  pages =        "46:1--46:??",
  month =        oct,
  year =         "2011",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2003695.2003706",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Oct 22 09:25:48 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In this article, we propose a design methodology using
                 two complementary techniques to address high-frequency
                 inductive noise in the early design phase of a
                 microprocessor. First, we propose a noise-aware
                 floorplanning technique that uses microarchitectural
                 profile information to create noise-aware floorplans.
                 Second, we present the design of a dynamic
                 inductive-noise controlling mechanism at the
                 microarchitectural level, which limits the on-die
                 current demand within predefined bounds, regardless of
                 the native power and current characteristics of running
                 applications. By dynamically monitoring the access
                 patterns of microarchitectural modules, our mechanism
                 can effectively limit simultaneous switching activity
                 of close-by modules, thereby leveling voltage ringing
                 at local power-pins.",
  acknowledgement = ack-nhfb,
  articleno =    "46",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Yan:2011:ICA,
  author =       "Jin-Tai Yan",
  title =        "{IO} connection assignment and {RDL} routing for
                 flip-chip designs",
  journal =      j-TODAES,
  volume =       "16",
  number =       "4",
  pages =        "47:1--47:??",
  month =        oct,
  year =         "2011",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2003695.2003707",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Oct 22 09:25:48 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Given a set of IO buffers and a set of bump balls with
                 the capacity constraints between two adjacent bump
                 balls, based on the construction of the Delaunay
                 triangulation and a Manhattan Voronoi diagram, an O(n2)
                 assignment algorithm is proposed to assign all the IO
                 connections in a single redistribution layer for IO
                 connection assignment, where n is the number of bump
                 balls in a flip-chip design. Furthermore, based on the
                 computation of the probabilistic congestion for the
                 assigned IO connections, an O(n2) routing algorithm is
                 proposed to minimize the total wirelength to route all
                 the assigned IO connections while satisfying the
                 capacity constraints for single-layer RDL routing.",
  acknowledgement = ack-nhfb,
  articleno =    "47",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kim:2011:CTS,
  author =       "Tak-Yung Kim and Taewhan Kim",
  title =        "Clock Tree synthesis for {TSV}-based {$3$D} {IC}
                 designs",
  journal =      j-TODAES,
  volume =       "16",
  number =       "4",
  pages =        "48:1--48:??",
  month =        oct,
  year =         "2011",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2003695.2003708",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Oct 22 09:25:48 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "For the cost-effective implementation of clock trees
                 in through-silicon via (TSV)-based 3D IC designs, we
                 propose core algorithms for 3D clock tree synthesis.
                 For a given abstract tree topology, we propose DLE-3D
                 (\underline{d}eferred \underline{l}ayer
                 \underline{e}mbedding for \underline{l} ICs), which
                 optimally finds the embedding layers of tree nodes, so
                 that the TSV cost required for a tree topology is
                 minimized, and DME-3D (\underline{d}eferred
                 \underline{m}erge \underline{e}mbedding for
                 \underline{l} ICs), which is an extended algorithm of
                 the 2D merging segment, to minimize the total
                 wirelength in 3D design space, with the consideration
                 of the TSV effect on delay. In addition, when an
                 abstract tree topology is not given, we propose NN-3D
                 (\underline{n}earest \underline{n}eighbor selection for
                 \underline{l} ICs), which constructs a (TSV and
                 wirelength) cost-effective abstract tree topology for
                 3D ICs.",
  acknowledgement = ack-nhfb,
  articleno =    "48",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lu:2011:CBP,
  author =       "Jianchao Lu and Baris Taskin",
  title =        "Clock buffer polarity assignment with skew tuning",
  journal =      j-TODAES,
  volume =       "16",
  number =       "4",
  pages =        "49:1--49:??",
  month =        oct,
  year =         "2011",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2003695.2003709",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Oct 22 09:25:48 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "A clock polarity assignment method is proposed that
                 reduces the peak current on the vdd/gnd rails of an
                 integrated circuit. The impacts of (i) the output
                 capacitive load on the peak current drawn by the
                 sink-level clock buffers, and (ii) the buffer/inverter
                 replacement scheme of polarity assignment on timing
                 accuracy are considered in the formulation. The
                 proposed sink-level-only polarity assignment is
                 performed by a lexi-search algorithm in order to
                 balance the peak current on the clock tree. Most of the
                 previous polarity assignment methods that do not
                 include clock tree resynthesis lead to an undesirable
                 increase in the worst corner clock skew.",
  acknowledgement = ack-nhfb,
  articleno =    "49",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Wang:2011:ALR,
  author =       "Shaoxi Wang and Xinzhang Jia and Arthur B. Yeh and
                 Lihong Zhang",
  title =        "Analog layout retargeting using geometric
                 programming",
  journal =      j-TODAES,
  volume =       "16",
  number =       "4",
  pages =        "50:1--50:??",
  month =        oct,
  year =         "2011",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2003695.2003710",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Oct 22 09:25:48 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "To satisfy the requirements of complex and special
                 analog layout constraints, a new analog layout
                 retargeting method is presented in this article. Our
                 approach uses geometric programming (GP) to achieve new
                 technology design rules, implement device symmetry and
                 matching constraints, and manage parasitics
                 optimization. The GP, a class of nonlinear optimization
                 problem, can be transferred or fitted into a convex
                 optimization problem. Therefore, a global optimum
                 solution can be achieved. Moreover, the GP can address
                 problems with large-scale variables and constraints
                 without setting an initialization variable range. To
                 meet the prerequisites of the GP methodology for analog
                 layout automation, we propose three kinds of
                 mathematical transformations, including negative
                 coefficient transformation, fraction transformation,
                 and maximum of posynomial transformation.",
  acknowledgement = ack-nhfb,
  articleno =    "50",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Duarte:2011:HDP,
  author =       "Filipa Duarte and Jos Hulzink and Jun Zhou and Jan
                 Stuijt and Jos Huisken and Harmke {De Groot}",
  title =        "A {36$ \mu $W} heartbeat-detection processor for a
                 wireless sensor node",
  journal =      j-TODAES,
  volume =       "16",
  number =       "4",
  pages =        "51:1--51:??",
  month =        oct,
  year =         "2011",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2003695.2003711",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Oct 22 09:25:48 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In order to provide better services to elderly people,
                 home healthcare monitoring systems have been
                 increasingly deployed. Typically, these systems are
                 based on wireless sensor nodes, and should utilize very
                 low energy during their lifetimes, as they are powered
                 by scavengers. In this article, we present an ultra-low
                 power processing system for a wireless sensor node for
                 very low duty cycle applications. In the CoolBio
                 system-on-chip, we utilized several power reduction
                 techniques at both the architecture level and the
                 circuit level. These techniques include feature
                 extraction, voltage and frequency scaling, clock and
                 power gating and a redesign of key standard cells.",
  acknowledgement = ack-nhfb,
  articleno =    "51",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Verbeek:2012:EFS,
  author =       "Freek Verbeek and Julien Schmaltz",
  title =        "Easy Formal Specification and Validation of Unbounded
                 {Networks-on-Chips} Architectures",
  journal =      j-TODAES,
  volume =       "17",
  number =       "1",
  pages =        "1:1--1:??",
  month =        jan,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2071356.2071357",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 26 16:38:42 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article presents a formal specification and
                 validation environment to prove safety and liveness
                 properties of parametric -- unbounded -- NoCs
                 architectures described at a high-level of abstraction.
                 The environment improves the GeNoC approach with two
                 new theorems, proving evacuation and starvation
                 freedom. The application of the validation methodology
                 is illustrated on a HERMES NoC with adaptive west-first
                 routing and wormhole switching. This case study
                 illustrates the strong compositional aspect of the
                 GeNoC environment. The complete specification of this
                 HERMES instance, together with the proof that the
                 specification is deadlock-free, starvation free, and
                 all messages eventually leave the network at their
                 correct destination, could be achieved in about a
                 week.",
  acknowledgement = ack-nhfb,
  articleno =    "1",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Pasha:2012:SLS,
  author =       "Muhammad Adeel Pasha and Steven Derrien and Olivier
                 Sentieys",
  title =        "System-Level Synthesis for Wireless Sensor Node
                 Controllers: a Complete Design Flow",
  journal =      j-TODAES,
  volume =       "17",
  number =       "1",
  pages =        "2:1--2:??",
  month =        jan,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2071356.2071358",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 26 16:38:42 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Wireless sensor networks (WSN) is a new and very
                 challenging research field for embedded system design
                 automation. Engineering a WSN node hardware platform is
                 known to be a tough challenge, as the design must
                 enforce many severe constraints, among which energy
                 dissipation is by far the most important one. WSN node
                 devices have until now been designed using
                 off-the-shelf low-power microcontroller units (MCUs),
                 even if their power dissipation is still an issue and
                 hinders the widespread use of this new technology. In
                 this work, we propose a complete system-level flow for
                 an alternative approach based on the concept of
                 hardware microtasks, which relies on hardware
                 specialization and power gating to drastically improve
                 the energy efficiency of the computational/control part
                 of the node.",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Aksoy:2012:OAM,
  author =       "Levent Aksoy and Eduardo Costa and Paulo Flores and
                 Jose Monteiro",
  title =        "Optimization Algorithms for the Multiplierless
                 Realization of Linear Transforms",
  journal =      j-TODAES,
  volume =       "17",
  number =       "1",
  pages =        "3:1--3:??",
  month =        jan,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2071356.2071359",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 26 16:38:42 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article addresses the problem of finding the
                 fewest numbers of addition and subtraction operations
                 in the multiplication of a constant matrix with an
                 input vector---a fundamental operation in many linear
                 digital signal processing transforms. We first
                 introduce an exact common subexpression elimination
                 (CSE) algorithm that formalizes the minimization of the
                 number of operations as a 0-1 integer linear
                 programming problem. Since there are still instances
                 that the proposed exact algorithm cannot handle due to
                 the NP-completeness of the problem, we also introduce a
                 CSE heuristic algorithm that iteratively finds the most
                 common 2-term subexpressions with the minimum conflicts
                 among the expressions.",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Leung:2012:PVI,
  author =       "Mario K. Y. Leung and Eric K. I. Chio and Evangeline
                 F. Y. Young",
  title =        "Postplacement Voltage Island Generation",
  journal =      j-TODAES,
  volume =       "17",
  number =       "1",
  pages =        "4:1--4:??",
  month =        jan,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2071356.2071360",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 26 16:38:42 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "High power consumption will not only shorten the
                 battery life of handheld devices, but also cause
                 thermal and reliability problems. To lower power
                 consumption, one way is to reduce the supply voltage as
                 in multisupply voltage (MSV) designs. In region-based
                 MSV, a circuit will be partitioned into ``voltage
                 islands'' where each island occupies a contiguous
                 physical space and operates at one supply voltage. In
                 the work of Wu et al. [2005], this voltage supply
                 problem is addressed, and the input placement is
                 partitioned into a set of rectangular voltage islands
                 by a slicing structure. However, the constraint of
                 using a slicing structure prohibits better solutions in
                 their approach.",
  acknowledgement = ack-nhfb,
  articleno =    "4",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Wang:2012:CMI,
  author =       "Hai Wang and Sheldon X.-D. Tan and Ryan Rakib",
  title =        "Compact Modeling of Interconnect Circuits over Wide
                 Frequency Band by Adaptive Complex-Valued Sampling
                 Method",
  journal =      j-TODAES,
  volume =       "17",
  number =       "1",
  pages =        "5:1--5:??",
  month =        jan,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2071356.2071361",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 26 16:38:42 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In this article, we propose a new model
                 order-reduction method for compact modeling of
                 interconnect circuits over wide frequency band using a
                 novel complex-valued adaptive sampling and error
                 estimation scheme. We address the outstanding error
                 control problems in the existing sampling-based
                 reduction framework over a frequency band. Our new
                 method, WBMOR, explicitly and efficiently computes the
                 exact residual errors to guide the sampling process. We
                 show by sampling along the imaginary axis and
                 performing a new complex-valued reduction that the
                 reduced model will match exactly with the original
                 model at the sample points. Additionally, we show in
                 theory that the proposed method can achieve the error
                 bound over a given frequency range.",
  acknowledgement = ack-nhfb,
  articleno =    "5",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lin:2012:RDP,
  author =       "Jing-Wei Lin and Tsung-Yi Ho and Iris Hui-Ru Jiang",
  title =        "Reliability-Driven Power\slash Ground Routing for
                 Analog {ICs}",
  journal =      j-TODAES,
  volume =       "17",
  number =       "1",
  pages =        "6:1--6:??",
  month =        jan,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2071356.2071362",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 26 16:38:42 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Electromigration and voltage drop (IR-drop) are two
                 major reliability issues in modern IC design.
                 Electromigration gradually creates permanently open or
                 short circuits due to excessive current densities;
                 IR-drop causes insufficient power supply, thus
                 degrading performance or even inducing functional
                 errors because of nonzero wire resistance. Both types
                 of failure can be triggered by insufficient wire
                 widths. Although expanding the wire width alleviates
                 electromigration and IR-drop, unlimited expansion not
                 only increases the routing cost, but may also be
                 infeasible due to the limited routing resource. In
                 addition, electromigration and IR-drop manifest mainly
                 in the power/ground (P/G) network. Therefore, taking
                 wire widths into consideration is desirable to prevent
                 electromigration and IR-drop at P/G routing.",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Ioannides:2012:CDT,
  author =       "Charalambos Ioannides and Kerstin I. Eder",
  title =        "Coverage-Directed Test Generation Automated by Machine
                 Learning --- a Review",
  journal =      j-TODAES,
  volume =       "17",
  number =       "1",
  pages =        "7:1--7:??",
  month =        jan,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2071356.2071363",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 26 16:38:42 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The increasing complexity and size of digital designs,
                 in conjunction with the lack of a potent verification
                 methodology that can effectively cope with this trend,
                 continue to inspire engineers and academics in seeking
                 ways to further automate design verification. In an
                 effort to increase performance and to decrease
                 engineering effort, research has turned to artificial
                 intelligence (AI) techniques for effective solutions.
                 The generation of tests for simulation-based
                 verification can be guided by machine-learning
                 techniques. In fact, recent advances demonstrate that
                 embedding machine-learning (ML) techniques into a
                 coverage-directed test generation (CDG) framework can
                 effectively automate the test generation process,
                 making it more effective and less error-prone.",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Pan:2012:ERE,
  author =       "Zhaoliang Pan and Melvin A. Breuer",
  title =        "Error Rate Estimation for Defective Circuits via Ones
                 Counting",
  journal =      j-TODAES,
  volume =       "17",
  number =       "1",
  pages =        "8:1--8:??",
  month =        jan,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2071356.2071364",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 26 16:38:42 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "With VLSI circuit feature size scaling down, it is
                 becoming more difficult and expensive to achieve a
                 desired level of yield. Error-tolerance employs
                 defective chips that occasionally produce erroneous yet
                 acceptable results in targeted applications, and has
                 been proposed as one way to increase effective yield.
                 These chips are characterized by criteria set by
                 specific applications. Error rate, an upper-bound on
                 how frequent errors occur at an output, is one such
                 criterion. In this article we focus on the following
                 problem: given a combinational logic circuit that is
                 defective, and hence occasionally produces an erroneous
                 output, how can we determine the error rate of each
                 output line by using ones counting?",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Peng:2012:SSE,
  author =       "Huan-Kai Peng and Hsuan-Ming Huang and Yu-Hsin Kuo and
                 Charles H.-P. Wen",
  title =        "Statistical Soft Error Rate {(SSER)} Analysis for
                 Scaled {CMOS} Designs",
  journal =      j-TODAES,
  volume =       "17",
  number =       "1",
  pages =        "9:1--9:??",
  month =        jan,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2071356.2071365",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 26 16:38:42 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article re-examines the soft error effect caused
                 by radiation-induced particles beyond the deep
                 submicron regime. Considering the impact of process
                 variations, voltage pulse widths of transient faults
                 are found no longer monotonically diminishing after
                 propagation, as they were formerly. As a result, the
                 soft error rates in scaled electronic designs escape
                 traditional static analysis and are seriously
                 underestimated. In this article we formulate the
                 statistical soft error rate (SSER) problem and present
                 two frameworks to cope with the aforementioned
                 sophisticated issues. The table-lookup framework
                 captures the change of transient-fault distributions
                 implicitly by using a Monte-Carlo approach, whereas the
                 SVR-learning framework does the task explicitly by
                 using statistical learning theory.",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Gong:2012:FNM,
  author =       "Fang Gong and Xuexin Liu and Hao Yu and Sheldon X. D.
                 Tan and Junyan Ren and Lei He",
  title =        "A Fast Non-{Monte-Carlo} Yield Analysis and
                 Optimization by Stochastic Orthogonal Polynomials",
  journal =      j-TODAES,
  volume =       "17",
  number =       "1",
  pages =        "10:1--10:??",
  month =        jan,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2071356.2071366",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 26 16:38:42 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Performance failure has become a significant threat to
                 the reliability and robustness of analog circuits. In
                 this article, we first develop an efficient
                 non-Monte-Carlo (NMC) transient mismatch analysis,
                 where transient response is represented by stochastic
                 orthogonal polynomial (SOP) expansion under PVT
                 variations and probabilistic distribution of transient
                 response is solved. We further define performance yield
                 and derive stochastic sensitivity for yield within the
                 framework of SOP, and finally develop a gradient-based
                 multiobjective optimization to improve yield while
                 satisfying other performance constraints. Extensive
                 experiments show that compared to Monte Carlo-based
                 yield estimation, our NMC method achieves up to 700X
                 speedup and maintains 98\% accuracy.",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Wu:2012:ESF,
  author =       "Meng-Huan Wu and Peng-Chih Wang and Cheng-Yang Fu and
                 Ren-Song Tsay",
  title =        "An Extended {SystemC} Framework for Efficient
                 {HW\slash SW} Co-Simulation",
  journal =      j-TODAES,
  volume =       "17",
  number =       "2",
  pages =        "11:1--11:??",
  month =        apr,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2159542.2159543",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Apr 20 17:41:41 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In this article, we propose an extended SystemC
                 framework that directly enables software simulation in
                 SystemC. Although SystemC has been widely adopted for
                 system-level simulation of hardware designs nowadays,
                 to complete HW/SW co-simulation, it still requires an
                 additional instruction set simulator (ISS) for software
                 execution. However, the heavy intercommunication
                 overheads between the two heterogeneous simulators
                 would significantly slow down simulation performance.
                 To deal with this issue, our proposed approach
                 automatically generates high-speed and equivalent
                 SystemC models for target software applications that
                 can be directly integrated with hardware models for
                 complete HW/SW co-simulation. In addition, to properly
                 handle multitasking, an efficient OS model is devised
                 to support accurate preemptive scheduling. Since both
                 the generated application model and the OS model are
                 constructed in SystemC modules, our approach avoids
                 heavy intercommunication overheads and achieves over
                 1,000 times faster simulation than that of the
                 conventional ISS-SystemC approach. Experimental results
                 demonstrate that our extended SystemC approach can
                 perform at 50 to 220 MIPS while offering accurate
                 simulation results.",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Zhou:2012:ONC,
  author =       "Pingqiang Zhou and Ping-Hung Yuh and Sachin S.
                 Sapatnekar",
  title =        "Optimized {$3$D} Network-on-Chip Design Using
                 Simulated Allocation",
  journal =      j-TODAES,
  volume =       "17",
  number =       "2",
  pages =        "12:1--12:??",
  month =        apr,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2159542.2159544",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Apr 20 17:41:41 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Three-dimensional (3D) silicon integration
                 technologies have provided new opportunities for
                 Network-on-Chip (NoC) architecture design in
                 Systems-on-Chip (SoCs). In this article, we consider
                 the application-specific NoC architecture design
                 problem in a 3D environment. We present an efficient
                 floorplan-aware 3D NoC synthesis algorithm based on
                 simulated allocation (SAL), a stochastic method for
                 traffic flow routing, and accurate power and delay
                 models for NoC components. We demonstrate that this
                 method finds greatly improved solutions compared to a
                 baseline algorithm reflecting prior work. To evaluate
                 the SAL method, we compare its performance with the
                 widely used simulated annealing (SA) method and show
                 that SAL is much faster than SA for this application,
                 while providing solutions of very similar quality. We
                 then extend the approach from a single-path routing to
                 a multipath routing scheme and explore the trade-off
                 between power consumption and runtime for these two
                 schemes. Finally, we study the impact of various
                 factors on the network performance in 3D NoCs,
                 including the TSV count and the number of 3D tiers. Our
                 studies show that link power and delay can be
                 significantly improved when moving from a 2D to a 3D
                 implementation, but the improvement flattens out as the
                 number of 3D tiers goes beyond a certain point.",
  acknowledgement = ack-nhfb,
  articleno =    "12",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Sun:2012:PTA,
  author =       "Guangyu Sun and Huazhong Yang and Yuan Xie",
  title =        "Performance\slash Thermal-Aware Design of
                 {$3$D}-Stacked {L2} Caches for {CMPs}",
  journal =      j-TODAES,
  volume =       "17",
  number =       "2",
  pages =        "13:1--13:??",
  month =        apr,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2159542.2159545",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Apr 20 17:41:41 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Three-dimensional (3D) stacking technology enables
                 integration of more memory on top of chip
                 multiprocessors (CMPs). As the number of cores and the
                 capacity of on-chip memory increase, the Non-Uniform
                 Cache Architecture (NUCA) becomes more attractive.
                 Compared to 2D cases, 3D stacking provides more options
                 for the design of on-chip memory due to numerous
                 advantages, such as the extra layout dimension, low
                 latency across layers, etc. On the other hand, 3D
                 stacking aggravates the thermal problem due to the
                 increase of power density. In this work, we first study
                 the design of 3D-stacked set-associative L2 caches
                 through managing the placement of cache ways. The
                 evaluation results show that the placement and
                 corresponding management of 3D cache ways have an
                 impact on the performance of CMPs. Then, we show that
                 the efficiency of thermal control is also related to
                 the placement of cache ways. For caches implemented
                 with different memory technologies, the placement and
                 management of cache ways have different effects on
                 power consumption and power distribution. Consequently,
                 we propose techniques to improve the efficiency of
                 thermal control for different memory technologies. The
                 evaluation results show the trade-off between
                 performance and thermal control efficiency.",
  acknowledgement = ack-nhfb,
  articleno =    "13",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Wu:2012:TAS,
  author =       "Chin-Hsien Wu and Hsin-Hung Lin",
  title =        "Timing Analysis of System Initialization and Crash
                 Recovery for a Segment-Based Flash Translation Layer",
  journal =      j-TODAES,
  volume =       "17",
  number =       "2",
  pages =        "14:1--14:??",
  month =        apr,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2159542.2159546",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Apr 20 17:41:41 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Recently, the capacity of flash-memory storage systems
                 has grown rapidly, and flash-memory technology has
                 advanced along with the wave of consumer electronics
                 and embedded systems. In order to properly manage
                 product cost and initialization performance, vendors
                 face serious challenges in system design and analysis.
                 Thus, the timing analysis of system initialization and
                 crash recovery for a segment-based flash translation
                 layer has become an important research topic. This
                 article focuses on system initialization, crash
                 recovery, and timing analysis. The timing analysis of
                 system initialization involves the relationship between
                 the size of the main memory and the system
                 initialization time. The timing analysis of crash
                 recovery explains the worst case recovery time. The
                 experiments in this study show that the timing analysis
                 of system initialization and crash recovery can be
                 applied to the segment-based flash translation layer.",
  acknowledgement = ack-nhfb,
  articleno =    "14",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Milder:2012:CGH,
  author =       "Peter Milder and Franz Franchetti and James C. Hoe and
                 Markus P{\"u}schel",
  title =        "Computer Generation of Hardware for Linear Digital
                 Signal Processing Transforms",
  journal =      j-TODAES,
  volume =       "17",
  number =       "2",
  pages =        "15:1--15:??",
  month =        apr,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2159542.2159547",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Apr 20 17:41:41 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Linear signal transforms such as the discrete Fourier
                 transform (DFT) are very widely used in digital signal
                 processing and other domains. Due to high performance
                 or efficiency requirements, these transforms are often
                 implemented in hardware. This implementation is
                 challenging due to the large number of algorithmic
                 options (e.g., fast Fourier transform algorithms or
                 FFTs), the variety of ways that a fixed algorithm can
                 be mapped to a sequential datapath, and the design of
                 the components of this datapath. The best choices
                 depend heavily on the resource budget and the
                 performance goals of the target application. Thus, it
                 is difficult for a designer to determine which set of
                 options will best meet a given set of requirements. In
                 this article we introduce the Spiral hardware
                 generation framework and system for linear transforms.
                 The system takes a problem specification as input as
                 well as directives that define characteristics of the
                 desired datapath. Using a mathematical language to
                 represent and explore transform algorithms and datapath
                 characteristics, the system automatically generates an
                 algorithm, maps it to a datapath, and outputs a
                 synthesizable register transfer level Verilog
                 description suitable for FPGA or ASIC implementation.
                 The quality of the generated designs rivals the best
                 available handwritten IP cores.",
  acknowledgement = ack-nhfb,
  articleno =    "15",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Weng:2012:TOS,
  author =       "Shih-Hung Weng and Yu-Min Kuo and Shih-Chieh Chang",
  title =        "Timing Optimization in Sequential Circuit by
                 Exploiting Clock-Gating Logic",
  journal =      j-TODAES,
  volume =       "17",
  number =       "2",
  pages =        "16:1--16:??",
  month =        apr,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2159542.2159548",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Apr 20 17:41:41 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Clock gating is a popular technique for reducing power
                 dissipation. In a circuit with clock gating, the clock
                 signal can be shut off without changing the
                 functionality under certain clock-gating conditions. In
                 this article, we observe that the clock-gating
                 conditions and the next-state function of a Flip-Flop
                 (FF) are correlated and can be used for sequential
                 circuit optimization. We also show that the
                 implementation of the next-state function of any FF can
                 be just an inverter if the clock signal is
                 appropriately gated. By exploiting the flexibility
                 between the clock-gating conditions and the next-state
                 function, we propose an iterative optimization
                 algorithm to improve the timing of sequential circuits.
                 We present experimental results of a set of benchmark
                 circuits with a timing improvement of 10.20\% on
                 average.",
  acknowledgement = ack-nhfb,
  articleno =    "16",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kurimoto:2012:YRI,
  author =       "Masanori Kurimoto and Jun Matsushima and Shigeki
                 Ohbayashi and Yoshiaki Fukui and Michio Komoda and
                 Nobuhiro Tsuda",
  title =        "A Yield and Reliability Improvement Methodology Based
                 on Logic Redundant Repair with a Repairable Scan
                 Flip-Flop Designed by Push Rule",
  journal =      j-TODAES,
  volume =       "17",
  number =       "2",
  pages =        "17:1--17:??",
  month =        apr,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2159542.2159549",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Apr 20 17:41:41 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We propose a yield improvement methodology which
                 repairs a faulty chip due to logic defect by using a
                 repairable scan flip-flop (R-SFF). Our methodology
                 improves area penalty, which is a large issue for logic
                 repair technology in actual products, by using repair
                 grouping and a redundant cell insertion algorithm and
                 by pushing the design rule for the repairable area of
                 R-SFF. Additionally, compared with the conventional
                 method, we reduce the number of wire connections around
                 redundant cells by improving the replacement method of
                 the faulty cell by the redundant cell. The proposed
                 methodology reduces the total area penalty caused by
                 the logic redundant repair to 3.6\% and improves the
                 yield, that is the number of good chips on a wafer, by
                 4.7\% when the defect density is 1.0[1/cm$^2$].
                 Furthermore, we propose the strategy to repair the
                 in-field failures due to latent defect for the chip
                 whose repair function had not been used in the shipment
                 test.",
  acknowledgement = ack-nhfb,
  articleno =    "17",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Xiang:2012:SFF,
  author =       "Dong Xiang and Zhen Chen and Laung-Terng Wang",
  title =        "Scan Flip-Flop Grouping to Compress Test Data and
                 Compact Test Responses for Launch-on-Capture Delay
                 Testing",
  journal =      j-TODAES,
  volume =       "17",
  number =       "2",
  pages =        "18:1--18:??",
  month =        apr,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2159542.2159550",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Apr 20 17:41:41 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Test data compression is a much more difficult problem
                 for launch-on-capture (LOC) delay testing, because test
                 data for LOC delay testing is much more than that of
                 stuck-at fault testing, and LOC delay fault test
                 generation in the two-frame circuit model can specify
                 many more inputs. A new scan architecture is proposed
                 to compress test stimulus data, compact test responses,
                 and reduce test application time for LOC delay fault
                 testing. The new scan architecture merges a number of
                 scan flip-flops into the same group, where all scan
                 flip-flops in the same group are assigned the same
                 values for all test pairs. Sufficient conditions are
                 presented for including any pair of scan flip-flops
                 into the same group for LOC transition, non-robust path
                 delay, and robust path delay fault testing. Test data
                 for LOC delay testing based on the new scan
                 architecture can be compressed significantly. Test
                 application time can also be reduced greatly.
                 Sufficient conditions are presented to construct a test
                 response compactor for LOC transition, non-robust, and
                 robust path delay fault testing. Folded scan forest and
                 test response compactor are constructed for further
                 test data compression. Sufficient experimental results
                 are presented to show the effectiveness of the
                 method.",
  acknowledgement = ack-nhfb,
  articleno =    "18",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Ray:2012:ISS,
  author =       "Sandip Ray and Jayanta Bhadra and Magdy S. Abadir and
                 Li-C. Wang and Aarti Gupta",
  title =        "Introduction to special section on verification
                 challenges in the concurrent world",
  journal =      j-TODAES,
  volume =       "17",
  number =       "3",
  pages =        "19:1--19:??",
  month =        jun,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2209291.2209292",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Jul 31 16:58:51 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  articleno =    "19",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Verbeek:2012:TFV,
  author =       "Freek Verbeek and Julien Schmaltz",
  title =        "Towards the formal verification of cache coherency at
                 the architectural level",
  journal =      j-TODAES,
  volume =       "17",
  number =       "3",
  pages =        "20:1--20:??",
  month =        jun,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2209291.2209293",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Jul 31 16:58:51 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Cache coherency is one of the major issues in
                 multicore systems. Formal methods, in particular
                 model-checking, have been successful at verifying
                 high-level protocols, but, to the best of our
                 knowledge, the verification of cache coherency at the
                 architectural level is still an open issue. All
                 existing verification efforts assume a reliable
                 interconnect, that is, messages eventually reach their
                 destination. We discuss the challenge of discharging
                 this assumption at the architectural level where
                 implementation details of the interconnect are mixed
                 with a cache coherency protocol. Our automatic approach
                 is based on a well-defined set of primitives to express
                 architectural models, a generic model of communication
                 fabrics expressed in an automated theorem proving
                 system, and a dedicated algorithm for deadlock and
                 livelock detection. We argue that reliability depends
                 on the interaction between the interconnect and the
                 cache coherency protocol. They must be verified
                 altogether as their combination creates intricate
                 message dependencies. We sketch our verification
                 approach and apply it to a simple write-invalidate
                 protocol on the Spidergon network-on-chip from
                 STMicroelectronics. Our approach is promising. For this
                 simple protocol, networks with tens of agents and
                 hundreds of components can be analyzed within
                 seconds.",
  acknowledgement = ack-nhfb,
  articleno =    "20",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Holt:2012:FLP,
  author =       "Jim Holt and Jaideep Dastidar and David Lindberg and
                 John Pape and Peng Yang",
  title =        "A full lifecycle performance verification methodology
                 for multicore systems-on-chip",
  journal =      j-TODAES,
  volume =       "17",
  number =       "3",
  pages =        "21:1--21:??",
  month =        jun,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2209291.2209294",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Jul 31 16:58:51 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Multicore Systems-on-Chip (MCSoC) are comprised of a
                 rich set of processor cores, specialized hardware
                 accelerators, and I/O interfaces. Functional
                 verification of these complex designs is a critical and
                 demanding task, however, focusing only on functional
                 verification is very risky because the motivation for
                 building such systems in the first place is to achieve
                 high levels of system throughput. Therefore a
                 functionally correct MCSoC that does not exhibit
                 sufficient performance will fail in the market. In
                 addition, limiting performance verification efforts to
                 analyzing individual system components in isolation is
                 insufficient due to: (1) the degree of system-level
                 resource contention that an application domain imposes
                 on the MCSoC, and (2) the degree of configuration
                 flexibility that is typically afforded by an MCSoC.
                 These factors motivate system-level performance
                 verification of MCSoC. This article presents an
                 important industrial case study of MCSoC performance
                 verification involving both pre- and postsilicon
                 analysis, highlighting the methodology used, the
                 lessons learned, and recommendations for improvement.",
  acknowledgement = ack-nhfb,
  articleno =    "21",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Elwakil:2012:DRM,
  author =       "Mohamed Elwakil and Zijiang Yang",
  title =        "Deterministic replay for message-passing-based
                 concurrent programs",
  journal =      j-TODAES,
  volume =       "17",
  number =       "3",
  pages =        "22:1--22:??",
  month =        jun,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2209291.2209295",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Jul 31 16:58:51 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The Multicore Communications API (MCAPI) is a new
                 message-passing API that was released by the Multicore
                 Association. MCAPI provides an interface designed for
                 closely distributed embedded systems with multiple
                 cores on a chip and/or chips on a board. Similar to
                 parallel programs in other domains, debugging MCAPI
                 programs is a challenging task due to their
                 nondeterministic behavior. In this article we present a
                 tool that is capable of deterministically replaying
                 MCAPI program executions, which provides valuable
                 insight for MCAPI developers in case of failure.",
  acknowledgement = ack-nhfb,
  articleno =    "22",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Deniz:2012:VCM,
  author =       "Etem Deniz and Alper Sen and Jim Holt",
  title =        "Verification and coverage of message passing multicore
                 applications",
  journal =      j-TODAES,
  volume =       "17",
  number =       "3",
  pages =        "23:1--23:??",
  month =        jun,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2209291.2209296",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Jul 31 16:58:51 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We describe verification and coverage methods for
                 multicore software that uses message passing libraries
                 for communication. Specifically, we provide techniques
                 to improve reliability of software using the new
                 industry standard MCAPI by the Multicore Association.
                 We develop dynamic predictive verification techniques
                 that allow us to find actual and potential errors in a
                 multicore software. Some of these error types are
                 deadlocks, race conditions, and violation of temporal
                 assertions. We complement our verification techniques
                 with a mutation-testing-based coverage metric. Coverage
                 metrics enable measuring the quality of verification
                 tests. We implemented our techniques in tools and
                 validated them on several multicore programs that use
                 the MCAPI standard. We implement our techniques in
                 tools and experimentally show the effectiveness of our
                 approach. We find errors that are not found using
                 traditional dynamic verification techniques and we can
                 potentially explore execution schedules different than
                 the original program with our coverage tool. This is
                 the first time such predictive verification and
                 coverage metrics have been developed for MCAPI.",
  acknowledgement = ack-nhfb,
  articleno =    "23",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Qin:2012:DTG,
  author =       "Xiaoke Qin and Prabhat Mishra",
  title =        "Directed test generation for validation of multicore
                 architectures",
  journal =      j-TODAES,
  volume =       "17",
  number =       "3",
  pages =        "24:1--24:??",
  month =        jun,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2209291.2209297",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Jul 31 16:58:51 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Functional validation is widely acknowledged as a
                 major challenge for multicore architectures. Directed
                 tests are promising since a significantly smaller
                 number of directed tests can achieve the same coverage
                 goal compared to constrained-random tests. SAT-based
                 bounded model checking is effective for automated
                 generation of directed tests (counterexamples). While
                 existing approaches focus on clause forwarding between
                 different bounds to reduce the test generation time,
                 this article proposes a novel technique that exploits
                 temporal, structural, and spatial symmetry in multicore
                 designs at the same time. Our proposed technique
                 enables the reuse of the knowledge learned from one
                 core to the remaining cores in multicore architectures
                 (structural symmetry), from one bound to the next for a
                 give property (temporal symmetry), as well as from one
                 property to other properties (spatial symmetry). The
                 experimental results demonstrate that our approach can
                 significantly (3--10 times) reduce overall test
                 generation time compared to existing approaches.",
  acknowledgement = ack-nhfb,
  articleno =    "24",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Singh:2012:TRT,
  author =       "Padmaraj Singh and Vijaykrishnan Narayanan and David
                 L. Landis",
  title =        "Targeted random test generation for power-aware
                 multicore designs",
  journal =      j-TODAES,
  volume =       "17",
  number =       "3",
  pages =        "25:1--25:??",
  month =        jun,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2209291.2209298",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Jul 31 16:58:51 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Multicore Register Transfer Level (RTL) model
                 simulations are indispensable in exposing subtle memory
                 subsystem bugs. Validating memory consistency,
                 coherency, and atomicity is a crucial design
                 verification task. Random MultiProcessor (MP) test
                 generators play critical roles in pre- and post-silicon
                 validation. The Advanced Configuration and Power
                 Interface (ACPI) standard supports dynamic frequency
                 and voltage scaling by controlling performance states
                 (P-States), yet multicore verification is generally
                 conducted with cores operating at the P0-State.
                 Independently varying core frequencies introduces new
                 sets of intracore and intercore traffic latencies. The
                 article introduces targeted random MP test generation
                 techniques for multicore P-State functional
                 verification. It develops a simple coverage metric to
                 evaluate MP test effectiveness. The metric is
                 demonstrated on MIP's instruction-set-based random MP
                 tests. A novel technique is introduced to modulate the
                 test address space by the spherical Bessel function.
                 The technique delivers an order of magnitude coverage
                 improvement over completely random tests. The article
                 then outlines minimal P-State combinations to be
                 exercised by MP tests. It also formulates two new
                 methodologies to set up and apply MP tests for
                 effective multicore P-State coverage. The methodologies
                 are termed SimInit and SimTransition. First-level
                 analyses indicate that these methods can deliver 97\%
                 to 100\% improvement over random MP test coverage.",
  acknowledgement = ack-nhfb,
  articleno =    "25",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Jang:2012:AAA,
  author =       "Wooyoung Jang and David Z. Pan",
  title =        "{A3MAP}: Architecture-aware analytic mapping for
                 networks-on-chip",
  journal =      j-TODAES,
  volume =       "17",
  number =       "3",
  pages =        "26:1--26:??",
  month =        jun,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2209291.2209299",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Jul 31 16:58:51 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In this article, we propose novel and global
                 Architecture-Aware Analytic MAPping (A3MAP) algorithms
                 applied to Networks-on-Chip (NoCs) not only with
                 homogeneous Processing Elements (PEs) on a regular mesh
                 network as done by most previous application mapping
                 algorithms but also with heterogeneous PEs on an
                 irregular mesh or custom network. As the main
                 contributions, we develop a simple yet efficient
                 interconnection matrix that can easily model any core
                 graph and network. Then, an application mapping problem
                 is exactly formulated to Mixed Integer Quadratic
                 Programming (MIQP). Since MIQP is NP-hard, we propose
                 two effective heuristics, a successive relaxation
                 algorithm achieving short runtime, called A3MAP-SR and
                 a genetic algorithm achieving high mapping quality,
                 called A3MAP-GA. We also propose a partition-based
                 application mapping approach for large-scale NoCs,
                 which provides better trade-off between performance and
                 runtime. Experimental results show that A3MAP
                 algorithms reduce total hop count, compared to the
                 previous application mapping algorithms optimized for a
                 regular mesh network, called NMAP [Murali and Micheli
                 2004] and for an irregular mesh and custom network,
                 called CMAP [Tornero et al. 2008]. Furthermore, A3MAP
                 algorithms make packets travel shorter distance than
                 CMAP, which is related to energy consumption.",
  acknowledgement = ack-nhfb,
  articleno =    "26",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Foroozannejad:2012:PBM,
  author =       "Mohammad H. Foroozannejad and Trevor Hodges and Matin
                 Hashemi and Soheil Ghiasi",
  title =        "Postscheduling buffer management trade-offs in
                 streaming software synthesis",
  journal =      j-TODAES,
  volume =       "17",
  number =       "3",
  pages =        "27:1--27:??",
  month =        jun,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2209291.2209300",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Jul 31 16:58:51 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Streaming applications, which are abundant in many
                 disciplines such as multimedia, networking, and signal
                 processing, require efficient processing of a seemingly
                 infinite sequence of input data. In the context of
                 streaming software synthesis from data flow graphs, we
                 study the inherent trade-off between memory requirement
                 and compilation runtime, under a given task firing
                 schedule. We utilize postscheduling analysis
                 granularity to control the amount of details in
                 characterization of buffer's spatio-temporal
                 footprints. Subsequently, we transform the buffer
                 allocation problem to two-dimensional packing of
                 polygons, where complexity of the packing problem
                 (e.g., polygon shapes) is determined by the analysis
                 granularity. We develop an evolutionary packing
                 optimization algorithm which readily yields buffer
                 allocations. Experimental results highlight the
                 trade-off between complexity of the analysis and the
                 total buffer size of generated implementations. In
                 addition, they show dramatic improvements in total
                 buffer size, if one is willing to pay the additional
                 cost in optimization runtime.",
  acknowledgement = ack-nhfb,
  articleno =    "27",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Salamy:2012:ISA,
  author =       "Hassan Salamy and J. Ramanujam",
  title =        "An {ILP} solution to address code generation for
                 embedded applications on digital signal processors",
  journal =      j-TODAES,
  volume =       "17",
  number =       "3",
  pages =        "28:1--28:??",
  month =        jun,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2209291.2209301",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Jul 31 16:58:51 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Digital Signal Processors (DSPs) are a family of
                 embedded processors designed under tight memory, area,
                 and cost constraints. Many DSPs use irregular
                 addressing modes where base-plus-offset mode is not
                 supported. However, they often have Address Generation
                 Units (AGUs) that can perform auto-increment/decrement
                 address arithmetic instructions in parallel with
                 Load/Store instructions. This feature can be utilized
                 to reduce the number of explicit address arithmetic
                 instructions and thus reduce the embedded application
                 code size. This code size reduction is essential for
                 this family of DSP as the code usually resides in the
                 ROM and hence the code size directly translates into
                 silicon area. An effective technique for optimized code
                 generation is offset assignment. This is a well-used
                 technique in the literature to decrease the code size
                 by finding an offset assignment that can effectively
                 utilize auto-increment/decrement. This problem is known
                 as simple offset assignment when there is only one
                 address register and as General Offset Assignment (GOA)
                 for multiple available address registers. In this
                 article, we present an optimal Integer Linear
                 Programming (ILP) solution to the offset assignment
                 problem with variable coalescing where more than one
                 variable can share the same memory location. Variable
                 permutation is also formulated to find the best access
                 sequence to achieve the best offset assignment that
                 decreases the code size the most. Experimental results
                 on several benchmarks show the effectiveness of our
                 variable permutation technique as well as the large
                 improvement from the ILP-based solutions compared to
                 heuristics.",
  acknowledgement = ack-nhfb,
  articleno =    "28",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Schafer:2012:DCH,
  author =       "Benjamin Carrion Schafer and Kazutoshi Wakabayashi",
  title =        "Divide and conquer high-level synthesis design space
                 exploration",
  journal =      j-TODAES,
  volume =       "17",
  number =       "3",
  pages =        "29:1--29:??",
  month =        jun,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2209291.2209302",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Jul 31 16:58:51 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "A method to accelerate the Design Space Exploration
                 (DSE) of behavioral descriptions for high-level
                 synthesis based on a divide and conquer method called
                 Divide and Conquer Exploration Algorithm (DC-ExpA) is
                 presented. DC-ExpA parses an untimed behavioral
                 description given in C or SystemC and clusters
                 interdependent operations which are in turn explored
                 independently by inserting synthesis directives
                 automatically in the source code. The method then
                 continues by combining the exploration results to
                 obtain only Pareto-optimal designs. This method
                 accelerates the design space exploration considerably
                 and is compared against two previous methods: an
                 Adaptive Simulated Annealer Exploration Algorithm
                 (ASA-ExpA) that shows good optimality at high runtimes,
                 and a pattern matching method called Clustering Design
                 Space Exploration Acceleration (CDS-ExpA) that is fast
                 but suboptimal. Our proposed method is orthogonal to
                 previous exploration methods that focus on the
                 exploration of resource constraints, allocation,
                 binding, and/or scheduling. Our proposed method on
                 contrary sets local synthesis directives that decide
                 upon the overall architectural structure of the design
                 (e.g., mapping certain arrays to memories or
                 registers). Results show that DC-ExpA explores the
                 design space on average 61\% faster than ASA-ExpA,
                 obtaining comparable results indicated by several
                 quality indicators, for example, distance to reference
                 Pareto-front, hypervolume, and Pareto dominance.
                 Compared to CDS-ExpA it is 69\% slower, but obtains
                 much betters results compared to the same quality
                 indicators.",
  acknowledgement = ack-nhfb,
  articleno =    "29",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Karfa:2012:FVC,
  author =       "Chandan Karfa and Chittaranjan Mandal and Dipankar
                 Sarkar",
  title =        "Formal verification of code motion techniques using
                 data-flow-driven equivalence checking",
  journal =      j-TODAES,
  volume =       "17",
  number =       "3",
  pages =        "30:1--30:??",
  month =        jun,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2209291.2209303",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Jul 31 16:58:51 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "A formal verification method for checking correctness
                 of code motion techniques is presented in this article.
                 Finite State Machine with Datapath (FSMD) models have
                 been used to represent the input and the output
                 behaviors of each synthesis step. The method introduces
                 cutpoints in one FSMD, visualizes its computations as
                 concatenation of paths from cutpoints to cutpoints, and
                 then identifies equivalent finite path segments in the
                 other FSMD; the process is then repeated with the FSMDs
                 interchanged. Unlike many other reported techniques,
                 the method is capable of verifying both uniform and
                 nonuniform code motion techniques. It has been
                 underlined in this work that for nonuniform code
                 motions, identifying equivalent path segments involves
                 model checking of some data-flow properties. Our method
                 automatically identifies the situations where such
                 properties are needed to be checked during equivalence
                 checking, generates the appropriate properties, and
                 invokes the model checking tool NuSMV to verify them.
                 The correctness and the complexity of the method have
                 been dealt with. Experimental results demonstrate the
                 effectiveness of the method.",
  acknowledgement = ack-nhfb,
  articleno =    "30",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Linehan:2012:MDA,
  author =       "{\'E}amonn Linehan and Eamonn O'Toole and Siobh{\'a}n
                 Clarke",
  title =        "Model-driven automation for simulation-based
                 functional verification",
  journal =      j-TODAES,
  volume =       "17",
  number =       "3",
  pages =        "31:1--31:??",
  month =        jun,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2209291.2209304",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Jul 31 16:58:51 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Developing testbenches for dynamic functional
                 verification of hardware designs is a
                 software-intensive process that lies on the critical
                 path of electronic system design. The increasing
                 capabilities of electronic components is contributing
                 to the construction of complex verification
                 environments that are increasingly difficult to
                 understand, maintain, extend, and reuse across
                 projects. Model-driven software engineering addresses
                 issues of complexity, productivity, and code quality
                 through the use of high-level system models and
                 subsequent automatic transformations. Reasoning about
                 verification testbench decomposition becomes simpler at
                 higher levels of abstraction. In particular, the
                 aspect-oriented paradigm, when applied at the model
                 level, can minimize the overlap in functionality
                 between modules, improving maintainability and
                 reusability. This article presents an aspect-oriented
                 model-driven engineering process and toolset for the
                 development of hardware verification testbenches. We
                 illustrate how this process and toolset supports
                 modularized design and automatic transformation to
                 verification environment-specific models and source
                 code through an industry case study.",
  acknowledgement = ack-nhfb,
  articleno =    "31",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Qian:2012:FPS,
  author =       "Haifeng Qian and Sachin S. Sapatnekar and Eren
                 Kursun",
  title =        "{Fast Poisson Solvers} for thermal analysis",
  journal =      j-TODAES,
  volume =       "17",
  number =       "3",
  pages =        "32:1--32:??",
  month =        jun,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2209291.2209305",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Jul 31 16:58:51 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Accurate and efficient thermal analysis for a VLSI
                 chip is crucial, both for sign-off reliability
                 verification and for design-time circuit optimization.
                 To determine an accurate temperature profile, it is
                 important to simulate a die together with its thermal
                 mounts: this requires solving Poisson's equation on a
                 nonrectangular 3D domain. This article presents a class
                 of eigendecomposition-based Fast Poisson Solvers (FPS)
                 for chip-level thermal analysis. We start with a solver
                 that solves a rectangular 3D domain with mixed boundary
                 conditions in $ O(N \cdot \log N) $ time, where $N$ is
                 the dimension of the finite difference matrix. Then we
                 reveal, for the first time in the literature, a strong
                 relation between fast Poisson solvers and
                 Green-function-based methods. Finally, we propose an
                 FPS method that leverages the preconditioned conjugate
                 gradient method to solve nonrectangular 3D domains
                 efficiently. We demonstrate this approach on thermal
                 analysis of an industrial microprocessor, showing
                 accurate results verified by a commercial tool, and
                 that it solves a system of dimension 4.54e6 in only 13
                 conjugate gradient iterations, with a runtime of 65
                 seconds, a 15X speedup over the popular ICCG solver.",
  acknowledgement = ack-nhfb,
  articleno =    "32",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Guthaus:2012:HPC,
  author =       "Matthew R. Guthaus and Xuchu Hu and Gustavo Wilke and
                 Guilherme Flach and Ricardo Reis",
  title =        "High-performance clock mesh optimization",
  journal =      j-TODAES,
  volume =       "17",
  number =       "3",
  pages =        "33:1--33:??",
  month =        jun,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2209291.2209306",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Jul 31 16:58:51 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Clock meshes are extremely effective at producing
                 low-skew regional clock networks that are tolerant of
                 environmental and process variations. For this reason,
                 clock meshes are used in most high-performance designs,
                 but this robustness consumes significant power. In this
                 work, we present two techniques to optimize
                 high-performance clock meshes. The first technique is a
                 mesh perturbation methodology for nonuniform mesh
                 routing. The second technique is a skew-aware buffer
                 placement through iterative buffer deletion. We
                 demonstrate how these optimizations can achieve
                 significant power reductions and a near elimination of
                 short-circuit power. In addition, the total wire length
                 is decreased, the number of required buffers is
                 decreased, and both skew and robustness are improved on
                 average when variation is considered.",
  acknowledgement = ack-nhfb,
  articleno =    "33",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lin:2012:LBC,
  author =       "Kuan-Yu Lin and Hong-Ting Lin and Tsung-Yi Ho and
                 Chia-Chun Tsai",
  title =        "Load-balanced clock tree synthesis with adjustable
                 delay buffer insertion for clock skew reduction in
                 multiple dynamic supply voltage designs",
  journal =      j-TODAES,
  volume =       "17",
  number =       "3",
  pages =        "34:1--34:??",
  month =        jun,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2209291.2209307",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Jul 31 16:58:51 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Power consumption is known to be a crucial issue in
                 current IC designs. To tackle this problem, Multiple
                 Dynamic Supply Voltage (MDSV) designs are proposed as
                 an efficient solution for power savings. However, the
                 increasing variability of clock skew during the
                 switching of power modes leads to an increase in the
                 complication of clock skew reduction in MDSV designs.
                 In this article, we propose a load-balanced clock tree
                 synthesizer with Adjustable Delay Buffer (ADB)
                 insertion for clock skew reduction in MDSV designs. The
                 clock tree synthesizer adopts the Minimum Spanning Tree
                 (MST) metric to estimate the interconnect capacitance
                 and execute the graph-theoretic clustering. The
                 power-mode-guided optimization is also embedded into
                 the clock tree synthesizer for improving additional
                 area overhead in the step of ADB insertion. After
                 constructing the initial buffered clock tree, we insert
                 the ADBs with delay value assignments to reduce clock
                 skew in MDSV designs. The ADBs can be used to produce
                 additional delays, hence the clock latencies and skew
                 become tunable in a clock tree. An efficient algorithm
                 of ADB insertion for the minimization of clock skew,
                 area, and runtime in MDSV designs has been presented.
                 Comparing with the state-of-the-art algorithm of ADB
                 insertion, experimental results show maximum 42.40\%
                 area overhead improvement. With the power-mode-guided
                 optimization, the maximum improvement of area overhead
                 can increase to 47.87\%.",
  acknowledgement = ack-nhfb,
  articleno =    "34",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Liu:2012:FHA,
  author =       "Chien-Nan Jimmy Liu and Yen-Lung Chen and Chin-Cheng
                 Kuo and I-Ching Tsai",
  title =        "A fast heuristic approach for parametric yield
                 enhancement of analog designs",
  journal =      j-TODAES,
  volume =       "17",
  number =       "3",
  pages =        "35:1--35:??",
  month =        jun,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2209291.2209308",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Jul 31 16:58:51 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In traditional yield enhancement approaches, a lot of
                 computation efforts have to be paid first to find the
                 feasible regions and the Pareto fronts, which will
                 become a heavy cost for large analog circuits. In order
                 to reduce the computation efforts, this article
                 proposes a fast heuristic approach that tries to finish
                 all iteration steps of the yield enhancement flow at
                 behavior level. First, a novel force-directed Nominal
                 Point Moving (NPM) algorithm is proposed to find a
                 better nominal point without building the feasible
                 regions. Then, an equation-based behavior-level sizing
                 approach is proposed to map the NPM results at
                 performance level to behavior-level parameters. A fast
                 behavior-level Monte Carlo simulation is also proposed
                 to shorten the iterative yield enhancement flow.
                 Finally, using the obtained behavioral parameters as
                 the sizing targets of each subblock, the device sizing
                 time is significantly reduced instead of sizing from
                 the system-level specifications directly. As
                 demonstrated on several analog circuits, this heuristic
                 approach could be another efficient methodology to help
                 designers improve their analog circuits toward better
                 yield.",
  acknowledgement = ack-nhfb,
  articleno =    "35",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Tu:2012:MFS,
  author =       "Chia-Heng Tu and Shih-Hao Hung and Tung-Chieh Tsai",
  title =        "{MCEmu}: a Framework for Software Development and
                 Performance Analysis of Multicore Systems",
  journal =      j-TODAES,
  volume =       "17",
  number =       "4",
  pages =        "36:1--36:??",
  month =        oct,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2348839.2348840",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Oct 22 10:59:18 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Developing software for heterogeneous multicore
                 systems is particularly challenging even for
                 experienced developers. While emulators have proven
                 useful to application development, very few
                 heterogeneous multicore emulators have been made
                 available by vendors so far, as building an emulator
                 for a heterogeneous multicore system has been a
                 time-consuming and difficult task. Thus, we proposed a
                 framework, called MCEmu, to speed up the process of
                 building a heterogeneous multicore emulator by
                 integrating existing and/or new processor emulators.
                 MCEmu is designed to help system and application
                 development, with a basic multicore board support
                 package, an interprocessor communication library, and
                 tools for debugging, tracing, and performance
                 monitoring. In addition, MCEmu can run on a multicore
                 host system to accelerate the emulation of data
                 parallel applications. We show that MCEmu can be very
                 useful for developing system software before the system
                 becomes available, as it has helped us catch numerous
                 functional and performance bugs which could have been
                 hard to find. In this article, we present the design of
                 MCEmu and demonstrate its capabilities with our case
                 studies.",
  acknowledgement = ack-nhfb,
  articleno =    "36",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Alizadeh:2012:FVD,
  author =       "Bijan Alizadeh",
  title =        "Formal Verification and Debugging of Precise
                 Interrupts on High Performance Microprocessors",
  journal =      j-TODAES,
  volume =       "17",
  number =       "4",
  pages =        "37:1--37:??",
  month =        oct,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2348839.2348841",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Oct 22 10:59:18 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The increased parallelism provided by Out-Of-Order
                 (OOO) and superscalar mechanisms have made the control
                 portion of advanced processors more complicated so that
                 the state-of-the-art formal verification techniques for
                 Register-Transfer-Level (RTL) and gate-level designs
                 cannot scale to the complexity of such complicated
                 processors. Moreover, verification and debugging of
                 exceptions and external interrupts on such processors
                 are nontrivial tasks. Because the exceptions arrival
                 time, the external interrupt arrival time, as well as
                 the microprocessor response time must be precise,
                 verification and debugging require sophisticated
                 hardware and software capabilities. This article
                 proposes techniques for effective verification and
                 debugging of cycle-accurate OOO processors in the event
                 of exceptions and external interrupts. The results show
                 that our techniques reduce the complexity of the
                 verification and debugging processes by reducing the
                 number of simulation cycles (3.3 $ \times $ average
                 reduction) and the number of state variables (8.7 $
                 \times $ average reduction) to be traced for localizing
                 bugs.",
  acknowledgement = ack-nhfb,
  articleno =    "37",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Mukherjee:2012:SAA,
  author =       "Subhankar Mukherjee and Pallab Dasgupta and Siddhartha
                 Mukhopadhyay and Scott Little and John Havlicek and
                 Srikanth Chandrasekaran",
  title =        "Synchronizing {AMS} Assertions with {AMS} Simulation:
                 From Theory to Practice",
  journal =      j-TODAES,
  volume =       "17",
  number =       "4",
  pages =        "38:1--38:??",
  month =        oct,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2348839.2348842",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Oct 22 10:59:18 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The verification community anticipates the adoption of
                 assertions in the Analog and Mixed-Signal (AMS) domain
                 in the near future. Several questions need to be
                 answered before AMS assertions are brought into
                 practice, such as: (a) How will the languages for AMS
                 assertions be different from the ones in the digital
                 domain? (b) Does the analog simulator have to be
                 assertion aware? (c) If so, then how and where on the
                 time line will the AMS assertion checker synchronize
                 with the analog simulator? and (d) What will be the
                 performance penalty for monitoring AMS assertions
                 accurately over analog simulation? This article
                 attempts to answer these questions through theoretical
                 analysis and empirical results obtained from industrial
                 test cases. We study logics which extend Linear
                 Temporal Logic (LTL) with predicates over real
                 variables, and show that further extensions allowing
                 the binding of real-valued variables across time makes
                 the logic undecidable. We present a toolkit which can
                 integrate with existing AMS simulators for checking AMS
                 assertions on practical designs. We study the problem
                 of synchronizing the AMS simulator with the AMS
                 assertion checker and demonstrate the performance
                 penalty of different synchronization options.",
  acknowledgement = ack-nhfb,
  articleno =    "38",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lin:2012:RSP,
  author =       "Hai Lin and Yunsi Fei",
  title =        "Resource Sharing of Pipelined Custom Hardware
                 Extension for Energy-Efficient Application-Specific
                 Instruction Set Processor Design",
  journal =      j-TODAES,
  volume =       "17",
  number =       "4",
  pages =        "39:1--39:??",
  month =        oct,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2348839.2348843",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Oct 22 10:59:18 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Application-Specific Instruction set Processor (ASIP)
                 has become an increasingly popular platform for
                 embedded systems because of its high performance,
                 flexibility, and short turn-around time. The hardware
                 extension in ASIPs can speed-up program execution.
                 However, it also incurs area overhead and extra static
                 energy consumption. Traditional datapath merging
                 techniques reduce the circuit overhead by reusing
                 hardware modules for executing multiple operations.
                 However, they introduce structural hazard for multiple
                 custom instructions in sequence, and hence reduce the
                 performance improvement. In this article, we introduce
                 a pipelined configurable structure for the hardware
                 extension in ASIPs, so that structural hazards can be
                 remedied. With multiple subgraphs of operations
                 selected, we design a novel operation-to-hardware
                 mapping algorithm based on Integer Linear Programming
                 (ILP) to automatically construct a resource-efficient
                 pipelined configurable functional unit. Different
                 resource sharing schemes would affect both the hardware
                 overhead and the overall performance improvement. We
                 analyze the design trade-offs between resource
                 efficiency and performance improvement. At the end, we
                 present our design space exploration results by setting
                 the optimization objective to area, area and delay, and
                 delay respectively.",
  acknowledgement = ack-nhfb,
  articleno =    "39",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lin:2012:HSC,
  author =       "Hai Lin and Tiansi Hu and Yunsi Fei",
  title =        "A Hardware\slash Software Cooperative Custom Register
                 Binding Approach for Register Spill Elimination in
                 Application-Specific Instruction Set Processors",
  journal =      j-TODAES,
  volume =       "17",
  number =       "4",
  pages =        "40:1--40:??",
  month =        oct,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2348839.2348844",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Oct 22 10:59:18 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Application-Specific Instruction set Processor (ASIP)
                 has become an important design choice for embedded
                 systems. It can achieve both high flexibility offered
                 by the base processor core and high performance and
                 energy efficiency offered by the dedicated hardware
                 extensions. Although a lot of efforts have been devoted
                 to computation acceleration, for example, automatic
                 custom instruction identification and synthesis,
                 limited on-chip data storage elements including the
                 register file and data cache have become a potential
                 performance bottleneck. For custom instructions that
                 have more inputs and/or outputs than the generic
                 register file I/O ports, custom registers are added in
                 ASIPs to satisfy the need of additional inputs and
                 outputs, and traditionally they are used only by custom
                 instructions. In this article, we propose a
                 hardware/software cooperative approach with a linear
                 scan register allocation algorithm, which allows base
                 instructions to utilize the existing custom registers
                 in ASIPs for eliminating register spills of the
                 program. The data traffic between the base processor
                 and off-chip memory can be replaced with
                 energy-efficient on-chip communications between the
                 processor core and custom hardware extensions. Our
                 experimental results demonstrate that a significant
                 performance gain can be achieved, orthogonal to
                 improvements by other techniques in ASIP design.",
  acknowledgement = ack-nhfb,
  articleno =    "40",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Wang:2012:BOD,
  author =       "An-Ping Wang and Jiwon Hahn and Mahshid Roumi and Pai
                 H. Chou",
  title =        "Buffer Optimization and Dispatching Scheme for
                 Embedded Systems with Behavioral Transparency",
  journal =      j-TODAES,
  volume =       "17",
  number =       "4",
  pages =        "41:1--41:??",
  month =        oct,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2348839.2348845",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Oct 22 10:59:18 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article presents a buffer minimization scheme
                 with low dispatching overhead for embedded software
                 processes. To accomplish this, we exploit behavioral
                 transparency in the model of computation. In such a
                 model (e.g., synchronous dataflow), the state of buffer
                 requirements is determined completely by the firing
                 sequence of the actors without requiring functional
                 simulation of the actors. Fine-grained buffer
                 allocation incurs high and code pointer overhead while
                 coarse-grained allocation suffers from memory
                 fragmentation. Instead, we propose a medium-grained,
                 ``access-contiguous'' buffer allocation scheme that
                 minimizes the total buffer space and pointer overhead.
                 We formulate the buffer allocation problem as 2D tiles
                 that represent the lifetime of the buffers to minimize
                 their memory occupation spatially and temporally.
                 Experimental results show that our scheme uses less
                 data memory than existing techniques by 26\% on
                 average, or up to 57\% in the best case. Our technique
                 retains code modularity for dynamic configuration and,
                 more importantly, enables many more applications that
                 otherwise would not fit if implemented using previous
                 state-of-the-art techniques.",
  acknowledgement = ack-nhfb,
  articleno =    "41",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Gately:2012:AJO,
  author =       "Matthew B. Gately and Mark B. Yeary and Choon Yik
                 Tang",
  title =        "An Algorithm for Jointly Optimizing Quantization and
                 Multiple Constant Multiplication",
  journal =      j-TODAES,
  volume =       "17",
  number =       "4",
  pages =        "42:1--42:??",
  month =        oct,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2348839.2348846",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Oct 22 10:59:18 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article presents a joint framework for
                 quantization and Multiple Constant Multiplication (MCM)
                 optimization, which yields a computationally efficient
                 implementation of multiplierless multiplication in
                 hardware and software. Frameworks of this nature have
                 been developed in the context of Finite Impulse
                 Response (FIR) filters, where frequency response
                 specifications are used to drive the design. In this
                 work, we look at a general case, considering as given a
                 vector of ideal, real constants, which may come from
                 any application and do not necessarily represent FIR
                 filter coefficients. We first formulate a joint
                 optimization problem for finding a fixed-point vector
                 and a shift-add network that are optimal in terms of
                 quantization error and MCM complexity. We then describe
                 ways to finitize and prune the search space, leading to
                 an efficient algorithm called JOINT\_SOLVE that solves
                 the problem. Finally, via extensive randomized
                 experiments, we show that our joint framework is
                 notably more computationally efficient than a
                 disjointed one, reducing the MCM cost by 15\%--60\% on
                 moderate size problems.",
  acknowledgement = ack-nhfb,
  articleno =    "42",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kim:2012:SAH,
  author =       "Yonghwan Kim and Sanghoon Kwak and Taewhan Kim",
  title =        "Synthesis of Adaptable Hybrid Adders for Area
                 Optimization under Timing Constraint",
  journal =      j-TODAES,
  volume =       "17",
  number =       "4",
  pages =        "43:1--43:??",
  month =        oct,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2348839.2348847",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Oct 22 10:59:18 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Satisfying the timing constraint is the utmost concern
                 in the integrated circuit design and it is true that
                 most critical timing paths in a circuit cover one or
                 more arithmetic components such as adder, subtractor,
                 and multiplier of which addition logic is commonly
                 involved. This work addresses the problem of
                 redesigning the addition logic (in a form of hybrid
                 adder) on a critical timing path to meet the timing
                 constraint while minimally allocating the required
                 addition logic. Unlike the conventional hybrid adder
                 design schemes in which they assume uniform or specific
                 patterns of input signal arrival times and minimize the
                 latest timing of the output signals, our work extracts
                 the required timing of each output signal as well as
                 the input arrival times directly from the circuit and
                 resynthesizes the addition logic by creating a
                 customized hybrid adder that is best suited, in terms
                 of logic area, for meeting the timing constraint of the
                 circuit. Specifically, we propose a systematic approach
                 of hybrid adder design exploration, basically following
                 the principle of dynamic programming with
                 well-controlled pruning techniques. This work is
                 realistic and practically very useful in that it can be
                 used as a timing optimizer to the computation-intensive
                 circuits with a tight timing budget. We provide a set
                 of diverse experimental data to show how much the
                 proposed hybrid adder scheme is effective in meeting or
                 reducing timing while maintaining the circuit area as
                 minimal as possible.",
  acknowledgement = ack-nhfb,
  articleno =    "43",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Backes:2012:SCD,
  author =       "John D. Backes and Marc D. Riedel",
  title =        "The Synthesis of Cyclic Dependencies with {Boolean}
                 Satisfiability",
  journal =      j-TODAES,
  volume =       "17",
  number =       "4",
  pages =        "44:1--44:??",
  month =        oct,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2348839.2348848",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Oct 22 10:59:18 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The accepted wisdom is that combinational circuits
                 must have acyclic (i.e., feed-forward) topologies. Yet
                 simple examples suggest that this is incorrect. In
                 fact, introducing cycles (i.e., feedback) into
                 combinational designs can lead to significant savings
                 in area and in delay. Prior work described
                 methodologies for synthesizing cyclic circuits with
                 Sum-Of-Product (SOP) and Binary-Decision Diagram
                 (BDD)-based formulations. Recently, techniques for
                 analyzing and mapping cyclic circuits based on Boolean
                 satisfiability (SAT) were proposed. This article
                 presents a SAT-based methodology for synthesizing
                 cyclic dependencies. The strategy is to generate cyclic
                 functional dependencies through a technique called
                 Craig interpolation. Given a choice of different
                 functional dependencies, a branch-and-bound search is
                 performed to pick the best one. Experiments on
                 benchmark circuits demonstrate the effectiveness of the
                 approach.",
  acknowledgement = ack-nhfb,
  articleno =    "44",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Bild:2012:SNR,
  author =       "David R. Bild and Robert P. Dick and Gregory E. Bok",
  title =        "Static {NBTI} Reduction Using Internal Node Control",
  journal =      j-TODAES,
  volume =       "17",
  number =       "4",
  pages =        "45:1--45:??",
  month =        oct,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2348839.2348849",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Oct 22 10:59:18 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Negative Bias Temperature Instability (NBTI) is a
                 significant reliability concern for nanoscale CMOS
                 circuits. Its effects on circuit timing can be
                 especially pronounced for circuits with standby-mode
                 equipped functional units, because these units can be
                 subjected to static NBTI stress for extended periods of
                 time. This article describes Internal Node Control
                 (INC), in which the inputs to some individual gates are
                 directly manipulated to prevent this static NBTI
                 fatigue. We prove that the INC selection problem is
                 NP-complete and present a linear-time heuristic that
                 can quickly determine near-optimal placements. This
                 near-optimality is confirmed by comparing results for
                 small benchmarks against optimal solutions from a mixed
                 integer linear programming formulation of our problem.
                 We evaluate the heuristic on the ISCAS85 benchmarks and
                 the Synopsys DesignWare Library. Our heuristic reduces
                 static NBTI-induced delay over a ten year period by
                 30--60\% and can reduce total path delay by an average
                 9.4\% when NBTI degradation is severe. The INC
                 placements and sleep signal routing require only a
                 1.6\% increase in area.",
  acknowledgement = ack-nhfb,
  articleno =    "45",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Chang:2012:CDA,
  author =       "Nai-Wen Chang and Tzu-Yin Lin and Sun-Yuan Hsieh",
  title =        "Conditional Diagnosability of $k$-Ary $n$-Cubes under
                 the {PMC} Model",
  journal =      j-TODAES,
  volume =       "17",
  number =       "4",
  pages =        "46:1--46:??",
  month =        oct,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2348839.2348850",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Oct 22 10:59:18 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Processor fault diagnosis plays an important role in
                 measuring the reliability of multiprocessor systems and
                 the diagnosis of many well-known interconnection
                 networks. The conditional diagnosability, which is more
                 general than the classical diagnosability, is to
                 measure the diagnosability of a multiprocessor system
                 under the assumption that all of the neighbors of any
                 node in the system cannot fail at the same time. This
                 study shows that the conditional diagnosability for
                 $k$-ary $n$-cubes under the PMC model is $ 8 n - 7$ for
                 $ k \geq 4$ and $ n \geq 4$.",
  acknowledgement = ack-nhfb,
  articleno =    "46",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Mondal:2012:SEP,
  author =       "Arijit Mondal and P. P. Chakrabarti and Pallab
                 Dasgupta",
  title =        "Symbolic-Event-Propagation-Based Minimal Test Set
                 Generation for Robust Path Delay Faults",
  journal =      j-TODAES,
  volume =       "17",
  number =       "4",
  pages =        "47:1--47:??",
  month =        oct,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2348839.2348851",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Oct 22 10:59:18 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We present a symbolic-event-propagation-based scheme
                 to generate hazard-free tests for robust path delay
                 faults. This approach identifies all robustly testable
                 paths in a circuit and the corresponding complete set
                 of test vectors. We address the problem of finding a
                 minimal set of test vectors that covers all robustly
                 testable paths. We propose greedy and
                 simulated-annealing-based algorithms to find the same.
                 Results on ISCAS89 benchmark circuits show a
                 considerable reduction in test vectors for covering all
                 robustly testable paths.",
  acknowledgement = ack-nhfb,
  articleno =    "47",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Wu:2012:LST,
  author =       "Shianling Wu and Laung-Terng Wang and Xiaoqing Wen and
                 Wen-Ben Jone and Michael S. Hsiao and Fangfang Li and
                 James Chien-Mo Li and Jiun-Lang Huang",
  title =        "Launch-on-Shift Test Generation for Testing Scan
                 Designs Containing Synchronous and Asynchronous Clock
                 Domains",
  journal =      j-TODAES,
  volume =       "17",
  number =       "4",
  pages =        "48:1--48:??",
  month =        oct,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2348839.2348852",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Oct 22 10:59:18 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article presents a hybrid Automatic Test Pattern
                 Generation (ATPG) technique using the staggered
                 Launch-On-Shift (LOS) scheme followed by the one-hot
                 launch-on-shift scheme for testing delay faults in a
                 scan design containing asynchronous clock domains.
                 Typically, the staggered scheme produces small test
                 sets but needs long ATPG runtime, whereas the one-hot
                 scheme takes short ATPG runtime but yields large test
                 sets. The proposed hybrid technique is intended to
                 reduce test pattern count with acceptable ATPG runtime
                 for multimillion-gate scan designs. In case the scan
                 design contains multiple synchronous clock domains, and
                 each group of synchronous clock domains is treated as a
                 clock group and tested using a launch-aligned or a
                 capture-aligned LOS scheme. By combining these schemes
                 together, we found the pattern counts for two large
                 industrial designs were reduced by approximately 1.6X
                 to 1.8X, while the ATPG runtime was increased by 40\%
                 to 50\%, when compared to the one-hot clocking scheme
                 alone.",
  acknowledgement = ack-nhfb,
  articleno =    "48",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Khatib:2012:MRP,
  author =       "Mohammed G. Khatib",
  title =        "Migration-Resistant Policies for Probe-Wear Leveling
                 in {MEMS} Storage Devices",
  journal =      j-TODAES,
  volume =       "17",
  number =       "4",
  pages =        "49:1--49:??",
  month =        oct,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2348839.2348853",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Oct 22 10:59:18 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Probes (read/write heads) in a MEMS storage device are
                 susceptible to wear. We study probe wear, and analyze
                 the causes of uneven wear. We show that under
                 real-world workloads some probes can wear one order of
                 magnitude faster than others. This premature expiry has
                 severe consequences for reliability, timing
                 performance, energy efficiency, and lifetime. Wear
                 leveling precludes premature expiry and is thus
                 necessary. We discuss the fundamental differences
                 between probe wear in MEMS storage and medium wear in
                 Flash, calling for a different treatment. We devise
                 three policies to level probe wear. The policies
                 provide a spectrum between best lifetime and least
                 influence on the response time and energy efficiency of
                 a MEMS storage device. We make the case that data
                 migration can be prevented by augmenting the policies
                 with a simple rule. We study the influence of the data
                 layout configuration on the leveling performance of the
                 policies.",
  acknowledgement = ack-nhfb,
  articleno =    "49",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lam:2012:EPL,
  author =       "Tak-Kei Lam and Wai-Chung Tang and Xiaoqing Yang and
                 Yu-Liang Wu",
  title =        "{ECR}: a Powerful and Low-Complexity Error
                 Cancellation Rewiring Scheme",
  journal =      j-TODAES,
  volume =       "17",
  number =       "4",
  pages =        "50:1--50:??",
  month =        oct,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2348839.2348854",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Oct 22 10:59:18 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Rewiring is known to be a class of logic restructuring
                 technique that is at least equally powerful in
                 flexibility compared to other logic transformation
                 techniques. Especially it is wiring sensitive and is
                 particularly useful for interconnect-based circuit
                 synthesis processes. One of the most well-studied
                 rewiring techniques is the ATPG-based Redundancy
                 Addition and Removal (RAR) technique which adds a
                 redundant alternative wire to make an originally
                 irredundant target wire become redundant and thus
                 removable. In this article, we propose a new
                 Error-Cancellation-based Rewiring scheme (ECR) which
                 can also identify non-RAR-based rewiring operations
                 with high efficiency. In ECR scheme, it is not
                 necessary for alternative wires to be redundant. Based
                 on the notion of error cancellation, we analyze and
                 reformulate the rewiring problem, and a more
                 generalized rewiring scheme is developed to detect more
                 rewiring cases which are not obtainable by existing
                 schemes while it still maintains a low runtime
                 complexity. Comparing with the most recent non-RAR
                 rewiring tool IRRA, the total number of alternative
                 wires found by our approach is about doubled (202\%)
                 while the CPU time used is just slightly more (8\%)
                 upon benchmarks preoptimized by ABC's rewriting. Our
                 experimental results also suggest that the ECR engine
                 is more powerful than IRRA in FPGA technology
                 mapping.",
  acknowledgement = ack-nhfb,
  articleno =    "50",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Shen:2012:FSF,
  author =       "Ruijing Shen and Sheldon X.-D. Tan and Hai Wang and
                 Jinjun Xiong",
  title =        "Fast Statistical Full-Chip Leakage Analysis for
                 Nanometer {VLSI} Systems",
  journal =      j-TODAES,
  volume =       "17",
  number =       "4",
  pages =        "51:1--51:??",
  month =        oct,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2348839.2348855",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Oct 22 10:59:18 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In this article, we present a new full-chip
                 statistical leakage estimation considering the spatial
                 correlation condition (strong or weak). The new
                 algorithm can deliver linear time, O ( N ), time
                 complexity, where N is the number of grids on chip. The
                 proposed algorithm adopts a set of uncorrelated virtual
                 variables over grid cells to represent the original
                 physical random variables and the cell size is
                 determined by the spatial correlation length. In this
                 way, each physical variable is always represented by
                 virtual variables locally. We prove the number of
                 neighbor cells for each grid cell is not related to the
                 condition of spatial correlation (from no correlation
                 to 100\% correlated), which leads to linear time
                 complexity in terms of number of gates. We compute the
                 gate leakage by the orthogonal polynomials-based
                 collocation method. The total leakage of a whole chip
                 can be computed by simply summing up the coefficients
                 of corresponding orthogonal polynomials in each grid
                 cell. Furthermore, we develop a look-up table to cache
                 statistical information for each type of gate instead
                 of calculating leakage for every single instance of
                 gate on a chip. As a result, a new statistical leakage
                 characterization in Standard Cell Library (SCL) is put
                 forward. Furthermore, an incremental analysis algorithm
                 is proposed to update the chip-level statistical
                 leakage information efficiently after a few changes are
                 made. The proposed method has no restrictions on static
                 leakage models, or types of leakage distributions. The
                 large circuit examples in 45nm CMOS process demonstrate
                 the proposed algorithm is 1000X faster than a recently
                 proposed grid-based method with similar accuracy and
                 many orders of magnitude times speedup over the Monte
                 Carlo method. Experimental results also show the
                 incremental analysis provides about 10X further
                 speedup. We expect the incremental analysis could
                 achieve more speedup over the full leakage analysis for
                 larger problem sizes.",
  acknowledgement = ack-nhfb,
  articleno =    "51",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Coskun:2012:ISS,
  author =       "Ayse Kivilcim Coskun and Yung-Hsiang Lu and Qinru
                 Qiu",
  title =        "Introduction to the special section on adaptive power
                 management for energy and temperature-aware computing
                 systems",
  journal =      j-TODAES,
  volume =       "18",
  number =       "1",
  pages =        "1:1--1:??",
  month =        dec,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2390191.2390192",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Jan 12 08:32:04 MST 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  articleno =    "1",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lari:2012:HPM,
  author =       "Vahid Lari and Shravan Muddasani and Srinivas Boppu
                 and Frank Hannig and Moritz Schmid and J{\"u}rgen
                 Teich",
  title =        "Hierarchical power management for adaptive
                 tightly-coupled processor arrays",
  journal =      j-TODAES,
  volume =       "18",
  number =       "1",
  pages =        "2:1--2:??",
  month =        dec,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2390191.2390193",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Jan 12 08:32:04 MST 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We present a self-adaptive hierarchical power
                 management technique for massively parallel processor
                 architectures, supporting a new resource-aware parallel
                 computing paradigm called invasive computing. Here, an
                 application can dynamically claim, execute, and release
                 the resources in three phases: resource acquisition
                 (invade), program loading/configuration and execution
                 (infect), and release (retreat). Resource invasion is
                 governed by dedicated decentralized hardware
                 controllers, called invasion controllers (i ctrls),
                 which are integrated into each processing element (PE).
                 Several invasion strategies for claiming linearly
                 connected or rectangular regions of processing
                 resources are implemented. The key idea is to exploit
                 the decentralized resource management inherent to
                 invasive computing for power savings by enabling
                 applications themselves to control the power for
                 processing resources and invasion controllers using a
                 hierarchical power-gating approach. We propose
                 analytical models for estimating various components of
                 energy consumption for faster design space exploration
                 and compare them with the results obtained from a
                 cycle-accurate C++ simulator of the processor array. In
                 order to find optimal design trade-offs, various
                 parameters like (a) energy consumption, (b) hardware
                 cost, and (c) timing overheads are compared for
                 different sizes of power domains. Experimental results
                 show significant energy savings (up to 73\%) for
                 selected characteristical algorithms and different
                 resource utilizations. In addition, we demonstrate the
                 accuracy of our proposed analytical model. Here,
                 estimation errors less than 3.6\% can be reported.",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Srivastav:2012:DEE,
  author =       "Meeta Srivastav and M. B. Henry and Leyla Nazhandali",
  title =        "Design of energy-efficient, adaptable throughput
                 systems at near\slash sub-threshold voltage",
  journal =      j-TODAES,
  volume =       "18",
  number =       "1",
  pages =        "3:1--3:??",
  month =        dec,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2390191.2390194",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Jan 12 08:32:04 MST 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Voltage scaling has been a prevalent method of saving
                 energy for energy-constrained applications. However,
                 current technology trends which shrink transistors
                 sizes exacerbate process variation effects in
                 voltage-scaled systems. Large variations in transistor
                 parameters result in high variation in performance and
                 power across the chip. These effects, if ignored at the
                 design, stage, will result in unpredictable behavior
                 when deployed in the field. In this article, we
                 leverage the benefits of voltage scaling methodology
                 for obtaining energy efficiency and compensate for the
                 loss in throughput by exploiting parallelism present in
                 the various DSP designs. We show that such a hybrid
                 method consumes 8\%--77\% less power, compared to
                 simple dynamic voltage scaling over different
                 throughputs. We study this system architecture in two
                 different workload environments: static and dynamic. We
                 show that to achieve the highest level of energy
                 efficiency, the number of cores and the operating
                 voltages vary widely between a BASE design versus a
                 process variation-aware (PVA) design. We further
                 demonstrate that the PVA design enjoys an average of
                 26.9\% and 51.1\% reduction in energy consumption for
                 the static and dynamic designs, respectively. Since
                 different cores will have a wide range of speeds at
                 operating voltages close to near/sub-thresholds due to
                 process variation, we gather characteristic behavior of
                 each core. With knowledge of the core speeds, we can
                 further increase the energy efficiency. Furthermore, in
                 this article, we show that of this methodology will be
                 49.3\% more energy efficient, compared to that building
                 the system with no knowledge about the characteristics
                 of each core.",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Sun:2012:STD,
  author =       "Jin Sun and Rui Zheng and Jyothi Velamala and Yu Cao
                 and Roman Lysecky and Karthik Shankar and Janet
                 Roveda",
  title =        "A self-tuning design methodology for power-efficient
                 multi-core systems",
  journal =      j-TODAES,
  volume =       "18",
  number =       "1",
  pages =        "4:1--4:??",
  month =        dec,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2390191.2390195",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Jan 12 08:32:04 MST 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article aims to achieve computational reliability
                 and energy efficiency through codevelopment of
                 algorithms, device, and circuit designs for
                 application-specific, reconfigurable architectures. The
                 new methodology characterizes aging-switching activity
                 and aging-supply voltage relationships that are
                 applicable for minimizing power consumption and task
                 execution efficiency in order to achieve low bit energy
                 ratio (BER). In addition, a new dynamic management
                 algorithm (DMA) is proposed to alleviate device
                 degradation and to extend system lifespan. In contrast
                 to traditional workload balancing schemes in which
                 cores are regarded as homogeneous, the new algorithm
                 ranks cores as ``highly competitive,'' ``less
                 competitive,'' and ``not competitive'' according to
                 their various competitiveness. Core competitiveness is
                 evaluated based upon their reliability, temperature,
                 and timing requirements. Consequently, ``competitive''
                 cores will take charge of the majority of the tasks at
                 relatively high voltage/frequency without violating
                 power and timing budgets, while ``not competitive''
                 cores will have light workloads to ensure their
                 reliability. The new approach combines intrinsic device
                 characteristics (aging-switching activity and
                 aging-supply voltage curves) into an integrated
                 framework to achieve high reliability and low energy
                 level with graceful degradation of system performance.
                 Experimental results show that the proposed method has
                 achieved up to 20\% power reduction, with about 4\%
                 performance degradation (in terms of accomplished
                 workload and system throughput), compared with
                 traditional workload balancing methods. The new method
                 also improves system mean-time-to-failure (MTTF) by up
                 to 25\%.",
  acknowledgement = ack-nhfb,
  articleno =    "4",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Rodrigues:2012:IPP,
  author =       "Rance Rodrigues and Arunachalam Annamalai and Israel
                 Koren and Sandip Kundu",
  title =        "Improving performance per watt of asymmetric
                 multi-core processors via online program phase
                 classification and adaptive core morphing",
  journal =      j-TODAES,
  volume =       "18",
  number =       "1",
  pages =        "5:1--5:??",
  month =        dec,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2390191.2390196",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Jan 12 08:32:04 MST 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Asymmetric multi-core processors (AMPs) have been
                 shown to outperform symmetric ones in terms of
                 performance and performance/watt. Improved performance
                 and power efficiency are achieved when the program
                 threads are matched to their most suitable cores. Since
                 the computational needs of a program may change during
                 its execution, the best thread to core assignment will
                 likely change with time. We have, therefore, developed
                 an online program phase classification scheme that
                 allows the swapping of threads when the current needs
                 of the threads justify a change in the assignment. The
                 architectural differences among the cores in an AMP can
                 never match the diversity that exists among different
                 programs and even between different phases of the same
                 program. Consider, for example, a program (or a program
                 phase) that has a high instruction-level parallelism
                 (ILP) and will exhibit high power efficiency if
                 executed on a powerful core. We can not, however,
                 include such powerful cores in the designed AMP, since
                 they will remain underutilized most of the time, and
                 they are not power efficient when the programs do not
                 exhibit a high degree of ILP. Thus, we must expect to
                 see program phases where the designed cores will be
                 unable to support the ILP that the program can exhibit.
                 We, therefore, propose in this article a dynamic
                 morphing scheme. This scheme will allow a core to gain
                 control of a functional unit that is ordinarily under
                 the control of a neighboring core during periods of
                 intense computation with high ILP. This way, we
                 dynamically adjust the hardware resources to the
                 current needs of the application. Our results show that
                 combining online phase classification and dynamic core
                 morphing can significantly improve the performance/watt
                 of most multithreaded workloads.",
  acknowledgement = ack-nhfb,
  articleno =    "5",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Zanini:2012:OTC,
  author =       "Francesco Zanini and David Atienza and Colin N. Jones
                 and Luca Benini and Giovanni {De Micheli}",
  title =        "Online thermal control methods for multiprocessor
                 systems",
  journal =      j-TODAES,
  volume =       "18",
  number =       "1",
  pages =        "6:1--6:??",
  month =        dec,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2390191.2390197",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Jan 12 08:32:04 MST 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "With technological advances, the number of cores
                 integrated on a chip is increasing. This in turn is
                 leading to thermal constraints and thermal design
                 challenges. Temperature gradients and hotspots not only
                 affect the performance of the system but also lead to
                 unreliable circuit operation and affect the lifetime of
                 the chip. Meeting temperature constraints and reducing
                 hotspots are critical for achieving reliable and
                 efficient operation of complex multi-core systems. In
                 this article, we analyze the use of four of the most
                 promising families of online control techniques for
                 thermal management of multiprocessors system-on-chip
                 (MPSoC). In particular, in our exploration, we aim at
                 achieving an online smooth thermal control action that
                 minimizes the performance loss as well as the
                 computational and hardware overhead of embedding a
                 thermal management system inside the MPSoC. The
                 definition of the optimization problem to tackle in
                 this work considers the thermal profile of the system,
                 its evolution over time, and current time-varying
                 workload requirements. Thus, this problem is formulated
                 as a finite-horizon optimal control problem, and we
                 analyze the control features of different online
                 thermal control approaches. In addition, we implemented
                 the policies on an MPSoC hardware simulation platform
                 and performed experiments on a cycle-accurate model of
                 the eight-core Niagara multi-core architecture using
                 benchmarks ranging from Web-accessing to playing
                 multimedia. Results show different trade-offs among the
                 analyzed techniques regarding the thermal profile, the
                 frequency setting, the power consumption, and the
                 implementation complexity.",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Cochran:2012:TPA,
  author =       "Ryan Cochran and Sherief Reda",
  title =        "Thermal prediction and adaptive control through
                 workload phase detection",
  journal =      j-TODAES,
  volume =       "18",
  number =       "1",
  pages =        "7:1--7:??",
  month =        dec,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2390191.2390198",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Jan 12 08:32:04 MST 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Elevated die temperature is a true limiter to the
                 scalability of modern processors. With continued
                 technology scaling in order to meet ever-increasing
                 performance demands, it is no longer cost effective to
                 design cooling systems that handle the worst-case
                 thermal behaviors. Instead, cooling systems are
                 designed to handle typical chip operation, while
                 processors must detect and handle rare thermal
                 emergencies. Most processors rely on measurements from
                 integrated thermal sensors and dynamic thermal
                 management (DTM) techniques in order to manage the
                 trade-off between performance and thermal risk. Optimal
                 management requires advanced knowledge of the thermal
                 trajectory based on the current workload behaviors and
                 operating conditions. In this work, we devise novel
                 workload phase classification strategies that
                 automatically discriminate among workload behaviors
                 with respect to the thermal control response. We
                 incorporate workload phase-detection and thermal models
                 into a dynamic voltage and frequency scaling (DVFS)
                 technique that can optimally control temperature during
                 runtime based on thermal predictions. We demonstrate
                 the effectiveness of our proposed techniques in
                 predicting and adaptively controlling the thermal
                 behavior of a real quad-core processor in response to a
                 wide range of workloads. In comparison with
                 state-of-the-art model predictive control (MPC)
                 techniques in previous works on thermal prediction, we
                 demonstrate a 5.8\% improvement in instruction
                 throughput with the same number of thermal violations.
                 In comparison with simple proportional-integral (PI)
                 feedback control techniques, we improve instruction
                 throughput by 3.9\%, while significantly reducing the
                 number of thermal violations.",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Shi:2012:HND,
  author =       "Liang Shi and Jianhua Li and Chun Jason Xue and Xuehai
                 Zhou",
  title =        "Hybrid nonvolatile disk cache for energy-efficient and
                 high-performance systems",
  journal =      j-TODAES,
  volume =       "18",
  number =       "1",
  pages =        "8:1--8:??",
  month =        dec,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2390191.2390199",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Jan 12 08:32:04 MST 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "NAND flash memory has been employed as disk cache in
                 recent years. It has the advantages of high
                 performance, low leakage power, and cost efficiency.
                 However, flash memory's performance is limited by the
                 inability of in-place updates, coarse access
                 granularity, and a limited number of write/erase times.
                 In this article, we propose a hybrid nonvolatile disk
                 cache architecture for high-performance and
                 energy-efficient systems, where the disk cache is
                 implemented with a small-size phase change memory (PCM)
                 and a large-size NAND flash memory. Compared with
                 current flash memory-based disk cache, it has the
                 following advantages. (1) System performance is
                 improved as requests are carefully directed between PCM
                 and flash memory; (2) the energy consumption of disk
                 cache is substantially reduced with significant
                 reduction of additional operations, such as garbage
                 collections; (3) the efficiency of flash memory is
                 improved with the reduction of write activities on
                 flash memory; and (4) lifetime of NAND flash memory is
                 increased with most of the write operations assigned to
                 PCM, where PCM's lifetime is guaranteed to be longer
                 than the lifetime of flash memory. Simulation results
                 show that the proposed methods can substantially
                 improve the system performance, energy consumption, and
                 lifetime of the hybrid disk cache.",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Singh:2012:ATA,
  author =       "Amit Kumar Singh and Akash Kumar and Thambipillai
                 Srikanthan",
  title =        "Accelerating throughput-aware runtime mapping for
                 heterogeneous {MPSoCs}",
  journal =      j-TODAES,
  volume =       "18",
  number =       "1",
  pages =        "9:1--9:??",
  month =        dec,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2390191.2390200",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Jan 12 08:32:04 MST 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Modern embedded systems need to support multiple
                 time-constrained multimedia applications that often
                 employ multiprocessor-systems-on-chip (MPSoCs). Such
                 systems need to be optimized for resource usage and
                 energy consumption. It is well understood that a
                 design-time approach cannot provide timing guarantees
                 for all the applications due to its inability to cater
                 for dynamism in applications. However, a runtime
                 approach consumes large computation requirements at
                 runtime and hence may not lend well to
                 constrained-aware mapping. In this article, we present
                 a hybrid approach for efficient mapping of applications
                 in such systems. For each application to be supported
                 in the system, the approach performs extensive
                 design-space exploration (DSE) at design time to derive
                 multiple design points representing throughput and
                 energy consumption at different resource combinations.
                 One of these points is selected at runtime efficiently,
                 depending upon the desired throughput while optimizing
                 for energy consumption and resource usage. While most
                 of the existing DSE strategies consider a fixed
                 multiprocessor platform architecture, our DSE considers
                 a generic architecture, making DSE results applicable
                 to any target platform. All the compute-intensive
                 analysis is performed during DSE, which leaves for
                 minimum computation at runtime. The approach is capable
                 of handling dynamism in applications by considering
                 their runtime aspects and providing timing guarantees.
                 The presented approach is used to carry out a DSE case
                 study for models of real-life multimedia applications:
                 H.263 decoder, H.263 encoder, MPEG-4 decoder, JPEG
                 decoder, sample rate converter, and MP3 decoder. At
                 runtime, the design points are used to map the
                 applications on a heterogeneous MPSoC. Experimental
                 results reveal that the proposed approach provides
                 faster DSE, better design points, and efficient runtime
                 mapping when compared to other approaches. In
                 particular, we show that DSE is faster by 83\% and
                 runtime mapping is accelerated by 93\% for some cases.
                 Further, we study the scalability of the approach by
                 considering applications with large numbers of tasks.",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Saladi:2012:CAC,
  author =       "Kalyan Saladi and Harikumar Somakumar and Mahadevan
                 Ganapathi",
  title =        "Concurrency-aware compiler optimizations for hardware
                 description languages",
  journal =      j-TODAES,
  volume =       "18",
  number =       "1",
  pages =        "10:1--10:??",
  month =        dec,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2390191.2390201",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Jan 12 08:32:04 MST 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In this article, we discuss the application of
                 compiler technology for eliminating redundant
                 computation in hardware simulation. We discuss how
                 concurrency in hardware description languages (HDLs)
                 presents opportunities for expression reuse across
                 different threads. While accounting for discrete event
                 simulation semantics, we extend the data flow analysis
                 framework to concurrent threads. In this process, we
                 introduce a rewriting scheme named $ \partial $VF and a
                 graph representation to model sensitivity relationships
                 among threads. An algorithm for identifying common
                 subexpressions as applied to HDLs is presented. Related
                 issues, such as scheduling correctness, are also
                 considered.",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Xydis:2012:CLE,
  author =       "Sotirios Xydis and Kiamal Pekmestzi and Dimitrios
                 Soudris and George Economakos",
  title =        "Compiler-in-the-loop exploration during datapath
                 synthesis for higher quality delay-area trade-offs",
  journal =      j-TODAES,
  volume =       "18",
  number =       "1",
  pages =        "11:1--11:??",
  month =        dec,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2390191.2390202",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Jan 12 08:32:04 MST 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Design space exploration during high-level synthesis
                 targets the computation of those design solutions which
                 form optimal trade-off points. This quest for optimal
                 trade-offs has been focused on studying the impact of
                 various architectural-level parameters during
                 high-level synthesis algorithms, silently neglecting
                 the trade-offs produced from the combined impact of
                 behavioral-level together with architectural-level
                 parameters. We propose a novel design space,
                 exploration methodology that studies an extended
                 instance of the solution space considering the effects
                 of combining compiler- and architectural-level
                 transformations. It is shown that exploring the design
                 space in a global manner reveals new trade-off points,
                 thus shifting towards higher quality design solutions.
                 We use a combination of upper-bounding conditions
                 together with gradient-based heuristic pruning to
                 efficiently traverse the extended search space. Our
                 exploration framework delivers significant quality
                 improvements without compromising the optimality
                 (Pareto accuracy) of the discovered solutions, together
                 with significant runtime reductions compared to
                 exploring exhaustively the solution space at every
                 allocation scenario.",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kurimoto:2012:VWR,
  author =       "Masanori Kurimoto and Takeshi Yamamoto and Satoshi
                 Nakano and Atsuto Hanami and Hiroyuki Kondo",
  title =        "Verification work reduction methodology in low-power
                 chip implementation",
  journal =      j-TODAES,
  volume =       "18",
  number =       "1",
  pages =        "12:1--12:??",
  month =        dec,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2390191.2390203",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Jan 12 08:32:04 MST 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In order to achieve satisfactory verification for
                 complicated low-power demands in green products, we
                 propose a verification work reduction methodology. It
                 consists of three step, namely virtual, direct actual,
                 and actual model simulations. Virtual low-power
                 simulation inserts low-power cells, such as isolators
                 or level shifters, virtually and simulates logical
                 behavior for design under test (DUT) based on
                 user-defined power mode. Direct actual low-power
                 simulation replaces behavior models without non-logical
                 pins for some of modules with actual models with
                 non-logical pins, which are Vdd and Gnd, and simulates
                 DUT in mixed level. Actual low-power simulation
                 simulates DUT by using actual models with non-logical
                 pins for all cells and hard macros. We introduce
                 techniques which classify the type of the bugs on which
                 we focus at each verification step and prevent the
                 concerned bugs from leaking to the latter verification
                 step as much as possible. We applied our methodology to
                 an actual chip and could reduce the total simulation
                 period until tape-out by 38.8\% and the total chip
                 development period by 10\%, compared with the
                 conventional methodology.",
  acknowledgement = ack-nhfb,
  articleno =    "12",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Jing:2012:SFE,
  author =       "Naifeng Jing and Ju-Yueh Lee and Zhe Feng and Weifeng
                 He and Zhigang Mao and Lei He",
  title =        "{SEU} fault evaluation and characteristics for
                 {SRAM}-based {FPGA} architectures and synthesis
                 algorithms",
  journal =      j-TODAES,
  volume =       "18",
  number =       "1",
  pages =        "13:1--13:??",
  month =        dec,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2390191.2390204",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Jan 12 08:32:04 MST 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Reliability has become an increasingly important
                 concern for SRAM-based field programmable gate arrays
                 (FPGAs). Targeting SEU (single event upset) in
                 SRAM-based FPGAs, this article first develops an SEU
                 evaluation framework that can quantify the failure
                 sensitivity for each configuration bit during design
                 time. This framework considers detailed fault behavior
                 and logic masking on a post-layout FPGA application and
                 performs logic simulation on various circuit elements
                 for fault evaluation. Applying this framework on MCNC
                 benchmark circuits, we first characterize SEUs with
                 respect to different FPGA circuits and architectures,
                 for example, bidirectional routing and unidirectional
                 routing. We show that in both routing architectures,
                 interconnects not only contribute to the lion's share
                 of the SEU-induced functional failures, but also
                 present higher failure rates per configuration bits
                 than LUTs. Particularly, local interconnect
                 multiplexers in logic blocks have the highest failure
                 rate per configuration bit. Then, we evaluate three
                 recently proposed SEU mitigation algorithms, IPD, IPF,
                 and IPV, which are all logic resynthesis-based with
                 little or no overhead on placement and routing.
                 Different fault mitigating capabilities at the chip
                 level are revealed, and it demonstrates that algorithms
                 with explicit consideration for interconnect
                 significantly mitigate the SEU at the chip level, for
                 example, IPV achieves 61\% failure rate reduction on
                 average against IPF with about 15\%. In addition, the
                 combination of the three algorithms delivers over 70\%
                 failure rate reduction on average at the chip level.
                 The experiments also reveal that in order to improve
                 fault tolerance at the chip level, it is necessary for
                 future fault mitigation algorithms to concern not only
                 LUT or interconnect faults, but also their
                 interactions. We envision that our framework can be
                 used to cast more useful insights for more robust FPGA
                 circuits, architectures, and better synthesis
                 algorithms.",
  acknowledgement = ack-nhfb,
  articleno =    "13",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Dworak:2012:UIC,
  author =       "Jennifer Dworak and Kundan Nepal and Nuno Alves and
                 Yiwen Shi and Nicholas Imbriglia and R. Iris Bahar",
  title =        "Using implications to choose tests through suspect
                 fault identification",
  journal =      j-TODAES,
  volume =       "18",
  number =       "1",
  pages =        "14:1--14:??",
  month =        dec,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2390191.2390205",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Jan 12 08:32:04 MST 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "As circuits continue to scale to smaller feature
                 sizes, wearout and latent defects are expected to cause
                 an increasing number of errors in the field. Online
                 error detection techniques, including logic
                 implication-based checker hardware, are capable of
                 detecting at least some of these errors as they occur.
                 However, recovery may be expensive, and the underlying
                 problem may lead to multiple failures of a core over
                 time. In this article, we will investigate the
                 diagnostic capability of logic implications to identify
                 possible failure locations when an error is detected
                 online. We will then utilize this information to select
                 a highly efficient test set that can be used to
                 effectively test the identified suspect locations in
                 both the failing core and in other identical cores in
                 the system.",
  acknowledgement = ack-nhfb,
  articleno =    "14",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Mok:2012:DSL,
  author =       "Santiago Mok and John Lee and Puneet Gupta",
  title =        "Discrete sizing for leakage power optimization in
                 physical design: a comparative study",
  journal =      j-TODAES,
  volume =       "18",
  number =       "1",
  pages =        "15:1--15:??",
  month =        dec,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2390191.2390206",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Jan 12 08:32:04 MST 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "While sizing has been studied for over three decades,
                 the absence of a common framework with which to compare
                 methods has made progress difficult to measure. In this
                 article, we compare popular sizing techniques in which
                 gates are chosen from a discrete standard cell library
                 and slew and interconnect effects are accounted for.
                 The difference between sizing methods reduces from
                 roughly 53\% to 8\% between best and worst case after
                 slew propagation is taken into account. In our
                 benchmarks, no one sizing technique consistently
                 outperforms the others.",
  acknowledgement = ack-nhfb,
  articleno =    "15",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lee:2012:ECM,
  author =       "John Lee and Puneet Gupta",
  title =        "{ECO} cost measurement and incremental gate sizing for
                 late process changes",
  journal =      j-TODAES,
  volume =       "18",
  number =       "1",
  pages =        "16:1--16:??",
  month =        dec,
  year =         "2012",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2390191.2390207",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Jan 12 08:32:04 MST 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Changes in the manufacturing process parameters may
                 create timing violations in a design, making it
                 necessary to perform an engineering change order (ECO)
                 to correct these problems. We present a framework for
                 performing incremental gate sizing for process changes
                 late in the design cycle, and a method for creating
                 initial designs that are robust to late process
                 changes. This includes a method for measuring and
                 estimating ECO cost and for transforming these costs
                 into linear programming optimization problems. In the
                 case of ECOs, the method reduces ECO costs on average,
                 by 89\% in changed area compared to a leading
                 commercial tool. Furthermore, the robust initial
                 designs are, on average, 55\% less likely to need
                 redesign in the future.",
  acknowledgement = ack-nhfb,
  articleno =    "16",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kornaros:2013:STC,
  author =       "Georgios Kornaros and Dionisios Pnevmatikatos",
  title =        "A survey and taxonomy of on-chip monitoring of
                 multicore systems-on-chip",
  journal =      j-TODAES,
  volume =       "18",
  number =       "2",
  pages =        "17:1--17:??",
  month =        mar,
  year =         "2013",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Apr 5 18:40:42 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Billion transistor systems-on-chip increasingly
                 require dynamic management of their hardware components
                 and careful coordination of the tasks that they carry
                 out. Diverse real-time monitoring functions assist
                 towards this objective through the collection of
                 important system metrics, such as throughput of
                 processing elements, communication latency, or resource
                 utilization for each application. The online evaluation
                 of these metrics can result in localized or global
                 decisions that attempt to improve aspects of system
                 behavior, system performance, quality-of-service, power
                 and thermal effects under nominal conditions. This work
                 provides a comprehensive categorization of monitoring
                 approaches used in multiprocessor SoCs. As adaptive
                 systems are encountered in many disciplines, it is
                 imperative to present the prominent research efforts in
                 developing online monitoring methods. To this end we
                 offer a taxonomy that groups strongly related
                 techniques that designers increasingly use to produce
                 more efficient and adaptive chips. The provided
                 classification helps to understand and compare
                 architectural mechanisms that can be used in systems,
                 while one can envisage the innovations required to
                 build real adaptive and intelligent systems-on-chip.",
  acknowledgement = ack-nhfb,
  articleno =    "17",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Backasch:2013:RVM,
  author =       "Rico Backasch and Christian Hochberger and Alexander
                 Weiss and Martin Leucker and Richard Lasslop",
  title =        "Runtime verification for multicore {SoC} with
                 high-quality trace data",
  journal =      j-TODAES,
  volume =       "18",
  number =       "2",
  pages =        "18:1--18:??",
  month =        mar,
  year =         "2013",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Apr 5 18:40:42 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Multicore System-on-Chip (SoC) implementations of
                 embedded systems are becoming very popular. In these
                 systems it is possible to spread out computations over
                 many cores. On one hand this leads to better energy
                 efficiency if clock frequencies and core voltages are
                 reduced. On the other hand this delivers very high
                 performance to the software developer and thus enables
                 complex software systems to be implemented.
                 Unfortunately, debugging and validation of these
                 systems becomes extremely difficult. Various
                 technological approaches try to solve this dilemma. In
                 this contribution we will show a new approach to
                 observe multi-core SoCs and make their internal
                 operations visible to external analysis tools. Also, we
                 show that runtime verification can be employed to
                 analyze and validate these internal operations while
                 the system operates in its normal environment. The
                 combination of these two approaches delivers
                 unprecedented options to the developer to understand
                 and verify system behavior even in complex multicore
                 SoCs.",
  acknowledgement = ack-nhfb,
  articleno =    "18",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Costa:2013:CDO,
  author =       "Jos{\'e} C. Costa and Jos{\'e} C. Monteiro",
  title =        "Coverage-directed observability-based validation for
                 embedded software",
  journal =      j-TODAES,
  volume =       "18",
  number =       "2",
  pages =        "19:1--19:??",
  month =        mar,
  year =         "2013",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Apr 5 18:40:42 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Motivated by the need for validation methodologies for
                 embedded systems we propose a method for embedded
                 software testing that can be integrated with existing
                 hardware methods. Existing coverage-directed validation
                 methods guarantee the execution of a certain percentage
                 of the program code under test. Yet they do not
                 generally verify whether the statements executed have
                 any influence on the program's output. In the proposed
                 method, a program statement is considered covered not
                 simply for belonging to the executed path, but only if
                 its execution has influence in some observable output.
                 The paths are generated by searching the longest path
                 in terms of the number of statements in the path. Given
                 that not all paths are valid, we check their
                 feasibility using a method based on Mixed Integer
                 Linear Programming (MILP). Variable aliasing is
                 accounted for by representing variables by their memory
                 addresses when building this MILP problem. In this
                 manner, for feasible paths, we obtain immediately the
                 input values that allow the execution of the path.
                 Using these inputs, we determine the statements
                 actually observed. We repeat this process until a
                 user-specified level of coverage has been achieved. In
                 the generation of each new path, the statement coverage
                 obtained so far and the feasibility of previous paths
                 is taken into account. We present results that
                 demonstrate the effectiveness of this methodology.",
  acknowledgement = ack-nhfb,
  articleno =    "19",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Chen:2013:DRG,
  author =       "Chun-An Chen and Sun-Yuan Hsieh",
  title =        "$ t / t $-Diagnosability of regular graphs under the
                 {PMC} model",
  journal =      j-TODAES,
  volume =       "18",
  number =       "2",
  pages =        "20:1--20:??",
  month =        mar,
  year =         "2013",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Apr 5 18:40:42 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "A system is $ t / t $-diagnosable if, given any
                 collection of test results, the faulty nodes can be
                 isolated to within a set of at most $t$ nodes provided
                 that the number of faulty nodes does not exceed $t$.
                 Given an {$N$}-vertex graph {$G$} that is regular with
                 the common degree $d$ and has no cycle of three or four
                 vertices, this study shows that {$G$} is $ (2 d - 2) /
                 (2 d - 2) $ diagnosable if {$ N \geq 4 d - 30 > 0 $}.
                 Based on this result, the $ t / t $-diagnosabilities of
                 several classes of graphs can be computed
                 efficiently.",
  acknowledgement = ack-nhfb,
  articleno =    "20",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Huang:2013:SNC,
  author =       "Chen Huang and Bailey Miller and Frank Vahid and Tony
                 Givargis",
  title =        "Synthesis of networks of custom processing elements
                 for real-time physical system emulation",
  journal =      j-TODAES,
  volume =       "18",
  number =       "2",
  pages =        "21:1--21:??",
  month =        mar,
  year =         "2013",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Apr 5 18:40:42 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Emulating a physical system in real-time or faster has
                 numerous applications in cyber-physical system design
                 and deployment. For example, testing of a
                 cyber-device's software (e.g., a medical ventilator)
                 can be done via interaction with a real-time digital
                 emulation of the target physical system (e.g., a
                 human's respiratory system). Physical system emulation
                 typically involves iteratively solving thousands of
                 ordinary differential equations (ODEs) that model the
                 physical system. We describe an approach that creates
                 custom processing elements (PEs) specialized to the
                 ODEs of a particular model while maintaining some
                 programmability, targeting implementation on
                 field-programmable gate arrays (FPGAs). We detail the
                 PE micro-architecture and accompanying automated
                 compilation and synthesis techniques. Furthermore, we
                 describe our efforts to use a high-level synthesis
                 approach that incorporates regularity extraction
                 techniques as an alternative FPGA-based solution, and
                 also describe an approach using graphics processing
                 units (GPUs). We perform experiments with five models:
                 a Weibel lung model, a Lutchen lung model, an atrial
                 heart model, a neuron model, and a wave model; each
                 model consists of several thousand ODEs and targets a
                 Xilinx Virtex 6 FPGA. Results of the experiments show
                 that the custom PE approach achieves 4X-9X speedups
                 (average 6.7X) versus our previous general ODE-solver
                 PE approach, and 7X-10X speedups (average 8.7X) versus
                 high-level synthesis, while using approximately the
                 same or fewer FPGA resources. Furthermore, the approach
                 achieves speedups of 18X-32X (average 26X) versus an
                 Nvidia GTX 460 GPU, and average speedups of more than
                 100X compared to a six-core TI DSP processor or a
                 four-core ARM processor, and 24X versus an Intel I7
                 quad core processor running at 3.06 GHz. While an FPGA
                 implementation costs about 3X-5X more than the non-FPGA
                 approaches, a speedup/dollar analysis shows 10X
                 improvement versus the next best approach, with the
                 trend of decreasing FPGA costs improving speedup/dollar
                 in the future.",
  acknowledgement = ack-nhfb,
  articleno =    "21",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Forte:2013:RAA,
  author =       "Domenic Forte and Ankur Srivastava",
  title =        "Resource-aware architectures for adaptive particle
                 filter based visual target tracking",
  journal =      j-TODAES,
  volume =       "18",
  number =       "2",
  pages =        "22:1--22:??",
  month =        mar,
  year =         "2013",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Apr 5 18:40:42 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "There are a growing number of visual tracking
                 applications now being envisioned for mobile devices.
                 However, since computer vision algorithms such as
                 particle filtering have large computational demands,
                 they can result in high energy consumption and
                 temperatures in mobile devices. Conventional approaches
                 for distributed target tracking with a camera node and
                 a receiver node are either sender-based (SB) or
                 receiver-based (RB). The SB approach uses little energy
                 and bandwidth, but requires a sender with large
                 computational resources. The RB approach fits
                 applications where computational resources are
                 completely unavailable to the sender, but requires very
                 large energy and bandwidth. In this article, we propose
                 three architectures for distributed particle filtering
                 that (i) reduce particle filtering workload and (ii)
                 allow for dynamic migration of workload between nodes
                 participating in tracking. We also discuss an adaptive
                 particle filtering extension that adapts particle
                 filter computational complexity and can be applied to
                 both the conventional and proposed architectures for
                 improved energy efficiency. Results show that the
                 proposed solutions require low additional overhead,
                 improve on tracking system lifetime, balance node
                 temperatures, maintain track of the desired target, and
                 are more effective than conventional approaches in many
                 scenarios.",
  acknowledgement = ack-nhfb,
  articleno =    "22",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Zhao:2013:SRE,
  author =       "Baoxian Zhao and Hakan Aydin and Dakai Zhu",
  title =        "Shared recovery for energy efficiency and reliability
                 enhancements in real-time applications with precedence
                 constraints",
  journal =      j-TODAES,
  volume =       "18",
  number =       "2",
  pages =        "23:1--23:??",
  month =        mar,
  year =         "2013",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Apr 5 18:40:42 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "While Dynamic Voltage Scaling (DVS) remains as a
                 popular energy management technique for modern
                 computing systems, recent research has identified
                 significant and negative impacts of voltage scaling on
                 system reliability. To preserve system reliability
                 under DVS settings, a number of reliability-aware power
                 management (RA-PM) schemes have been recently studied.
                 However, the existing RA-PM schemes normally schedule a
                 separate recovery for each task whose execution is
                 scaled down and are rather conservative. To overcome
                 such conservativeness, we study in this article novel
                 RA-PM schemes based on the shared recovery (SHR)
                 technique. Specifically, we consider a set of
                 frame-based real-time tasks with individual deadlines
                 and a common period where the precedence constraints
                 are represented by a directed acyclic graph (DAG). We
                 first show that the earliest deadline first (EDF)
                 algorithm can always yield a schedule where all timing
                 and precedence constraints are met by considering the
                 effective deadlines of tasks derived from as late as
                 possible (ALAP) policy, provided that the task set is
                 feasible. Then, we propose a shared recovery based
                 frequency assignment technique (namely SHR-DAG) and
                 prove its optimality to minimize energy consumption
                 while preserving the system reliability. To exploit
                 additional slack that arises from early completion of
                 tasks, we also study a dynamic extension for SHR-DAG to
                 improve energy efficiency and system reliability at
                 runtime. The results from our extensive simulations
                 show that, compared to the existing RA-PM schemes,
                 SHR-DAG can achieve up to 35\% energy savings, which is
                 very close to the maximum achievable energy savings.
                 More interestingly, our extensive evaluation also
                 indicates that the new schemes offer non-trivial
                 improvements on system reliability over the existing
                 RA-PM schemes as well.",
  acknowledgement = ack-nhfb,
  articleno =    "23",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Shen:2013:AAP,
  author =       "Hao Shen and Ying Tan and Jun Lu and Qing Wu and Qinru
                 Qiu",
  title =        "Achieving autonomous power management using
                 reinforcement learning",
  journal =      j-TODAES,
  volume =       "18",
  number =       "2",
  pages =        "24:1--24:??",
  month =        mar,
  year =         "2013",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Apr 5 18:40:42 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "System level power management must consider the
                 uncertainty and variability that come from the
                 environment, the application and the hardware. A robust
                 power management technique must be able to learn the
                 optimal decision from past events and improve itself as
                 the environment changes. This article presents a novel
                 on-line power management technique based on model-free
                 constrained reinforcement learning (Q-learning). The
                 proposed learning algorithm requires no prior
                 information of the workload and dynamically adapts to
                 the environment to achieve autonomous power management.
                 We focus on the power management of the peripheral
                 device and the microprocessor, two of the basic
                 components of a computer. Due to their different
                 operating behaviors and performance considerations,
                 these two types of devices require different designs of
                 Q-learning agent. The article discusses system modeling
                 and cost function construction for both types of
                 Q-learning agent. Enhancement techniques are also
                 proposed to speed up the convergence and better
                 maintain the required performance (or power) constraint
                 in a dynamic system with large variations. Compared
                 with the existing machine learning based power
                 management techniques, the Q-learning based power
                 management is more flexible in adapting to different
                 workload and hardware and provides a wider range of
                 power-performance tradeoff.",
  acknowledgement = ack-nhfb,
  articleno =    "24",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lee:2013:RIB,
  author =       "Jongwon Lee and Jonghee M. Youn and Doosan Cho and
                 Yunheung Paek",
  title =        "Reducing instruction bit-width for low-power {VLIW}
                 architectures",
  journal =      j-TODAES,
  volume =       "18",
  number =       "2",
  pages =        "25:1--25:??",
  month =        mar,
  year =         "2013",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Apr 5 18:40:42 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "VLIW (very long instruction word) architectures have
                 proven to be useful for embedded applications with
                 abundant instruction level parallelism. But due to the
                 long instruction bus width it often consumes more power
                 and memory space than necessary. One way to lessen this
                 problem is to adopt a reduced bit-width instruction set
                 architecture (ISA) that has a narrower instruction word
                 length. This facilitates a more efficient hardware
                 implementation in terms of area and power by decreasing
                 bus-bandwidth requirements and the power dissipation
                 associated with instruction fetches. In practice,
                 however, it is impossible to convert a given ISA fully
                 into an equivalent reduced bit-width one because the
                 narrow instruction word, due to bit-width restrictions,
                 can encode only a small subset of normal instructions
                 in the original ISA. Consequently, existing processors
                 provide narrow instructions in very limited cases along
                 with severe restrictions on register accessibility. The
                 objective of this work is to explore the possibility of
                 complete conversion, as a case study, of an existing
                 32-bit VLIW ISA into a 16-bit one that supports
                 effectively all 32-bit instructions. To this objective,
                 we attempt to circumvent the bit-width restrictions by
                 dynamically extending the effective instruction word
                 length of the converted 16-bit operations. Further, we
                 will show that our proposed ISA conversion can create a
                 synergy effect with a VLES (variable length execution
                 set) architecture that is adopted in most recent VLIW
                 processors. According to our experiment, the code size
                 becomes significantly smaller after the conversion to
                 16-bit VLIW code. Also at a slight run time cost, the
                 machine with the 16-bit ISA consumes much less energy
                 than the original machine.",
  acknowledgement = ack-nhfb,
  articleno =    "25",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Majzoobi:2013:LPR,
  author =       "Mehrdad Majzoobi and Joonho Kong and Farinaz
                 Koushanfar",
  title =        "Low-power resource binding by postsilicon
                 customization",
  journal =      j-TODAES,
  volume =       "18",
  number =       "2",
  pages =        "26:1--26:??",
  month =        mar,
  year =         "2013",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Apr 5 18:40:42 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article proposes the first postsilicon
                 customization method for resource binding to achieve
                 power reduction application specific integrated
                 circuits (ASICs) design. Instead of committing to one
                 configuration of resource binding during synthesis, our
                 new synthesis method produces a diverse set of
                 candidate bindings for the design. To ensure diversity
                 of the resource usage patterns, we introduce a binding
                 candidate formation method based on the orthogonal
                 arrays. Additional control components are added to
                 enable post manufacturing selection of one of the
                 binding candidates. The resource binding candidate that
                 minimizes the power consumption is selected by
                 considering the specific power characteristics of each
                 chip. An efficient methodology for embedding several
                 binding candidates in one design is developed.
                 Evaluations on benchmark designs show the low overhead
                 and the effectiveness of the proposed methods. As an
                 example, applying our method results in an average of
                 14.2\% (up to 24.0\%) power savings on benchmark
                 circuits for a variation model in 45nm CMOS technology.
                 The power efficiency of our customized postsilicon
                 binding is expected to improve with scaling of the
                 technology and the likely resulting higher process
                 variations.",
  acknowledgement = ack-nhfb,
  articleno =    "26",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Huang:2013:LPA,
  author =       "Shih-Hsu Huang and Wen-Pin Tu and Chia-Ming Chang and
                 Song-Bin Pan",
  title =        "Low-power anti-aging zero skew clock gating",
  journal =      j-TODAES,
  volume =       "18",
  number =       "2",
  pages =        "27:1--27:??",
  month =        mar,
  year =         "2013",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Apr 5 18:40:42 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In advanced CMOS technology, the NBTI (negative bias
                 temperature instability) effect results in delay
                 degradations of PMOS transistors. Further, because of
                 clock gating, PMOS transistors in a clock tree often
                 have different active probabilities, leading to
                 different delay degradations. If the degradation
                 difference is not properly controlled, this clock skew
                 may cause the circuit fails to function at some point
                 later in time. Intuitively, the degradation difference
                 can be eliminated, if we increase the active
                 probability of the low-probability clock gates to
                 ensure the clock gates at the same level always having
                 the same active probability. However, this intuitive
                 method may suffer from large power consumption
                 overhead. In this article, we point out, by carefully
                 planning the transistor-level clock signal propagation
                 path, we can have many clock gates whose active
                 probabilities do not affect the degradation difference.
                 Based on that observation, we propose a
                 critical-PMOS-aware clock tree design methodology to
                 eliminate the degradation difference with minimum power
                 consumption overhead. Benchmark data consistently show
                 our approach achieves very good results in terms of
                 both the NBTI-induced clock skew (i.e., the degradation
                 difference) and the power consumption overhead.",
  acknowledgement = ack-nhfb,
  articleno =    "27",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Wang:2013:CTM,
  author =       "Hai Wang and Sheldon X.-D. Tan and Duo Li and Ashish
                 Gupta and Yuan Yuan",
  title =        "Composable thermal modeling and simulation for
                 architecture-level thermal designs of multicore
                 microprocessors",
  journal =      j-TODAES,
  volume =       "18",
  number =       "2",
  pages =        "28:1--28:??",
  month =        mar,
  year =         "2013",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Apr 5 18:40:42 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Efficient temperature estimation is vital for
                 designing thermally efficient, lower power and robust
                 integrated circuits in nanometer regime. Thermal
                 simulation based on the detailed thermal structures no
                 longer meets the demanding tasks for efficient design
                 space exploration. The compact and composable
                 model-based simulation provides a viable solution to
                 this difficult problem. However, building such thermal
                 models from detailed thermal structures was not well
                 addressed in the past. In this article, we propose a
                 new compact thermal modeling technique, called
                 ThermComp, standing for thermal modeling with
                 composable modules. ThermComp can be used for fast
                 thermal design space exploration for multicore
                 microprocessors. The new approach builds the composable
                 model from detailed structures for each basic module
                 using the finite difference method and reduces the
                 model complexity by the sampling-based model order
                 reduction technique. These composable models are then
                 used to assemble different multicore architecture
                 thermal models and realized into SPICE-like netlists.
                 The resulting thermal models can be simulated by the
                 general circuit simulator SPICE. ThermComp tries to
                 preserve the accuracy of fine-grained models with the
                 speed of coarse-grained models. Experimental results on
                 a number of multicore microprocessor architectures show
                 the new approach can easily build accurate thermal
                 systems from compact composable models for fast
                 architecture thermal analysis and optimization and is
                 much faster than the existing HotSpot method with
                 similar accuracy.",
  acknowledgement = ack-nhfb,
  articleno =    "28",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Zeng:2013:IPD,
  author =       "Zhiyu Zeng and Suming Lai and Peng Li",
  title =        "{IC} power delivery: Voltage regulation and
                 conversion, system-level cooptimization and technology
                 implications",
  journal =      j-TODAES,
  volume =       "18",
  number =       "2",
  pages =        "29:1--29:??",
  month =        mar,
  year =         "2013",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Apr 5 18:40:42 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Modern IC power delivery systems encompass large
                 on-chip passive power grids and active on-chip or
                 off-chip voltage converters and regulators. While there
                 exists little work targeting on holistic design of such
                 complex IC subsystems, the optimal system-level design
                 of power delivery is critical for achieving power
                 integrity and power efficiency. In this article, we
                 conduct a systematic design analysis on power delivery
                 networks that incorporate Buck Converters (BCs) and
                 on-chip Low-Dropout voltage regulators (LDOs) for the
                 entire chip power supply. The electrical interactions
                 between active voltage converters, regulators as well
                 as passive power grids and their influence on key
                 system design specifications are analyzed
                 comprehensively. With the derived design insights, the
                 system-level codesign of a complete power delivery
                 network is facilitated by a proposed automatic
                 optimization flow in which key design parameters of
                 buck converters and on-chip LDOs as well as on-chip
                 decoupling capacitance are jointly optimized. The
                 experimental results demonstrate significant
                 performance improvements resulted from the proposed
                 system cooptimization in terms of achievable area
                 overhead, supply noise and power efficiency. Impacts of
                 different decoupling capacitance technologies are also
                 investigated.",
  acknowledgement = ack-nhfb,
  articleno =    "29",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lee:2013:SRB,
  author =       "Ren-Jie Lee and Hung-Ming Chen",
  title =        "A study of row-based area-array {I/O} design planning
                 in concurrent chip-package design flow",
  journal =      j-TODAES,
  volume =       "18",
  number =       "2",
  pages =        "30:1--30:??",
  month =        mar,
  year =         "2013",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Apr 5 18:40:42 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "IC-centric design flow has been a common paradigm when
                 designing and optimizing a system. Package and
                 board/system designs are usually followed by
                 almost-ready chip designs, which causes long
                 turn-around time communicating with package and system
                 houses. In this article, the realizations of area-array
                 I/O design methodologies are studied. Different from
                 IC-centric flow, we propose a chip-package concurrent
                 design flow to speed up the design time. Along with the
                 flow, we design the I/O-bump (and P/G-bump) tile that
                 combines I/O (and P/G) and bump into a hard macro with
                 the considerations of I/O power connection and
                 electrostatic discharge (ESD) protection. We then
                 employ an I/O-row based scheme to place I/O-bump tiles
                 with existed metal layers. By such a scheme, it reduces
                 efforts in I/O placement legalization and the
                 redistribution layer (RDL) routing. With the emphasis
                 on package design awareness, the proposed methods map
                 package balls onto chip I/Os, thus providing an
                 opportunity to design chip and package in parallel. Due
                 to this early study of I/O and bump planning, faster
                 convergence can be expected with concurrent design
                 flow. The results are encouraging and the merits of
                 this flow are reassuring.",
  acknowledgement = ack-nhfb,
  articleno =    "30",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Guthaus:2013:RAP,
  author =       "Matthew R. Guthaus and Gustavo Wilke and Ricardo
                 Reis",
  title =        "Revisiting automated physical synthesis of
                 high-performance clock networks",
  journal =      j-TODAES,
  volume =       "18",
  number =       "2",
  pages =        "31:1--31:??",
  month =        mar,
  year =         "2013",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Apr 5 18:40:42 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "High-performance clock distribution has been a
                 challenge for nearly three decades. During this time,
                 clock synthesis tools and algorithms have strove to
                 address a myriad of important issues helping designers
                 to create faster, more reliable, and more power
                 efficient chips. This work provides a complete
                 discussion of the high-performance ASIC clock
                 distribution using information gathered from both
                 leading industrial clock designers and previous
                 research publications. While many techniques are only
                 briefly explained, the references summarize the most
                 influential papers on a variety of topics for more
                 in-depth investigation. This article also provides a
                 thorough discussion of current issues in clock
                 synthesis and concludes with insight into future
                 research and design challenges for the community at
                 large.",
  acknowledgement = ack-nhfb,
  articleno =    "31",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Gester:2013:BAD,
  author =       "Michael Gester and Dirk M{\"u}ller and Tim Nieberg and
                 Christian Panten and Christian Schulte and Jens Vygen",
  title =        "{BonnRoute}: Algorithms and data structures for fast
                 and good {VLSI} routing",
  journal =      j-TODAES,
  volume =       "18",
  number =       "2",
  pages =        "32:1--32:??",
  month =        mar,
  year =         "2013",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Apr 5 18:40:42 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We present the core elements of BonnRoute: advanced
                 data structures and algorithms for fast and
                 high-quality routing in modern technologies. Global
                 routing is based on a combinatorial approximation
                 scheme for min-max resource sharing. Detailed routing
                 uses exact shortest path algorithms, based on a
                 shape-based data structure for pin access and a
                 two-level track-based data structure for long-distance
                 connections. All algorithms are very fast. Compared to
                 an industrial router (on 32 nm and 22 nm chips),
                 BonnRoute is over two times faster, has 5 \% less
                 netlength, 20 \% less vias, and reduces detours by more
                 than 90 \%.",
  acknowledgement = ack-nhfb,
  articleno =    "32",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Agarwal:2013:SDS,
  author =       "Amit Agarwal and Jason Cong and Brian Tagiku",
  title =        "The survivability of design-specific spare placement
                 in {FPGA} architectures with high defect rates",
  journal =      j-TODAES,
  volume =       "18",
  number =       "2",
  pages =        "33:1--33:??",
  month =        mar,
  year =         "2013",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Apr 5 18:40:42 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We address the problem of optimizing fault tolerance
                 in FPGA architectures with high defect rates (such as
                 nano-FPGAs) without significantly degrading
                 performance. Our methods address fault tolerance during
                 the placement and reconfiguration stages of FPGA
                 programming. First, we provide several complexity
                 results for both the fault reconfiguration and
                 fault-tolerance placement problems. Then, we propose a
                 placement algorithm which, in the presence of randomly
                 generated faults, optimizes spare placement to maximize
                 the probability that the FPGA can be reconfigured to
                 meet a specified timing constraint. We also give
                 heuristics for reconfiguration after faults have been
                 detected. Despite the hardness results for both the
                 placement and reconfiguration problems, we show our
                 heuristics perform well in simulation (in one scenario,
                 increasing the probability of successful
                 reconfiguration by as much as 55\% compared to a
                 uniform spare placement).",
  acknowledgement = ack-nhfb,
  articleno =    "33",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Nadakuditi:2013:BAS,
  author =       "Raj Rao Nadakuditi and Igor L. Markov",
  title =        "On bottleneck analysis in stochastic stream
                 processing",
  journal =      j-TODAES,
  volume =       "18",
  number =       "3",
  pages =        "34:1--34:??",
  month =        jul,
  year =         "2013",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2491477.2491478",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Jul 27 08:09:07 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Past improvements in clock frequencies have
                 traditionally been obtained through technology scaling,
                 but most recent technology nodes do not offer such
                 benefits. Instead, parallelism has emerged as the key
                 driver of chip-performance growth. Unfortunately,
                 efficient simultaneous use of on-chip resources is
                 hampered by sequential dependencies, as illustrated by
                 Amdahl's law. Quantifying achievable parallelism in
                 terms of provable mathematical results can help prevent
                 futile programming efforts and guide innovation in
                 computer architecture toward the most significant
                 challenges. To complement Amdahl's law, we focus on
                 stream processing and quantify performance losses due
                 to stochastic runtimes. Using spectral theory of random
                 matrices, we derive new analytical results and validate
                 them by numerical simulations. These results allow us
                 to explore unique benefits of stochasticity and show
                 how and when they outweigh the costs for software
                 streams.",
  acknowledgement = ack-nhfb,
  articleno =    "34",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Abouelella:2013:HEI,
  author =       "Fatma Abouelella and Tom Davidson and Wim Meeus and
                 Karel Bruneel and Dirk Stroobandt",
  title =        "How to efficiently implement dynamic circuit
                 specialization systems",
  journal =      j-TODAES,
  volume =       "18",
  number =       "3",
  pages =        "35:1--35:??",
  month =        jul,
  year =         "2013",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2491477.2491479",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Jul 27 08:09:07 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Dynamic circuit specialization (DCS) is a technique
                 used to implement FPGA applications where some of the
                 input data, called parameters, change slowly compared
                 to other inputs. Each time the parameter values change,
                 the FPGA is reconfigured by a configuration that is
                 specialized for those new parameter values. This
                 specialized configuration is much smaller and faster
                 than a regular configuration. However, the overhead
                 associated with the specialization process should be
                 minimized to achieve the desired benefits of using the
                 DCS technique. This overhead is represented by both the
                 FPGA resources needed to specialize the FPGA at runtime
                 and by the specialization time. The introduction of
                 parameterized configurations [Bruneel and Stroobandt
                 2008] has improved the efficiency of DCS
                 implementations. However, the specialization overhead
                 still takes a considerable amount of resources and
                 time. In this article, we explore how to efficiently
                 build DCS systems by presenting a variety of possible
                 solutions for the specialization process and the
                 overhead associated with each of them. We split the
                 specialization process into two main phases: the
                 evaluation and the configuration phase. The PowerPC
                 embedded processor, the MicroBlaze, and a customized
                 processor (CP) are used as alternatives in the
                 evaluation phase. In the configuration phase, the ICAP
                 and a custom configuration interface (SRL
                 configuration) are used as alternatives. Each solution
                 is used to implement a DCS system for three
                 applications: an adaptive finite impulse response (FIR)
                 filter, a ternary content-addressable memory (TCAM),
                 and a regular expression matcher (RegEx). The
                 experiments show that the use of our CP along with the
                 SRL configuration achieves minimum overhead in terms of
                 resources and time. Our CP is 1.8 and 3.5 times smaller
                 than the PowerPC and the area-optimized implementation
                 of the MicroBlaze, respectively. Moreover, the use of
                 the CP enables a more compact representation for the
                 parameterized configuration in comparison to both the
                 PowerPC and the MicroBlaze processors. For instance, in
                 the FIR, the parameterized configuration compiled for
                 our CP is 6--7 times smaller than that for the embedded
                 processors.",
  acknowledgement = ack-nhfb,
  articleno =    "35",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Cabodi:2013:TBM,
  author =       "Gianpiero Cabodi and Sergio Nocco and Stefano Quer",
  title =        "Thread-based multi-engine model checking for multicore
                 platforms",
  journal =      j-TODAES,
  volume =       "18",
  number =       "3",
  pages =        "36:1--36:??",
  month =        jul,
  year =         "2013",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2491477.2491480",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Jul 27 08:09:07 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article describes a multithreaded,
                 portfolio-based approach to model checking, where
                 multiple cores are exploited as the underlying
                 computing framework to support concurrent execution of
                 cooperative engines. We introduce a portfolio-based
                 approach to model checking. Our portfolio is first
                 driven by an approximate runtime predictor that
                 provides a heuristic approximation to a perfect oracle
                 and suggests which engines are more suitable for each
                 verification instance. Scalability and robustness of
                 the overall model-checking effort highly rely on a
                 concurrent, multithreaded model of execution. Following
                 similar approaches in related application fields, we
                 dovetail data partitioning, focused on proving several
                 properties in parallel, and engine partitioning, based
                 on concurrent runs of different model-checking engines
                 competing for completion of the same problem. We
                 investigate concurrency not only to effectively exploit
                 several available engines, which operate independently,
                 but also to show that a cooperative effort is possible.
                 In this case, we adopt a straightforward, light-weight,
                 model of inter-engine communication and data sharing.
                 We provide a detailed description of the ideas,
                 algorithms, and experimental results obtained on the
                 benchmarks from the Hardware Model Checking Competition
                 suites (HWMCC'10 and HWMCC'11).",
  acknowledgement = ack-nhfb,
  articleno =    "36",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kim:2013:AMP,
  author =       "Sehwan Kim and Pai H. Chou",
  title =        "Analysis and minimization of power-transmission loss
                 in locally daisy-chained systems by local energy
                 buffering",
  journal =      j-TODAES,
  volume =       "18",
  number =       "3",
  pages =        "37:1--37:??",
  month =        jul,
  year =         "2013",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2491477.2491481",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Jul 27 08:09:07 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Power-transmission loss can be a severe problem for
                 low-power embedded systems organized in a daisy-chain
                 topology. The loss can be so high that it can result in
                 failure to power the load in the first place. The first
                 contribution of this article is a recursive algorithm
                 for solving the transmission current on each segment of
                 the daisy chain at a given supply voltage. It enables
                 solving not only the transmission loss but also reports
                 infeasible configurations if the voltage is too low.
                 Using this core algorithm, our second contribution is
                 to find energy-efficient configurations that use local
                 energy buffers (LEBs) to eliminate peak load on the bus
                 without relying on high voltage. Experimental results
                 confirm that our proposed techniques significantly
                 reduce the total energy consumption and enable the
                 deployed system to operate for significantly longer.",
  acknowledgement = ack-nhfb,
  articleno =    "37",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Gupta:2013:ECR,
  author =       "Saket Gupta and Sachin S. Sapatnekar",
  title =        "Employing circadian rhythms to enhance power and
                 reliability",
  journal =      j-TODAES,
  volume =       "18",
  number =       "3",
  pages =        "38:1--38:??",
  month =        jul,
  year =         "2013",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2491477.2491482",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Jul 27 08:09:07 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article presents a novel scheme for saving
                 architectural power by mitigating delay degradations in
                 digital circuits due to bias temperature instability
                 (BTI), inspired by the notion of human circadian
                 rhythms. The method works in two alternating phases. In
                 the first, the compute phase, the circuit is awake and
                 active, operating briskly at a greater-than-nominal
                 supply voltage which causes tasks to complete more
                 quickly. In the second, the idle phase, the circuit is
                 power-gated and put to sleep, enabling BTI recovery.
                 Since the wakeful stage works at an elevated supply
                 voltage, it results in greater aging than operation at
                 the nominal supply voltage, but the sleep state
                 involves a recovery that more than compensates for this
                 differential. We demonstrate, both at the circuit and
                 the architectural levels, that at about the same
                 performance, this approach can result in appreciable
                 BTI mitigation, thus reducing the guardbands necessary
                 to protect against aging, which results in power
                 savings over the conventional design.",
  acknowledgement = ack-nhfb,
  articleno =    "38",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Tsai:2013:ROC,
  author =       "Mei-Hsiang Tsai and Po-Yang Hsu and Hung-Yi Li and
                 Yi-Huang Hung and Yi-Yu Liu",
  title =        "Routability optimization for crossbar-switch
                 structured {ASIC} design",
  journal =      j-TODAES,
  volume =       "18",
  number =       "3",
  pages =        "39:1--39:??",
  month =        jul,
  year =         "2013",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2491477.2491483",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Jul 27 08:09:07 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In the routing architecture of a structured
                 application-specific integrated circuit (ASIC), the
                 crossbar is one of the most area-efficient switch
                 blocks. Nevertheless, a dangling wire occurs when there
                 is a routing bend in a crossbar switch. Dangling wires
                 incur longer wire lengths as well as a higher
                 interconnection capacitance. In this article, we tackle
                 dangling wire issues for structured ASIC routability
                 optimization. We first propose a compact graph model
                 for crossbar-switch routing. With our graph model,
                 switch connectivity relations can be removed to keep
                 the 2D structured ASIC routing graph efficient and to
                 speed up the runtime of our routing algorithm.
                 Furthermore, we propose a heuristic
                 dangling-wire-avoidance routing framework containing
                 deferred pin assignment, Steiner point reassignment,
                 and anchor pair insertion in order to minimize dangling
                 wires and channel width. Finally, in order to take
                 routing bends and channel width into account
                 simultaneously, we propose concurrent and sequential
                 integer linear programming (ILP) formulations and ILP
                 variable/constraint degeneration techniques. The
                 experimental results demonstrate that our proposed
                 heuristic routing framework reduces dangling wires by
                 19\%, channel width by 38\%, and wire length by 13\% to
                 VPR using the crossbar switch (VPR-C). In addition, our
                 sequential ILP router reduces dangling wires by 38\%,
                 channel width by 40\%, and wire length by 15\% compared
                 to VPR-C. Thus, the runtime efficiency of our
                 sequential ILP router is attractive for crossbar-switch
                 structured ASIC routing.",
  acknowledgement = ack-nhfb,
  articleno =    "39",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Liu:2013:ABF,
  author =       "Sean Shih-Ying Liu and Wan-Ting Lo and Chieh-Jui Lee
                 and Hung-Ming Chen",
  title =        "Agglomerative-based flip-flop merging and relocation
                 for signal wirelength and clock tree optimization",
  journal =      j-TODAES,
  volume =       "18",
  number =       "3",
  pages =        "40:1--40:??",
  month =        jul,
  year =         "2013",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2491477.2491484",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Jul 27 08:09:07 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In this article, we propose a flip-flop merging
                 algorithm based on agglomerative clustering. Compared
                 to previous state-of-the-art on flip-flop merging, our
                 proposed algorithm outperforms that of Chang et al.
                 [2010] and Wang et al. [2011] in all aspects, including
                 number of flip-flop reductions, increase in signal
                 wirelength, displacement of flip-flops, and execution
                 time. Our proposed algorithm also has minimal
                 disruption to original placement. In comparison with
                 Jiang et al. [2011], Wang et al. [2011], and Chang et
                 al. [2010], our proposed algorithm has the least
                 displacement when relocating merged flip-flops. While
                 previous works on flip-flop merging focus on the number
                 of flip-flop reduction, we further evaluate the power
                 consumption of clock tree after flip-flop merging. To
                 further minimize clock tree wirelength, we propose a
                 framework that determines a preferable location for
                 relocated merged flip-flops for clock tree synthesis
                 (CTS). Experimental results show that our CTS-driven
                 flip-flop merging can reduce clock tree wirelength by
                 an average of 7.82\% with minimum clock network power
                 consumption compared to all of the previous works.",
  acknowledgement = ack-nhfb,
  articleno =    "40",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lee:2013:EMA,
  author =       "Yu-Min Lee and Pei-Yu Huang",
  title =        "An efficient method for analyzing on-chip thermal
                 reliability considering process variations",
  journal =      j-TODAES,
  volume =       "18",
  number =       "3",
  pages =        "41:1--41:??",
  month =        jul,
  year =         "2013",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2491477.2491485",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Jul 27 08:09:07 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This work provides an efficient statistical
                 electrothermal simulator for analyzing on-chip thermal
                 reliability under process variations. Using the
                 collocation-based statistical modeling technique,
                 first, the statistical interpolation polynomial for
                 on-chip temperature distribution can be obtained by
                 performing deterministic electrothermal simulation very
                 few times and by utilizing polynomial interpolation.
                 After that, the proposed simulator not only provides
                 the mean and standard deviation profiles of on-chip
                 temperature distribution, but also innovates the
                 concept of thermal yield profile to statistically
                 characterize the on-chip temperature distribution more
                 precisely, and builds an efficient technique for
                 estimating this figure of merit. Moreover, a mixed-mesh
                 strategy is presented to further enhance the efficiency
                 of the developed statistical electrothermal simulator.
                 Experimental results demonstrate that (1) the developed
                 statistical electrothermal simulator can obtain
                 accurate approximations with orders of magnitude
                 speedup over the Monte Carlo method; (2) comparing with
                 a well-known cumulative distribution function
                 estimation method, APEX [Li et al. 2004], the developed
                 statistical electrothermal simulator can achieve 215$
                 \times $ speedup with better accuracy; (3) the
                 developed mixed-mesh strategy can achieve an order of
                 magnitude faster over our baseline algorithm and still
                 maintain an acceptable accuracy level.",
  acknowledgement = ack-nhfb,
  articleno =    "41",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Shi:2013:OSC,
  author =       "Yiyu Shi and Jinjun Xiong and Vladimir Zolotov and
                 Chandu Visweswariah",
  title =        "Order statistics for correlated random variables and
                 its application to at-speed testing",
  journal =      j-TODAES,
  volume =       "18",
  number =       "3",
  pages =        "42:1--42:??",
  month =        jul,
  year =         "2013",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2491477.2491486",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Jul 27 08:09:07 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Although order statistics have been studied for
                 several decades, most of the results are based on the
                 assumption of independent and identically distributed
                 (i.i.d.) random variables. In the literature, how to
                 compute the $m$ th order statistics of $n$ correlated
                 random variables is still a problem. This article
                 proposes a recursive algorithm based on statistical
                 min/max operations to compute order statistics for
                 general correlated and not necessarily identically
                 distributed random variables. The algorithm has an {$
                 O(m n) $} time complexity and {$ O(m + n) $} space
                 complexity. A binary tree-based data structure is
                 further developed to allow selective update of the
                 order statistics with {$ O(n m^2) $} time. As a vehicle
                 to demonstrate the algorithm, we apply it to the path
                 selection algorithm in at-speed testing. A novel metric
                 multilayer process space coverage metric is proposed to
                 quantitatively gauge the quality of path selection. We
                 then show that such a metric is directly linked to the
                 order statistics, and our recursive algorithm can thus
                 be applied. By employing a branch-and-bound path
                 selection algorithm with these techniques, this article
                 shows that selecting an optimal set of paths for a
                 multimillion-gate design can be performed efficiently.
                 Compared to the state of the art, experimental results
                 show both the efficiency of our algorithms and better
                 quality of our path selection.",
  acknowledgement = ack-nhfb,
  articleno =    "42",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Zhao:2013:PSA,
  author =       "Wei Zhao and Junxia Ma and Mohammad Tehranipoor and
                 Sreejit Chakravarty",
  title =        "Power-safe application of tdf patterns to flip-chip
                 designs during wafer test",
  journal =      j-TODAES,
  volume =       "18",
  number =       "3",
  pages =        "43:1--43:??",
  month =        jul,
  year =         "2013",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2491477.2491487",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Jul 27 08:09:07 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Due to high switching activities in test mode, circuit
                 power consumption is higher than its functional
                 operation. Large switching in the circuit during
                 launch-to-capture cycles not only negatively impacts
                 circuit performance causing overkill, but could also
                 burn tester probes during wafer test due to the
                 excessive current they must drive. It is necessary to
                 develop a quick and effective method for evaluating
                 each pattern, identifying high-power patterns
                 considering functional and tester probes' current
                 limits and making the final pattern set power-safe.
                 Compared with previous low-power methods that deal with
                 scan structure modification or pattern filling
                 techniques, the new proposed method takes into account
                 layout information and resistance in the power
                 distribution network and can identify peak current
                 among C4 power bumps. Post-processing steps replace
                 power-unsafe patterns with low-power ones. The final
                 pattern set provides considerable peak current
                 reduction while fault coverage is maintained.",
  acknowledgement = ack-nhfb,
  articleno =    "43",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Xiang:2013:TCS,
  author =       "Dong Xiang and Jianbo Li and Krishnendu Chakrabarty
                 and Xijiang Lin",
  title =        "Test compaction for small-delay defects using an
                 effective path selection scheme",
  journal =      j-TODAES,
  volume =       "18",
  number =       "3",
  pages =        "44:1--44:??",
  month =        jul,
  year =         "2013",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2491477.2491488",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Jul 27 08:09:07 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Testing for small-delay defects (SDDs) requires
                 fault-effect propagation along the longest testable
                 paths. However, identification of the longest testable
                 paths requires high CPU time, and the sensitization of
                 all such paths leads to large pattern counts. Dynamic
                 test compaction for small-delay defects is therefore
                 necessary to reduce test-data volume. We present a new
                 technique for identifying the longest testable paths
                 through each gate in order to accelerate test
                 generation for SDDs. The resulting test patterns
                 sensitize the longest testable paths that pass through
                 each SDD site. An efficient dynamic test compaction
                 method based on structural analysis is presented to
                 reduce the pattern count substantially, while ensuring
                 that all the longest paths for each SDD are sensitized.
                 Simulation results for a set of ISCAS 89 and IWLS 05
                 benchmark circuits demonstrate the effectiveness of
                 this method.",
  acknowledgement = ack-nhfb,
  articleno =    "44",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Anonymous:2013:CNE,
  author =       "Anonymous",
  title =        "Call for nominations for {Editor-in-Chief}",
  journal =      j-TODAES,
  volume =       "18",
  number =       "4",
  pages =        "44:1--44:??",
  month =        oct,
  year =         "2013",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2541012.2541672",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Nov 8 11:45:54 MST 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  articleno =    "44",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Marculescu:2013:ESS,
  author =       "Diana Marculescu and Chita Das",
  title =        "Editorial to special section on networks on chip:
                 Architecture, tools, and methodologies",
  journal =      j-TODAES,
  volume =       "18",
  number =       "4",
  pages =        "45:1--45:??",
  month =        oct,
  year =         "2013",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2541012.2541013",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Nov 8 11:45:54 MST 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  articleno =    "45",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Bogdan:2013:DPM,
  author =       "Paul Bogdan and Radu Marculescu and Siddharth Jain",
  title =        "Dynamic power management for multidomain
                 system-on-chip platforms: an optimal control approach",
  journal =      j-TODAES,
  volume =       "18",
  number =       "4",
  pages =        "46:1--46:??",
  month =        oct,
  year =         "2013",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2504904",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Nov 8 11:45:54 MST 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Reducing energy consumption in multiprocessor
                 systems-on-chip (MPSoCs) where communication happens
                 via the network-on-chip (NoC) approach calls for
                 multiple voltage/frequency island (VFI)-based designs.
                 In turn, such multi-VFI architectures need efficient,
                 robust, and accurate runtime control mechanisms that
                 can exploit the workload characteristics in order to
                 save power. Despite being tractable, the linear control
                 models for power management cannot capture some
                 important workload characteristics (e.g., fractality,
                 nonstationarity) observed in heterogeneous NoCs; if
                 ignored, such characteristics lead to inefficient
                 communication and resources allocation, as well as high
                 power dissipation in MPSoCs. To mitigate such
                 limitations, we propose a new paradigm shift from power
                 optimization based on linear models to control
                 approaches based on fractal-state equations. As such,
                 our approach is the first to propose a controller for
                 fractal workloads with precise constraints on state and
                 control variables and specific time bounds. Our results
                 show that significant power savings can be achieved at
                 runtime while running a variety of benchmark
                 applications.",
  acknowledgement = ack-nhfb,
  articleno =    "46",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Chen:2013:NMC,
  author =       "Xi Chen and Zheng Xu and Hyungjun Kim and Paul Gratz
                 and Jiang Hu and Michael Kishinevsky and Umit Ogras",
  title =        "In-network monitoring and control policy for {DVFS} of
                 {CMP} networks-on-chip and last level caches",
  journal =      j-TODAES,
  volume =       "18",
  number =       "4",
  pages =        "47:1--47:??",
  month =        oct,
  year =         "2013",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2504905",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Nov 8 11:45:54 MST 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In chip design today and for a foreseeable future, the
                 last-level cache and on-chip interconnect is not only
                 performance critical but also a substantial power
                 consumer. This work focuses on employing dynamic
                 voltage and frequency scaling (DVFS) policies for
                 networks-on-chip (NoC) and shared, distributed
                 last-level caches (LLC). In particular, we consider a
                 practical system architecture where the distributed LLC
                 and the NoC share a voltage/frequency domain that is
                 separate from the core domain. This architecture
                 enables the control of the relative speed between the
                 cores and memory hierarchy without introducing
                 synchronization delays within the NoC. DVFS for this
                 architecture is more complex than individual
                 link/core-based DVFS since it involves spatially
                 distributed monitoring and control. We propose an
                 average memory access time (AMAT)-based monitoring
                 technique and integrate it with DVFS based on PID
                 control theory. Simulations on PARSEC benchmarks yield
                 a 27\% energy savings with a negligible impact on
                 system performance.",
  acknowledgement = ack-nhfb,
  articleno =    "47",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lee:2013:AVC,
  author =       "Jaekyu Lee and Si Li and Hyesoon Kim and Sudhakar
                 Yalamanchili",
  title =        "Adaptive virtual channel partitioning for
                 network-on-chip in heterogeneous architectures",
  journal =      j-TODAES,
  volume =       "18",
  number =       "4",
  pages =        "48:1--48:??",
  month =        oct,
  year =         "2013",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2504906",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Nov 8 11:45:54 MST 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Current heterogeneous chip-multiprocessors (CMPs)
                 integrate a GPU architecture on a die. However, the
                 heterogeneity of this architecture inevitably exerts
                 different pressures on shared resource management due
                 to differing characteristics of CPU and GPU cores. We
                 consider how to efficiently share on-chip resources
                 between cores within the heterogeneous system, in
                 particular the on-chip network. Heterogeneous
                 architectures use an on-chip interconnection network to
                 access shared resources such as last-level cache tiles
                 and memory controllers, and this type of on-chip
                 network will have a significant impact on performance.
                 In this article, we propose a feedback-directed virtual
                 channel partitioning (VCP) mechanism for on-chip
                 routers to effectively share network bandwidth between
                 CPU and GPU cores in a heterogeneous architecture. VCP
                 dedicates a few virtual channels to CPU and GPU
                 applications with separate injection queues. The
                 proposed mechanism balances on-chip network bandwidth
                 for applications running on CPU and GPU cores by
                 adaptively choosing the best partitioning
                 configuration. As a result, our mechanism improves
                 system throughput by 15\% over the baseline across 39
                 heterogeneous workloads.",
  acknowledgement = ack-nhfb,
  articleno =    "48",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Abousamra:2013:OCE,
  author =       "Ahmed Abousamra and Alex K. Jones and Rami Melhem",
  title =        "Ordering circuit establishment in multiplane {NoCs}",
  journal =      j-TODAES,
  volume =       "18",
  number =       "4",
  pages =        "49:1--49:??",
  month =        oct,
  year =         "2013",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2500752",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Nov 8 11:45:54 MST 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Segregating networks-on-chips (NoCs) into data and
                 control planes yields several opportunities for
                 improving power and performance in chip-multiprocessor
                 systems (CMPs). This article describes a hybrid
                 packet/circuit switched multiplane network optimized to
                 reduce latency in order to improve system performance
                 and/or reduce system energy. Unlike traditional circuit
                 preallocation techniques which require timestamps to
                 reserve circuit resources, this article proposes an
                 order-based preallocation scheme. By enforcing the
                 order in which resources are scheduled and utilized
                 rather than a fixed time, the NoC can take advantage of
                 messages that arrive early while naturally tolerating
                 message delays due to contention. Ordered circuit
                 establishment is presented using two techniques. First,
                 D{\'e}j{\`a} Vu switching preestablishes circuits for
                 data messages once a cache hit is detected and prior to
                 the requested data becoming available. Second, using
                 Red Carpet Routing, circuits are proactively reserved
                 for a return data message as a request message
                 traverses the NoC. The reduced communication latency
                 over configured circuits enable system performance
                 improvement or saving NoC energy by reducing voltage
                 and frequency without sacrificing performance. In
                 simulations of 16 and 64 core CMPs, D{\'e}j{\`a} Vu
                 switching enabled average NoC energy savings of 43\%
                 and 53\% respectively. On the other hand, simulations
                 of communication sensitive benchmarks using Red Carpet
                 Routing show speedup in execution time of up to 16\%,
                 with an average of 10\% over a purely packet switched
                 NoC and an average of 8\% over preconfiguring circuits
                 using D{\'e}j{\`a} Vu switching.",
  acknowledgement = ack-nhfb,
  articleno =    "49",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lee:2013:DRN,
  author =       "Jinho Lee and Dongwoo Lee and Sunwook Kim and Kiyoung
                 Choi",
  title =        "Deflection routing in {$3$D} network-on-chip with
                 limited vertical bandwidth",
  journal =      j-TODAES,
  volume =       "18",
  number =       "4",
  pages =        "50:1--50:??",
  month =        oct,
  year =         "2013",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2505011",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Nov 8 11:45:54 MST 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article proposes a deflection routing for 3D NoC
                 with serialized TSVs for vertical links. Compared to
                 buffered routing, deflection routing provides area- and
                 power-efficient communication and little loss of
                 performance under low to medium traffic load. Under 3D
                 environments, the deflection routing can yield even
                 better performance than buffered routing when key
                 aspects are properly taken into account. However, the
                 existing deflection routing technique cannot be
                 directly applied because the serialized TSV links will
                 take longer time to send data than ordinary planar
                 links and cause many problems. A naive deflection
                 through a TSV link can cause significantly longer
                 latency and more energy consumption even for
                 communications through planar links. This article
                 proposes a method to mitigate the effect and also solve
                 arising deadlock and livelock problems. Evaluation of
                 the proposed scheme shows its effectiveness in
                 throughput, latency, and energy consumption.",
  acknowledgement = ack-nhfb,
  articleno =    "50",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Shojaei:2013:FSM,
  author =       "Hamid Shojaei and Twan Basten and Marc Geilen and
                 Azadeh Davoodi",
  title =        "A fast and scalable multidimensional multiple-choice
                 knapsack heuristic",
  journal =      j-TODAES,
  volume =       "18",
  number =       "4",
  pages =        "51:1--51:??",
  month =        oct,
  year =         "2013",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2541012.2541014",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Nov 8 11:45:54 MST 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Many combinatorial optimization problems in the
                 embedded systems and design automation domains involve
                 decision making in multidimensional spaces. The
                 multidimensional multiple-choice knapsack problem
                 (MMKP) is among the most challenging of the encountered
                 optimization problems. MMKP problem instances appear
                 for example in chip multiprocessor runtime resource
                 management and in global routing of wiring in circuits.
                 Chip multiprocessor resource management requires
                 solving MMKP under real-time constraints, whereas
                 global routing requires scalability of the solution
                 approach to extremely large MMKP instances. This
                 article presents a novel MMKP heuristic, CPH (for
                 Compositional Pareto-algebraic Heuristic), which is a
                 parameterized compositional heuristic based on the
                 principles of Pareto algebra. Compositionality allows
                 incremental computation of solutions. The
                 parameterization allows tuning of the heuristic to the
                 problem at hand. These aspects make CPH a very
                 versatile heuristic. When tuning CPH for computation
                 time, MMKP instances can be solved in real time with
                 better results than the fastest MMKP heuristic so far.
                 When tuning CPH for solution quality, it finds several
                 new solutions for standard benchmarks that are not
                 found by any existing heuristic. CPH furthermore scales
                 to extremely large problem instances. We illustrate and
                 evaluate the use of CPH in both chip multiprocessor
                 resource management and in global routing.",
  acknowledgement = ack-nhfb,
  articleno =    "51",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Yoon:2013:ACC,
  author =       "Jonghee W. Yoon and Jongeun Lee and Sanghyun Park and
                 Yongjoo Kim and Jinyong Lee and Yunheung Paek and
                 Doosan Cho",
  title =        "Architecture customization of on-chip reconfigurable
                 accelerators",
  journal =      j-TODAES,
  volume =       "18",
  number =       "4",
  pages =        "52:1--52:??",
  month =        oct,
  year =         "2013",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2493384",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Nov 8 11:45:54 MST 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Integrating coarse-grained reconfigurable
                 architectures (CGRAs) into a System-on-a-Chip (SoC)
                 presents many benefits as well as important challenges.
                 One of the challenges is how to customize the
                 architecture for the target applications efficiently
                 and effectively without performing explicit design
                 space exploration. In this article we present a novel
                 methodology for incremental interconnect customization
                 of CGRAs that can suggest a new interconnection
                 architecture which is able to maximize the performance
                 for a given set of application kernels while minimizing
                 the hardware cost. In our methodology, we translate the
                 problem of interconnect customization into that of
                 inexact graph matching, and we devised a heuristic for
                 A* search algorithm to efficiently solve the inexact
                 graph matching problem. Our experimental results
                 demonstrate that our customization method can quickly
                 find application-optimized interconnections that
                 exhibit 80\% higher performance on average compared to
                 the base architecture which has mesh interconnections,
                 with little energy and hardware increase in
                 interconnections and muxes.",
  acknowledgement = ack-nhfb,
  articleno =    "52",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Jeyapaul:2013:EEE,
  author =       "Reiley Jeyapaul and Aviral Shrivastava",
  title =        "Enabling energy efficient reliability in embedded
                 systems through smart cache cleaning",
  journal =      j-TODAES,
  volume =       "18",
  number =       "4",
  pages =        "53:1--53:??",
  month =        oct,
  year =         "2013",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2505012",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Nov 8 11:45:54 MST 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Incessant and rapid technology scaling has brought us
                 to a point where today's, and future transistors are
                 susceptible to transient errors induced by energy
                 carrying particles, called soft errors. Within a
                 processor, the sheer size and nature of data in the
                 caches render it most vulnerable to electrical
                 interference on data stored in the cache. Data in the
                 cache is vulnerable to corruption by soft errors, for
                 the time it remains actively unused in the cache.
                 Write-through and early-write-back [Li et al. 2004]
                 cache configurations reduce the time for vulnerable
                 data in the cache, at the cost of increased memory
                 writes and thereby energy. We propose a smart cache
                 cleaning methodology, that enables copying of only
                 specific vulnerable cache blocks into the memory at
                 chosen times, thereby ensuring data cache protection
                 with minimal memory writes. In this work, we first
                 propose a hybrid (software-hardware) methodology. We
                 then propose an improved software solution that
                 utilizes cache write-back functionality available in
                 commodity processors; thereby reducing the hardware
                 overhead required to implement smart cache cleaning for
                 such systems. The parameters involved in the
                 implementation of our Smart Cache Cleaning (SCC)
                 technique enable a means to provide for customizable
                 energy-efficient soft error reduction in the L1 data
                 cache. Given the system requirements of reliability,
                 power-budget and runtime priority of the application,
                 appropriate parameters of the SCC can be customized to
                 trade-off power consumption and L1 data cache
                 reliability. Our experiments over LINPACK and Livermore
                 benchmarks demonstrate 26\% reduced
                 energy-vulnerability product (energy-efficient
                 vulnerability reduction) compared to that of hardware
                 based cache reliability techniques. Our software-only
                 solution achieves same levels of reliability with an
                 additional 28\% performance improvement.",
  acknowledgement = ack-nhfb,
  articleno =    "53",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kadayif:2013:HSA,
  author =       "Ismail Kadayif and Mahir Turkcan and Seher Kiziltepe
                 and Ozcan Ozturk",
  title =        "Hardware\slash software approaches for reducing the
                 process variation impact on instruction fetches",
  journal =      j-TODAES,
  volume =       "18",
  number =       "4",
  pages =        "54:1--54:??",
  month =        oct,
  year =         "2013",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2489778",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Nov 8 11:45:54 MST 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "As technology moves towards finer process geometries,
                 it is becoming extremely difficult to control critical
                 physical parameters such as channel length, gate oxide
                 thickness, and dopant ion concentration. Variations in
                 these parameters lead to dramatic variations in access
                 latencies in Static Random Access Memory (SRAM)
                 devices. This means that different lines of the same
                 cache may have different access latencies. A simple
                 solution to this problem is to adopt the worst-case
                 latency paradigm. While this egalitarian cache
                 management is simple, it may introduce significant
                 performance overhead during instruction fetches when
                 both address translation (instruction Translation
                 Lookaside Buffer (TLB) access) and instruction cache
                 access take place, making this solution infeasible for
                 future high-performance processors. In this study, we
                 first propose some hardware and software enhancements
                 and then, based on those, investigate several
                 techniques to mitigate the effect of process variation
                 on the instruction fetch pipeline stage in modern
                 processors. For address translation, we study an
                 approach that performs the virtual-to-physical page
                 translation once, then stores it in a special register,
                 reusing it as long as the execution remains on the same
                 instruction page. To handle varying access latencies
                 across different instruction cache lines, we annotate
                 the cache access latency of instructions within
                 themselves to give the circuitry a hint about how long
                 to wait for the next instruction to become available.",
  acknowledgement = ack-nhfb,
  articleno =    "54",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Wu:2013:EWD,
  author =       "Guanying Wu and Xubin He and Ningde Xie and Tong
                 Zhang",
  title =        "Exploiting workload dynamics to improve {SSD} read
                 latency via differentiated error correction codes",
  journal =      j-TODAES,
  volume =       "18",
  number =       "4",
  pages =        "55:1--55:??",
  month =        oct,
  year =         "2013",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2489792",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Nov 8 11:45:54 MST 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article presents a cross-layer codesign approach
                 to reduce SSD read response latency. The key is to
                 cohesively exploit the NAND flash memory device write
                 speed vs. raw storage reliability trade-off at the
                 physical layer and runtime data access workload
                 dynamics at the system level. Leveraging runtime data
                 access workload variation, we can opportunistically
                 slow down NAND flash memory write speed and hence
                 improve NAND flash memory raw storage reliability. This
                 naturally enables an opportunistic use of weaker error
                 correction schemes that can directly reduce SSD read
                 access latency. We develop a disk-level scheduling
                 scheme to effectively smooth the write workload in
                 order to maximize the occurrence of runtime
                 opportunistic NAND flash memory write slowdown. Using 2
                 bits/cell NAND flash memory with BCH-based error
                 correction correction as a test vehicle, we carry out
                 extensive simulations over various workloads and
                 demonstrate that this developed cross-layer co-design
                 solution can reduce the average SSD read latency by up
                 to 59.4\% without sacrificing the write throughput
                 performance.",
  acknowledgement = ack-nhfb,
  articleno =    "55",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Huang:2013:IBM,
  author =       "Po-Chun Huang and Yuan-Hao Chang and Tei-Wei Kuo",
  title =        "An index-based management scheme with adaptive caching
                 for huge-scale low-cost embedded flash storages",
  journal =      j-TODAES,
  volume =       "18",
  number =       "4",
  pages =        "56:1--56:??",
  month =        oct,
  year =         "2013",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2505013",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Nov 8 11:45:54 MST 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Due to its remarkable access performance, shock
                 resistance, and costs, NAND flash memory is now widely
                 adopted in a variety of computing environments,
                 especially in mobile devices such as smart phones,
                 media players and electronic book readers. For the
                 consideration of costs, low-cost embedded flash
                 storages such as flash memory cards are often employed
                 on such devices. Different from solid-state disks, the
                 RAM buffer equipped on low-cost embedded flash storages
                 are very small, for example, limited under several
                 dozens of kilobytes, despite of the rapidly growing
                 capacity of the storages. The significance of
                 effectively utilizing the very limited on-device RAM
                 buffers of embedded flash storages is therefore
                 highlighted, and a novel design of scalable flash
                 management schemes is needed to tackle the new access
                 constraints of MLC NAND flash memory. In this work, a
                 highly scalable design of the flash translation layer
                 is presented with the considerations of the on-device
                 RAM size, user access patterns,
                 address-mapping-information caching and MLC access
                 constraints. Through a series of experiments, it is
                 verified that, with appropriate settings of cache
                 sizes, the proposed management scheme provides
                 comparable performance results to prior arts with much
                 lower requirements on the on-device RAM. In other
                 words, the proposed scheme suggests a strategy to make
                 better use of the on-device RAM, and is suitable for
                 embedded flash storages.",
  acknowledgement = ack-nhfb,
  articleno =    "56",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Zhao:2013:CSL,
  author =       "Bo Zhao and Jun Yang and Youtao Zhang and Yiran Chen
                 and Hai Li",
  title =        "Common-source-line array: an area efficient memory
                 architecture for bipolar nonvolatile devices",
  journal =      j-TODAES,
  volume =       "18",
  number =       "4",
  pages =        "57:1--57:??",
  month =        oct,
  year =         "2013",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2500459",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Nov 8 11:45:54 MST 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Traditional array organization of bipolar nonvolatile
                 memories such as STT-MRAM and memristor utilizes two
                 bitlines for cell manipulations. With technology
                 scaling, such bitline pair will soon become the
                 bottleneck for further density improvement. In this
                 article we propose a novel common-source-line array
                 architecture, which uses a shared source-line along the
                 row, leaving only one bitline per column. We elaborate
                 the array design to ensure reliability, and demonstrate
                 its effectiveness on STT-MRAM and memristor memory
                 arrays. Our study results show that with comparable
                 latency and energy, the proposed common-source-line
                 array can save 34\% and 33\% area for Memristor-RAM and
                 STT-MRAM respectively, compared with corresponding
                 dual-bitline arrays.",
  acknowledgement = ack-nhfb,
  articleno =    "57",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{DaRolt:2013:NDS,
  author =       "Jean {Da Rolt} and Giorgio {Di Natale} and Marie-Lise
                 Flottes and Bruno Rouzeyre",
  title =        "A novel differential scan attack on advanced {DFT}
                 structures",
  journal =      j-TODAES,
  volume =       "18",
  number =       "4",
  pages =        "58:1--58:??",
  month =        oct,
  year =         "2013",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2505014",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Nov 8 11:45:54 MST 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Scan chains insertion is the most common technique to
                 ensure the testability of digital cores, providing high
                 fault coverage. However, for ICs dealing with secret
                 information, scan chains can be used as back doors for
                 accessing secret data thus becoming a threat to system
                 security. So far, advanced test structures used to
                 reduce test costs (e.g., response compaction) and
                 achieve high fault coverage (e.g., X's masking decoder)
                 have been considered as intrinsic countermeasures
                 against these threats. This work proposes a new generic
                 scan-based attack demonstrating that these test
                 structures are not sufficiently effective to prevent
                 leakage through the test infrastructure. This generic
                 attack can be easily adapted to several cryptographic
                 implementations for both symmetric and public key
                 algorithms. The proposed attack is demonstrated on
                 several ciphers.",
  acknowledgement = ack-nhfb,
  articleno =    "58",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Chang:2013:PDS,
  author =       "Yao-Lin Chang and I-Lun Tseng",
  title =        "A parallel dual-scanline algorithm for partitioning
                 parameterized 45-degree polygons",
  journal =      j-TODAES,
  volume =       "18",
  number =       "4",
  pages =        "59:1--59:??",
  month =        oct,
  year =         "2013",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2505015",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Nov 8 11:45:54 MST 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/pvm.bib;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In order to use rectangular corner stitching data
                 structures in storing parameterized orthogonal layouts,
                 parameterized polygons in the layouts must be
                 partitioned into rectangles. Likewise, in order to use
                 trapezoidal corner stitching data structures in storing
                 parameterized 45-degree layouts, parameterized polygons
                 in the layouts have to be partitioned into trapezoids.
                 In this article, a parallel polygon partitioning
                 algorithm is proposed; the algorithm is capable of
                 partitioning parameterized orthogonal polygons into
                 parameterized rectangles as well as partitioning
                 parameterized 45-degree polygons into parameterized
                 trapezoids. Additionally, the algorithm can be used to
                 partition fixed-coordinate polygons. By adopting the
                 dual-scanline technique, which involves using two
                 scanlines to concurrently sweep an input polygon, the
                 parallel partitioning algorithm can process vertices
                 and edges of the input polygon efficiently. The
                 parallel polygon partitioning algorithm has been
                 implemented in C++ with the use of OpenMP. Compared
                 with a sequential partitioning program which uses a
                 single scanline, our parallel partitioning program can
                 achieve 20\% to 30\% speedup while partitioning large
                 parameterized polygons or partitioning parameterized
                 polygons with complex constraints.",
  acknowledgement = ack-nhfb,
  articleno =    "59",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Ramanujam:2013:DBC,
  author =       "Rohit Sunkam Ramanujam and Bill Lin",
  title =        "Destination-based congestion awareness for adaptive
                 routing in {$2$D} mesh networks",
  journal =      j-TODAES,
  volume =       "18",
  number =       "4",
  pages =        "60:1--60:??",
  month =        oct,
  year =         "2013",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2505055",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Nov 8 11:45:54 MST 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The choice of routing algorithm plays a vital role in
                 the performance of on-chip interconnection networks.
                 Adaptive routing is appealing because it offers better
                 latency and throughput than oblivious routing,
                 especially under nonuniform and bursty traffic. The
                 performance of an adaptive routing algorithm is
                 determined by its ability to accurately estimate
                 congestion in the network. In this regard, maintaining
                 global congestion state using a separate monitoring
                 network offers better congestion visibility into
                 distant parts of the network compared to solutions
                 relying only on local congestion. However, the main
                 challenge in designing such routing schemes is to keep
                 the logic and bandwidth overhead as low as possible to
                 fit into the tight power, area, and delay budgets of
                 on-chip routers. In this article, we propose a minimal
                 destination-based adaptive routing strategy (DAR),
                 where every node estimates the delay to every other
                 node in the network, and routing decisions are based on
                 these per-destination delay estimates. DAR outperforms
                 Regional Congestion Awareness (RCA), the best
                 previously known adaptive routing algorithm that uses
                 nonlocal congestion state. The performance improvement
                 is brought about by maintaining fine-grained
                 per-destination delay estimates in DAR that are more
                 accurate than regional congestion metrics measured in
                 RCA. The increased accuracy is a consequence of the
                 fact that the per-destination delay estimates are not
                 corrupted by congestion on links outside the admissible
                 routing paths to the destination. A scalable version of
                 DAR, referred to as SDAR, is also proposed for
                 minimizing the overheads associated with DAR in large
                 network topologies. We show that DAR outperforms local
                 adaptive routing by up to 79\% and RCA by up to 58\% in
                 terms of latency on SPLASH-2 benchmarks. DAR and SDAR
                 also outperform existing adaptive and oblivious routing
                 algorithms in latency and throughput under synthetic
                 traffic patterns on 8$ \times $8 and 16times;16 mesh
                 topologies, respectively.",
  acknowledgement = ack-nhfb,
  articleno =    "60",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Yan:2013:RAG,
  author =       "Tan Yan and Qiang Ma and Scott Chilstedt and Martin D.
                 F. Wong and Deming Chen",
  title =        "A routing algorithm for graphene nanoribbon circuit",
  journal =      j-TODAES,
  volume =       "18",
  number =       "4",
  pages =        "61:1--61:??",
  month =        oct,
  year =         "2013",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2505056",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Nov 8 11:45:54 MST 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Conventional CMOS devices are facing an increasing
                 number of challenges as their feature sizes scale down.
                 Graphene nanoribbon (GNR) based devices are shown to be
                 a promising replacement of traditional CMOS at future
                 technology nodes. However, all previous works on GNRs
                 focus at the device level. In order to integrate these
                 devices into electronic systems, routing becomes a key
                 issue. In this article, the GNR routing problem is
                 studied for the first time. We formulate the GNR
                 routing problem as a minimum hybrid-cost shortest path
                 problem on triangular mesh (``hybrid'' means that we
                 need to consider both the length and the bending of the
                 routing path). We show that by graph expansion, this
                 minimum hybrid-cost shortest path problem can be solved
                 by applying the conventional shortest path algorithm on
                 the expanded graph. Experimental results show that our
                 GNR routing algorithm effectively handles the hybrid
                 cost.",
  acknowledgement = ack-nhfb,
  articleno =    "61",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Ayoub:2013:CCM,
  author =       "Raid Ayoub and Rajib Nath and Tajana Simunic Rosing",
  title =        "{CoMETC}: Coordinated management of energy\slash
                 thermal\slash cooling in servers",
  journal =      j-TODAES,
  volume =       "19",
  number =       "1",
  pages =        "1:1--1:??",
  month =        dec,
  year =         "2013",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2534381",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Dec 17 17:21:29 MST 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We introduce a Coordinated Management of Energy,
                 Thermal, and Cooling (CoMETC) technique to minimize
                 cooling and memory energy of server machines.
                 State-of-the-art solutions decouple the optimization of
                 cooling energy costs and energy consumption of CPU and
                 memory subsystems. This results in suboptimal solutions
                 due to thermal dependencies between CPU and memory and
                 the nonlinearity in energy costs of cooling. In
                 contrast, we develop a unified solution that integrates
                 energy, thermal, and cooling management for CPU and
                 memory subsystems to maximize energy savings. CoMETC
                 reduces the operational energy of the memory by
                 clustering active memory pages to a subset of memory
                 modules while accounting for thermal and cooling
                 aspects. At the same time, CoMETC removes hotspots
                 between and within the CPU sockets and reduces the
                 effects of thermal coupling with memory in order to
                 minimize cooling energy costs. We design CoMETC using a
                 control-theoretic approach to guarantee meeting these
                 objectives. We introduce a formal thermal and cooling
                 model to be used for online decisions inside CoMETC.
                 Our experimental results show that CoMETC achieves
                 average cooling and memory energy savings of 58\%
                 compared to state-of-the-art techniques at a
                 performance overhead of less than 0.3\%.",
  acknowledgement = ack-nhfb,
  articleno =    "1",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Al-Dujaily:2013:DPB,
  author =       "Ra'ed Al-Dujaily and Nizar Dahir and Terrence Mak and
                 Fei Xia and Alex Yakovlev",
  title =        "Dynamic programming-based runtime thermal management
                 {(DPRTM)}: an online thermal control strategy for
                 {$3$D}-NoC systems",
  journal =      j-TODAES,
  volume =       "19",
  number =       "1",
  pages =        "2:1--2:??",
  month =        dec,
  year =         "2013",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2534382",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Dec 17 17:21:29 MST 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Complex thermal behavior inhibits the advancement of
                 three-dimensional (3D) very-large-scale-integration
                 (VLSI) system designs, as it could lead to ultra-high
                 temperature hotspots and permanent silicon device
                 damage. This article introduces a new runtime thermal
                 management strategy to effectively diffuse and manage
                 heat throughout 3D chip geometry for a better
                 throughput performance in networks on chip (NoC). This
                 strategy employs a dynamic programming-based runtime
                 thermal management (DPRTM) policy to provide online
                 thermal regulation. Reactive and proactive adaptive
                 schemes are integrated to optimize the routing pathways
                 depending on the critical temperature thresholds and
                 traffic developments. Also, when the critical system
                 thermal limit is violated, an urgent throttling will
                 take place. The proposed DPRTM is rigorously evaluated
                 through cycle-accurate simulations, and results show
                 that the proposed approach outperforms conventional
                 approaches in terms of computational efficiency and
                 thermal stability. For example, the system throughput
                 using the DPRTM approach can be improved by 33\% when
                 compared to other adaptive routing strategies for a
                 given thermal constraint. Moreover, the DPRTM
                 implementation presented in this article demonstrates
                 that the hardware overhead is insignificant. This work
                 opens a new avenue for exploring the on-chip
                 adaptability and thermal regulation for future
                 large-scale and 3D many-core integrations.",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Chang:2013:IPP,
  author =       "Yen-Jen Chang and Hsiang-Yu Lu",
  title =        "Improving the performance of port range check for
                 network packet filtering",
  journal =      j-TODAES,
  volume =       "19",
  number =       "1",
  pages =        "3:1--3:??",
  month =        dec,
  year =         "2013",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2523069",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Dec 17 17:21:29 MST 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article introduces a high-performance packet
                 filter design in which we propose the partial parallel
                 range check (PPRC) technique for speeding up port range
                 check. Unlike the conventional serial design that uses
                 cascading cells to perform the serial check, PPRC
                 divides the single path into several segments. All PPRC
                 segments perform the range compare simultaneously, that
                 is, parallel check, and then the results of each
                 segment are serialized to generate the final check
                 result. Besides theoretical analyses, we also use UMC
                 90nm CMOS process to implement the PPRC design and
                 verify its effect on the check performance. Compared to
                 state-of-the-art range check techniques, the results
                 show that the PPRC design with the best configuration
                 can improve check performance by 28\%, at least. In
                 addition, the PPRC design is more stable and energy
                 efficient than related designs, even though it requires
                 more transistors to implement the peripheral circuitry.
                 The range of energy improvement achieved by the PPRC
                 design is about 35\%--70\%.",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kritikakou:2013:NOS,
  author =       "Angeliki Kritikakou and Francky Catthoor and Vasilios
                 Kelefouras and Costas Goutis",
  title =        "Near-optimal and scalable intrasignal in-place
                 optimization for non-overlapping and irregular access
                 schemes",
  journal =      j-TODAES,
  volume =       "19",
  number =       "1",
  pages =        "4:1--4:??",
  month =        dec,
  year =         "2013",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2534383",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Dec 17 17:21:29 MST 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Storage-size management techniques aim to reduce the
                 resources required to store elements and to
                 concurrently provide efficient addressing during
                 element accessing. Existing techniques are less
                 appropriate for large iteration spaces with increased
                 numbers of irregularly spread holes. They either have
                 to approximate the accessed regions, leading to
                 overestimation of the final resources, or they require
                 prohibited exploration time to find the storage size.
                 In this work, we present a near-optimal and scalable
                 methodology for storage-size, intrasignal, in-place
                 optimization, that is, to compute the minimum amount of
                 resources required to store the elements of a group
                 (array), for irregular complex access schemes in the
                 target domain of non-overlapping store and load
                 accesses.",
  acknowledgement = ack-nhfb,
  articleno =    "4",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Li:2013:LEV,
  author =       "Jianhua Li and Liang Shi and Qingan Li and Chun Jason
                 Xue and Yiran Chen and Yinlong Xu and Wei Wang",
  title =        "Low-energy volatile {STT--RAM} cache design using
                 cache-coherence-enabled adaptive refresh",
  journal =      j-TODAES,
  volume =       "19",
  number =       "1",
  pages =        "5:1--5:??",
  month =        dec,
  year =         "2013",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2534393",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Dec 17 17:21:29 MST 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Spin-Torque Transfer RAM (STT-RAM) is a promising
                 candidate for SRAM replacement because of its excellent
                 features, such as fast read access, high density, low
                 leakage power, and CMOS technology compatibility.
                 However, wide adoption of STT-RAM as cache memories is
                 impeded by its long write latency and high write power.
                 Recent work proposed improving the write performance
                 through relaxing the retention time of STT-RAM cells.
                 The resultant volatile STT-RAM needs to be periodically
                 refreshed to prevent data loss. When volatile STT-RAM
                 is applied as the last-level cache (LLC) in chip
                 multiprocessor (CMP) systems, frequent refresh
                 operations could dissipate significant extra energy. In
                 addition, refresh operations could severely conflict
                 with normal read/write operations to degrade overall
                 system performance. Therefore, minimizing the
                 performance impact caused by refresh operations is
                 crucial for the adoption of volatile STT-RAM. In this
                 article, we propose Cache-Coherence-Enabled Adaptive
                 Refresh (CCear) to minimize the number of refresh
                 operations for volatile STT-RAM, adopted as the LLC for
                 CMP systems. Specifically, CCear interacts with cache
                 coherence protocol and cache management policy to
                 minimize the number of refresh operations on volatile
                 STT-RAM caches. Full-system simulation results show
                 that CCear performs close to an ideal refresh policy
                 with low overhead. Compared with state-of-the-art
                 refresh policies, CCear simultaneously improves the
                 system performance and reduces the energy consumption.
                 Moreover, the performance of CCear could be further
                 enhanced using small filter caches to accommodate the
                 not-refreshed private STT-RAM blocks.",
  acknowledgement = ack-nhfb,
  articleno =    "5",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Liu:2013:PBA,
  author =       "Xue-Xin Liu and Sheldon X.-D. Tan and Adolfo Adair
                 Palma-Rodriguez and Esteban Tlelo-Cuautle and Guoyong
                 Shi",
  title =        "Performance bound analysis of analog circuits in
                 frequency- and time-domain considering process
                 variations",
  journal =      j-TODAES,
  volume =       "19",
  number =       "1",
  pages =        "6:1--6:??",
  month =        dec,
  year =         "2013",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2534395",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Dec 17 17:21:29 MST 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In this article, we propose a new performance bound
                 analysis of analog circuits considering process
                 variations. We model the variations of component values
                 as intervals measured from tested chips and manufacture
                 processes. The new method first applies a graph-based
                 analysis approach to generate the symbolic transfer
                 function of a linear(ized) analog circuit. Then the
                 frequency response bounds (maximum and minimum) are
                 obtained by performing nonlinear constrained
                 optimization in which magnitude or phase of the
                 transfer function is the objective function to be
                 optimized subject to the ranges of process variational
                 parameters. The response bounds given by the
                 optimization-based method are very accurate and do not
                 have the over-conservativeness issues of existing
                 methods. Based on the frequency-domain bounds, we
                 further develop a method to calculate the time-domain
                 response bounds for any arbitrary input stimulus.
                 Experimental results from several analog benchmark
                 circuits show that the proposed method gives the
                 correct bounds verified by Monte Carlo analysis while
                 it delivers one order of magnitude speedup over Monte
                 Carlo for both frequency-domain and time-domain bound
                 analyses. We also show analog circuit yield analysis as
                 an application of the frequency-domain variational
                 bound analysis.",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Huang:2013:OCC,
  author =       "Chien-Chih Huang and Chin-Long Wey and Jwu-E Chen and
                 Pei-Wen Luo",
  title =        "Optimal common-centroid-based unit capacitor
                 placements for yield enhancement of switched-capacitor
                 circuits",
  journal =      j-TODAES,
  volume =       "19",
  number =       "1",
  pages =        "7:1--7:??",
  month =        dec,
  year =         "2013",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2534394",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Dec 17 17:21:29 MST 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Yield is defined as the probability that the circuit
                 under consideration meets with the design specification
                 within the tolerance. Placement with higher correlation
                 coefficients has fewer mismatches and lower variation
                 of capacitor ratio, thus achieving higher yield
                 performance. This study presents a new optimization
                 criterion that quickly determines if the placement is
                 optimal. The optimization criterion leads to the
                 development of the concepts of C-entries and
                 partitioned subarrays which can significantly reduce
                 the searching space for finding the
                 optimal/near-optimal placements on a sufficiently large
                 array size.",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Pomeranz:2013:BGM,
  author =       "Irith Pomeranz",
  title =        "Built-in generation of multicycle functional broadside
                 tests with observation points",
  journal =      j-TODAES,
  volume =       "19",
  number =       "1",
  pages =        "8:1--8:??",
  month =        dec,
  year =         "2013",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2534396",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Dec 17 17:21:29 MST 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Functional broadside tests allow overtesting to be
                 avoided as part of a scheme that considers both test
                 generation and the analysis of output responses, by
                 ensuring that delay faults are detected under
                 functional operation conditions. Compared with
                 two-cycle tests, multicycle tests allow more faults to
                 be detected with each test, thus reducing the number of
                 tests that need to be applied. They also provide an
                 opportunity for nonfunctional electrical effects, which
                 are caused by switching between modes of operation, to
                 subside before the clock cycles where delay faults are
                 detected. Built-in test generation facilitates at-speed
                 testing and reduces the test data volume. Motivated by
                 these observations, this article describes the
                 modification of a built-in test generation method for
                 two-cycle functional broadside tests so as to generate
                 multicycle functional broadside tests. The size of the
                 hardware is not increased by the modification. The
                 article investigates the following issues related to
                 this method: (1) the effect of using multicycle tests
                 on the number of tests that need to be applied; (2)
                 fault simulation for tailoring the test generation
                 hardware to a circuit that takes into account, to
                 different extents, the need to allow nonfunctional
                 electrical effects to subside; (3) the insertion of
                 observation points in order to increase the transition
                 fault coverage.",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Tong:2013:TCT,
  author =       "Jason G. Tong and Marc Boul{\'e} and Zeljko Zilic",
  title =        "Test compaction techniques for assertion-based test
                 generation",
  journal =      j-TODAES,
  volume =       "19",
  number =       "1",
  pages =        "9:1--9:??",
  month =        dec,
  year =         "2013",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2534397",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Dec 17 17:21:29 MST 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Assertions are now widely used in verification as a
                 means to help convey designer intent and also to
                 simplify the detection of erroneous conditions by the
                 firing of assertions. With this expressive modeling
                 power, assertions could also be used for tasks such as
                 helping to assess test coverage and even as a source
                 for test generation. Our work deals with this last
                 aspect, namely, assertion-based test generation. In
                 this article, we present our compacted test generation
                 scheme based on assertions. Novel compaction techniques
                 are presented based on assertion clustering, test-path
                 overlap detection and parallel-path removal. Our
                 compaction approach is experimentally evaluated using
                 nearly 300 assertions to show the amount of reduction
                 that can be obtained in the size of the test sets. This
                 ultimately has a positive impact on verification time
                 in the quest for bugfree designs.",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Tu:2014:PPP,
  author =       "Chia-Heng Tu and Hui-Hsin Hsu and Jen-Hao Chen and
                 Chun-Han Chen and Shih-Hao Hung",
  title =        "Performance and power profiling for emulated {Android}
                 systems",
  journal =      j-TODAES,
  volume =       "19",
  number =       "2",
  pages =        "10:1--10:??",
  month =        mar,
  year =         "2014",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2566660",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 21 18:21:14 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/java2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib;
                 https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
  abstract =     "Simulation is a common approach for assisting system
                 design and optimization. For system-wide optimization,
                 energy and computational resources are often the two
                 most critical issues. Monitoring the energy state of
                 each hardware component and measuring the time spent in
                 each state is needed for accurate energy and
                 performance prediction. For software optimization, it
                 is important to profile the energy and the time
                 consumed by each software construct in a realistic
                 operating environment with a proper workload. However,
                 the conventional approaches of simulation often fail to
                 produce satisfying data. First, building a
                 cycle-accurate simulation environment for a complex
                 system, such as an Android smartphone, is difficult and
                 can take a long time. Second, a slow simulation can
                 significantly alter the behavior of multithreaded,
                 I/O-intensive applications and can affect the accuracy
                 of profiles. Third, existing software-based profilers
                 generally do not work on simulators, which makes it
                 difficult for performance analysis of complicated
                 software, for example, Java applications executed by
                 the Dalvik VM in an Android system. To address these
                 aforementioned problems, we proposed and prototyped a
                 framework, called virtual performance analyzer (VPA).
                 VPA takes advantage of an existing emulator or virtual
                 machine monitor to reduce the complexity of building a
                 simulator. VPA allows the user to selectively and
                 incrementally integrate timing models and power models
                 into the emulator with our carefully designed
                 performance/power monitors, tracing facility, and
                 profiling tools to evaluate and analyze the emulated
                 system. The emulated system can perform at different
                 levels of speed to help verify if the profile data are
                 impacted by the emulation speed. Finally, VPA supports
                 existing software-based profiles and enables
                 non-intrusive tracing/profiling by minimizing the probe
                 effect. Our experimental results show that the VPA
                 framework allows users to quickly establish a
                 performance/power evaluation environment and gather
                 useful information to support system design and
                 software optimization for Android smartphones.",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Ganeshpure:2014:PDD,
  author =       "Kunal Ganeshpure and Sandip Kundu",
  title =        "Performance-driven dynamic thermal management of
                 {MPSoC} based on task rescheduling",
  journal =      j-TODAES,
  volume =       "19",
  number =       "2",
  pages =        "11:1--11:??",
  month =        mar,
  year =         "2014",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2566661",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 21 18:21:14 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "High level of integration has led to the advent of
                 Multiprocessor System-on-Chip (MPSoC) which consists of
                 multiple processor cores and accelerators on the same
                 die. A MPSoC programming model is based on a task graph
                 where tasks are assigned to cores to maximize
                 performance. To address thermal hotspots in MPSoCs,
                 coarse-grain power management techniques based on
                 Dynamic Frequency Scaling (DFS) are widely used. DFS is
                 reactive in nature and has detrimental effects on
                 performance. We propose an alternative solution based
                 on dynamic task rescheduling where a temperature
                 prediction scheme is built into the scheduler. The
                 temperature look-ahead scheme is used for task
                 reassignment or delay insertion in scheduling. Since
                 temperature prediction and task assignment are done at
                 runtime, both must be simple and extremely fast. To
                 that end, we propose a heuristic solution based on a
                 limited branch-and-bound search and compare results
                 against an optimal Integer Linear Programming
                 (ILP)-based solution. The proposed approach is shown to
                 be superior to frequency scaling, and the resulting
                 schedule length is within 5\% to 10\% of the optimal
                 solution as obtained from ILP formulation.",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Meyer:2014:CEL,
  author =       "Brett H. Meyer and Adam S. Hartman and Donald E.
                 Thomas",
  title =        "Cost-effective lifetime and yield optimization for
                 {NoC-based} {MPSoCs}",
  journal =      j-TODAES,
  volume =       "19",
  number =       "2",
  pages =        "12:1--12:??",
  month =        mar,
  year =         "2014",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2535575",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 21 18:21:14 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "As manufacturing processes scale, designers are
                 increasingly dependent on techniques to mitigate
                 manufacturing defect and permanent failure. In embedded
                 systems-on-chip, system lifetime and yield can be
                 increased using slack -under-utilization in execution
                 and storage resources-so that when components are
                 defective, data and tasks can be remapped and
                 rescheduled. For any given system, the design space of
                 possible slack allocations is both large and complex,
                 consisting of every possible way to replace each
                 component in the initial system with another from the
                 component library. Based on the observation that useful
                 slack is often quantized, we have developed Critical
                 Quantity Slack Allocation (CQSA), an approach that
                 effectively and efficiently allocates execution and
                 storage slack to jointly optimize system yield and
                 cost. While exploring less than 1.4\% of the slack
                 allocation design space, our approach consistently
                 outperforms alternative slack allocation techniques to
                 find sets of designs within 1.4\% of the lifetime-cost
                 Pareto-optimal front. When applied to yield-cost
                 optimization, our approach again outperforms
                 alternative techniques, exploring less than 1.62\% of
                 the design space to find sets of designs within 4.27\%
                 of the yield-cost Pareto-optimal front. One advantage
                 of managing failure at the system level is that the
                 same techniques that improve lifetime often also
                 improve yield. As a result, with little modification,
                 CQSA is further able to perform effective joint
                 optimization of lifetime and yield, finding designs
                 within 1.6\% of the Pareto-optimal front.",
  acknowledgement = ack-nhfb,
  articleno =    "12",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lee:2014:CRM,
  author =       "Jongeun Lee and Seongseok Seo and Jongkyung Paek and
                 Kiyoung Choi",
  title =        "Configurable range memory for effective data reuse on
                 programmable accelerators",
  journal =      j-TODAES,
  volume =       "19",
  number =       "2",
  pages =        "13:1--13:??",
  month =        mar,
  year =         "2014",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2566662",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 21 18:21:14 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "While programmable accelerators such as
                 application-specific processors and reconfigurable
                 architectures can dramatically speed up
                 compute-intensive kernels of an application,
                 application performance can still be severely limited
                 by the communication between processors. To minimize
                 the communication overhead, a shared memory such as a
                 scratchpad memory may be employed between the main
                 processor and the accelerator coprocessor. However,
                 this setup poses a significant challenge to the main
                 processor, which now must manage data on the scratchpad
                 explicitly, resulting in superfluous data copying due
                 to the inflexibility of a scratchpad. In this article,
                 we present an enhancement of a scratchpad, Configurable
                 Range Memory (CRM), whose address range can be
                 reprogrammed to minimize unnecessary data copying
                 between processors and therefore promote data reuse on
                 the accelerator, and also present a software management
                 algorithm for the CRM. Our experimental results
                 involving detailed simulation of full multimedia
                 applications demonstrate that our CRM architecture can
                 reduce the communication overhead quite effectively,
                 reducing the kernel execution time by up to 28\% and
                 the application runtime by up to 12.8\%, in addition to
                 considerable system energy reduction, compared to the
                 conventional architecture based on a scratchpad.",
  acknowledgement = ack-nhfb,
  articleno =    "13",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Hung:2014:AFD,
  author =       "Eddie Hung and Steven J. E. Wilton",
  title =        "Accelerating {FPGA} debug: Increasing visibility using
                 a runtime reconfigurable observation and triggering
                 network",
  journal =      j-TODAES,
  volume =       "19",
  number =       "2",
  pages =        "14:1--14:??",
  month =        mar,
  year =         "2014",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2566668",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 21 18:21:14 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "FPGA technology is commonly used to prototype new
                 digital designs before entering fabrication. Whilst
                 these physical prototypes can operate many orders of
                 magnitude faster than through a logic simulator, a
                 fundamental limitation is their lack of on-chip
                 visibility when debugging. To counter this,
                 trace-buffer-based instrumentation can be installed
                 into the prototype, allowing designers to capture a
                 predetermined window of signal data during live
                 operation for offline analysis. However, instead of
                 requiring the designer to recompile their entire
                 circuit every time the window is modified, this article
                 proposes that an overlay network is constructed using
                 only spare FPGA routing multiplexers to connect all
                 circuit signals through to the trace instruments. Thus,
                 during debugging, designers would only need to
                 reconfigure this network instead of finding a new
                 place-and-route solution. Furthermore, we describe how
                 this network can deliver signals to both the trigger
                 and trace units of these instruments, which are
                 implemented simultaneously using dual-port RAMs. Our
                 results show that new network configurations connecting
                 any subset of signals to 80--90\% of the available RAM
                 capacity can be computed in less than 70 seconds, for a
                 100,000 LUT circuit, as many times as necessary. Our
                 tool-QuickTrace-is available for download.",
  acknowledgement = ack-nhfb,
  articleno =    "14",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Panerati:2014:CEM,
  author =       "Jacopo Panerati and Giovanni Beltrame",
  title =        "A comparative evaluation of multi-objective
                 exploration algorithms for high-level design",
  journal =      j-TODAES,
  volume =       "19",
  number =       "2",
  pages =        "15:1--15:??",
  month =        mar,
  year =         "2014",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2566669",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 21 18:21:14 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article presents a detailed overview and the
                 experimental comparison of 15 multi-objective
                 design-space exploration (DSE) algorithms for
                 high-level design. These algorithms are collected from
                 recent literature and include heuristic, evolutionary,
                 and statistical methods. To provide a fair comparison,
                 the algorithms are classified according to the approach
                 used and examined against a large set of metrics. In
                 particular, the effectiveness of each algorithm was
                 evaluated for the optimization of a multiprocessor
                 platform, considering initial setup effort, rate of
                 convergence, scalability, and quality of the resulting
                 optimization. Our experiments are performed with
                 statistical rigor, using a set of very diverse
                 benchmark applications (a video converter, a parallel
                 compression algorithm, and a fast Fourier
                 transformation algorithm) to take a large spectrum of
                 realistic workloads into account. Our results provide
                 insights on the effort required to apply each algorithm
                 to a target design space, the number of simulations it
                 requires, its accuracy, and its precision. These
                 insights are used to draw guidelines for the choice of
                 DSE algorithms according to the type and size of design
                 space to be optimized.",
  acknowledgement = ack-nhfb,
  articleno =    "15",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lee:2014:CPA,
  author =       "Seokhyun Lee and Kiyoung Choi",
  title =        "Critical-path-aware high-level synthesis with
                 distributed controller for fast timing closure",
  journal =      j-TODAES,
  volume =       "19",
  number =       "2",
  pages =        "16:1--16:??",
  month =        mar,
  year =         "2014",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2566670",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 21 18:21:14 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Centralized controllers commonly used in high-level
                 synthesis often require long wires and cause high load
                 capacitance, and that is why critical paths typically
                 occur on paths from controllers to data registers
                 instead of paths from data registers to data registers.
                 However, conventional high-level synthesis has focused
                 on delays within a datapath, making it difficult to
                 solve the timing closure problem during physical
                 synthesis. This article presents hardware architecture
                 with a distributed controller, which makes the timing
                 closure problem much easier. A novel
                 critical-path-aware high-level synthesis flow is also
                 presented for synthesizing such hardware through
                 datapath partitioning, register binding, and controller
                 optimization. We explore the design space related to
                 the number of partitions, which is an important design
                 parameter for target architecture. According to our
                 experiments, the proposed approach reduces the critical
                 path delay excluding FUs by 29.3\% and that including
                 FUs by 10.0\%, with 2.2\% area overhead on average
                 compared to centralized controller architecture.",
  acknowledgement = ack-nhfb,
  articleno =    "16",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Wei:2014:TSE,
  author =       "Yaoguang Wei and Cliff Sze and Natarajan Viswanathan
                 and Zhuo Li and Charles J. Alpert and Lakshmi Reddy and
                 Andrew D. Huber and Gustavo E. Tellez and Douglas
                 Keller and Sachin S. Sapatnekar",
  title =        "Techniques for scalable and effective routability
                 evaluation",
  journal =      j-TODAES,
  volume =       "19",
  number =       "2",
  pages =        "17:1--17:??",
  month =        mar,
  year =         "2014",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2566663",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 21 18:21:14 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Routing congestion has become a critical layout
                 challenge in nanoscale circuits since it is a critical
                 factor in determining the routability of a design. An
                 unroutable design is not useful even though it closes
                 on all other design metrics. Fast design closure can
                 only be achieved by accurately evaluating whether a
                 design is routable or not early in the design cycle.
                 Lately, it has become common to use a ``light mode''
                 version of a global router to quickly evaluate the
                 routability of a given placement. This approach suffers
                 from three weaknesses: (i) it does not adequately model
                 local routing resources, which can cause incorrect
                 routability predictions that are only detected late,
                 during detailed routing; (ii) the congestion maps
                 obtained by it tend to have isolated hotspots
                 surrounded by noncongested spots, called ``noisy
                 hotspots'', which further affects the accuracy in
                 routability evaluation; and (iii) the metrics used to
                 represent congestion may yield numbers that do not
                 provide sufficient intuition to the designer, and
                 moreover, they may often fail to predict the
                 routability accurately. This article presents solutions
                 to these issues. First, we propose three approaches to
                 model local routing resources. Second, we propose a
                 smoothing technique to reduce the number of noisy
                 hotspots and obtain a more accurate routability
                 evaluation result. Finally, we develop a new metric
                 which represents congestion maps with higher fidelity.
                 We apply the proposed techniques to several industrial
                 circuits and demonstrate that one can better predict
                 and evaluate design routability and that congestion
                 mitigation tools can perform much better to improve the
                 design routability.",
  acknowledgement = ack-nhfb,
  articleno =    "17",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Pomeranz:2014:LPS,
  author =       "Irith Pomeranz",
  title =        "Low-power skewed-load tests based on functional
                 broadside tests",
  journal =      j-TODAES,
  volume =       "19",
  number =       "2",
  pages =        "18:1--18:??",
  month =        mar,
  year =         "2014",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2566664",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 21 18:21:14 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article studies the generation of low-power
                 skewed-load tests such that the signal transitions (and
                 line values) they create during their fast functional
                 clock cycles match those of functional broadside tests.
                 Functional broadside tests create functional operation
                 conditions during their fast functional clock cycles.
                 As a result, the signal transitions that occur during
                 these clock cycles can also occur during functional
                 operation. The procedure described in this article
                 matches these signal-transitions on a line-by-line
                 basis when generating low-power skewed-load tests. The
                 procedure accepts a functional broadside test set for
                 transition faults. In one of its basic steps, the
                 procedure modifies a functional broadside test into a
                 skewed-load test. This allows it to retain many of the
                 signal transitions (and line values) of the functional
                 broadside test in the skewed-load test. Experimental
                 results for benchmark circuits demonstrate the extent
                 to which it is possible to match the signal-transitions
                 of skewed-load tests with those of functional broadside
                 tests while achieving the high transition fault
                 coverage that is typical of skewed-load tests.",
  acknowledgement = ack-nhfb,
  articleno =    "18",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Pomeranz:2014:DTM,
  author =       "Irith Pomeranz",
  title =        "Design-for-testability for multi-cycle broadside tests
                 by holding of state variables",
  journal =      j-TODAES,
  volume =       "19",
  number =       "2",
  pages =        "19:1--19:??",
  month =        mar,
  year =         "2014",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2566665",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 21 18:21:14 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article describes a design-for-testability
                 approach for increasing the transition fault coverage
                 of multi-cycle broadside tests. Earlier methods
                 addressed two-cycle tests. The importance of
                 multi-cycle tests results from the ability to produce
                 more compact test sets than possible with two-cycle
                 tests, from the fact that when multi-cycle tests are
                 applied at-speed, they can detect defects that are not
                 detected by two-cycle tests and from their ability to
                 avoid overtesting of delay faults. The approach
                 described in this article is based on holding the
                 values of selected state variables constant during the
                 functional clock cycles of a multi-cycle broadside
                 test. This allows new tests to be produced, which are
                 different from broadside tests, without relying on
                 nonfunctional toggling of state variables as in earlier
                 methods for two-cycle tests. Experimental results show
                 significant improvements in transition fault coverage
                 using a fixed set of hold configurations for two types
                 of multi-cycle broadside test sets: (1) test sets that
                 are stored and applied from an external tester, and (2)
                 functional broadside test sets that are generated using
                 on-chip hardware.",
  acknowledgement = ack-nhfb,
  articleno =    "19",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Biswas:2014:RTC,
  author =       "Sounil Biswas and Hongfei Wang and R. D. (Shawn)
                 Blanton",
  title =        "Reducing test cost of integrated, heterogeneous
                 systems using pass-fail test data analysis",
  journal =      j-TODAES,
  volume =       "19",
  number =       "2",
  pages =        "20:1--20:??",
  month =        mar,
  year =         "2014",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2566666",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 21 18:21:14 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Stringent quality requirements for integrated,
                 heterogeneous systems have led designers and test
                 engineers to mandate large sets of tests to be applied
                 to these systems, which, in turn, have resulted in
                 increased test cost. However, many of these tests are
                 unnecessary (i.e., redundant), since their outcomes can
                 be reliably predicted using results from other applied
                 tests. A methodology for identifying the redundant
                 tests of an integrated, heterogeneous system that has
                 only binary pass-fail test data is described. This
                 methodology uses decision trees, Boolean minimization,
                 and satisfiability as core components. Feasibility is
                 empirically demonstrated using test data from two
                 commercially fabricated systems, namely, a high-speed
                 serializer/deserializer (HSS) and a phase-locked loop
                 (PLL). Our analysis of test data from {$>$} 38,000 HSS
                 and {$>$} 22,000 PLL circuits show that 14 out of 40
                 HSS tests and 11 out of 36 PLL tests are redundant.",
  acknowledgement = ack-nhfb,
  articleno =    "20",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Chang:2014:BBL,
  author =       "Da-Wei Chang and Hsin-Hung Chen and Dau-Jieu Yang and
                 Hsung-Pin Chang",
  title =        "{BLAS}: Block-level adaptive striping for solid-state
                 drives",
  journal =      j-TODAES,
  volume =       "19",
  number =       "2",
  pages =        "21:1--21:??",
  month =        mar,
  year =         "2014",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2555616",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 21 18:21:14 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Increasing the degree of parallelism and reducing the
                 overhead of garbage collection (GC overhead) are the
                 two keys to enhancing the performance of solid-state
                 drives (SSDs). SSDs employ multichannel architectures,
                 and a data placement scheme in an SSD determines how
                 the data are striped to the channels. Without
                 considering the data access pattern, existing fixed and
                 device-level data placement schemes may have either
                 high GC overhead or poor I/O parallelism, resulting in
                 degraded performance. In this article, an adaptive
                 block-level data placement scheme called BLAS is
                 proposed to maximize the I/O parallelism while
                 simultaneously minimizing the GC overhead. In contrast
                 to existing device-level schemes, BLAS allows different
                 data placement policies for blocks with different
                 access patterns. Pages in read-intensive blocks are
                 scattered over various channels to maximize the degree
                 of read parallelism, while pages in each of the
                 remaining blocks are attempted to be gathered in the
                 same physical block to minimize the GC overhead.
                 Moreover, BLAS allows the placement policy for a
                 logical block to be changed dynamically according to
                 the access pattern changes of that block. Finally, a
                 parallelism-aware write buffer management approach is
                 adopted in BLAS to maximize the degree of write
                 parallelism. Performance results show that BLAS yields
                 a significant improvement in the SSD response time when
                 compared to existing device-level schemes. In
                 particular, BLAS outperforms device-level page striping
                 and device-level block striping by factors of up to
                 8.75 and 7.41, respectively. Moreover, BLAS achieves
                 low GC overhead and is effective in adapting to
                 workload changes.",
  acknowledgement = ack-nhfb,
  articleno =    "21",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Bathen:2014:STS,
  author =       "Luis Angel D. Bathen and Nikil D. Dutt",
  title =        "{SPMCloud}: Towards the Single-Chip Embedded
                 {ScratchPad} Memory-Based Storage Cloud",
  journal =      j-TODAES,
  volume =       "19",
  number =       "3",
  pages =        "22:1--22:??",
  month =        jun,
  year =         "2014",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2611755",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Jun 21 07:58:42 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The era of cloud computing on-a-chip is enabled by the
                 aggressive move towards many-core platforms and the
                 rapid adoption of Network-on-Chips. As a result, there
                 is a need for large-scale distributed on-chip shared
                 memories that are reliable, low power, and seamlessly
                 manageable. In this work, we propose SPMCloud, a novel
                 scratchpad-memory-based cloud-inspired volatile storage
                 subsystem designed to meet the needs of
                 future-generation many-core platforms. SPMCloud is
                 composed of several concepts, including: (1) a highly
                 scalable data-center-like memory subsystem that
                 exploits two enterprise-network-inspired memory
                 configurations, namely, embedded Network Attached
                 Storage (eNAS) and embedded Storage Area Network (
                 eSAN), and (2) on-demand allocation of reliable memory
                 space through memory virtualization and the use of
                 embedded RAIDs. Our experimental results on
                 Mediabench/CHStone benchmarks show that the SPMCloud 's
                 fully distributed reliable memory subsystems can
                 achieve 48\% energy savings and 70\% latency reduction
                 on average over state-of-the-art NoC memory reliability
                 techniques. We then evaluate the scalability of the
                 SPMCloud and compare it with traditional SPM allocation
                 policies. The SPMCloud 's dynamic allocator outperforms
                 the best competition by an average 60\% (eNAS) and 46\%
                 (eSAN) when the platform runs at 250 MHz and by an
                 average 80\% (eNAS) and 40\% when running at 1 GHz.
                 Moreover, the SPMCloud achieves an average 83\% energy
                 savings across all configurations (number of cores)
                 with respect to the best competitors when running at
                 250 MHz and 1 GHz. We then studied the SPM hit ratio
                 across the various allocation policies discussed in
                 this article and showed that on average the SPMCloud 's
                 priority-driven dynamic allocation policy achieves
                 93.5\% SPM hit ratio, 0.6\% higher hit ratio than the
                 closest allocation policy. We then showed that the eNAS
                 and eSAN achieve an average of 67.9\% and 29\%
                 reduction in execution time, respectively, over the
                 best competitor. Similarly, the eNAS and eSAN achieve
                 an average of 82.7\% and 82.3\% energy savings,
                 respectively, over the best competitor. Furthermore, we
                 evaluated the scalability of the SPMCloud and its
                 performance/energy efficiency when providing support
                 for some of the heavier E-RAID levels, and showed that
                 the eNAS / eSAN configurations with SECDED achieve an
                 average of 51.5\% and 34.9\% reduction in execution
                 time, respectively, over the best competitor with
                 SECDED. Similarly, the eNAS / eSAN configurations with
                 E-RAID Level 1, + SECDED achieve an average of 82.3\%
                 and 75.6\% energy savings, respectively, over the best
                 competitor.",
  acknowledgement = ack-nhfb,
  articleno =    "22",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Rosales:2014:MHA,
  author =       "Rafael Rosales and Michael Glass and J{\"u}rgen Teich
                 and Bo Wang and Yang Xu and Ralph Hasholzner",
  title =        "{MAESTRO} --- Holistic Actor-Oriented Modeling of
                 Nonfunctional Properties and Firmware Behavior for
                 {MPSoCs}",
  journal =      j-TODAES,
  volume =       "19",
  number =       "3",
  pages =        "23:1--23:??",
  month =        jun,
  year =         "2014",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2594481",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Jun 21 07:58:42 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Modeling and evaluating nonfunctional properties such
                 as performance, power, and reliability of embedded
                 systems are tasks of utmost importance. In this
                 article, we introduce M AESTRO, a methodology for the
                 modeling and evaluation of nonfunctional properties and
                 embedded firmware of MPSoC architecture components at
                 the Electronic System Level (ESL). In contrast to
                 existing design flows that provide predefined
                 performance models, MAESTRO defines a flexible approach
                 that allows to define virtual prototypes that can be
                 easily customized and extended to evaluate multiple
                 nonfunctional properties of interest at different
                 levels of abstraction. In MAESTRO, a design is composed
                 purely from actor-oriented models. This enables typical
                 ESL features such as automatic design space exploration
                 and synthesizability of HW and SW components, typically
                 missing in very general design flows. Unique to MAESTRO
                 is the separation and coordination of the interaction
                 between application functionality, firmware, and
                 performance models for the evaluation of nonfunctional
                 properties, and their complex interactions within a
                 single Model-of-Computation (MoC). The main advantages
                 of MAESTRO are: (I) Extensible modeling of
                 interdependent nonfunctional properties of
                 heterogeneous MPSoC components; (II) high flexibility
                 to investigate the appropriate trade-off between
                 modeling effort and accuracy of nonfunctional property
                 evaluators; (III) a holistic approach for modeling
                 application functionality as well as firmware affecting
                 the evaluation of nonfunctional properties. Regarding
                 (II), we present a mobile baseband processor platform
                 use-case, executing a GSM paging application. To
                 demonstrate (I) and (III), we present the modeling of a
                 complex ESL processor virtual prototype, running a soft
                 real-time application and equipped with both a power
                 and reliability manager.",
  acknowledgement = ack-nhfb,
  articleno =    "23",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Huang:2014:ICP,
  author =       "Libo Huang and Zhiying Wang and Nong Xiao and Yongwen
                 Wang and Qiang Dou",
  title =        "Integrated Coherence Prediction: Towards Efficient
                 Cache Coherence on {NoC}-Based Multicore
                 Architectures",
  journal =      j-TODAES,
  volume =       "19",
  number =       "3",
  pages =        "24:1--24:??",
  month =        jun,
  year =         "2014",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2611756",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Jun 21 07:58:42 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Multicore architectures with Network-on-Chips (NoCs)
                 have been widely recognized as the de facto design for
                 the efficient utilization of the continuously
                 increasing density of transistors on a chip. A key
                 challenge in designing such an NoC-based multicore
                 processor is maintaining cache coherence in an
                 efficient manner. Directory-based protocols avoid the
                 bandwidth overhead of snoop-based protocols, therefore
                 scaling to a large number of cores. However,
                 conventional directory structures add significant
                 indirection delay to cache-to-cache accesses in larger
                 multicore processor. In this article we propose a novel
                 hardware coherence technique, called integrated
                 coherence prediction (ICP). This approach adopts a
                 prediction technique for managing shared data to reduce
                 or eliminate the cache-to-cache delay in coherence
                 accesses. ICP has two unique features that differ from
                 previous coherence prediction techniques. First, ICP
                 introduces a new integrated prediction scheme that
                 combines two kinds of predictors: owner predictor,
                 which predicts the data writers and avoids the
                 indirection through directory, and data predictor,
                 which predicts the access address and prefetches data
                 from remote nodes directly. Second, ICP uses a request
                 replication method to reduce the negative effect of
                 wrong owner prediction operations, thus facilitating
                 overall performance improvement. We present the design
                 and implementation details of the ICP approach. Using
                 detailed full-system simulations, we conclude that the
                 ICP provides a cost-effective solution for designing
                 high-performance multicore processors.",
  acknowledgement = ack-nhfb,
  articleno =    "24",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Huang:2014:GCM,
  author =       "Po-Chun Huang and Yuan-Hao Chang and Kam-Yiu Lam and
                 Jian-Tao Wang and Chien-Chin Huang",
  title =        "Garbage Collection for Multiversion Index in
                 Flash-Based Embedded Databases",
  journal =      j-TODAES,
  volume =       "19",
  number =       "3",
  pages =        "25:1--25:??",
  month =        jun,
  year =         "2014",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2611757",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Jun 21 07:58:42 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Recently, flash-based embedded databases have gained
                 their momentum in various control and monitoring
                 systems, such as cyber-physical systems (CPSes). To
                 support the functionality to access the historical
                 data, a multiversion index is adopted to simultaneously
                 maintain multiple versions of data items, as well as
                 their index information. However, maintaining a
                 multiversion index on flash memory incurs considerable
                 performance overheads on garbage collection, which is
                 to reclaim the spaces occupied by the outdated/invalid
                 data items and their index information on flash memory.
                 In this work, we propose an efficient garbage
                 collection strategy to solve the garbage collection
                 issues of flash-based multiversion databases. In
                 particular, a version-tracking method is proposed to
                 accelerate the performance on the process on
                 identifying/reclaiming the space of invalid data and
                 their indexes, and a pre-summary method is also
                 designed to solve the cascading update problem that is
                 caused by the write-once nature of flash memory and is
                 worsened when more versions refer to the same data
                 item. The capability of the proposed strategy is then
                 verified by analytical and experimental studies.",
  acknowledgement = ack-nhfb,
  articleno =    "25",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lim:2014:PMG,
  author =       "Jieun Lim and Nagesh B. Lakshminarayana and Hyesoon
                 Kim and William Song and Sudhakar Yalamanchili and
                 Wonyong Sung",
  title =        "Power Modeling for {GPU} Architectures Using {McPAT}",
  journal =      j-TODAES,
  volume =       "19",
  number =       "3",
  pages =        "26:1--26:??",
  month =        jun,
  year =         "2014",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2611758",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Jun 21 07:58:42 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Graphics Processing Units (GPUs) are very popular for
                 both graphics and general-purpose applications. Since
                 GPUs operate many processing units and manage multiple
                 levels of memory hierarchy, they consume a significant
                 amount of power. Although several power models for CPUs
                 are available, the power consumption of GPUs has not
                 been studied much yet. In this article we develop a new
                 power model for GPUs by utilizing McPAT, a CPU power
                 tool. We generate initial power model data from McPAT
                 with a detailed GPU configuration, and then adjust the
                 models by comparing them with empirical data. We use
                 the NVIDIA's Fermi architecture for building the power
                 model, and our model estimates the GPU power
                 consumption with an average error of 7.7\% and 12.8\%
                 for the microbenchmarks and Merge benchmarks,
                 respectively.",
  acknowledgement = ack-nhfb,
  articleno =    "26",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lee:2014:DCC,
  author =       "Chia-Wei Lee and Sun-Yuan Hsieh",
  title =        "Diagnosability of Component-Composition Graphs in the
                 {MM*} Model",
  journal =      j-TODAES,
  volume =       "19",
  number =       "3",
  pages =        "27:1--27:??",
  month =        jun,
  year =         "2014",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2611759",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Jun 21 07:58:42 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Diagnosability is an important metric for measuring
                 the reliability of multiprocessor systems. This article
                 adopts the MM* model and outlines the common properties
                 of a wide class of interconnection networks, called
                 component-composition graphs (CCGs), to determine their
                 diagnosability by using their obtained properties. By
                 applying the results to multiprocessor systems, the
                 diagnosability of hypercube-like networks (including
                 hypercubes, crossed cubes, M{\"o}bius cubes, twisted
                 cubes, locally twisted cubes, generalized twisted
                 cubes, and recursive circulants), star graphs, pancake
                 graphs, bubble-sort graphs, and burnt pancake graphs,
                 all of which belong to the class of CCGs, can also be
                 computed.",
  acknowledgement = ack-nhfb,
  articleno =    "27",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Erb:2014:ELF,
  author =       "Dominik Erb and Michael A. Kochte and Matthias Sauer
                 and Stefan Hillebrecht and Tobias Schubert and
                 Hans-Joachim Wunderlich and Bernd Becker",
  title =        "Exact Logic and Fault Simulation in Presence of
                 Unknowns",
  journal =      j-TODAES,
  volume =       "19",
  number =       "3",
  pages =        "28:1--28:??",
  month =        jun,
  year =         "2014",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2611760",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Jun 21 07:58:42 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Logic and fault simulation are essential techniques in
                 electronic design automation. The accuracy of standard
                 simulation algorithms is compromised by unknown or
                 X-values. This results in a pessimistic overestimation
                 of X-valued signals in the circuit and a pessimistic
                 underestimation of fault coverage. This work proposes
                 efficient algorithms for combinational and sequential
                 logic as well as for stuck-at and transition-delay
                 fault simulation that are free of any simulation
                 pessimism in presence of unknowns. The SAT-based
                 algorithms exactly classifiy all signal states. During
                 fault simulation, each fault is accurately classified
                 as either undetected, definitely detected, or possibly
                 detected. The pessimism with respect to unknowns
                 present in classic algorithms is thoroughly
                 investigated in the experimental results on benchmark
                 circuits. The applicability of the proposed algorithms
                 is demonstrated on larger industrial circuits. The
                 results show that, by accurate analysis, the number of
                 detected faults can be significantly increased without
                 increasing the test-set size.",
  acknowledgement = ack-nhfb,
  articleno =    "28",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Yan:2014:EFG,
  author =       "Jackey Z. Yan and Natarajan Viswanathan and Chris
                 Chu",
  title =        "An Effective Floorplan-Guided Placement Algorithm for
                 Large-Scale Mixed-Size Designs",
  journal =      j-TODAES,
  volume =       "19",
  number =       "3",
  pages =        "29:1--29:??",
  month =        jun,
  year =         "2014",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2611761",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Jun 21 07:58:42 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In this article we propose an effective algorithm flow
                 to handle modern large-scale mixed-size placement, both
                 with and without geometry constraints. The basic idea
                 is to use floorplanning to guide the placement of
                 objects at the global level. The flow consists of four
                 steps: (1) The objects in the original netlist are
                 clustered into blocks; (2) floorplanning is performed
                 on the blocks; (3) the blocks are shifted within the
                 chip region to further optimize the wirelength; (4)
                 with large macro-locations fixed, incremental placement
                 is applied to place the remaining objects. There are
                 several advantages to handling placement at the global
                 level with a floorplanning technique. First, the
                 problem size can be significantly reduced. Second,
                 exact Half-Perimeter WireLength (HPWL) can be
                 minimized. Third, better object distribution can be
                 achieved so that legalization only needs to handle
                 minor overlaps among small objects in a block. Fourth,
                 macro-rotation and various geometry constraints can be
                 handled. To demonstrate the effectiveness of this new
                 flow, we implement a high-quality and efficient
                 floorplan-guided placer called FLOP. We also construct
                 the Modern Mixed-Size (MMS) placement benchmarks that
                 can effectively represent the complexities of modern
                 mixed-size designs and the challenges faced by modern
                 mixed-size placers. Compared with most state-of-the-art
                 mixed-size placers and leading macroplacers,
                 experimental results show that FLOP achieves the best
                 HPWL and easily obtains legal solutions on all circuits
                 with all geometry constraints satisfied.",
  acknowledgement = ack-nhfb,
  articleno =    "29",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kang:2014:IRA,
  author =       "Minseok Kang and Taewhan Kim",
  title =        "Integrated Resource Allocation and Binding in Clock
                 Mesh Synthesis",
  journal =      j-TODAES,
  volume =       "19",
  number =       "3",
  pages =        "30:1--30:??",
  month =        jun,
  year =         "2014",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2611762",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Jun 21 07:58:42 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The clock distribution network in a synchronous
                 digital circuit delivers a clock signal to every
                 storage element, that is, clock sink in the circuit.
                 However, since the continued technology scaling
                 increases PVT (process-voltage-temperature) variation,
                 the increase of clock-skew variation is highly likely
                 to cause performance degradation or system failure at
                 runtime. Recently, to mitigate the clock-skew
                 variation, many researchers have taken a profound
                 interest in the clock mesh network. However, though the
                 structure of the clock mesh network is excellent in
                 tolerating timing variations, it demands significantly
                 high power consumption due to the use of excessive mesh
                 wire and buffer resources. Thus, optimizing the
                 resources required in the mesh clock synthesis while
                 maintaining the variation tolerance is crucially
                 important. The three major tasks that greatly affect
                 the cost of the resulting clock mesh are: (1) mesh
                 segment allocation, (2) mesh buffer allocation and
                 sizing, and (3) clock sink binding to mesh segments.
                 Previous clock mesh optimization approaches solve the
                 three tasks sequentially, one by one at a time, to
                 manage the runtime complexity of the tasks at the
                 expense of losing the quality of results. However,
                 since the three tasks are tightly interrelated,
                 simultaneously optimizing all three tasks is essential,
                 if the runtime is ever permitted, to synthesize an
                 economical clock mesh network. In this work, we propose
                 an approach that is able to tackle the problem in an
                 integrated fashion by combining the three tasks into an
                 iterative framework of incremental updates and solving
                 them simultaneously to find a globally optimal
                 allocation of mesh resources while taking into account
                 the clock-skew tolerance constraints. The core parts of
                 this work are a precise analysis on the relation among
                 the resource optimization tasks and an establishment of
                 a mechanism for effective and efficient integration of
                 the tasks. In particular, to handle the runtime
                 problem, we propose a set of speedup techniques, that
                 is, modeling the RC circuit for eliminating redundant
                 matrix multiplications, exploiting a sliding-window
                 scheme, and quickly estimating the buffer sizing
                 effect, which are fitted into our context of fast
                 clock-skew estimation in mesh resource optimization as
                 well as an invention of early decision policies.
                 Through extensive experiments with benchmark circuits,
                 it is shown that our proposed clock mesh synthesizer is
                 able to reduce the worst-case clock skew, total mesh
                 wirelength, total size of mesh driving buffers, and
                 total clock mesh power consumption including
                 short-circuit power by 25.0\%, 13.2\%, 10.9\%, and
                 11.0\% on average compared to that produced by the
                 best-known clock mesh synthesis method (MeshWorks),
                 respectively.",
  acknowledgement = ack-nhfb,
  articleno =    "30",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Boghrati:2014:IAP,
  author =       "Baktash Boghrati and Sachin S. Sapatnekar",
  title =        "Incremental Analysis of Power Grids Using Backward
                 Random Walks",
  journal =      j-TODAES,
  volume =       "19",
  number =       "3",
  pages =        "31:1--31:??",
  month =        jun,
  year =         "2014",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2611763",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Jun 21 07:58:42 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Power grid design and analysis is a critical part of
                 modern VLSI chip design and demands tools for accurate
                 modeling and efficient analysis. The process of power
                 grid design is inherently iterative, during which
                 numerous small changes are made to an initial design,
                 either to enhance the design or to fix design
                 constraint violations. Due to the large sizes of power
                 grids in modern chips, updating the solution for these
                 perturbations can be a computationally intensive task.
                 In this work, we first introduce an accurate modeling
                 methodology for power grids that, contrary to
                 conventional models, can result in asymmetrical
                 equations. Next, we propose an efficient and accurate
                 incremental solver that utilizes the backward random
                 walks to identify the region of influence of the
                 perturbation. The solution of the network is then
                 updated for this significantly smaller region only. The
                 proposed algorithm is capable of handling both
                 symmetrical and asymmetrical power grid equations.
                 Moreover, it can handle consecutive perturbations
                 without any degradation in the quality of the solution.
                 Experimental results show speedups of up to 13$ \times
                 $ for our incremental solver, as compared to a full
                 resolve of the power grid.",
  acknowledgement = ack-nhfb,
  articleno =    "31",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Schneider:2014:QNE,
  author =       "Reinhard Schneider and Dip Goswami and Samarjit
                 Chakraborty and Unmesh Bordoloi and Petru Eles and Zebo
                 Peng",
  title =        "Quantifying Notions of Extensibility in {FlexRay}
                 Schedule Synthesis",
  journal =      j-TODAES,
  volume =       "19",
  number =       "4",
  pages =        "32:1--32:??",
  month =        aug,
  year =         "2014",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2647954",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Aug 25 19:03:51 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "FlexRay has now become a well-established in-vehicle
                 communication bus at most original equipment
                 manufacturers (OEMs) such as BMW, Audi, and GM. Given
                 the increasing cost of verification and the high degree
                 of crosslinking between components in automotive
                 architectures, an incremental design process is
                 commonly followed. In order to incorporate
                 FlexRay-based designs in such a process, the resulting
                 schedules must be extensible, that is: (i) when
                 messages are added in later iterations, they must
                 preserve deadline guarantees of already scheduled
                 messages, and (ii) they must accommodate as many new
                 messages as possible without changes to existing
                 schedules. Apart from extensible scheduling having not
                 received much attention so far, traditional metrics
                 used for quantifying them cannot be trivially adapted
                 to FlexRay schedules. This is because they do not
                 exploit specific properties of the FlexRay protocol. In
                 this article we, for the first time, introduce new
                 notions of extensibility for FlexRay that capture all
                 the protocol-specific properties. In particular, we
                 focus on the dynamic segment of FlexRay and we present
                 a number of metrics to quantify extensible schedules.
                 Based on the introduced metrics, we propose strategies
                 to synthesize extensible schedules and compare the
                 results of different scheduling algorithms. We
                 demonstrate the applicability of the results with
                 industrial-size case studies and also show that the
                 proposed metrics may also be visually represented,
                 thereby allowing for easy interpretation.",
  acknowledgement = ack-nhfb,
  articleno =    "32",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Pan:2014:SPM,
  author =       "Gung-Yu Pan and Jing-Yang Jou and Bo-Cheng Lai",
  title =        "Scalable Power Management Using Multilevel
                 Reinforcement Learning for Multiprocessors",
  journal =      j-TODAES,
  volume =       "19",
  number =       "4",
  pages =        "33:1--33:??",
  month =        aug,
  year =         "2014",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2629486",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Aug 25 19:03:51 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Dynamic power management has become an imperative
                 design factor to attain the energy efficiency in modern
                 systems. Among various power management schemes,
                 learning-based policies that are adaptive to different
                 environments and applications have demonstrated
                 superior performance to other approaches. However, they
                 suffer the scalability problem for multiprocessors due
                 to the increasing number of cores in a system. In this
                 article, we propose a scalable and effective online
                 policy called MultiLevel Reinforcement Learning (MLRL).
                 By exploiting the hierarchical paradigm, the time
                 complexity of MLRL is O ( n lg n ) for n cores and the
                 convergence rate is greatly raised by compressing
                 redundant searching space. Some advanced techniques,
                 such as the function approximation and the action
                 selection scheme, are included to enhance the
                 generality and stability of the proposed policy. By
                 simulating on the SPLASH-2 benchmarks, MLRL runs 53\%
                 faster and outperforms the state-of-the-art work with
                 13.6\% energy saving and 2.7\% latency penalty on
                 average. The generality and the scalability of MLRL are
                 also validated through extensive simulations.",
  acknowledgement = ack-nhfb,
  articleno =    "33",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Yang:2014:WLL,
  author =       "Yoon Seok Yang and Reeshav Kumar and Gwan Choi and
                 Paul V. Gratz",
  title =        "{WaveSync}: Low-Latency Source-Synchronous Bypass
                 Network-on-Chip Architecture",
  journal =      j-TODAES,
  volume =       "19",
  number =       "4",
  pages =        "34:1--34:??",
  month =        aug,
  year =         "2014",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2647950",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Aug 25 19:03:51 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "WaveSync is a network-on-chip architecture for a
                 globally asynchronous locally-synchronous (GALS)
                 design. The WaveSync design facilitates low-latency
                 communication leveraging the source-synchronous clock
                 sent along with the data to time components in the
                 datapath of a downstream router, reducing the number of
                 synchronizations needed. WaveSync accomplishes this by
                 partitioning the router components at each node into
                 different clock domains, each synchronized with one of
                 the orthogonal incoming source-synchronous clocks in a
                 GALS 2D mesh network. The data and clock subsequently
                 propagate through each node/router synchronously until
                 the destination is reached, regardless of the number of
                 hops this may take. As long as the data travels in the
                 path of clock propagation and no congestion is
                 encountered, it will be propagated without latching as
                 if in a long combinatorial path, with both the clock
                 and the data accruing delay at the same rate. The
                 result is that the need for synchronization between the
                 mesochronous nodes and/or the asynchronous control
                 associated with the typical GALS network is completely
                 eliminated. To further reduce the latency overhead of
                 synchronization, for those occasions when
                 synchronization is still required (when a flit takes a
                 turn or arrives at the destination), we propose a novel
                 less-than-one-cycle synchronizer. The proposed WaveSync
                 network outperforms conventional GALS networks by
                 87--90\% in average latency, synthesized using a 45nm
                 CMOS library.",
  acknowledgement = ack-nhfb,
  articleno =    "34",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Jose:2014:IAH,
  author =       "John Jose and Madhu Mutyam",
  title =        "Implementation and Analysis of History-Based Output
                 Channel Selection Strategies for Adaptive Routers in
                 Mesh {NoCs}",
  journal =      j-TODAES,
  volume =       "19",
  number =       "4",
  pages =        "35:1--35:??",
  month =        aug,
  year =         "2014",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2647952",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Aug 25 19:03:51 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The efficiency and effectiveness of an adaptive router
                 in an NoC-based multicore system is evaluated by the
                 performance it achieves under varying inter-core
                 communication traffic. A well-designed selection
                 strategy plays an important role in an adaptive router
                 to act upon dynamic traffic variations. The
                 effectiveness of a selection strategy depends on what
                 metric is used to represent congestion, how precisely
                 this metric captures the actual congestion, and how
                 much cost is involved in capturing the congestion on a
                 real-time scale. Congestion is formed over a period of
                 time due to cumulative and chain reaction effects. We
                 propose novel history-based selection strategies that
                 could be used with any adaptive, deadlock-free, minimal
                 routing in mesh NoCs. Buffer occupancy time and rate of
                 flit flow across reachable ports of neighboring routers
                 in the recent past are captured, propagated, and
                 maintained in a cost-effective way to compute the
                 selection metric. Experimental results on real and
                 synthetic workloads show that our proposed selection
                 strategies significantly outperform state-of-the-art
                 techniques.",
  acknowledgement = ack-nhfb,
  articleno =    "35",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Tsai:2014:PAE,
  author =       "Kun-Lin Tsai and Hao-Tse Chen and Yo-An Lin",
  title =        "Power and Area Efficiency {NoC} Router Design for
                 Application-Specific {SoC} by Using Buffer Merging and
                 Resource Sharing",
  journal =      j-TODAES,
  volume =       "19",
  number =       "4",
  pages =        "36:1--36:??",
  month =        aug,
  year =         "2014",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2633604",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Aug 25 19:03:51 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Network-on-Chip (NoC) is an efficient on-chip
                 communication architecture specifically for
                 System-on-a-Chip (SoC) design. However, the input
                 buffers of a NoC router often take a significant
                 portion of the silicon area and power consumption.
                 Besides, the performance of a NoC is also greatly
                 affected by the buffer size. In this article, a static
                 buffer merging and resource sharing method is proposed
                 for the application-specific SoC minimizing the NoC
                 buffer. When given an application-specific task graph
                 and the dataflow distribution, the proposed method
                 statically merges rarely used buffers and generates the
                 suitable number of input buffers for each router at
                 design timely. The merged buffer is shared by several
                 input directions. The experimental result shows that
                 the buffer can be utilized more effectively after the
                 resource sharing. Based on the synthesized design with
                 TSMC 90nm technology, the proposed method reduces an
                 average of 42.23\% area and 35.13\% power while
                 providing similar performance.",
  acknowledgement = ack-nhfb,
  articleno =    "36",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Hatami:2014:MSN,
  author =       "Nadereh Hatami and Rafal Baranowski and Paolo Prinetto
                 and Hans-Joachim Wunderlich",
  title =        "Multilevel Simulation of Nonfunctional Properties by
                 Piecewise Evaluation",
  journal =      j-TODAES,
  volume =       "19",
  number =       "4",
  pages =        "37:1--37:??",
  month =        aug,
  year =         "2014",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2647955",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Aug 25 19:03:51 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "As the technology shrinks, nonfunctional properties
                 (NFPs) such as reliability, vulnerability, power
                 consumption, or heat dissipation become as important as
                 system functionality. As NFPs often influence each
                 other, depend on the application and workload of a
                 system, and exhibit nonlinear behavior, NFP simulation
                 over long periods of system operation is
                 computationally expensive, if feasible at all. This
                 article presents a piecewise evaluation method for
                 efficient NFP simulation. Simulation time is divided
                 into intervals called evaluation windows, within which
                 the NFP models are partially linearized. High-speed
                 functional system simulation is achieved by parallel
                 execution of models at different levels of abstraction.
                 A trade-off between simulation speed and accuracy is
                 met by adjusting the size of the evaluation window. As
                 an example, the piecewise evaluation technique is
                 applied to analyze aging caused by two mechanisms,
                 namely Negative Bias Temperature Instability (NBTI) and
                 Hot Carrier Injection (HCI), in order to identify
                 reliability hotspots. Experiments show that the
                 proposed technique yields considerable simulation
                 speedup at a marginal loss of accuracy.",
  acknowledgement = ack-nhfb,
  articleno =    "37",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Ravi:2014:HLT,
  author =       "Srivaths Ravi and Michael Joseph",
  title =        "High-Level Test Synthesis: a Survey from Synthesis
                 Process Flow Perspective",
  journal =      j-TODAES,
  volume =       "19",
  number =       "4",
  pages =        "38:1--38:??",
  month =        aug,
  year =         "2014",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2627754",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Aug 25 19:03:51 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "High-level test synthesis is a special class of
                 high-level synthesis having testability as one of the
                 important components. This article presents a detailed
                 survey on recent developments in high-level test
                 synthesis from a synthesis process flow perspective. It
                 also presents a survey on controller synthesis
                 techniques for testability.",
  acknowledgement = ack-nhfb,
  articleno =    "38",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Juan:2014:SPT,
  author =       "Da-Cheng Juan and Siddharth Garg and Diana
                 Marculescu",
  title =        "Statistical Peak Temperature Prediction and Thermal
                 Yield Improvement for {$3$D} Chip Multiprocessors",
  journal =      j-TODAES,
  volume =       "19",
  number =       "4",
  pages =        "39:1--39:??",
  month =        aug,
  year =         "2014",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2633606",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Aug 25 19:03:51 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Thermal issues have become critical roadblocks for
                 achieving highly reliable three-dimensional (3D)
                 integrated circuits (ICs). The presence of process
                 variations further exacerbates these problems. In this
                 article, we propose techniques for the efficient
                 evaluation and mitigation of the impact of leakage
                 power variations on the temperature profile of 3D Chip
                 Multiprocessors (CMPs). Experimental results
                 demonstrate that, due to the impact of process
                 variations, a 4-tier 3D implementation can be more than
                 40^C hotter and 23\% leakier than its 2D counterpart.
                 To determine the maximum temperature of each fabricated
                 3D IC, we propose an accurate learning-based model for
                 peak temperature prediction. Based on the learning
                 model, we then propose two post-fabrication techniques
                 to increase the thermal yield of 3D CMPs: (1) tier
                 restacking and (2) thermally-aware die matching.
                 Experimental results show that: (1) the proposed
                 prediction model achieves more than 98\% accuracy, and
                 (2) the proposed thermally-aware, post-fabrication
                 optimization techniques significantly improve the
                 thermal yield from only 51\% to 99\% for 3D CMPs.",
  acknowledgement = ack-nhfb,
  articleno =    "39",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Livramento:2014:HTD,
  author =       "Vinicius S. Livramento and Chrystian Guth and Jos{\'e}
                 Lu{\'\i}s G{\"u}ntzel and Marcelo O. Johann",
  title =        "A Hybrid Technique for Discrete Gate Sizing Based on
                 {Lagrangian} Relaxation",
  journal =      j-TODAES,
  volume =       "19",
  number =       "4",
  pages =        "40:1--40:??",
  month =        aug,
  year =         "2014",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2647956",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Aug 25 19:03:51 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Discrete gate sizing has attracted a lot of attention
                 recently as the EDA industry faces the challenge of
                 optimizing large standard cell-based circuits. The
                 discrete nature of the problem, along with complex
                 timing models, stringent design constraints, and
                 ever-increasing circuit sizes, make the problem very
                 difficult to tackle. Lagrangian Relaxation (LR) is an
                 effective technique to handle complex constrained
                 optimization problems and therefore has been
                 successfully applied to solve the gate sizing problem.
                 This article proposes an improved Lagrangian relaxation
                 formulation for discrete gate sizing that relaxes
                 timing, maximum gate input slew, and maximum gate
                 output capacitance constraints. Based on such
                 formulation, we propose a hybrid technique composed of
                 three steps. First, a topological greedy heuristic
                 solves the LR formulation. Such a heuristic is applied
                 assuming a slightly increased target clock period
                 (backoff factor) to better explore the solution space.
                 Second, a delay recovery heuristic reestablishes the
                 original target clock with small power overhead. Third,
                 a power recovery heuristic explores the remaining
                 slacks to further reduce power. Experiments on the ISPD
                 2012 Contest benchmarks show that our hybrid technique
                 provides less leakage power than the state-of-the-art
                 work for every circuit from the ISPD 2012 Contest
                 infrastructure, achieving up to 24\% less leakage. In
                 addition, our technique achieves a much better
                 compromise between leakage reduction and runtime,
                 obtaining, on average, 9\% less leakage power while
                 running 8.8 times faster.",
  acknowledgement = ack-nhfb,
  articleno =    "40",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Ho:2014:USS,
  author =       "Yenpo Ho and Garng M. Huang and Peng Li",
  title =        "Understanding {SRAM} Stability via Bifurcation
                 Analysis: Analytical Models and Scaling Trends",
  journal =      j-TODAES,
  volume =       "19",
  number =       "4",
  pages =        "41:1--41:??",
  month =        aug,
  year =         "2014",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2647957",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Aug 25 19:03:51 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In the past decades, aggressive scaling of transistor
                 feature size has been a primary force driving higher
                 Static Random Access Memory (SRAM) integration density.
                 Due to technology scaling, nanometer SRAM designs
                 become increasingly vulnerable to stability challenges.
                 The traditional way of analyzing stability is through
                 the use of Static Noise Margins (SNMs). SNMs are not
                 capable of capturing the key nonlinear dynamics
                 associated with memory operations, leading to imprecise
                 characterization of stability. This work rigorously
                 develops dynamic stability concepts and, more
                 importantly, captures them in physically based
                 analytical models. By leveraging nonlinear stability
                 theory, we develop analytical models that characterize
                 the minimum required amplitude and duration of injected
                 current noises that can flip the SRAM state. These
                 models, which are parameterized in key design,
                 technology, and operating condition parameters, provide
                 important design insights and offer a basis for
                 predicting scaling trends of SRAM dynamic stability.",
  acknowledgement = ack-nhfb,
  articleno =    "41",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Chang:2014:EBT,
  author =       "Naehyuck Chang and David Z. Pan and Yuan Xie",
  title =        "Editorial: {{\booktitle{ACM Transactions on Design
                 Automation of Electronics Systems}}} and Beyond",
  journal =      j-TODAES,
  volume =       "20",
  number =       "1",
  pages =        "1:1--1:??",
  month =        nov,
  year =         "2014",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2676865",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Nov 19 11:18:40 MST 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  articleno =    "1",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Hu:2014:GLI,
  author =       "Wei Hu and Dejun Mu and Jason Oberg and Baolei Mao and
                 Mohit Tiwari and Timothy Sherwood and Ryan Kastner",
  title =        "Gate-Level Information Flow Tracking for Security
                 Lattices",
  journal =      j-TODAES,
  volume =       "20",
  number =       "1",
  pages =        "2:1--2:??",
  month =        nov,
  year =         "2014",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2676548",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Nov 19 11:18:40 MST 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "High-assurance systems found in safety-critical
                 infrastructures are facing steadily increasing cyber
                 threats. These critical systems require rigorous
                 guarantees in information flow security to prevent
                 confidential information from leaking to an
                 unclassified domain and the root of trust from being
                 violated by an untrusted party. To enforce bit-tight
                 information flow control, gate-level information flow
                 tracking (GLIFT) has recently been proposed to
                 precisely measure and manage all digital information
                 flows in the underlying hardware, including implicit
                 flows through hardware-specific timing channels.
                 However, existing work in this realm either restricts
                 to two-level security labels or essentially targets
                 two-input primitive gates and several simple multilevel
                 security lattices. This article provides a general way
                 to expand the GLIFT method for multilevel security.
                 Specifically, it formalizes tracking logic for an
                 arbitrary Boolean gate under finite security lattices,
                 presents a precise tracking logic generation method for
                 eliminating false positives in GLIFT logic created in a
                 constructive manner, and illustrates application
                 scenarios of GLIFT for enforcing multilevel information
                 flow security. Experimental results show various
                 trade-offs in precision and performance of GLIFT logic
                 created using different methods. It also reveals the
                 area and performance overheads that should be expected
                 when expanding GLIFT for multilevel security.",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Wang:2014:CTS,
  author =       "Chun-Kai Wang and Yeh-Chi Chang and Hung-Ming Chen and
                 Ching-Yu Chin",
  title =        "Clock Tree Synthesis Considering Slew Effect on Supply
                 Voltage Variation",
  journal =      j-TODAES,
  volume =       "20",
  number =       "1",
  pages =        "3:1--3:??",
  month =        nov,
  year =         "2014",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2651401",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Nov 19 11:18:40 MST 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This work tackles a problem of clock power
                 minimization within a skew constraint under supply
                 voltage variation. This problem is defined in the ISPD
                 2010 benchmark. Unlike mesh and cross link that reduce
                 clock skew uncertainty by multiple driving paths, our
                 focus is on controlling skew uncertainty in the
                 structure of the tree. We observe that slow slew
                 amplifies supply voltage variation, which induces
                 larger path delay variation and skew uncertainty. To
                 obtain the optimality, we formulate a symmetric clock
                 tree synthesis as a mathematical programming problem in
                 which the slew effect is considered by an NLDM-like
                 cell delay variation model. A symmetry-to-asymmetry
                 tree transformation is proposed to further reduce wire
                 loading. Experimental results show that the proposed
                 four methods save up to 20\% of clock tree capacitance
                 loading. Beyond controlling slew to suppress
                 supply-voltage-variation-induced skew, we also discuss
                 the strategies of clock tree synthesis under variant
                 variation scenarios and the limitations of the ISPD
                 2010 benchmark.",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Liu:2014:SIS,
  author =       "Lingyi Liu and Shobha Vasudevan",
  title =        "Scaling Input Stimulus Generation through Hybrid
                 Static and Dynamic Analysis of {RTL}",
  journal =      j-TODAES,
  volume =       "20",
  number =       "1",
  pages =        "4:1--4:??",
  month =        nov,
  year =         "2014",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2676549",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Nov 19 11:18:40 MST 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We enhance STAR, an automatic technique for functional
                 input vector generation for design validation. STAR
                 statically analyzes the source code of the
                 Register-Transfer Level (RTL) design. The STAR approach
                 is a hybrid between RTL symbolic execution and concrete
                 simulation that offsets the disadvantages of both. The
                 symbolic execution, which follows the concrete
                 simulation path, extracts constraints for that path.
                 The guard in the path constraints is then mutated and
                 passed to an SMT solver. A satisfiable assignment
                 generates a valid input vector. However, STAR suffers
                 the problem of path explosion during symbolic
                 execution. In this article, we present an explored
                 symbolic state caching method to attack path explosion.
                 Explored symbolic states are states starting from which
                 all subpaths have been explored. Each explored symbolic
                 state is stored in the form of bitmap encoding of
                 branches to ease comparison. When the explored symbolic
                 state is reached again in the following symbolic
                 execution, all subpaths can be pruned. In addition, we
                 use two types of optimizations: (a) dynamic UD chain
                 slicing; and (b) local conflict resolution to improve
                 the running efficiency of STAR. We demonstrate that the
                 results of the enhanced STAR are promising in showing
                 high coverage on benchmark RTL designs, and the runtime
                 of the test generation process is reduced from several
                 hours to less than 20 minutes.",
  acknowledgement = ack-nhfb,
  articleno =    "4",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Sinha:2014:DGP,
  author =       "Sharad Sinha and Thambipillai Srikanthan",
  title =        "Dataflow Graph Partitioning for Area-Efficient
                 High-Level Synthesis with Systems Perspective",
  journal =      j-TODAES,
  volume =       "20",
  number =       "1",
  pages =        "5:1--5:??",
  month =        nov,
  year =         "2014",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2660769",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Nov 19 11:18:40 MST 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Area efficiency in datapath synthesis is a widely
                 accepted goal of high-level synthesis. Applications
                 represented by their dataflow graphs are synthesized
                 using resource sharing principles to reduce the area.
                 However, existing resource sharing algorithms focus on
                 absolute area reduction and maximal resource sharing.
                 This kind of a design approach leads to constraints on
                 how often, in terms of number of clock cycles, a new
                 set of input data can be fed to an application. It also
                 leads to very large multiplexers in case of very big
                 dataflow graphs with hundreds of nodes. An adaptive
                 dataflow graph partitioning algorithm is proposed that
                 partitions a graph taking into account a user-defined
                 constraint on how often a new set of input data
                 (generally referred to as data initiation interval) is
                 available. At the same time, a resource sharing
                 algorithm is applied to such partitions in order to
                 reduce area. Multiple design points are generated for a
                 given dataflow graph with different area and time
                 measures to enable a designer to make decisions. We
                 demonstrate our graph partitioning algorithm using
                 synthetically generated large dataflow graphs and on
                 some benchmark applications.",
  acknowledgement = ack-nhfb,
  articleno =    "5",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Gange:2014:SOS,
  author =       "Graeme Gange and Harald S{\o}ndergaard and Peter J.
                 Stuckey",
  title =        "Synthesizing Optimal Switching Lattices",
  journal =      j-TODAES,
  volume =       "20",
  number =       "1",
  pages =        "6:1--6:??",
  month =        nov,
  year =         "2014",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2661632",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Nov 19 11:18:40 MST 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The use of nanoscale technologies to create electronic
                 devices has revived interest in the use of regular
                 structures for defining complex logic functions. One
                 such structure is the switching lattice, a
                 two-dimensional lattice of four-terminal switches. We
                 show how to directly construct switching lattices of
                 polynomial size from arbitrary logic functions; we also
                 show how to synthesize minimal-sized lattices by
                 translating the problem to the satisfiability problem
                 for a restricted class of quantified Boolean formulas.
                 The synthesis method is an anytime algorithm that uses
                 modern SAT solving technology and dichotomic search. It
                 improves considerably on an earlier proposal for
                 creating switching lattices for arbitrary logic
                 functions.",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Cheng:2014:ECD,
  author =       "An-Che Cheng and Chia-Chih (Jack) Yen and Celina G.
                 Val and Sam Bayless and Alan J. Hu and Iris Hui-Ru
                 Jiang and Jing-Yang Jou",
  title =        "Efficient Coverage-Driven Stimulus Generation Using
                 Simultaneous {SAT} Solving, with Application to
                 {SystemVerilog}",
  journal =      j-TODAES,
  volume =       "20",
  number =       "1",
  pages =        "7:1--7:??",
  month =        nov,
  year =         "2014",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2651400",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Nov 19 11:18:40 MST 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "SystemVerilog provides powerful language constructs
                 for verification, and one of them is the covergroup
                 functional coverage model. This model is designed as a
                 complement to assertion verification, that is, it has
                 the advantage of defining cross-coverage over multiple
                 coverage points. In this article, a coverage-driven
                 verification (CDV) approach is formulated as a
                 simultaneous Boolean satisfiability (SAT) problem that
                 is based on covergroups. The coverage bins defined by
                 the functional model are converted into Conjunction
                 Normal Form (CNF) and then solved together by our
                 proposed simultaneous SAT algorithm PLNSAT to generate
                 stimuli for improving coverage. The basic PLNSAT
                 algorithm is then extended in our second proposed
                 algorithm GPLNSAT, which exploits additional
                 information gleaned from the structure of SystemVerilog
                 covergroups. Compared to generating stimuli separately,
                 the simultaneous SAT approaches can share learned
                 knowledge across each coverage target, thus reducing
                 the overall solving time drastically. Experimental
                 results on a UART circuit and the largest ITC benchmark
                 circuits show that the proposed algorithms can achieve
                 10.8x speedup on average and outperform
                 state-of-the-art techniques in most of the
                 benchmarks.",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Li:2014:SUM,
  author =       "Xueliang Li and Guihai Yan and Yinhe Han and Xiaowei
                 Li",
  title =        "{SmartCap}: Using Machine Learning for Power
                 Adaptation of {Smartphone}'s Application Processor",
  journal =      j-TODAES,
  volume =       "20",
  number =       "1",
  pages =        "8:1--8:??",
  month =        nov,
  year =         "2014",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2651402",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Nov 19 11:18:40 MST 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Power efficiency is increasingly critical to
                 battery-powered smartphones. Given that the using
                 experience is most valued by the user, we propose that
                 the power optimization should directly respect the user
                 experience. We conduct a statistical sample survey and
                 study the correlation among the user experience, system
                 runtime activities, and computational performance of an
                 application processor. We find that there exists a
                 minimal frequency requirement, called ``saturated
                 frequency''. Above this frequency, the device consumes
                 more power but provides little improvements in user
                 experience. This study motivates an intelligent
                 self-adaptive scheme, SmartCap, that automatically
                 identifies the most power-efficient state of the
                 application processor. Compared to prior Linux power
                 adaptation schemes, SmartCap can help save power from
                 11\% to 84\%, depending on applications, with little
                 decline in user experience.",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Shih:2014:COR,
  author =       "Wen-Li Shih and Yi-Ping You and Chung-Wen Huang and
                 Jenq Kuen Lee",
  title =        "Compiler Optimization for Reducing Leakage Power in
                 Multithread {BSP} Programs",
  journal =      j-TODAES,
  volume =       "20",
  number =       "1",
  pages =        "9:1--9:??",
  month =        nov,
  year =         "2014",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2668119",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Nov 19 11:18:40 MST 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Multithread programming is widely adopted in novel
                 embedded system applications due to its high
                 performance and flexibility. This article addresses
                 compiler optimization for reducing the power
                 consumption of multithread programs. A traditional
                 compiler employs energy management techniques that
                 analyze component usage in control-flow graphs with a
                 focus on single-thread programs. In this environment
                 the leakage power can be controlled by inserting on and
                 off instructions based on component usage information
                 generated by flow equations. However, these methods
                 cannot be directly extended to a multithread
                 environment due to concurrent execution issues. This
                 article presents a multithread power-gating framework
                 composed of multithread power-gating analysis (MTPGA)
                 and predicated power-gating (PPG) energy management
                 mechanisms for reducing the leakage power when
                 executing multithread programs on simultaneous
                 multithreading (SMT) machines. Our multithread
                 programming model is based on hierarchical
                 bulk-synchronous parallel (BSP) models. Based on a
                 multithread component analysis with dataflow equations,
                 our MTPGA framework estimates the energy usage of
                 multithread programs and inserts PPG operations as
                 power controls for energy management. We performed
                 experiments by incorporating our power optimization
                 framework into SUIF compiler tools and by simulating
                 the energy consumption with a post-estimated SMT
                 simulator based on Wattch toolkits. The experimental
                 results show that the total energy consumption of a
                 system with PPG support and our power optimization
                 method is reduced by an average of 10.09\% for BSP
                 programs relative to a system without a power-gating
                 mechanism on leakage contribution set to 30\%; and the
                 total energy consumption is reduced by an average of
                 4.27\% on leakage contribution set to 10\%. The results
                 demonstrate our mechanisms are effective in reducing
                 the leakage energy of BSP multithread programs.",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Maric:2014:HCD,
  author =       "Bojan Maric and Jaume Abella and Francisco J. Cazorla
                 and Mateo Valero",
  title =        "Hybrid Cache Designs for Reliable Hybrid High and
                 Ultra-Low Voltage Operation",
  journal =      j-TODAES,
  volume =       "20",
  number =       "1",
  pages =        "10:1--10:??",
  month =        nov,
  year =         "2014",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2658988",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Nov 19 11:18:40 MST 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Geometry scaling of semiconductor devices enables the
                 design of ultra-low-cost (e.g., below 1 USD)
                 battery-powered resource-constrained ubiquitous devices
                 for environment, urban life, and body monitoring. These
                 sensor-based devices require high performance to react
                 in front of infrequent particular events as well as
                 extreme energy efficiency in order to extend battery
                 lifetime during most of the time when low performance
                 is required. In addition, they require real-time
                 guarantees. The most suitable technological solution
                 for these devices consists of using hybrid processors
                 able to operate at: (i) high voltage to provide high
                 performance and (ii) near-/subthreshold voltage to
                 provide ultra-low energy consumption. However, the most
                 efficient SRAM memories for each voltage level differ
                 and trading off different SRAM designs is mandatory.
                 This is particularly true for cache memories, which
                 occupy most of the processor's area. In this article,
                 we propose new, simple, single-Vcc-domain hybrid L1
                 cache architectures suitable for reliable hybrid high
                 and ultra-low voltage operation. In particular, the
                 cache is designed by combining heterogeneous SRAM cell
                 types: some of the cache ways are optimized to satisfy
                 high-performance requirements during high voltage
                 operation, whereas the rest of the ways provide
                 ultra-low energy consumption and reliability during
                 near-/subthreshold voltage operation. We analyze the
                 performance, energy, and power impact of the proposed
                 cache designs when using them to implement L1 caches in
                 a processor. Experimental results show that our hybrid
                 caches can efficiently and reliably operate across a
                 wide range of voltages, consuming little energy at
                 near-/subthreshold voltage as well as providing high
                 performance at high voltage without decreasing
                 reliability levels to provide strong performance
                 guarantees, as required for our target market.",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Baek:2014:DHD,
  author =       "Seungcheol Baek and Hyung Gyu Lee and Chrysostomos
                 Nicopoulos and Jongman Kim",
  title =        "Designing Hybrid {DRAM\slash PCM} Main Memory Systems
                 Utilizing Dual-Phase Compression",
  journal =      j-TODAES,
  volume =       "20",
  number =       "1",
  pages =        "11:1--11:??",
  month =        nov,
  year =         "2014",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2658989",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Nov 19 11:18:40 MST 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The last few years have witnessed the emergence of a
                 promising new memory technology, namely Phase-Change
                 Memory (PCM). Due to its inherent ability to scale
                 deeply into the nanoscale regime and its low power
                 consumption, PCM is increasingly viewed as an
                 attractive alternative for the memory subsystem of
                 future microprocessor architectures. However, PCM is
                 marred by a duo of potentially show-stopping
                 deficiencies, that is, poor write performance
                 (especially when compared to the prevalent and
                 ubiquitous DRAM technology) and limited durability.
                 These weaknesses have urged designers to develop
                 various supporting architectural techniques to aid and
                 complement the operation of the PCM while mitigating
                 its innate flaws. One promising such solution is the
                 deployment of hybridized memory architectures that fuse
                 DRAM and PCM, in order to combine the best attributes
                 of each technology. In this article, we introduce a
                 novel Dual-Phase Compression (DPC) scheme and its
                 architectural design aimed at DRAM/PCM hybrids, which
                 caters to the limitations of PCM technology while
                 optimizing memory performance. The DPC technique is
                 specifically optimized for PCM-based environments and
                 is transparent to the operation of the remaining
                 components of the memory subsystem. Furthermore, the
                 proposed architecture is imbued with a multifaceted
                 wear-leveling technique to enhance the durability and
                 prolong the lifetime of the PCM. Extensive simulations
                 with traces from real applications running on a
                 full-system simulator demonstrate 20.4\% performance
                 improvement and 46.9\% energy reduction, on average, as
                 compared to a baseline DRAM/PCM hybrid implementation.
                 Additionally, the multifaceted wear-leveling technique
                 is shown to significantly prolong the lifetime of the
                 PCM.",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kuo:2014:RCS,
  author =       "Hsien-Kai Kuo and Bo-Cheng Charles Lai and Jing-Yang
                 Jou",
  title =        "Reducing Contention in Shared Last-Level Cache for
                 Throughput Processors",
  journal =      j-TODAES,
  volume =       "20",
  number =       "1",
  pages =        "12:1--12:??",
  month =        nov,
  year =         "2014",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2676550",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Nov 19 11:18:40 MST 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Deploying the Shared Last-Level Cache (SLLC) is an
                 effective way to alleviate the memory bottleneck in
                 modern throughput processors, such as GPGPUs. A
                 commonly used scheduling policy of throughput
                 processors is to render the maximum possible
                 thread-level parallelism. However, this greedy policy
                 usually causes serious cache contention on the SLLC and
                 significantly degrades the system performance. It is
                 therefore a critical performance factor that the thread
                 scheduling of a throughput processor performs a careful
                 trade-off between the thread-level parallelism and
                 cache contention. This article characterizes and
                 analyzes the performance impact of cache contention in
                 the SLLC of throughput processors. Based on the
                 analyses and findings of cache contention and its
                 performance pitfalls, this article formally formulates
                 the aggregate working-set-size-constrained thread
                 scheduling problem that constrains the aggregate
                 working-set size on concurrent threads. With a proof to
                 be NP-hard, this article has integrated a series of
                 algorithms to minimize the cache contention and enhance
                 the overall system performance on GPGPUs. The
                 simulation results on NVIDIA's Fermi architecture have
                 shown that the proposed thread scheduling scheme
                 achieves up to 61.6\% execution time enhancement over a
                 widely used thread clustering scheme. When compared to
                 the state-of-the-art technique that exploits the data
                 reuse of applications, the improvement on execution
                 time can reach 47.4\%. Notably, the execution time
                 improvement of the proposed thread scheduling scheme is
                 only 2.6\% from an exhaustive searching scheme.",
  acknowledgement = ack-nhfb,
  articleno =    "12",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Sinha:2014:FAI,
  author =       "Roopak Sinha and Alain Girault and Gregor Goessler and
                 Partha S. Roop",
  title =        "A Formal Approach to Incremental Converter Synthesis
                 for System-on-Chip Design",
  journal =      j-TODAES,
  volume =       "20",
  number =       "1",
  pages =        "13:1--13:??",
  month =        nov,
  year =         "2014",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2663344",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Nov 19 11:18:40 MST 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "A system-on-chip (SoC) contains numerous intellectual
                 property blocks, or IPs. Protocol mismatches between
                 IPs may affect the system-level functionality of the
                 SoC. Mismatches are addressed by introducing converters
                 to control inter-IP interactions. Current approaches
                 towards converter generation find limited practical
                 application as they use restrictive models, lack formal
                 rigour, handle a small subset of commonly encountered
                 mismatches, and/or are not scalable. We propose a
                 formal technique for SoC design using incremental
                 converter synthesis. The proposed formulation provides
                 precise models for protocols and requirements, and
                 provides a scalable algorithm that allows adding
                 multiple components and requirements to an SoC
                 incrementally. We prove that the technique is sound and
                 complete. Experimental results obtained using real-life
                 AMBA benchmarks show the scalability and wide range of
                 mismatches handled by our approach.",
  acknowledgement = ack-nhfb,
  articleno =    "13",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Aksoy:2014:MDF,
  author =       "Levent Aksoy and Paulo Flores and Jose Monteiro",
  title =        "Multiplierless Design of Folded {DSP} Blocks",
  journal =      j-TODAES,
  volume =       "20",
  number =       "1",
  pages =        "14:1--14:??",
  month =        nov,
  year =         "2014",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2663343",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Nov 19 11:18:40 MST 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article addresses the problem of minimizing the
                 implementation cost of the time-multiplexed constant
                 multiplication (TMCM) operation that realizes the
                 multiplication of an input variable by a single
                 constant selected from a set of multiple constants at a
                 time. It presents an efficient algorithm, called
                 orpheus, that finds a multiplierless TMCM design by
                 sharing logic operators, namely adders, subtractors,
                 adders/subtractors, and multiplexors (MUXes). Moreover,
                 this article introduces folded design architectures for
                 the digital signal processing (DSP) blocks, such as
                 finite impulse response (FIR) filters and linear DSP
                 transforms, and describes how these folded DSP blocks
                 can be efficiently realized using TMCM operations
                 optimized by orpheus. Experimental results indicate
                 that orpheus can find better solutions than existing
                 TMCM algorithms, yielding TMCM designs requiring less
                 area. They also show that the folded architectures lead
                 to alternative designs with significantly less area,
                 but incurring an increase in latency and energy
                 consumption, compared to the parallel architecture.",
  acknowledgement = ack-nhfb,
  articleno =    "14",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{BasiriM:2014:EHB,
  author =       "Mohamed Asan {Basiri M.} and Noor Mahammad Sk",
  title =        "An Efficient Hardware-Based Higher Radix Floating
                 Point {MAC} Design",
  journal =      j-TODAES,
  volume =       "20",
  number =       "1",
  pages =        "15:1--15:??",
  month =        nov,
  year =         "2014",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2667224",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Nov 19 11:18:40 MST 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/fparith.bib;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article proposes an effective way of implementing
                 a multiply accumulate circuit (MAC) for high-speed
                 floating point arithmetic operations. The real-world
                 applications related to digital signal processing and
                 the like demand high-performance computation with
                 greater accuracy. In general, digital signals are
                 represented as a sequence of signed/unsigned
                 fixed/floating point numbers. The final result of a MAC
                 operation can be computed by feeding the mantissa of
                 the previous MAC result as one of the partial products
                 to a Wallace tree multiplier or Braun multiplier. Thus,
                 the separate accumulation circuit can be avoided by
                 keeping the circuit depth still within the bounds of
                 the Wallace tree multiplier, namely $ O (\log_2 n) $,
                 or Braun multiplier, namely $ O (n) $. In this article,
                 three kinds of floating point MACs are proposed. The
                 experimental results show 48.54\% of improvement in
                 worst path delay achieved by the proposed floating
                 point MAC using a radix-2 Wallace structure compared
                 with a conventional floating point MAC without a
                 pipeline using a 45nm technology library. The same
                 proposed design gives 39.92\% of improvement in worst
                 path delay without a pipeline using a radix-4 Braun
                 structure as compared with a conventional design. In
                 this article, a radix-32 $ Q_{32.32}$-format-based
                 floating point MAC is proposed using a Wallace
                 tree/Braun multiplier. Also this article discusses the
                 msb prediction problem and its solution in floating
                 point arithmetic that is not available in modern fused
                 multiply-add designs. The performance results show
                 comparisons between the proposed floating point MAC
                 with various floating point MAC designs for radix-2,
                 -4, -8, and -16. The proposed design has lesser depth
                 than a conventional floating point MAC as well as a
                 lower area requirement than other ways of floating
                 point MAC implementation, both with/without a
                 pipeline.",
  acknowledgement = ack-nhfb,
  articleno =    "15",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Bolchini:2014:DHE,
  author =       "Cristiana Bolchini and Chiara Sandionigi",
  title =        "Design of Hardened Embedded Systems on Multi-{FPGA}
                 Platforms",
  journal =      j-TODAES,
  volume =       "20",
  number =       "1",
  pages =        "16:1--16:??",
  month =        nov,
  year =         "2014",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2676551",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Nov 19 11:18:40 MST 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The aim of this article is the definition of a
                 reliability-aware methodology for the design of
                 embedded systems on multi-FPGA platforms. The designed
                 system must be able to detect the occurrence of faults
                 globally and autonomously, in order to recover or to
                 mitigate their effects. Two categories of faults are
                 identified, based on their impact on the device
                 elements; (i) recoverable faults, transient problems
                 that can be fixed without causing a lasting effect
                 namely and (ii) nonrecoverable faults, those that cause
                 a permanent problem, making the portion of the fabric
                 unusable. While some aspects can be taken from previous
                 solutions available in literature, several open issues
                 exist. In fact, no complete design methodology handling
                 all the peculiar issues of the considered scenario has
                 been proposed yet, a gap we aim at filling with our
                 work. The final system exposes reliability properties
                 and increases its overall lifetime and availability.",
  acknowledgement = ack-nhfb,
  articleno =    "16",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lu:2015:EEB,
  author =       "Jingwei Lu and Pengwen Chen and Chin-Chih Chang and Lu
                 Sha and Dennis Jen-Hsin Huang and Chin-Chi Teng and
                 Chung-Kuan Cheng",
  title =        "{ePlace}: Electrostatics-Based Placement Using {Fast
                 Fourier Transform} and {Nesterov}'s Method",
  journal =      j-TODAES,
  volume =       "20",
  number =       "2",
  pages =        "17:1--17:??",
  month =        feb,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2699873",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Mar 3 14:46:37 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We develop a flat, analytic, and nonlinear placement
                 algorithm, ePlace, which is more effective,
                 generalized, simpler, and faster than previous works.
                 Based on the analogy between placement instance and
                 electrostatic system, we develop a novel placement
                 density function eDensity, which models every object as
                 positive charge and the density cost as the potential
                 energy of the electrostatic system. The electric
                 potential and field distribution are coupled with
                 density using a well-defined Poisson's equation, which
                 is numerically solved by spectral methods based on fast
                 Fourier transform (FFT). Instead of using the conjugate
                 gradient (CG) nonlinear solver in previous placers, we
                 propose to use Nesterov's method which achieves faster
                 convergence. The efficiency bottleneck on line search
                 is resolved by predicting the steplength using a
                 closed-form equation of Lipschitz constant. The
                 placement performance is validated through experiments
                 on the ISPD 2005 and ISPD 2006 benchmark suites, where
                 ePlace outperforms all state-of-the-art placers
                 (Capo10.5, FastPlace3.0, RQL, MAPLE, ComPLx, BonnPlace,
                 POLAR, APlace3, NTUPlace3, mPL6) with much shorter
                 wirelength and shorter or comparable runtime. On
                 average, of all the ISPD 2005 benchmarks, ePlace
                 outperforms the leading placer BonnPlace with 2.83\%
                 shorter wirelength and runs 3.05$ \times $ faster; and
                 on average, of all the ISPD 2006 benchmarks, ePlace
                 outperforms the leading placer MAPLE with 4.59\%
                 shorter wirelength and runs 2.84$ \times $ faster.",
  acknowledgement = ack-nhfb,
  articleno =    "17",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Guo:2015:RDS,
  author =       "Qi Guo and Tianshi Chen and Zhi-Hua Zhou and Olivier
                 Temam and Ling Li and Depei Qian and Yunji Chen",
  title =        "Robust Design Space Modeling",
  journal =      j-TODAES,
  volume =       "20",
  number =       "2",
  pages =        "18:1--18:??",
  month =        feb,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2668118",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Mar 3 14:46:37 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Architectural design spaces of microprocessors are
                 often exponentially large with respect to the pending
                 processor parameters. To avoid simulating all
                 configurations in the design space, machine learning
                 and statistical techniques have been utilized to build
                 regression models for characterizing the relationship
                 between architectural configurations and responses
                 (e.g., performance or power consumption). However, this
                 article shows that the accuracy variability of many
                 learning techniques over different design spaces and
                 benchmarks can be significant enough to mislead the
                 decision-making. This clearly indicates a high risk of
                 applying techniques that work well on previous modeling
                 tasks (each involving a design space, benchmark, and
                 design objective) to a new task, due to which the
                 powerful tools might be impractical. Inspired by
                 ensemble learning in the machine learning domain, we
                 propose a robust framework called ELSE to reduce the
                 accuracy variability of design space modeling. Rather
                 than employing a single learning technique as in
                 previous investigations, ELSE employs distinct learning
                 techniques to build multiple base regression models for
                 each modeling task. This is not a trivial combination
                 of different techniques (e.g., always trusting the
                 regression model with the smallest error). Instead,
                 ELSE carefully maintains the diversity of base
                 regression models and constructs a metamodel from the
                 base models that can provide accurate predictions even
                 when the base models are far from accurate.
                 Consequently, we are able to reduce the number of cases
                 in which the final prediction errors are unacceptably
                 large. Experimental results validate the robustness of
                 ELSE: compared with the widely used artificial neural
                 network over 52 distinct modeling tasks, ELSE reduces
                 the accuracy variability by about 62\%. Moreover, ELSE
                 reduces the average prediction error by 27\% and 85\%
                 for the investigated MIPS and POWER design spaces,
                 respectively.",
  acknowledgement = ack-nhfb,
  articleno =    "18",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Taouil:2015:YIW,
  author =       "Mottaqiallah Taouil and Said Hamdioui and Erik Jan
                 Marinissen",
  title =        "Yield Improvement for {$3$D} Wafer-to-Wafer Stacked
                 {ICs} Using Wafer Matching",
  journal =      j-TODAES,
  volume =       "20",
  number =       "2",
  pages =        "19:1--19:??",
  month =        feb,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2699832",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Mar 3 14:46:37 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Three-Dimensional Stacked IC (3D-SIC) using
                 Through-Silicion Vias (TSVs) is an emerging technology
                 that provides heterogeneous integration, higher
                 performance, and lower power consumption compared to
                 traditional ICs. Stacking 3D-SICs using Wafer-to-Wafer
                 (W2W) has several advantages such as high stacking
                 throughput, high TSV density, and the ability to handle
                 thin wafers and small dies. However, it suffers from
                 low-compound yield as the stacking of good dies on bad
                 dies and vice versa cannot be prevented. This article
                 investigates wafer matching as a means for yield
                 improvement. It first defines a complete wafer matching
                 framework consisting of different scenarios, each a
                 combination of a matching process (defines the order of
                 wafer selection), a matching criterion (defines whether
                 good or bad dies are matched), wafer rotation (defines
                 either wafers are rotated or not), and a repository
                 type. The repository type specifies whether either the
                 repository is filled immediately after each wafer
                 selection (i.e., running repository) or after all
                 wafers are matched (i.e., static repository). A mapping
                 of prior work on the framework shows that existing
                 research has mainly explored scenarios based on static
                 repositories. Therefore, the article analyzes scenarios
                 based on running repositories. Simulation results show
                 that scenarios based on running repositories improve
                 the compound yield with up to 13.4\% relative to random
                 W2W stacking; the improvement strongly depends on the
                 number of stacked dies, die yield, repository size, as
                 well as on the used matching process. Moreover, the
                 results reveal that scenarios based on running
                 repositories outperform those of static repositories in
                 terms of yield improvement at significant runtime
                 reduction (three orders of magnitude) and lower memory
                 complexity (from exponential to linear in terms of
                 stack size).",
  acknowledgement = ack-nhfb,
  articleno =    "19",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Chang:2015:CDC,
  author =       "Naiwen Chang and Eddie Cheng and Sunyuan Hsieh",
  title =        "Conditional Diagnosability of {Cayley} Graphs
                 Generated by Transposition Trees under the {PMC}
                 Model",
  journal =      j-TODAES,
  volume =       "20",
  number =       "2",
  pages =        "20:1--20:??",
  month =        feb,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2699854",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Mar 3 14:46:37 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Processor fault diagnosis has played an essential role
                 in measuring the reliability of a multiprocessor
                 system. The diagnosability of many well-known
                 multiprocessor systems has been widely investigated.
                 Conditional diagnosability is a novel measure of
                 diagnosability by adding a further condition that any
                 fault set cannot contain all the neighbors of every
                 node in the system. Several known structural properties
                 of Cayley graphs are exhibited. Based on these
                 properties, we investigate the conditional
                 diagnosability of Cayley graphs generated by
                 transposition trees under the PMC model and show that
                 it is 4n-11 for n {$>$}= 4 except for the n
                 -dimensional star graph for which it has been shown to
                 be 8 n -21 for n {$>$}= 5 (refer to Chang and Hsieh
                 [2014]).",
  acknowledgement = ack-nhfb,
  articleno =    "20",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Duan:2015:DDO,
  author =       "Qing Duan and Jun Zeng and Krishnendu Chakrabarty and
                 Gary Dispoto",
  title =        "Data-Driven Optimization of Order Admission Policies
                 in a Digital Print Factory",
  journal =      j-TODAES,
  volume =       "20",
  number =       "2",
  pages =        "21:1--21:??",
  month =        feb,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2699836",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Mar 3 14:46:37 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "On-demand digital print service is an example of a
                 real-time embedded enterprise system. It offers mass
                 customization and exemplifies personalized
                 manufacturing services. Once a print order is submitted
                 to the print factory by a client, the print service
                 provider (PSP) needs to make a real-time decision on
                 whether to accept or refuse this order. Based on the
                 print factory's current capacity and the order's
                 properties and requirements, an order is refused if its
                 acceptance is not profitable for the PSP. The order is
                 accepted with the most appropriate due date in order to
                 maximize the profit that can result from this order. We
                 have developed an automated learning-based order
                 admission framework that can be embedded into an
                 enterprise environment to provide real-time admission
                 decisions for new orders. The framework consists of
                 three classifiers: Support Vector Machine (SVM),
                 Decision Tree (DT), and Bayesian Probabilistic Model
                 (BPM). The classifiers are trained by history orders
                 and used to predict completion status for new orders. A
                 decision integration technique is implemented to
                 combine the results of the classifiers and predict due
                 dates. Experimental results derived using real factory
                 data from a leading print service provider and Weka
                 open-source software show that the order completion
                 status prediction accuracy is significantly improved by
                 the decision integration strategy. The proposed
                 multiclassifier model also outperforms a standalone
                 regression model.",
  acknowledgement = ack-nhfb,
  articleno =    "21",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lin:2015:DES,
  author =       "Cheng-Yen Lin and Chung-Wen Huang and Chi-Bang Kuan
                 and Shi-Yu Huang and Jenq-Kuen Lee",
  title =        "The Design and Experiments of a {SID}-Based
                 Power-Aware Simulator for Embedded Multicore Systems",
  journal =      j-TODAES,
  volume =       "20",
  number =       "2",
  pages =        "22:1--22:??",
  month =        feb,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2699834",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Mar 3 14:46:37 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Embedded multicore systems are playing increasingly
                 important roles in the design of consumer electronics.
                 The objective of such systems is to optimize both
                 performance and power characteristics of mobile
                 devices. However, currently there are no power metrics
                 supporting popular application design platforms (such
                 as SID) that application developers use to develop
                 their applications. This hinders the ability of
                 application developers to optimize power consumption.
                 In this article we present the design and experiments
                 of a SID-based power-aware simulation framework for
                 embedded multicore systems. The proposed power
                 estimation flow includes two phases: IP-level power
                 modeling and power-aware system simulation. The first
                 phase employs PowerMixer$^{IP}$ to construct the power
                 model for the processor IP and other major IPs, while
                 the second phase involves a power abstract
                 interpretation method for summarizing the simulation
                 trace, then, with a CPE module, estimating the power
                 consumption based on the summarized trace information
                 and the input of IP power models. In addition, a
                 Manager component is devised to map each digital signal
                 processor (DSP) component to a host thread and maintain
                 the access to shared resources. The aim is to maintain
                 the simulation performance as the number of simulated
                 DSP components increases. A power-profiling API is also
                 supported that developers of embedded software can use
                 to tune the granularity of power-profiling for a
                 specific code section of the target application. We
                 demonstrate via case studies and experiments how
                 application developers can use our SID-based power
                 simulator for optimizing the power consumption of their
                 applications. We characterize the power consumption of
                 DSP applications with the DSPstone benchmark and
                 discuss how compiler optimization levels with SIMD
                 intrinsics influence the performance and power
                 consumption. A histogram application and an
                 augmented-reality application based on human-face-based
                 RMS (recognition, mining, and synthesis) application
                 are deployed as running examples on multicore systems
                 to demonstrate how our power simulator can be used by
                 developers in the optimization process to illustrate
                 different views of power dissipations of
                 applications.",
  acknowledgement = ack-nhfb,
  articleno =    "22",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Asadinia:2015:PLP,
  author =       "Marjan Asadinia and Mohammad Arjomand and Hamid
                 Sarbazi Azad",
  title =        "Prolonging Lifetime of {PCM}-Based Main Memories
                 through On-Demand Page Pairing",
  journal =      j-TODAES,
  volume =       "20",
  number =       "2",
  pages =        "23:1--23:??",
  month =        feb,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2699867",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Mar 3 14:46:37 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "With current memory scalability challenges,
                 Phase-Change Memory (PCM) is viewed as an attractive
                 replacement to DRAM. The preliminary concern for PCM
                 applicability is its limited write endurance that
                 results in fast wear-out of memory cells. Worse,
                 process variation in the deep-nanometer regime
                 increases the variation in cell lifetime, resulting in
                 an early and sudden reduction in main memory capacity
                 due to the wear-out of a few cells. Recent studies have
                 proposed redirection or correction schemes to alleviate
                 this problem, but all suffer poor throughput or
                 latency. In this article, we show that one of the
                 inefficiency sources in current schemes, even when
                 wear-leveling algorithms are used, is the nonuniform
                 write endurance limit incurred by process variation,
                 that is, when some memory pages have reached their
                 endurance limit, other pages may be far from their
                 limit. In this line, we present a technique that aims
                 to displace a faulty page to a healthy page. This
                 technique, called On-Demand Page Paired PCM (OD3P, for
                 short), when applied at page level, can improve PCM
                 time-to-failure by 20\% on average for different
                 multithreaded and multiprogrammed workloads while also
                 improving IPC by 14\% on average compared to previous
                 page-level techniques. The comparison between
                 line-level OD3P and previous line-level techniques
                 reveals about 2$ \times $ improvement of lifetime and
                 performance.",
  acknowledgement = ack-nhfb,
  articleno =    "23",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Huang:2015:OAA,
  author =       "Xing Huang and Genggeng Liu and Wenzhong Guo and
                 Yuzhen Niu and Guolong Chen",
  title =        "Obstacle-Avoiding Algorithm in {X}-Architecture Based
                 on Discrete Particle Swarm Optimization for {VLSI}
                 Design",
  journal =      j-TODAES,
  volume =       "20",
  number =       "2",
  pages =        "24:1--24:??",
  month =        feb,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2699862",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Mar 3 14:46:37 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Obstacle-avoiding Steiner minimal tree (OASMT)
                 construction has become a focus problem in the physical
                 design of modern very large-scale integration (VLSI)
                 chips. In this article, an effective algorithm is
                 presented to construct an OASMT based on
                 X-architecturex for a given set of pins and obstacles.
                 First, a kind of special particle swarm optimization
                 (PSO) algorithm is proposed that successfully combines
                 the classic genetic algorithm (GA), and greatly
                 improves its own search capability. Second, a
                 pretreatment strategy is put forward to deal with
                 obstacles and pins, which can provide a fast
                 information inquiry for the whole algorithm by
                 generating a precomputed lookup table. Third, we
                 present an efficient adjustment method, which enables
                 particles to avoid all the obstacles by introducing
                 some corner points of obstacles. Finally, an excellent
                 refinement method is discussed to further enhance the
                 quality of the final routing tree, which can improve
                 the quality of the solution by 7.93\% on average. To
                 our best knowledge, this is the first time to specially
                 solve the single-layer obstacle-avoiding problem in
                 X-architecture. Experimental results show that the
                 proposed algorithm can further shorten wirelength in
                 the presence of obstacles. And it achieves the best
                 solution quality in a reasonable runtime among the
                 existing algorithms.",
  acknowledgement = ack-nhfb,
  articleno =    "24",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Chang:2015:MBW,
  author =       "Hung-Sheng Chang and Yuan-Hao Chang and Pi-Cheng Hsiu
                 and Tei-Wei Kuo and Hsiang-Pang Li",
  title =        "Marching-Based Wear-Leveling for {PCM}-Based Storage
                 Systems",
  journal =      j-TODAES,
  volume =       "20",
  number =       "2",
  pages =        "25:1--25:??",
  month =        feb,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2699831",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Mar 3 14:46:37 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Improving the performance of storage systems without
                 losing the reliability and sanity/integrity of file
                 systems is a major issue in storage system designs. In
                 contrast to existing storage architectures, we consider
                 a PCM-based storage architecture to enhance the
                 reliability of storage systems. In PCM-based storage
                 systems, the major challenge falls on how to prevent
                 the frequently updated (meta)data from wearing out
                 their residing PCM cells without excessively searching
                 and moving metadata around the PCM space and without
                 extensively updating the index structures of file
                 systems. In this work, we propose an adaptive
                 wear-leveling mechanism to prevent any PCM cell from
                 being worn out prematurely by selecting appropriate
                 data for swapping with constant search/sort cost.
                 Meanwhile, the concept of indirect pointers is designed
                 in the proposed mechanism to swap data without any
                 modification to the file system's indexes. Experiments
                 were conducted based on well-known benchmarks and
                 realistic workloads to evaluate the effectiveness of
                 the proposed design, for which the results are
                 encouraging.",
  acknowledgement = ack-nhfb,
  articleno =    "25",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Chen:2015:APB,
  author =       "Gang Chen and Kai Huang and Christian Buckl and Alois
                 Knoll",
  title =        "Applying Pay-Burst-Only-Once Principle for Periodic
                 Power Management in Hard Real-Time Pipelined
                 Multiprocessor Systems",
  journal =      j-TODAES,
  volume =       "20",
  number =       "2",
  pages =        "26:1--26:??",
  month =        feb,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2699865",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Mar 3 14:46:37 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Pipelined computing is a promising paradigm for
                 embedded system design. Designing a power management
                 policy to reduce the power consumption of a pipelined
                 system with nondeterministic workload is, however,
                 nontrivial. In this article, we study the problem of
                 energy minimization for coarse-grained pipelined
                 systems under hard real-time constraints and propose
                 new approaches based on an inverse use of the
                 pay-burst-only-once principle. We formulate the problem
                 by means of the resource demands of individual pipeline
                 stages and propose two new approaches, a quadratic
                 programming-based approach and fast heuristic, to solve
                 the problem. In the quadratic programming approach, the
                 problem is transformed into a standard quadratic
                 programming with box constraint and then solved by a
                 standard quadratic programming solver. Observing the
                 problem is NP-hard, the fast heuristic is designed to
                 solve the problem more efficiently. Our approach is
                 scalable with respect to the numbers of pipeline
                 stages. Simulation results using real-life applications
                 are presented to demonstrate the effectiveness of our
                 methods.",
  acknowledgement = ack-nhfb,
  articleno =    "26",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Yonga:2015:ABE,
  author =       "Franck Yonga and Michael Mefenza and Christophe
                 Bobda",
  title =        "{ASP}-Based Encoding Model of Architecture Synthesis
                 for Smart Cameras in Distributed Networks",
  journal =      j-TODAES,
  volume =       "20",
  number =       "2",
  pages =        "27:1--27:??",
  month =        feb,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2701419",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Mar 3 14:46:37 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "A synthesis approach based on Answer Set Programming
                 (ASP) for heterogeneous system-on-chips to be used in
                 distributed camera networks is presented. In such
                 networks, the tight resource limitations represent a
                 major challenge for application development. Starting
                 with a high-level description of applications, the
                 physical constraints of the target devices, and the
                 specification of network configuration, our goal is to
                 produce optimal computing infrastructures made of a
                 combination of hardware and software components for
                 each node of the network. Optimization aims at
                 maximizing speed while minimizing chip area and power
                 consumption. Additionally, by performing the
                 architecture synthesis simultaneously for all cameras
                 in the network, we are able to minimize the overall
                 utilization of communication resources and consequently
                 reduce power consumption. Because of its
                 reconfiguration capabilities, a Field Programmable Gate
                 Array (FPGA) has been chosen as the target device,
                 which enhances the exploration of several design
                 alternatives. We present several realistic network
                 scenarios to evaluate and validate the proposed
                 synthesis approach.",
  acknowledgement = ack-nhfb,
  articleno =    "27",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kim:2015:AIP,
  author =       "Lok-Won Kim and Dong-U Lee and John Villasenor",
  title =        "Automated Iterative Pipelining for {ASIC} Design",
  journal =      j-TODAES,
  volume =       "20",
  number =       "2",
  pages =        "28:1--28:??",
  month =        feb,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2660768",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Mar 3 14:46:37 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We describe an automated pipelining approach for
                 optimally balanced pipeline implementation that
                 achieves low area cost as well as meeting timing
                 requirements. Most previous automatic pipelining
                 methods have focused on Instruction Set Architecture
                 (ISA)-based designs and the main goal of such methods
                 generally has been maximizing performance as measured
                 in terms of instructions per clock (IPC). By contrast,
                 we focus on datapath-oriented designs (e.g., DSP
                 filters for image or communication processing
                 applications) in ASIC design flows. The goal of the
                 proposed pipelining approach is to find the optimally
                 pipelined design that not only meets the user-specified
                 target clock frequency, but also seeks to minimize area
                 cost of a given design. Unlike most previous
                 approaches, the proposed methods incorporate the use of
                 accurate area and timing information (iteratively
                 achieved by synthesizing every interim pipelined
                 design) to achieve higher accuracy during design
                 exploration. When compared with exhaustive design
                 exploration that considers all possible pipeline
                 patterns, the two heuristic pipelining methods
                 presented here involve only a small area penalty
                 (typically under 5\%) while offering dramatically
                 reduced computational complexity. Experimental
                 validation is performed with commercial ASIC design
                 tools and described for applications including
                 polynomial function evaluation, FIR filters, matrix
                 multiplication, and discrete wavelet transform filter
                 designs with a 90nm standard cell library.",
  acknowledgement = ack-nhfb,
  articleno =    "28",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Pomeranz:2015:GDU,
  author =       "Irith Pomeranz",
  title =        "A Generalized Definition of Unnecessary Test Vectors
                 in Functional Test Sequences",
  journal =      j-TODAES,
  volume =       "20",
  number =       "2",
  pages =        "29:1--29:??",
  month =        feb,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2699853",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Mar 3 14:46:37 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "A class of static test compaction procedures for
                 functional test sequences is based on the omission of
                 unnecessary test vectors. According to the definition
                 used by these procedures, a test vector is unnecessary
                 if all the target faults continue to be detected after
                 it is omitted. This article introduces a more general
                 definition of unnecessary test vectors that allows
                 additional ones to be omitted. According to this
                 definition, a test vector is unnecessary if every
                 target fault can be detected by a sequence that is
                 obtained after omitting the vector, and possibly other
                 vectors. The article develops a procedure for omitting
                 test vectors based on this definition and discusses its
                 effects on the storage requirements and test
                 application time.",
  acknowledgement = ack-nhfb,
  articleno =    "29",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Baranowski:2015:RSN,
  author =       "Rafal Baranowski and Michael A. Kochte and
                 Hans-Joachim Wunderlich",
  title =        "Reconfigurable Scan Networks: Modeling, Verification,
                 and Optimal Pattern Generation",
  journal =      j-TODAES,
  volume =       "20",
  number =       "2",
  pages =        "30:1--30:??",
  month =        feb,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2699863",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Mar 3 14:46:37 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Efficient access to on-chip instrumentation is a key
                 requirement for post-silicon validation, test, debug,
                 bringup, and diagnosis. Reconfigurable scan networks,
                 as proposed by, for example, IEEE Std 1687-2014 and
                 IEEE Std 1149.1-2013, emerge as an effective and
                 affordable means to cope with the increasing complexity
                 of on-chip infrastructure. Reconfigurable scan networks
                 are often hierarchical and may have complex structural
                 and functional dependencies. Common approaches for scan
                 verification based on static structural analysis and
                 functional simulation are not sufficient to ensure
                 correct operation of these types of architectures. To
                 access an instrument in a reconfigurable scan network,
                 a scan-in bit sequence must be generated according to
                 the current state and structure of the network. Due to
                 sequential and combinational dependencies, the access
                 pattern generation process ( pattern retargeting )
                 poses a complex decision and optimization problem. This
                 article presents the first generalized formal model
                 that considers structural and functional dependencies
                 of reconfigurable scan networks and is directly
                 applicable to 1687-2014-based and 1149.1-2013-based
                 scan architectures. This model enables efficient formal
                 verification of complex scan networks, as well as
                 automatic generation of access patterns. The proposed
                 pattern generation method supports concurrent access to
                 multiple target scan registers ( access merging ) and
                 generates short scan-in sequences.",
  acknowledgement = ack-nhfb,
  articleno =    "30",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Beznia:2015:TAR,
  author =       "Kamel Beznia and Ahcene Bounceur and Reinhardt Euler
                 and Salvador Mir",
  title =        "A Tool for Analog\slash {RF BIST} Evaluation Using
                 Statistical Models of Circuit Parameters",
  journal =      j-TODAES,
  volume =       "20",
  number =       "2",
  pages =        "31:1--31:??",
  month =        feb,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2699837",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Mar 3 14:46:37 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Testing analog integrated circuits is expensive in
                 terms of both test equipment and time. To reduce the
                 cost, Design-For-Test techniques (DFT) such as Built-In
                 Self-Test (BIST) have been developed. For a given
                 Circuit Under Test (CUT), the choice of a suitable
                 technique should be made at the design stage as a
                 result of the analysis of test metrics such as test
                 escapes and yield loss. However, it is very hard to
                 carry out this estimation for analog/RF circuits by
                 using fault simulation techniques. Instead, the
                 estimation of parametric test metrics is made possible
                 by Monte Carlo circuit-level simulations and the
                 construction of statistical models. These models
                 represent the output parameter space of the CUT in
                 which the test metrics are defined. In addition, models
                 of the input parameter space may be required to
                 accelerate the simulations and obtain higher confidence
                 in the DFT choices. In this work, we describe a
                 methodological flow for the selection of most adequate
                 statistical models and several techniques that can be
                 used for obtaining these models. Some of these
                 techniques have been integrated into a Computer-Aided
                 Test (CAT) tool for the automation of the process of
                 test metrics estimation. This estimation is illustrated
                 for the case of a BIST solution for CMOS imager pixels
                 that requires the use of advanced statistical modeling
                 techniques.",
  acknowledgement = ack-nhfb,
  articleno =    "31",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Gupte:2015:FAT,
  author =       "Adwait Gupte and Sudhanshu Vyas and Phillip H. Jones",
  title =        "A Fault-Aware Toolchain Approach for {FPGA} Fault
                 Tolerance",
  journal =      j-TODAES,
  volume =       "20",
  number =       "2",
  pages =        "32:1--32:??",
  month =        feb,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2699838",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Mar 3 14:46:37 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "As the size and density of silicon chips continue to
                 increase, maintaining acceptable manufacturing yields
                 has become increasingly difficult. Recent works suggest
                 that lithography techniques are reaching their limits
                 with respect to enabling high yield fabrication of
                 small-scale devices, thus there is an increasing need
                 for techniques that can tolerate fabrication time
                 defects. One candidate technology to help combat these
                 defects is reconfigurable hardware. The flexible nature
                 of reconfigurable devices, such as Field Programmable
                 Gate Arrays (FPGAs), makes it possible for them to
                 route around defective areas of a chip after the device
                 has been packaged and deployed into the field. This
                 work presents a technique that aims to increase the
                 effective yield of FPGA manufacturing by re-claiming a
                 portion of chips that would be ordinarily classified as
                 unusable. In brief, we propose a modification to
                 existing commercial toolchain flows to make them fault
                 aware. A phase is added to identify faults within the
                 chip. The locations of these faults are then used by
                 the toolchain to avoid faults during the placement and
                 routing phase. Specifically, we have applied our
                 approach to the Xilinx commercial toolchain flow and
                 evaluated its tolerance to both logic and routing
                 resource faults. Our findings show that, at a cost of
                 5--10\% in device frequency performance, the modified
                 toolchain flow can tolerate up to 30\% of logic
                 resources being faulty and, depending on the nature of
                 the target application, can tolerate 1--30\% of the
                 device's routing resources being faulty. These results
                 provide strong evidence that commercial toolchains not
                 designed for the purpose of tolerating faults can still
                 be greatly leveraged in the presence of faults to place
                 and route circuits in an efficient manner.",
  acknowledgement = ack-nhfb,
  articleno =    "32",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Zhang:2015:RBA,
  author =       "Jiliang Zhang and Yaping Lin and Gang Qu",
  title =        "Reconfigurable Binding against {FPGA} Replay Attacks",
  journal =      j-TODAES,
  volume =       "20",
  number =       "2",
  pages =        "33:1--33:??",
  month =        feb,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2699833",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Mar 3 14:46:37 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The FPGA replay attack, where an attacker downgrades
                 an FPGA-based system to the previous version with known
                 vulnerabilities, has become a serious security and
                 privacy concern for FPGA design. Current FPGA
                 intellectual property (IP) protection mechanisms target
                 the protection of FPGA configuration bitstreams by
                 watermarking or encryption or binding. However, these
                 mechanisms fail to prevent replay attacks. In this
                 article, based on a recently reported PUF-FSM binding
                 method that protects the usage of configuration
                 bitstreams, we propose to reconfigure both the physical
                 unclonable functions (PUFs) and the locking scheme of
                 the finite state machine (FSM) in order to defeat the
                 replay attack. We analyze the proposed scheme and
                 demonstrate how replay attack would fail in attacking
                 systems protected by the reconfigurable binding method.
                 We implement two ways to build reconfigurable PUFs and
                 propose two practical methods to reconfigure the
                 locking scheme. Experimental results show that the two
                 reconfigurable PUFs can generate significantly distinct
                 responses with average reconfigurability of more than
                 40\%. The reconfigurable locking schemes only incur a
                 timing overhead less than 1\%.",
  acknowledgement = ack-nhfb,
  articleno =    "33",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Srivastav:2015:DUL,
  author =       "Meeta Srivastav and Mohammed Ehteshamuddin and Kyle
                 Stegner and Leyla Nazhandali",
  title =        "Design of Ultra-Low Power Scalable-Throughput
                 Many-Core {DSP} Applications",
  journal =      j-TODAES,
  volume =       "20",
  number =       "3",
  pages =        "34:1--34:??",
  month =        jun,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2720018",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Aug 7 08:47:44 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We propose a system-level solution in designing
                 process variation aware (PVA) scalable-throughput
                 many-core systems for energy constrained applications.
                 In our proposed methodology, we leverage the benefits
                 of voltage scaling for obtaining energy efficiency
                 while compensating for the loss in throughput by
                 exploiting parallelism present in various DSP designs.
                 We demonstrate that such a hybrid method consumes
                 6.27\%--28.15\% less power as compared to simple
                 dynamic voltage scaling over different workload
                 environments. Design details of a prototype chip
                 fabricated on 90 nm technology node and its findings
                 are presented. Chip consists of 8 homogeneous FIR
                 cores, which are capable of running from near-threshold
                 to nominal voltages. In our 20 chip population, we
                 observe 7\% variation in speed among the cores at
                 nominal voltage (0.9V) and 26\% at near threshold
                 voltage (0.55V). We also observe 54\% variation in
                 power consumption of the cores. For any desired
                 throughput, the optimum number of cores and their
                 optimum operating voltage is chosen based on the speed
                 and power characteristics of the cores present inside
                 the chip. We will also present analysis on
                 energy-efficiency of such systems based on changes in
                 ambient temperature.",
  acknowledgement = ack-nhfb,
  articleno =    "34",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Jafari:2015:LUD,
  author =       "Fahimeh Jafari and Zhonghai Lu and Axel Jantsch",
  title =        "Least Upper Delay Bound for {VBR} Flows in
                 Networks-on-Chip with Virtual Channels",
  journal =      j-TODAES,
  volume =       "20",
  number =       "3",
  pages =        "35:1--35:??",
  month =        jun,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2733374",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Aug 7 08:47:44 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Real-time applications such as multimedia and gaming
                 require stringent performance guarantees, usually
                 enforced by a tight upper bound on the maximum
                 end-to-end delay. For FIFO multiplexed on-chip packet
                 switched networks we consider worst-case delay bounds
                 for Variable Bit-Rate (VBR) flows with aggregate
                 scheduling, which schedules multiple flows as an
                 aggregate flow. VBR Flows are characterized by a
                 maximum transfer size ($L$), peak rate ($p$),
                 burstiness ($ \sigma $), and average sustainable rate
                 ($ \rho $). Based on network calculus, we present and
                 prove theorems to derive per-flow end-to-end Equivalent
                 Service Curves (ESC), which are in turn used for
                 computing Least Upper Delay Bounds (LUDBs) of
                 individual flows. In a realistic case study we find
                 that the end-to-end delay bound is up to 46.9\% more
                 accurate than the case without considering the traffic
                 peak behavior. Likewise, results also show similar
                 improvements for synthetic traffic patterns. The
                 proposed methodology is implemented in C++ and has low
                 run-time complexity, enabling quick evaluation for
                 large and complex SoCs.",
  acknowledgement = ack-nhfb,
  articleno =    "35",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Bombieri:2015:MRR,
  author =       "Nicola Bombieri and Franco Fummi and Sara Vinco",
  title =        "A Methodology to Recover {RTL IP} Functionality for
                 Automatic Generation of {SW} Applications",
  journal =      j-TODAES,
  volume =       "20",
  number =       "3",
  pages =        "36:1--36:??",
  month =        jun,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2720019",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Aug 7 08:47:44 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "With the advent of heterogeneous multiprocessor
                 system-on-chips (MPSoCs), hardware/software
                 partitioning is again on the rise both in research and
                 in product development. In this new scenario,
                 implementing intellectual-property (IP) blocks as SW
                 applications rather than dedicated HW is an increasing
                 trend to fully exploit the computation power provided
                 by the MPSoC CPUs. On the other hand, whole libraries
                 of IP blocks are available as RTL descriptions, most of
                 them without a corresponding high-level SW
                 implementation. In this context, this article presents
                 a methodology to automatically generate SW applications
                 in C++, by starting from existing RTL IPs implemented
                 in hardware description language (HDL). The methodology
                 exploits an abstraction algorithm to eliminate
                 implementation details typical of HW descriptions (such
                 as cycle-accurate functionality and data types) to
                 guarantee relevant performance of the generated code.
                 The experimental results show that, in many cases, the
                 C++ code automatically generated in a few seconds with
                 the proposed methodology is as efficient as the
                 corresponding code manually implemented from scratch.",
  acknowledgement = ack-nhfb,
  articleno =    "36",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Holst:2015:HTL,
  author =       "Stefan Holst and Michael E. Imhof and Hans-Joachim
                 Wunderlich",
  title =        "High-Throughput Logic Timing Simulation on {GPGPUs}",
  journal =      j-TODAES,
  volume =       "20",
  number =       "3",
  pages =        "37:1--37:??",
  month =        jun,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2714564",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Aug 7 08:47:44 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Many EDA tasks such as test set characterization or
                 the precise estimation of power consumption, power
                 droop and temperature development, require a very large
                 number of time-aware gate-level logic simulations.
                 Until now, such characterizations have been feasible
                 only for rather small designs or with reduced precision
                 due to the high computational demands. The new
                 simulation system presented here is able to accelerate
                 such tasks by more than two orders of magnitude and
                 provides for the first time fast and comprehensive
                 timing simulations for industrial-sized designs.
                 Hazards, pulse-filtering, and pin-to-pin delay are
                 supported for the first time in a GPGPU accelerated
                 simulator, and the system can easily be extended to
                 even more realistic delay models and further
                 applications. A sophisticated mapping with efficient
                 memory utilization and access patterns as well as
                 minimal synchronizations and control flow divergence is
                 able to use the full potential of GPGPU architectures.
                 To provide such a mapping, we combine for the first
                 time the versatility of event-based timing simulation
                 and multi-dimensional parallelism used in GPU-based
                 gate-level simulators. The result is a
                 throughput-optimized timing simulation algorithm, which
                 runs many simulation instances in parallel and at the
                 same time fully exploits gate-parallelism within the
                 circuit.",
  acknowledgement = ack-nhfb,
  articleno =    "37",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Xu:2015:DCD,
  author =       "Tong Xu and Peng Li and Savithri Sundareswaran",
  title =        "Decoupling Capacitance Design Strategies for Power
                 Delivery Networks with Power Gating",
  journal =      j-TODAES,
  volume =       "20",
  number =       "3",
  pages =        "38:1--38:??",
  month =        jun,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2700825",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Aug 7 08:47:44 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Power gating is a widely used leakage power saving
                 strategy in modern chip designs. However, power gating
                 introduces unique power integrity issues and trade-offs
                 between switching and rush current (wake-up) supply
                 noises. At the same time, the amount of power saving
                 intrinsically trades off with power integrity. In
                 addition, these trade-offs significantly vary with
                 supply voltage. In this article, we propose systemic
                 decoupling capacitors (decaps) optimization strategies
                 that optimally trade-off between power integrity and
                 leakage saving. Specially, new global decap and
                 reroutable decap design concepts are proposed to relax
                 the tight interaction between power integrity and
                 leakage saving of power gated PDNs with a single supply
                 voltage level. Furthermore, we propose a flexible decap
                 allocation technique to deal with the design trade-offs
                 under multiple supply voltage levels. The proposed
                 strategies are implemented in an automatic design flow
                 for choosing the optimal amount of local decaps, global
                 decaps and reroutable decaps. The conducted experiments
                 demonstrate that leakage saving can be increased
                 significantly compared with the conventional PDN design
                 approach with a single supply voltage level using the
                 proposed techniques without jeopardizing power
                 integrity. For PDN designs operating at two supply
                 voltage levels, the optimal performance is achieved at
                 each voltage level.",
  acknowledgement = ack-nhfb,
  articleno =    "38",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Firouzi:2015:AVA,
  author =       "Farshad Firouzi and Fangming Ye and Krishnendu
                 Chakrabarty and Mehdi B. Tahoori",
  title =        "Aging- and Variation-Aware Delay Monitoring Using
                 Representative Critical Path Selection",
  journal =      j-TODAES,
  volume =       "20",
  number =       "3",
  pages =        "39:1--39:??",
  month =        jun,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2746237",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Aug 7 08:47:44 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Process together with runtime variations in
                 temperature and voltage, as well as transistor aging,
                 degrade path delay and may eventually induce circuit
                 failure due to timing variations. Therefore, in-field
                 tracking of path delays is essential, and to respond to
                 this need, several delay sensor designs have been
                 proposed in the literature. However, due to the
                 significant overhead of these sensors and the large
                 number of critical paths in today's IC, it is
                 infeasible to monitor the delay of every critical path
                 in silicon. We present an aging- and variation-aware
                 representative path selection technique based on
                 machine learning that allows to measure the delay of a
                 small set of paths and infer the delay of a larger pool
                 of paths that are likely to fail due to delay
                 variations. Simulation results for benchmark circuits
                 highlight the accuracy of the proposed approach for
                 predicting critical-path delay based on the selected
                 representative paths.",
  acknowledgement = ack-nhfb,
  articleno =    "39",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Park:2015:SGA,
  author =       "Heejong Park and Avinash Malik and Zoran Salcic",
  title =        "Scheduling Globally Asynchronous Locally Synchronous
                 Programs for Guaranteed Response Times",
  journal =      j-TODAES,
  volume =       "20",
  number =       "3",
  pages =        "40:1--40:??",
  month =        jun,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2740961",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Aug 7 08:47:44 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Safety-critical software systems need to guarantee
                 functional correctness and bounded response times to
                 external input events. Programs designed using reactive
                 programming languages, based on formal mathematical
                 semantics, can be automatically verified for functional
                 correctness guarantees. Real-time guarantees on the
                 other hand are much harder to achieve. In this article
                 we provide a static analysis framework for guaranteeing
                 response times for reactive programs developed using
                 the Globally Asynchronous Locally Synchronous (GALS)
                 model of computation. The proposed approach is
                 applicable to scheduling of GALS programs for different
                 target architectures with single or multiple processors
                 or cores. A Satisfiability Modulo Theory (SMT)
                 formulation in the quantifier free linear real
                 arithmetic (QF_LRA) logic is used for scheduling. A
                 novel technique to encode rendezvous used in
                 synchronization of globally asynchronous processes in
                 the presence of locally synchronous parallelism and
                 arbitrary preemption into QF_LRA logic is presented.
                 Finally, our SMT formulation is shown to produce
                 schedules in reasonable time.",
  acknowledgement = ack-nhfb,
  articleno =    "40",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Yi:2015:ESF,
  author =       "Qiuping Yi and Zijiang Yang and Jian Liu and Chen Zhao
                 and Chao Wang",
  title =        "Explaining Software Failures by Cascade Fault
                 Localization",
  journal =      j-TODAES,
  volume =       "20",
  number =       "3",
  pages =        "41:1--41:??",
  month =        jun,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2738038",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Aug 7 08:47:44 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/gnu.bib;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib;
                 https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
  abstract =     "During software debugging, a significant amount of
                 effort is required for programmers to identify the root
                 cause of a manifested failure. In this article, we
                 propose a cascade fault localization method to help
                 speed up this labor-intensive process via a combination
                 of weakest precondition computation and constraint
                 solving. Our approach produces a cause tree, where each
                 node is a potential cause of the failure and each edge
                 represents a casual relationship between two causes.
                 There are two main contributions of this article that
                 differentiate our approach from existing methods.
                 First, our method systematically computes all potential
                 causes of a failure and augments each cause with a
                 proper context for ease of comprehension by the user.
                 Second, our method organizes the potential causes in a
                 tree structure to enable on-the-fly pruning based on
                 domain knowledge and feedback from the user. We have
                 implemented our new method in a software tool called
                 CaFL, which builds upon the LLVM compiler and KLEE
                 symbolic virtual machine. We have conducted experiments
                 on a large set of public benchmarks, including real
                 applications from GNU Coreutils and Busybox. Our
                 results show that in most cases the user has to examine
                 only a small fraction of the execution trace before
                 identifying the root cause of the failure.",
  acknowledgement = ack-nhfb,
  articleno =    "41",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lee:2015:SLO,
  author =       "Jong Chul Lee and Roman Lysecky",
  title =        "System-Level Observation Framework for Non-Intrusive
                 Runtime Monitoring of Embedded Systems",
  journal =      j-TODAES,
  volume =       "20",
  number =       "3",
  pages =        "42:1--42:??",
  month =        jun,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2717310",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Aug 7 08:47:44 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "As the complexity of embedded systems rapidly
                 increases, the use of traditional analysis and debug
                 methods encounters significant challenges in
                 monitoring, analyzing, and debugging the complex
                 interactions of various software and hardware
                 components. This situation is further exacerbated for
                 in-situ debugging and verification in which traditional
                 debug and trace interfaces that require physical access
                 are unavailable, infeasible, or cost prohibitive. In
                 this article, we present a system-level observation
                 framework that provides minimally intrusive methods for
                 dynamically monitoring and analyzing deeply integrated
                 hardware and software components within embedded
                 systems. The system-level observation framework
                 monitors hardware and software events by inserting
                 additional logic for detecting designer-specified
                 events within hardware cores to observe complex
                 interaction across hardware and software boundaries at
                 runtime, and provides visibility for monitoring complex
                 execution behavior of software applications without
                 affecting the system execution.",
  acknowledgement = ack-nhfb,
  articleno =    "42",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Zhang:2015:LRR,
  author =       "Qi Zhang and Xuandong Li and Linzhang Wang and Tian
                 Zhang and Yi Wang and Zili Shao",
  title =        "{Lazy-RTGC}: a Real-Time Lazy Garbage Collection
                 Mechanism with Jointly Optimizing Average and Worst
                 Performance for {NAND} Flash Memory Storage Systems",
  journal =      j-TODAES,
  volume =       "20",
  number =       "3",
  pages =        "43:1--43:??",
  month =        jun,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2746236",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Aug 7 08:47:44 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Due to many attractive and unique properties, NAND
                 flash memory has been widely adopted in
                 mission-critical hard real-time systems and some soft
                 real-time systems. However, the nondeterministic
                 garbage collection operation in NAND flash memory makes
                 it difficult to predict the system response time of
                 each data request. This article presents Lazy-RTGC, a
                 real-time lazy garbage collection mechanism for NAND
                 flash memory storage systems. Lazy-RTGC adopts two
                 design optimization techniques: on-demand page-level
                 address mappings, and partial garbage collection.
                 On-demand page-level address mappings can achieve high
                 performance of address translation and can effectively
                 manage the flash space with the minimum RAM cost. On
                 the other hand, partial garbage collection can provide
                 the guaranteed system response time. By adopting these
                 techniques, Lazy-RTGC jointly optimizes both the
                 average and the worst system response time, and
                 provides a lower bound of reclaimed free space.
                 Lazy-RTGC is implemented in FlashSim and compared with
                 representative real-time NAND flash memory management
                 schemes. Experimental results show that our technique
                 can significantly improve both the average and worst
                 system performance with very low extra flash-space
                 requirements.",
  acknowledgement = ack-nhfb,
  articleno =    "43",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Sharma:2015:AIE,
  author =       "Namita Sharma and Preeti Ranjan Panda and Francky
                 Catthoor and Praveen Raghavan and Tom {Vander Aa}",
  title =        "Array Interleaving --- An Energy-Efficient Data Layout
                 Transformation",
  journal =      j-TODAES,
  volume =       "20",
  number =       "3",
  pages =        "44:1--44:??",
  month =        jun,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2747875",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Aug 7 08:47:44 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Optimizations related to memory accesses and data
                 storage make a significant difference to the
                 performance and energy of a wide range of
                 data-intensive applications. These techniques need to
                 evolve with modern architectures supporting wide memory
                 accesses. We investigate array interleaving, a data
                 layout transformation technique that achieves energy
                 efficiency by combining the storage of data elements
                 from multiple arrays in contiguous locations, in an
                 attempt to exploit spatial locality. The transformation
                 reduces the number of memory accesses by loading the
                 right set of data into vector registers, thereby
                 minimizing redundant memory fetches. We perform a
                 global analysis of array accesses, and account for
                 possibly different array behavior in different loop
                 nests that might ultimately lead to changes in data
                 layout decisions for the same array across program
                 regions. Our technique relies on detailed estimates of
                 the savings due to interleaving, and also the cost of
                 performing the actual data layout modifications. We
                 also account for the vector register widths and the
                 possibility of choosing the appropriate granularity for
                 interleaving. Experiments on several benchmarks show a
                 6--34\% reduction in memory energy due to the
                 strategy.",
  acknowledgement = ack-nhfb,
  articleno =    "44",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Roy:2015:LAM,
  author =       "Sudip Roy and Partha P. Chakrabarti and Srijan Kumar
                 and Krishnendu Chakrabarty and Bhargab B.
                 Bhattacharya",
  title =        "Layout-Aware Mixture Preparation of Biochemical Fluids
                 on Application-Specific Digital Microfluidic Biochips",
  journal =      j-TODAES,
  volume =       "20",
  number =       "3",
  pages =        "45:1--45:??",
  month =        jun,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2714562",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Aug 7 08:47:44 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The recent proliferation of digital microfluidic (DMF)
                 biochips has enabled rapid on-chip implementation of
                 many biochemical laboratory assays or protocols. Sample
                 preprocessing, which includes dilution and mixing of
                 reagents, plays an important role in the preparation of
                 assays. The automation of sample preparation on a
                 digital microfluidic platform often mandates the
                 execution of a mixing algorithm, which determines a
                 sequence of droplet mix-split steps (usually
                 represented as a mixing graph). However, the overall
                 cost and performance of on-chip mixture preparation not
                 only depends on the mixing graph but also on the
                 resource allocation and scheduling strategy, for
                 instance, the placement of boundary reservoirs or
                 dispensers, mixer modules, storage units, and physical
                 design of droplet-routing pathways. In this article, we
                 first present a new mixing algorithm based on a
                 number-partitioning technique that determines a
                 layout-aware mixing tree corresponding to a given
                 target ratio of a number of fluids. The mixing graph
                 produced by the proposed method can be implemented on a
                 chip with a fewer number of crossovers among
                 droplet-routing paths as well as with a reduced
                 reservoir-to-mixer transportation distance. Second, we
                 propose a routing-aware resource-allocation scheme that
                 can be used to improve the performance of a given
                 mixing algorithm on a chip layout. The design
                 methodology is evaluated on various test cases to
                 demonstrate its effectiveness in mixture preparation
                 with the help of two representative mixing algorithms.
                 Simulation results show that on average, the proposed
                 scheme can reduce the number of crossovers among
                 droplet-routing paths by 89.7\% when used in
                 conjunction with the new mixing algorithm, and by
                 75.4\% when an earlier algorithm [Thies et al. 2008] is
                 used.",
  acknowledgement = ack-nhfb,
  articleno =    "45",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Suresh:2015:AGU,
  author =       "Chandra K. H. Suresh and Sule Ozev and Ozgur
                 Sinanoglu",
  title =        "Adaptive Generation of Unique {IDs} for Digital Chips
                 through Analog Excitation",
  journal =      j-TODAES,
  volume =       "20",
  number =       "3",
  pages =        "46:1--46:??",
  month =        jun,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2732408",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Aug 7 08:47:44 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Globalization of the integrated circuit design and
                 manufacturing flow has successfully ameliorated design
                 complexity and fabrication cost challenges, and helped
                 deliver cost-effective products while meeting stringent
                 time-to-market requirements. On the flip side, it has
                 resulted in various forms of security vulnerabilities
                 in the supply chain that involves designers, fabs, test
                 facilities, and distributors until the end-product
                 reaches customers. One of the biggest threats to
                 semiconductor industry today is the entry of aged,
                 reject, or cloned parts, that is, counterfeit chips,
                 into the supply chain, leading to annual revenue losses
                 in the order of billions of dollars. While traceability
                 of chips between trusted parties can help monitor the
                 supply chain at various points in the flow, existing
                 solutions are in the form of integrating costly
                 hardware units on chip, or utilizing easy-to-circumvent
                 inspection-based detection techniques. In this article,
                 we propose a technique for adaptive unique ID
                 generation that leverages process variations, enabling
                 chip traceability. The proposed method stimulates
                 digital chips with an analog signal from the supply
                 lines, which serve as primary inputs to each gate in
                 the signal path. Using a sinusoidal signal that
                 exercises the transistors as gain components, we create
                 a chip-specific response that can be post-processed
                 into a digital ID. The proposed technique enables quick
                 and cost-effective authenticity validation that
                 requires no on-chip hardware support. Our simulation
                 and experimentation on actual chips show that the
                 proposed technique is capable of generating unique IDs
                 even in the presence of environmental noise.",
  acknowledgement = ack-nhfb,
  articleno =    "46",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Chen:2015:MBF,
  author =       "Hai-Bao Chen and Ying-Chi Li and Sheldon X.-D. Tan and
                 Xin Huang and Hai Wang and Ngai Wong",
  title =        "{$H$}-Matrix-Based Finite-Element-Based Thermal
                 Analysis for {$3$D} {ICs}",
  journal =      j-TODAES,
  volume =       "20",
  number =       "4",
  pages =        "47:1--47:??",
  month =        sep,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2714563",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Sep 29 08:53:54 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In this article, we propose an efficient
                 finite-element-based (FE-based) method for both steady
                 and transient thermal analyses of high-performance
                 integrated circuits based on the hierarchical matrix (
                 H -matrix) representation. H -matrix has been shown to
                 provide a data-sparse way to approximate the matrices
                 and their inverses with almost linear-space and time
                 complexities. In this work, we apply the H -matrix
                 concept for solving heating diffusion problems modeled
                 by parabolic partial differential equations (PDEs)
                 based on the finite element method. We show that the
                 matrix from a FE-based steady and transient thermal
                 analysis can be represented by H -matrix without any
                 approximation, and its inverse and Cholesky factors can
                 be evaluated by H -matrix with controlled accuracy. We
                 then show and prove that the memory and time
                 complexities of the solver are bounded by O ( k$_1$ N
                 log N ) and O ( k$_1^2$ N log$^2$ N ), respectively,
                 where k$_1$ is a small quantity determined by accuracy
                 requirements and N is the number of unknowns in the
                 system. The comparison with existing product-quality LU
                 solvers, CSPARSE and UMFPACK, on a number of 3D IC
                 thermal matrices, shows that the new method is much
                 more memory efficient than these methods, which however
                 prevents CPU time comparison with those methods on
                 large examples. But the proposed method can solve all
                 the given thermal circuits with decent scalabilities,
                 which shows good agreement with the predicted
                 theoretical results.",
  acknowledgement = ack-nhfb,
  articleno =    "47",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Heyse:2015:TTM,
  author =       "Karel Heyse and Brahim {Al Farisi} and Karel Bruneel
                 and Dirk Stroobandt",
  title =        "{TCONMAP}: Technology Mapping for Parameterised {FPGA}
                 Configurations",
  journal =      j-TODAES,
  volume =       "20",
  number =       "4",
  pages =        "48:1--48:??",
  month =        sep,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2751558",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Sep 29 08:53:54 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Parameterised configurations are FPGA configuration
                 bitstreams in which the bits are defined as functions
                 of user-defined parameters. From a parameterised
                 configuration, it is possible to quickly and
                 efficiently derive specialised, regular configuration
                 bitstreams by evaluating these functions. The
                 specialised bitstreams have different properties and
                 functionality depending on the chosen values of the
                 parameters. The most important application of
                 parameterised configurations is the generation of
                 specialised configuration bitstreams for Dynamic
                 Circuit Specialisation, a technique for optimising
                 circuits at runtime using partial reconfiguration of
                 the FPGA. Generating and using parameterised
                 configurations requires a new FPGA tool flow. In this
                 article, we present a new technology mapping algorithm
                 for parameterised designs, called TCONMAP, that can be
                 used to produce parameterised configurations in which
                 both the configuration of the logic blocks and routing
                 is a function of the parameters. In our experiments, we
                 demonstrate that in using TCONMAP, the depth and area
                 of the mapped circuit is close to the minimal depth and
                 area attainable. Both Dynamic Circuit Specialisation
                 and fine-grained modular reconfiguration are extracted
                 by TCONMAP from the HDL description of the design
                 requiring only simple parameter annotations.",
  acknowledgement = ack-nhfb,
  articleno =    "48",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Peter:2015:CBS,
  author =       "Steffen Peter and Tony Givargis",
  title =        "Component-Based Synthesis of Embedded Systems Using
                 Satisfiability Modulo Theories",
  journal =      j-TODAES,
  volume =       "20",
  number =       "4",
  pages =        "49:1--49:??",
  month =        sep,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2746235",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Sep 29 08:53:54 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Constraint programming solvers, such as Satisfiability
                 Modulo Theory (SMT) solvers, are capable tools in
                 finding preferable configurations for embedded systems
                 from large design spaces. However, constructing SMT
                 constraint programs is not trivial, in particular for
                 complex systems that exhibit multiple viewpoints and
                 models. In this article we propose CoDeL: a
                 component-based description language that allows system
                 designers to express components as reusable building
                 blocks of the system with their parameterizable
                 properties, models, and interconnectivity. Systems are
                 synthesized by allocating, connecting, and
                 parameterizing the components to satisfy the
                 requirements of an application. We present an algorithm
                 that transforms component-based design spaces,
                 expressible in CoDeL, to an SMT program, which, solved
                 by state-of-the-art SMT solvers, determines the
                 satisfiability of the synthesis problem, and delivers a
                 correct-by-construction system configuration.
                 Evaluation results for use cases in the domain of
                 scheduling and mapping of distributed real-time
                 processes confirm, first, the performance gain of SMT
                 compared to traditional design space exploration
                 approaches, second, the usability gains by expressing
                 design problems in CoDeL, and third, the capability of
                 the CoDeL/SMT approach to support the design of
                 embedded systems.",
  acknowledgement = ack-nhfb,
  articleno =    "49",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Mirtar:2015:AAA,
  author =       "Ali Mirtar and Sujit Dey and Anand Raghunathan",
  title =        "An Application Adaptation Approach to Mitigate the
                 Impact of Dynamic Thermal Management on Video
                 Encoding",
  journal =      j-TODAES,
  volume =       "20",
  number =       "4",
  pages =        "50:1--50:??",
  month =        sep,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2753758",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Sep 29 08:53:54 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Due to limitations of cooling methods such as using
                 fan and heat sink, dynamic thermal management (DTM) is
                 being widely adopted to manage the temperature of
                 computing systems. However, application of DTM can
                 reduce the system performance and thereby affect the
                 quality of real-time applications. Real-time video
                 encoding, which has high computational need and hard
                 deadlines, is a commonly used application that can be
                 severely affected by the usage of DTM. We study the
                 effect of DTM on a widely used H.264 video encoder and
                 formulate a multidimensional optimization problem to
                 maximize video quality and minimize bit rate while
                 ensuring that the video encoder can run in real time in
                 spite of DTM effects. We model the effects of adapting
                 encoding parameters on video quality, bit rate, and
                 encoder speed. We propose a dynamic application
                 adaptation method to efficiently solve the optimization
                 problem by optimally adapting the encoding parameters
                 in response to DTM effects. In addition, we show that
                 the proposed dynamic application adaptation method
                 would reduce the need for cooling methods such as
                 forced convection cooling. We implement the proposed
                 approach on an Intel\reg CoreTM 2 Duo platform where
                 dynamic voltage and frequency scaling (DVFS) is used
                 for DTM. Our measurements with several videos reveal
                 that when DTM is applied, the video quality is affected
                 significantly. However, using the proposed adaptation
                 algorithm, the encoder can run in real time, and the
                 quality loss is minimized with only a marginal increase
                 in the bit rate.",
  acknowledgement = ack-nhfb,
  articleno =    "50",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Chang:2015:VPI,
  author =       "Da-Wei Chang and Hsin-Hung Chen and Wei-Jian Su",
  title =        "{VSSD}: Performance Isolation in a Solid-State Drive",
  journal =      j-TODAES,
  volume =       "20",
  number =       "4",
  pages =        "51:1--51:??",
  month =        sep,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2755560",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Sep 29 08:53:54 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Performance isolation is critical in shared storage
                 systems, a popular storage solution. In a shared
                 storage system, interference between requests from
                 different users can affect the accuracy of I/O cost
                 accounting, resulting in poor performance isolation.
                 Recently, NAND flash-memory-based solid-state drives
                 (SSDs) have been increasingly used in shared storage
                 systems. However, interference in SSD-based shared
                 storage systems has not been addressed. In this
                 article, two types of interference, namely, queuing
                 delay (QD) interference and garbage collection (GC)
                 interference, are identified in a shared SSD.
                 Additionally, a framework called VSSD is proposed to
                 address these types of interference. VSSD is composed
                 of two components: the FACO credit-based I/O scheduler
                 designed to address QD interference and the ViSA flash
                 translation layer designed to address GC interference.
                 The VSSD framework aims to be implemented in the
                 firmware running on an SSD controller. With VSSD,
                 interference in an SSD can be eliminated and
                 performance isolation can be ensured. Both synthetic
                 and application workloads are used to evaluate the
                 effectiveness of the proposed VSSD framework. The
                 performance results show the following. First, QD and
                 GC interference exists and can result in poor
                 performance isolation between users on SSD-based shared
                 storage systems. Second, VSSD is effective in
                 eliminating the interference and achieving performance
                 isolation between users. Third, the overhead of VSSD is
                 insignificant.",
  acknowledgement = ack-nhfb,
  articleno =    "51",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Duan:2015:AAP,
  author =       "Qing Duan and Abhishek Koneru and Jun Zeng and
                 Krishnendu Chakrabarty and Gary Dispoto",
  title =        "Accurate Analysis and Prediction of Enterprise
                 Service-Level Performance",
  journal =      j-TODAES,
  volume =       "20",
  number =       "4",
  pages =        "52:1--52:??",
  month =        sep,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2757279",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Sep 29 08:53:54 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "An enterprise service-level performance time series is
                 a sequence of data points that quantify demand,
                 throughput, average order-delivery time, quality of
                 service, or end-to-end cost. Analytical and predictive
                 models of such time series can be embedded into an
                 enterprise information system (EIS) in order to provide
                 meaningful insights into potential business problems
                 and generate guidance for appropriate solutions.
                 Time-series analysis includes periodicity detection,
                 decomposition, and correlation analysis. Time-series
                 prediction can be modeled as a regression problem to
                 forecast a sequence of future time-series datapoints
                 based on the given time series. The state-of-the-art
                 (baseline) methods employed in time-series prediction
                 generally apply advanced machine-learning algorithms.
                 In this article, we propose a new univariate method for
                 dealing with midterm time-series prediction. The
                 proposed method first analyzes the hierarchical
                 periodic structure in one time series and decomposes it
                 into trend, season, and noise components. By discarding
                 the noise component, the proposed method only focuses
                 on predicting repetitive season and smoothed trend
                 components. As a result, this method significantly
                 improves upon the performance of baseline methods in
                 midterm time-series prediction. Moreover, we propose a
                 new multivariate method for dealing with short-term
                 time-series prediction. The proposed method utilizes
                 cross-correlation information derived from multiple
                 time series. The amount of data taken from each time
                 series for training the regression model is determined
                 by results from hierarchical cross-correlation
                 analysis. Such a data-filtering strategy leads to
                 improved algorithm efficiency and prediction accuracy.
                 By combining statistical methods with advanced
                 machine-learning algorithms, we have achieved a
                 significantly superior performance in both short-term
                 and midterm time-series predictions compared to
                 state-of-the-art (baseline) methods.",
  acknowledgement = ack-nhfb,
  articleno =    "52",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Heo:2015:IAS,
  author =       "Ingoo Heo and Minsu Kim and Yongje Lee and Changho
                 Choi and Jinyong Lee and Brent Byunghoon Kang and
                 Yunheung Paek",
  title =        "Implementing an Application-Specific Instruction-Set
                 Processor for System-Level Dynamic Program Analysis
                 Engines",
  journal =      j-TODAES,
  volume =       "20",
  number =       "4",
  pages =        "53:1--53:??",
  month =        sep,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2746238",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Sep 29 08:53:54 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In recent years, dynamic program analysis (DPA) has
                 been widely used in various fields such as profiling,
                 finding bugs, and security. However, existing solutions
                 have their own weaknesses. Software solutions provide
                 flexibility in DPA but they suffer from tremendous
                 performance overhead. In contrast, core-level hardware
                 engines rely on specialized integrated logics and
                 attain extremely fast computation, but they have a
                 limited functional extensibility because the logics are
                 tightly coupled with the host processor. To mend this,
                 a prior system-level approach utilizes an existing
                 channel to integrate their hardware without
                 necessitating the host architecture modification and
                 introduced great potential in performance.
                 Nevertheless, the prior work does not address the
                 detailed design and implementation of the engine, which
                 is quite essential to leverage the deployment on real
                 systems. To address this, in this article, we propose
                 an implementation of programmable DPA hardware engine,
                 called program analysis unit (PAU). PAU is an
                 application-specific instruction-set processor (ASIP)
                 whose instruction set is customized to reflect common
                 features of various DPA methods. With the specialized
                 architecture and programmability of software, our PAU
                 aims at fast computation and sufficient flexibility. In
                 our case studies on several DPA techniques, we show
                 that our ASIP approach can be successfully applicable
                 to complex DPA schemes while providing hardware-backed
                 power in performance and software-based flexibility in
                 analysis. Recent experiments on our FPGA prototype
                 revealed that the performance of PAU is 4.7-13.6 times
                 faster than pure software DPA, and the power/area
                 consumption is also acceptably small compared to
                 today's mobile processors.",
  acknowledgement = ack-nhfb,
  articleno =    "53",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Jiang:2015:CLF,
  author =       "Lei Jiang and Bo Zhao and Jun Yang and Youtao Zhang",
  title =        "Constructing Large and Fast On-Chip Cache for Mobile
                 Processors with Multilevel Cell {STT--MRAM}
                 Technology",
  journal =      j-TODAES,
  volume =       "20",
  number =       "4",
  pages =        "54:1--54:??",
  month =        sep,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2764903",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Sep 29 08:53:54 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Modern mobile processors integrating an increasing
                 number of cores into one single chip demand
                 large-capacity, on-chip, last-level caches (LLCs) in
                 order to achieve scalable performance improvements.
                 However, adopting traditional memory technologies such
                 as SRAM and embedded DRAM (eDRAM) leakage and
                 scalability problems. Spin-transfer torque magnetic RAM
                 (STT-MRAM) is a novel nonvolatile memory technology
                 that has emerged as a promising alternative for
                 constructing on-chip caches in high-end mobile
                 processors. STT-MRAM has many advantages, such as short
                 read latency, zero leakage from the memory cell, and
                 better scalability than eDRAM and SRAM. Multilevel cell
                 (MLC) STT-MRAM further enlarges capacity and reduces
                 per-bit cost by storing more bits in one cell. However,
                 MLC STT-MRAM has long write latency which limits the
                 effectiveness of MLC STT-MRAM-based LLCs. In this
                 article, we address this limitation with three novel
                 designs: line pairing (LP), line swapping (LS), and
                 dynamic LP/LS enabler (DLE). LP forms fast cache lines
                 by reorganizing MLC soft bits which are faster to
                 write. LS dynamically stores frequently-written data
                 into these fast cache lines. We then propose a dynamic
                 LP/LS enabler (DLE) to enable LP and LS only if they
                 help to improve the overall cache performance. Our
                 experimental results show that the proposed designs
                 improve system performance by 9--15\% and reduce energy
                 consumption by 14--21\% for various types of mobile
                 processors.",
  acknowledgement = ack-nhfb,
  articleno =    "54",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Samavatian:2015:ALL,
  author =       "Mohammad Hossein Samavatian and Mohammad Arjomand and
                 Ramin Bashizade and Hamid Sarbazi-Azad",
  title =        "Architecting the Last-Level Cache for {GPUs} using
                 {STT}-{RAM} Technology",
  journal =      j-TODAES,
  volume =       "20",
  number =       "4",
  pages =        "55:1--55:??",
  month =        sep,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2764905",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Sep 29 08:53:54 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Future GPUs should have larger L2 caches based on the
                 current trends in VLSI technology and GPU architectures
                 toward increase of processing core count. Larger L2
                 caches inevitably have proportionally larger power
                 consumption. In this article, having investigated the
                 behavior of GPGPU applications, we present an efficient
                 L2 cache architecture for GPUs based on STT-RAM
                 technology. Due to its high-density and low-power
                 characteristics, STT-RAM technology can be utilized in
                 GPUs where numerous cores leave a limited area for
                 on-chip memory banks. They have, however, two important
                 issues, high energy and latency of write operations,
                 that have to be addressed. Low retention time STT-RAMs
                 can reduce the energy and delay of write operations.
                 Nevertheless, employing STT-RAMs with low retention
                 time in GPUs requires a thorough study on the behavior
                 of GPGPU applications. Based on this investigation, we
                 have architectured a two-part STT-RAM-based L2 cache
                 with low-retention (LR) and high-retention (HR) parts.
                 The proposed two-part L2 cache exploits a dynamic
                 threshold regulator (DTR) to efficiently regulate the
                 write threshold for migration of the data blocks from
                 HR to LR, based on the behavior of the applications.
                 Also, a Data and Access type Aware Cache Search
                 mechanism (DAACS) is hired for handling the search of
                 the requested data blocks in two parts of the cache.
                 The STT-RAM L2 cache architecture proposed in this
                 article can improve IPC by up to 171\% (20\% on
                 average), and reduce the average consumed power by
                 28.9\% compared to a conventional L2 cache architecture
                 with equal on-chip area.",
  acknowledgement = ack-nhfb,
  articleno =    "55",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Indrusiak:2015:FSN,
  author =       "Leandro Soares Indrusiak and James Harbin and Osmar
                 {Marchi Dos Santos}",
  title =        "Fast Simulation of Networks-on-Chip with
                 Priority-Preemptive Arbitration",
  journal =      j-TODAES,
  volume =       "20",
  number =       "4",
  pages =        "56:1--56:??",
  month =        sep,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2755559",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Sep 29 08:53:54 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "An increasingly time-consuming part of the design flow
                 of on-chip multiprocessors is the simulation of the
                 interconnect architecture. The accurate simulation of
                 state-of-the art network-on-chip interconnects can take
                 hours, and this process is repeated for each design
                 iteration because it provides valuable insights on
                 communication latencies that can greatly affect the
                 overall performance of the system. In this article, we
                 identify a time-predictable network-on-chip
                 architecture and show that its timing behaviour can be
                 predicted using models which are far less complex than
                 the architecture itself. We then explore such a feature
                 to produce simplified and lightweight simulation models
                 that can produce latency figures with more than 90\%
                 accuracy and simulate more than 1,000 times faster when
                 compared to a cycle-accurate model of the same
                 interconnect.",
  acknowledgement = ack-nhfb,
  articleno =    "56",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Pomeranz:2015:FES,
  author =       "Irith Pomeranz",
  title =        "{FOLD}: Extreme Static Test Compaction by Folding of
                 Functional Test Sequences",
  journal =      j-TODAES,
  volume =       "20",
  number =       "4",
  pages =        "57:1--57:??",
  month =        sep,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2764455",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Sep 29 08:53:54 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article introduces a new approach to extreme
                 static test compaction for functional test sequences
                 that modifies the sequence in order to enhance the
                 ability to omit test vectors from it and thus compact
                 it. In the new approach, modification of the sequence
                 and omission of test vectors from it are tightly
                 coupled by focusing both subprocedures on subsequences
                 of limited lengths. In a new process that is referred
                 to as folding, a subsequence is partitioned into two
                 halves, and the goal of the modification is to ensure
                 that the two halves are as similar as possible. With
                 similar halves, the expectation is that it will be
                 possible to omit test vectors from the subsequence.
                 Experimental results demonstrate that the procedure
                 produces extremely short functional test sequences for
                 benchmark circuits.",
  acknowledgement = ack-nhfb,
  articleno =    "57",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Wang:2015:BST,
  author =       "Ran Wang and Krishnendu Chakrabarty and Sudipta
                 Bhawmik",
  title =        "Built-In Self-Test and Test Scheduling for
                 Interposer-Based {$ 2.5 $D IC}",
  journal =      j-TODAES,
  volume =       "20",
  number =       "4",
  pages =        "58:1--58:??",
  month =        sep,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2757278",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Sep 29 08:53:54 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Interposer-based 2.5D integrated circuits (ICs) are
                 seen today as a precursor to 3D ICs based on
                 through-silicon vias (TSVs). All the dies and the
                 interposer in a 2.5D IC must be adequately tested for
                 product qualification. We present an efficient built-in
                 self-test (BIST) architecture for targeting defects in
                 dies and in the interposer interconnects. The proposed
                 BIST architecture can also be used for fault diagnosis
                 during interconnect testing. To reduce the overall test
                 cost, we describe a test scheduling and optimization
                 technique under power constraints. We present
                 simulation results to validate the BIST architecture
                 and demonstrate fault detection, synthesis results to
                 evaluate the area overhead of the proposed BIST
                 architecture, and test scheduling results to highlight
                 the effectiveness of the optimization approach.",
  acknowledgement = ack-nhfb,
  articleno =    "58",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Bahar:2015:ISI,
  author =       "R. Iris Bahar and Alex K. Jones and Yuan Xie",
  title =        "Introduction to the Special Issue on Reliable,
                 Resilient, and Robust Design of Circuits and Systems",
  journal =      j-TODAES,
  volume =       "20",
  number =       "4",
  pages =        "59:1--59:??",
  month =        sep,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2796541",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Sep 29 08:53:54 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  articleno =    "59",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kiddie:2015:SEM,
  author =       "Bradley T. Kiddie and William H. Robinson and Daniel
                 B. Limbrick",
  title =        "Single-Event Multiple-Transient Characterization and
                 Mitigation via Alternative Standard Cell Placement
                 Methods",
  journal =      j-TODAES,
  volume =       "20",
  number =       "4",
  pages =        "60:1--60:??",
  month =        sep,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2740962",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Sep 29 08:53:54 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "As fabrication technology scales towards smaller
                 transistor sizes and lower critical charge,
                 single-event radiation effects are more likely to cause
                 errant behavior in multiple, physically adjacent
                 devices in modern integrated circuits (ICs), and with
                 higher operating frequencies, this risk increasingly
                 impacts design logic over memory as well. In order to
                 increase future system reliability, circuit designers
                 need greater awareness of multiple-transient
                 charge-sharing effects during the early stages of their
                 design flow with standard cell placement and routing.
                 To measure the propagation and observability of
                 multiple transients from single radiation events, this
                 work uses several intra-pipeline combinational logic
                 circuits at the 32nm technology node, investigates
                 several different standard cell placements of each
                 design, and analyzes those placements with a novel,
                 physically realistic transient injection and simulation
                 method. It is shown that (1) this simulation
                 methodology, informed by experimental data, provides an
                 increased realism over other works in traditional fault
                 injection fields, (2) different placements of the same
                 circuit where standard cells are grouped by logical
                 hierarchy can result in different reliability behavior
                 and benefits especially useful within the area of
                 approximate computing, and (3) improved reliability
                 through charge-sharing transient mitigation can be
                 gained with no area penalty and minimal speed and power
                 penalties by adjusting the placement of standard
                 cells.",
  acknowledgement = ack-nhfb,
  articleno =    "60",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Delshadtehrani:2015:SMR,
  author =       "Leila Delshadtehrani and Hamed Farbeh and Seyed
                 Ghassem Miremadi",
  title =        "In-Scratchpad Memory Replication: Protecting
                 Scratchpad Memories in Multicore Embedded Systems
                 against Soft Errors",
  journal =      j-TODAES,
  volume =       "20",
  number =       "4",
  pages =        "61:1--61:??",
  month =        sep,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2770874",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Sep 29 08:53:54 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Scratchpad memories (SPMs) are widely employed in
                 multicore embedded processors. Reliability is one of
                 the major constraints in the embedded processor design,
                 which is threatened with the increasing susceptibility
                 of memory cells to multiple-bit upsets (MBUs) due to
                 continuous technology down-scaling. This article
                 proposes a low-cost and efficient data replication
                 mechanism, called In-Scratchpad Memory Replication
                 (ISMR), to correct MBUs in SPMs of multicore embedded
                 processors. The main feature of ISMR is a smart
                 controller, called Replication Management Unit (RMU),
                 which is responsible for dynamically analyzing the
                 activity of the SPM blocks at runtime and efficiently
                 replicating the vulnerable SPM blocks into currently
                 inactive SPM blocks. RMU exploits a 2-bit tag for each
                 SPM block, where the value of each tag is determined by
                 RMU according to the SPM access pattern. Accordingly,
                 the proposed mechanism guarantees the replication of
                 all vulnerable SPM blocks to provide error correction
                 without decreasing the SPM utilization. To detect
                 errors in SPM blocks, ISMR uses a 2-bit
                 interleaved-parity code. As compared with the previous
                 E-RAID 1 mechanism, the simulation results illustrate
                 that for an 8-core embedded processor, the ISMR
                 mechanism experiences 81\% less energy consumption
                 overhead and 48\% less performance overhead.",
  acknowledgement = ack-nhfb,
  articleno =    "61",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Papandreou:2015:ERM,
  author =       "Nikolaos Papandreou and Thomas Parnell and Haralampos
                 Pozidis and Thomas Mittelholzer and Evangelos
                 Eleftheriou and Charles Camp and Thomas Griffin and
                 Gary Tressler and Andrew Walls",
  title =        "Enhancing the Reliability of {MLC NAND} Flash Memory
                 Systems by Read Channel Optimization",
  journal =      j-TODAES,
  volume =       "20",
  number =       "4",
  pages =        "62:1--62:??",
  month =        sep,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2699866",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Sep 29 08:53:54 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "NAND flash memory is not only the ubiquitous storage
                 medium in consumer applications but has also started to
                 appear in enterprise storage systems as well. MLC and
                 TLC flash technology made it possible to store multiple
                 bits in the same silicon area as SLC, thus reducing the
                 cost per amount of data stored. However, at current
                 sub-20nm technology nodes, MLC flash devices fail to
                 provide the levels of raw reliability, mainly cycling
                 endurance, that are required by typical enterprise
                 applications. Advanced signal processing and coding
                 schemes are needed to improve the flash bit error rate
                 and thus elevate the device reliability to the desired
                 level. In this article, we report on the use of
                 adaptive voltage thresholds and cell-to-cell
                 interference cancellation in the read operation of NAND
                 flash devices. We discuss how the optimal read voltage
                 thresholds can be determined and assess the benefit of
                 cancelling cell-to-cell interference in terms of
                 cycling endurance, data retention, and resilience to
                 read disturb.",
  acknowledgement = ack-nhfb,
  articleno =    "62",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Xu:2015:ICF,
  author =       "Cong Xu and Dimin Niu and Yang Zheng and Shimeng Yu
                 and Yuan Xie",
  title =        "Impact of Cell Failure on Reliable Cross-Point
                 Resistive Memory Design",
  journal =      j-TODAES,
  volume =       "20",
  number =       "4",
  pages =        "63:1--63:??",
  month =        sep,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2753759",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Sep 29 08:53:54 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Resistive random access memory (ReRAM) technology is
                 an emerging candidate for next-generation nonvolatile
                 memory (NVM) architecture due to its simple structure,
                 low programming voltage, fast switching speed, high
                 on/off ratio, excellent scalability, good endurance,
                 and great compatibility with silicon CMOS technology.
                 The most attractive of the characteristics of ReRAM is
                 its cross-point structure, which features a 4 F$^2$
                 cell size. In a cross-point structure, the existence of
                 sneak current and resulting voltage loss due to the
                 wire's resistance might cause read and write failures
                 if not designed properly. In addition, a robust ReRAM
                 design needs to deal with both soft and hard errors. In
                 this article, we summarize mechanisms of both soft and
                 hard errors of ReRAM cells and propose a unified model
                 to characterize different failure behaviors. We
                 quantitatively analyze the impact of cell failure types
                 on the reliability of the cross-point array. We also
                 propose an error-resilient architecture, which avoids
                 unnecessary writes in the hard error detection unit.
                 Assuming constant soft error rate, our approach can
                 extend the lifetime of ReRAM up to 75\% over a design
                 without hard error detection and up to 12\% over the
                 design with a ``write-verify'' detection mechanism. Our
                 approach yields greater significant lifetime
                 improvement when considering postcycling retention
                 degradation.",
  acknowledgement = ack-nhfb,
  articleno =    "63",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Zhang:2015:RLP,
  author =       "Renyuan Zhang and Mineo Kaneko",
  title =        "Robust and Low-Power Digitally Programmable Delay
                 Element Designs Employing Neuron-{MOS} Mechanism",
  journal =      j-TODAES,
  volume =       "20",
  number =       "4",
  pages =        "64:1--64:??",
  month =        sep,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2740963",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Sep 29 08:53:54 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The feasibility of designing digitally programmable
                 delay elements (PDEs) employing neuron-MOS mechanism is
                 investigated in this work. By coupling the capacitors
                 on the gate of the MOS transistor, the current flowing
                 through the transistor can be digitally tuned without
                 additional static power consumption. Various switching
                 delays are generated by a clock buffer stage in this
                 manner. Two types of neuron-MOS-based PDEs are
                 suggested in this article. One of them is realized by
                 directly applying capacitor-coupling technology on the
                 transistors of an inverter as a clock buffer. The delay
                 programmability is realized by tuning the
                 charging/discharging current through the neuron-MOS
                 inverter digitally. Since no additional transistor is
                 introduced into the charging/discharging path, the
                 performance fluctuation due to process variations on
                 MOS transistors is reduced. The temperature effect is
                 also partially compensated by the proposed neuron-MOS
                 implementation. Another type of PDE circuit is proposed
                 by employing a reliable reference-current-generator,
                 where the neuron-MOS transistor acts as a linearly
                 tunable resistance. A stable reference current is
                 generated and used for charging/discharging the
                 inverter as a clock buffer. As a result, the switching
                 delay of the inverter is linearly programmed by digital
                 input patterns. In general, both types of suggested PDE
                 circuits achieve improved or fair performances over the
                 robustness, power consumption, and linearity.",
  acknowledgement = ack-nhfb,
  articleno =    "64",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kim:2015:UIL,
  author =       "Hyungjun Kim and Siva Bhanu Krishna Boga and Arseniy
                 Vitkovskiy and Stavros Hadjitheophanous and Paul V.
                 Gratz and Vassos Soteriou and Maria K. Michael",
  title =        "Use It or Lose It: Proactive, Deterministic Longevity
                 in Future Chip Multiprocessors",
  journal =      j-TODAES,
  volume =       "20",
  number =       "4",
  pages =        "65:1--65:??",
  month =        sep,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2770873",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Sep 29 08:53:54 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Moore's Law scaling continues to yield higher
                 transistor density with each succeeding process
                 generation, leading to today's many-core chip
                 multiprocessors (CMPs) with tens or even hundreds of
                 interconnected cores or tiles. Unfortunately, deep
                 submicron CMOS process technology is marred by
                 increasing susceptibility to wear. Prolonged
                 operational stress gives rise to accelerated wearout
                 and failure due to several physical failure mechanisms,
                 including hot-carrier injection (HCI) and negative-bias
                 temperature instability (NBTI). Each failure mechanism
                 correlates with different usage-based stresses, all of
                 which can eventually generate permanent faults. While
                 the wearout of an individual core in many-core CMPs may
                 not necessarily be catastrophic, a single fault in the
                 interprocessor network-on-chip (NoC) fabric could
                 render the entire chip useless, as it could lead to
                 protocol-level deadlocks, or even partition away vital
                 components such as the memory controller or other
                 critical I/O. In this article, we study HCI- and
                 NBTI-induced wear due to actual stresses caused by real
                 workloads, applied onto the interconnect
                 microarchitecture and develop a critical path model for
                 NBTI-induced wearout. A key finding of this modeling is
                 that, counter to prevailing wisdom, wearout in the
                 CMP's on-chip interconnect is correlated with lack of
                 load observed in the NoC routers rather than high load.
                 We then develop a novel wearout-decelerating scheme in
                 which routers under low load have their wear-sensitive
                 components exercised without significantly impacting
                 cycle time, pipeline depth, area, or power consumption
                 of the overall router. A novel deterministic approach
                 is proposed for the generation of appropriate
                 exercise-mode data, ensuring design parameter targets
                 are met. We subsequently show that the proposed design
                 yields an ~2,300$ \times $ decrease in the rate of
                 wear.",
  acknowledgement = ack-nhfb,
  articleno =    "65",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kahng:2015:IMR,
  author =       "Andrew B. Kahng and Seokhyeong Kang and Jiajia Li and
                 Jose {Pineda De Gyvez}",
  title =        "An Improved Methodology for Resilient Design
                 Implementation",
  journal =      j-TODAES,
  volume =       "20",
  number =       "4",
  pages =        "66:1--66:??",
  month =        sep,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2749462",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Sep 29 08:53:54 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Resilient design techniques are used to (i) ensure
                 correct operation under dynamic variations and to (ii)
                 improve design performance (e.g., timing speculation).
                 However, significant overheads (e.g., 16\% and 14\%
                 energy penalties due to throughput degradation and
                 additional circuits) are incurred by existing resilient
                 design techniques. For instance, resilient designs
                 require additional circuits to detect and correct
                 timing errors. Further, when there is an error, the
                 additional cycles needed to restore a previous correct
                 state degrade throughput, which diminishes the
                 performance benefit of using resilient designs. In this
                 work, we describe an improved methodology for resilient
                 design implementation to minimize the costs of
                 resilience in terms of power, area, and throughput
                 degradation. Our methodology uses two levers:
                 selective-endpoint optimization (i.e.,
                 sensitivity-based margin insertion) and clock skew
                 optimization. We integrate the two optimization
                 techniques in an iterative optimization flow which
                 comprehends toggle rate information and the trade-off
                 between cost of resilience and margin on combinational
                 paths. Since the error-detection network can result in
                 up to 9\% additional wirelength cost, we also propose a
                 matching-based algorithm for construction of the
                 error-detection network to minimize this resilience
                 overhead. Further, our implementations comprehend the
                 impacts of signoff corners (in particular, hold
                 constraints, and use of typical vs. slow libraries) and
                 process variation, which are typically omitted in
                 previous studies of resilience trade-offs. Our proposed
                 flow achieves energy reductions of up to 21\% and 10\%
                 compared to a conventional (with only margin used to
                 attain robustness) design and a brute-force
                 implementation (i.e., a typical resilient design, where
                 resilient endpoints are (greedily) instantiated at
                 timing-critical endpoints), respectively. We show that
                 these benefits increase in the context of an adaptive
                 voltage scaling strategy.",
  acknowledgement = ack-nhfb,
  articleno =    "66",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Roy:2015:FTE,
  author =       "Debashri Roy and Prasun Ghosal and Saraju Mohanty",
  title =        "{FuzzRoute}: a Thermally Efficient Congestion-Free
                 Global Routing Method for Three-Dimensional Integrated
                 Circuits",
  journal =      j-TODAES,
  volume =       "21",
  number =       "1",
  pages =        "1:1--1:??",
  month =        nov,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2767127",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Dec 3 10:15:10 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The high density of interconnects, closer proximity of
                 modules, and routing phase are pivotal during the
                 layout of a performance-centric three-dimensional
                 integrated circuit (3D IC). Heuristic-based approaches
                 are typically used to handle such NP-complete problems
                 of global routing in 3D ICs. To overcome the inherent
                 limitations of deterministic approaches, a novel
                 methodology for multi-objective global routing based on
                 fuzzy logic has been proposed in this article. The
                 guiding information generated after the placement phase
                 is used during routing with the help of a fuzzy expert
                 system to achieve thermally efficient and
                 congestion-free routing. A complete global routing
                 solution is designed based on the proposed algorithms
                 and the results are compared with selected fully
                 established global routers, namely Labyrinth,
                 FastRoute3.0, NTHU-R, BoxRouter 2.0, FGR,
                 NTHU-Route2.0, FastRoute4.0, NCTU-GR, MGR, and
                 NCTU-GR2.0. Experiments are performed over ISPD 1998
                 and 2008 benchmarks. The proposed router, called
                 FuzzRoute, achieves balanced superiority in terms of
                 routability, runtime, and wirelength over others. The
                 improvements on routing time for Labyrinth, BoxRouter
                 2.0, and FGR are 91.81\%, 86.87\%, and 32.16\%,
                 respectively, for ISPD 1998 benchmarks. It may be noted
                 that, though FastRoute3.0 achieves fastest runtime, it
                 fails to generate congestion-free solutions for all
                 benchmarks, which is overcome by the proposed FuzzRoute
                 of the current article. It also shows wirelength
                 improvements of 17.35\%, 2.88\%, 2.44\%, 2.83\%, and
                 2.10\%, respectively, over others for ISPD 1998
                 benchmarks. For ISPD 2008 benchmark circuits it also
                 provides 2.5\%, 2.6\%, 1 \%, 1.1\%, and 0.3\% lesser
                 wirelength and averagely runs 1.68$ \times $, 6.42$
                 \times $, 2.21$ \times $, 0.76$ \times $, and 1.54$
                 \times $ faster than NTHU-Route2.0, FastRoute4.0,
                 NCTU-GR, MGR, and NCTU-GR2.0, respectively.",
  acknowledgement = ack-nhfb,
  articleno =    "1",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Zhang:2015:LDP,
  author =       "Ye Zhang and Wai-Shing Luk and Yunfeng Yang and Hai
                 Zhou and Changhao Yan and David Z. Pan and Xuan Zeng",
  title =        "Layout Decomposition with Pairwise Coloring and
                 Adaptive Multi-Start for Triple Patterning
                 Lithography",
  journal =      j-TODAES,
  volume =       "21",
  number =       "1",
  pages =        "2:1--2:??",
  month =        nov,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2764904",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Dec 3 10:15:10 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In this article we present a pairwise coloring (PWC)
                 approach to tackle the layout decomposition problem for
                 triple patterning lithography (TPL). The main idea is
                 to reduce the problem to a set of bi-coloring problems.
                 The overall solution is refined by applying a
                 bi-coloring method for pairs of color sets per pass.
                 One obvious advantage of this method is that the
                 existing double patterning lithography (DPL) techniques
                 can be reused effortlessly. Moreover, we observe that
                 each pass can be fulfilled efficiently by integrating
                 an SPQR-tree-graph-division-based bi-coloring method.
                 In addition, to prevent the solution getting stuck in
                 the local minima, an adaptive multi-start (AMS)
                 approach is incorporated. Adaptive starting points are
                 generated according to the vote of previous solutions.
                 The experimental results show that our method is
                 competitive with other works on both solution quality
                 and runtime performance.",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Chen:2015:DMD,
  author =       "Hu Chen and Sanghamitra Roy and Koushik Chakraborty",
  title =        "{DARP-MP}: Dynamically Adaptable Resilient Pipeline
                 Design in Multicore Processors",
  journal =      j-TODAES,
  volume =       "21",
  number =       "1",
  pages =        "3:1--3:??",
  month =        nov,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2755558",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Dec 3 10:15:10 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In this article, we demonstrate that the sensitized
                 path delays in various microprocessor pipe stages
                 exhibit intriguing temporal and spatial variations
                 during the execution of real-world applications. To
                 effectively exploit these delay variations, we propose
                 dynamically adaptable resilient pipeline (DARP)-a
                 series of runtime techniques to boost power-performance
                 efficiency and fault tolerance in a pipelined
                 microprocessor. DARP employs early error prediction to
                 avoid a major portion of the timing errors. We combine
                 DARP with the state-of-art topologically homogeneous
                 and power-performance heterogeneous (THPH) architecture
                 to build up a new frontier for the energy efficiency of
                 multicore processors (DARP-MP). Using a rigorous
                 circuit-architectural infrastructure, we demonstrate
                 that DARP substantially improves the multicore
                 processor performance (9.4--20\%) and energy efficiency
                 (10--28.6\%) compared to state-of-the-art techniques.
                 The energy-efficiency improvements of DARP-MP are 42\%
                 and 49.9\% compared against the original THPH and
                 another state-of-art multicore power management scheme,
                 respectively.",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kim:2015:MMS,
  author =       "Myungsun Kim and Jinkyu Koo and Hyojung Lee and James
                 R. Geraci",
  title =        "Memory Management Scheme to Improve Utilization
                 Efficiency and Provide Fast Contiguous Allocation
                 without a Statically Reserved Area",
  journal =      j-TODAES,
  volume =       "21",
  number =       "1",
  pages =        "4:1--4:??",
  month =        nov,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2770871",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Dec 3 10:15:10 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Fast allocation of large blocks of physically
                 contiguous memory plays a crucial role to boost the
                 performance of multimedia applications in modern
                 memory-constrained portable devices, such as
                 smartphones, tablets, etc. Existing systems have
                 addressed this issue by provisioning a large statically
                 reserved memory area (SRA) in which only dedicated
                 applications can allocate pages. However, this in turn
                 degrades the performance of applications that are
                 prohibited to utilize the SRA due to the reduced
                 available memory pool. To overcome this drawback while
                 maintaining the benefits of the SRA, we propose a new
                 memory management scheme that uses a special memory
                 region, called page-cache-preferred area (PCPA), in
                 concert with a quick memory reclaiming algorithm. The
                 key of the proposed scheme is to enhance the memory
                 utilization efficiency by enabling to allocate
                 page-cached pages of all applications in the PCPA until
                 predetermined applications require to allocate big
                 chunks of contiguous memory. At this point, clean
                 page-cached pages in the PCPA are rapidly evicted
                 without write-back to a secondary storage. Compared to
                 the SRA scheme, experimental results show that the
                 average launch time of real-world applications and the
                 execution time of I/O-intensive benchmarks are reduced
                 by 9.2\% and 24.7\%, respectively.",
  acknowledgement = ack-nhfb,
  articleno =    "4",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Oboril:2015:EIS,
  author =       "Fabian Oboril and Mehdi B. Tahoori",
  title =        "Exploiting Instruction Set Encoding for Aging-Aware
                 Microprocessor Design",
  journal =      j-TODAES,
  volume =       "21",
  number =       "1",
  pages =        "5:1--5:??",
  month =        nov,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2783435",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Dec 3 10:15:10 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Microprocessors fabricated at nanoscale nodes are
                 exposed to accelerated transistor aging due to bias
                 temperature instability and hot carrier injection. As a
                 result, device delays increase over time, reducing the
                 mean time to failure (MTTF) and hence lifetime of the
                 processor. To address this challenge, many
                 (micro)-architectural techniques target the execution
                 stage of the instruction pipeline, as this one is
                 typically most critical. However, also the decoding
                 stages can become aging critical and limit the
                 microprocessor lifetime, as we will show in this work.
                 Therefore, we propose a novel aging-aware instruction
                 set-encoding methodology (ArISE) that improves the
                 instruction encoding iteratively using a heuristic
                 algorithm. In addition, the switching activities of the
                 affected memory elements are considered in order to
                 co-optimize lifetime and energy efficiency. Our
                 experimental results show that MTTF of the decoding
                 stages can be improved by 2.3$ \times $ with negligible
                 implementation costs.",
  acknowledgement = ack-nhfb,
  articleno =    "5",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{More:2015:LAN,
  author =       "Ankit More and Baris Taskin",
  title =        "Locality-Aware Network Utilization Balancing in
                 {NoCs}",
  journal =      j-TODAES,
  volume =       "21",
  number =       "1",
  pages =        "6:1--6:??",
  month =        nov,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2743012",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Dec 3 10:15:10 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Hierarchical and multi-network networks-on-chip (NoCs)
                 have been proposed in the literature to improve the
                 energy- and performance-efficient scalability of the
                 traditional flat-mesh NoC architecture. Theoretically,
                 based on a small-world network-based analysis,
                 traditional hierarchical NoCs are expected to provide
                 good scalability. However, the traditional theoretical
                 analysis (e.g. for small-worldness) does not take into
                 account the congestion phenomenon experienced in such
                 networks. Counterintuitively, as shown in this work,
                 breaking the hierarchy in traditional hierarchical NoCs
                 and utilizing the proposed locality-aware network
                 utilization (NU) balancing technique performs better.
                 This improvement in performance is observed through
                 experimental analysis, which is contrasted with the
                 theoretical analysis that does not account for
                 congestion. In addition to the novelties for
                 hierarchical networks, the application of the proposed
                 locality-aware NU balancing scheme is extended to
                 multi-network NoC topologies (with already separated
                 networks). Results of the analysis show the superiority
                 of applying the locality-aware NU balancing technique
                 for a throughput and energy-efficient scaling of the
                 multi-network NoC architectures, much like those of the
                 hierarchical NoCs. For instance, for a NoC with 1024
                 nodes, the proposed NU balancing technique provides up
                 to 95\% higher throughput efficiency and consumes up to
                 29\% less energy per flit compared to the best NoC
                 topology without the NU balancing technique. The
                 analysis also helps to render the choice of a NoC
                 topology for traffic patterns varying in locality and
                 nonlocality on exascale computing CMPs.",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Cheng:2015:ABW,
  author =       "Hsiang-Yun Cheng and Mary Jane Irwin and Yuan Xie",
  title =        "Adaptive Burst-Writes {(ABW)}: Memory Requests
                 Scheduling to Reduce Write-Induced Interference",
  journal =      j-TODAES,
  volume =       "21",
  number =       "1",
  pages =        "7:1--7:??",
  month =        nov,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2753757",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Dec 3 10:15:10 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Main memory latencies have become a major performance
                 bottleneck for chip-multiprocessors (CMPs). Since reads
                 are on the critical path, existing memory controllers
                 prioritize reads over writes. However, writes must be
                 eventually processed when the write queue is full.
                 These writes are serviced in a burst to reduce the bus
                 turnaround delay and increase the row-buffer locality.
                 Unfortunately, a large number of reads may suffer long
                 queuing delay when the burst-writes are serviced. The
                 long write latency of future nonvolatile memory will
                 further exacerbate the long queuing delay of reads
                 during burst-writes. In this article, we propose a
                 run-time mechanism, Adaptive Burst-Writes (ABW), to
                 reduce the queuing delay of reads. Based on the
                 row-buffer hit rate of writes and the arrival rate of
                 reads, we dynamically control the number of writes
                 serviced in a burst to trade off the write service time
                 and the queuing latency of reads. For prompt
                 adjustment, our history-based mechanism further
                 terminates the burst-writes earlier when the row-buffer
                 hit rate of writes in the previous burst-writes is low.
                 As a result, our policy improves system throughput by
                 up to 28\% (average 10\%) and 43\% (average 14\%) in
                 CMPs with DRAM-based and PCM-based main memory.",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Ochoa-Ruiz:2015:MAR,
  author =       "Gilberto Ochoa-Ruiz and S{\'e}bastien Guillet and
                 Florent {De Lamotte} and Eric Rutten and El-Bay
                 Bourennane and Jean-Philippe Diguet and Guy Gogniat",
  title =        "An {MDE} Approach for Rapid Prototyping and
                 Implementation of Dynamic Reconfigurable Systems",
  journal =      j-TODAES,
  volume =       "21",
  number =       "1",
  pages =        "8:1--8:??",
  month =        nov,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2800784",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Dec 3 10:15:10 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article presents a co-design methodology based on
                 RecoMARTE, an extension to the well-known UML MARTE
                 profile, which is used for the specification and
                 automatic generation of Dynamic and Partially
                 Reconfigurable Systems-on-Chip (DRSoC). This endeavor
                 is part of a larger framework in which Model-Driven
                 Engineering (MDE) techniques are extensively used for
                 modeling and via model transformations, generating
                 executable models, which are exploited by
                 implementation tools to create reconfigurable systems.
                 More specifically, the methodological aspects presented
                 in this article are concerned with expediting the
                 conception and implementation of the hardware platform
                 and the integration of correct by construction
                 reconfiguration controller. This article builds upon
                 previous research by integrating previously separated
                 endeavors to obtain a complete PR system generation
                 chain, which aims at shielding the designer of many of
                 the burdensome technological and tool-specific
                 requirements. The methodology permits for the
                 verification of the platform description at different
                 stages in the development process (i.e., HDL for
                 simulation, static FPGA implementation, controller
                 simulation and verification). Furthermore, automation
                 capabilities embedded in the flow enable the generation
                 of the platform description and the integration of the
                 reconfiguration controller executive seamlessly. In
                 order to demonstrate the benefits of the proposed
                 approach, we present a case study in which we target
                 the creation of an image-processing application to be
                 deployed onto an FPGA board. We present the required
                 modeling strategies and we discuss how the generation
                 chains are integrated with the back-end Xilinx tools
                 (the most mature version of PR technology) to produce
                 the necessary executable artifacts: VHDL for the
                 platform description and a C description of the
                 reconfiguration controller to be executed by an
                 embedded processor.",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Huang:2015:CPM,
  author =       "Shih-Hsu Huang and Hua-Hsin Yeh and Yow-Tyng Nieh",
  title =        "Clock Period Minimization with Minimum Leakage Power",
  journal =      j-TODAES,
  volume =       "21",
  number =       "1",
  pages =        "9:1--9:??",
  month =        nov,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2778954",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Dec 3 10:15:10 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In the design of nonzero clock skew circuits, an
                 increase of the short-path delay may improve circuit
                 speed or reduce leakage power. However, the impact of
                 increasing the short-path delay on the trade-off
                 between circuit speed and leakage power has not been
                 well studied. An analysis of previous works shows that
                 they can be classified into two independent groups. One
                 group uses extra buffers to increase the short-path
                 delay for achieving the lower bound of the clock
                 period; however, this group has a large overhead of
                 leakage power. The other group uses the combination of
                 threshold voltage assignment and gate sizing (TVA/GS)
                 to increase the short-path delay as possible for
                 reducing leakage power; however, this group often does
                 not work with the lower bound of the clock period.
                 Accordingly, this article considers the simultaneous
                 application of buffer insertion and TVA/GS during clock
                 skew scheduling. Our objective is to minimize the
                 leakage power for working with the lower bound of the
                 clock period. To the best of our knowledge, our
                 approach is the first leakage-power-aware clock skew
                 scheduling that guarantees working with the lower bound
                 of the clock period. Benchmark data consistently show
                 that our approach achieves good results in terms of
                 both the circuit speed and the leakage power.",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Subramaniam:2015:FPM,
  author =       "Anupama R. Subramaniam and Janet Roveda and Yu Cao",
  title =        "A Finite-Point Method for Efficient Gate
                 Characterization Under Multiple Input Switching",
  journal =      j-TODAES,
  volume =       "21",
  number =       "1",
  pages =        "10:1--10:??",
  month =        nov,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2778970",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Dec 3 10:15:10 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Timing characterization of standard cells is one of
                 the essential steps in VLSI design. The traditional
                 static timing analysis (STA) tool assumes single input
                 switching models for the characterization of multiple
                 input gates. However, due to technology scaling,
                 increasing operating frequency, and process variation,
                 the probability of the occurrence of multiple input
                 switching (MIS) is increasing. On the other hand,
                 considering all possible MIS scenarios for the
                 characterization of multiple input logic gates, is
                 computationally intensive. To improve the efficiency,
                 this work proposes a finite-point-based
                 characterization methodology for multiple input gates
                 with the effects of MIS. Furthermore, delay variation
                 due to MIS is integrated into the STA flow through
                 propagation of switching windows. The proposed modeling
                 methodology is validated using benchmark circuits at
                 the 45nm technology node for various operating
                 conditions. Experimental results demonstrate
                 significant reduction in computation cost and data
                 volume with less than ~10\% error compared to that of
                 traditional SPICE simulation.",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Jung:2015:LMS,
  author =       "Dongha Jung and Hokyoon Lee and Seon Wook Kim",
  title =        "Lowering Minimum Supply Voltage for Power-Efficient
                 Cache Design by Exploiting Data Redundancy",
  journal =      j-TODAES,
  volume =       "21",
  number =       "1",
  pages =        "11:1--11:??",
  month =        nov,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2795229",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Dec 3 10:15:10 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Voltage scaling is known to be an efficient way of
                 saving power and energy within a system, and large
                 caches such as LLCs are good candidates for voltage
                 scaling considering their constantly increasing size.
                 However, the V$^{CCMIN}$ problem, in which the lower
                 bound of scalable voltage is limited by process
                 variation, has made it difficult to exploit the
                 benefits of voltage scaling. Lowering V$^{CCMIN}$
                 incurs multibit faults, which cannot be efficiently
                 resolved by current technologies due to their high
                 complexity and power consumption. We overcame the
                 limitation by exploiting the data redundancy of memory
                 hierarchy. For example, cache coherence states and
                 several layers of cache organization naturally expose
                 the existence of redundancy within cache blocks. If
                 blocks have redundant copies, their V$^{CCMIN}$ can be
                 lowered; although more faults can occur in the blocks,
                 they can be efficiently detected by simple error
                 detection codes and recovered by reloading the
                 redundant copies. Our scheme requires only minor
                 modifications to the existing cache design. We verified
                 our proposal on a cycle accurate simulator with
                 SPLASH-2 and PARSEC benchmark suites and found that the
                 V$^{CCMIN}$ of a 2MB L2 cache can be further lowered by
                 0.1V in 32nm technology with negligible degradation in
                 performance. As a result, we could achieve 15.6\% of
                 reduction in dynamic power and 15.4\% of reduction in
                 static power compared to the previous minimum power.",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Qin:2015:CSE,
  author =       "Ying Qin and Shengyu Shen and Qingbo Wu and Huadong
                 Dai and Yan Jia",
  title =        "Complementary Synthesis for Encoder with Flow Control
                 Mechanism",
  journal =      j-TODAES,
  volume =       "21",
  number =       "1",
  pages =        "12:1--12:??",
  month =        nov,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2794079",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Dec 3 10:15:10 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Complementary synthesis automatically generates an
                 encoder's decoder with the assumption that the
                 encoder's all input variables can always be uniquely
                 determined by its output symbol sequence. However, to
                 prevent the faster encoder from overwhelming the slower
                 decoder, many encoders employ flow control mechanism
                 that fails this assumption. Such encoders, when their
                 output symbol sequences are too fast to be processed by
                 the decoders, will stop transmitting data symbols, but
                 instead transmitting idle symbols that can only
                 uniquely determine a subset of the encoder's input
                 variables. And the decoder should recognize and discard
                 these idle symbols. This mechanism fails the assumption
                 of all complementary synthesis algorithms, because some
                 input variables can't be uniquely determined by the
                 idle symbol. A novel algorithm is proposed to handle
                 such encoders. First, it identifies all input variables
                 that can be uniquely determined, and takes them as flow
                 control variables. Second, it infers a predicate over
                 these flow control variables that enables all other
                 input variables to be uniquely determined. Third, it
                 characterizes the decoder's Boolean function with Craig
                 interpolant. Experimental results on several complex
                 encoders indicate that this algorithm can always
                 correctly identify the flow control variables, infer
                 the predicates and generate the decoder's Boolean
                 functions.",
  acknowledgement = ack-nhfb,
  articleno =    "12",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Pomeranz:2015:ETC,
  author =       "Irith Pomeranz",
  title =        "Enhanced Test Compaction for Multicycle Broadside
                 Tests by Using State Complementation",
  journal =      j-TODAES,
  volume =       "21",
  number =       "1",
  pages =        "13:1--13:??",
  month =        nov,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2778953",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Dec 3 10:15:10 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Multicycle tests support test compaction by allowing
                 each test to detect more target faults. The ability of
                 multicycle broadside tests to provide test compaction
                 depends on the ability of primary input sequences to
                 take the circuit between pairs of states that are
                 useful for detecting target faults. This ability can be
                 enhanced by adding design-for-testability (DFT) logic
                 that allows states to be complemented. This article
                 describes a test compaction procedure that uses such
                 DFT logic to form a compact multicycle broadside test
                 set for transition faults where the tests use constant
                 primary input vectors. The use of complemented states
                 also allows the procedure to increase the transition
                 fault coverage beyond the transition fault coverage of
                 a broadside test set. The procedure has the option of
                 increasing the switching activity of the tests
                 gradually in order to explore the tradeoff between the
                 number of tests, the fault coverage, and the switching
                 activity.",
  acknowledgement = ack-nhfb,
  articleno =    "13",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Potluri:2015:DAT,
  author =       "Seetal Potluri and A. Satya Trinadh and Sobhan Babu
                 Ch. and V. Kamakoti and Nitin Chandrachoodan",
  title =        "{DFT} Assisted Techniques for Peak Launch-to-Capture
                 Power Reduction during Launch-On-Shift At-Speed
                 Testing",
  journal =      j-TODAES,
  volume =       "21",
  number =       "1",
  pages =        "14:1--14:??",
  month =        nov,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2790297",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Dec 3 10:15:10 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Scan-based testing is crucial to ensuring correct
                 functioning of chips. In this scheme, the scan and
                 capture phases are interleaved. It is well known that
                 for large designs, excessive switching activity during
                 the launch-to-capture window leads to high voltage
                 droop on the power grid, ultimately resulting in false
                 delay failures during at-speed test. This article
                 proposes a new design-for-testability (DFT) scheme for
                 launch-on-shift (LOS) testing, which ensures that the
                 combinational logic remains undisturbed between the
                 interleaved capture phases, providing
                 computer-aided-design (CAD) tools with extra search
                 space for minimizing launch-to-capture switching
                 activity through test pattern ordering (TPO). We
                 further propose a new TPO algorithm that keeps track of
                 the don't cares during the ordering process, so that
                 the don't care filling step after the ordering process
                 yields a better reduction in launch-to-capture
                 switching activity compared to any other technique in
                 the literature. The proposed DFT-assisted technique,
                 when applied to circuits in ITC99 benchmark suite,
                 produces an average reduction of 17.68\% in peak
                 launch-to-capture switching activity (CSA) compared to
                 the best known lowpower TPO technique. Even for
                 circuits whose test cubes are not rich in don't care
                 bits, the proposed technique produces an average
                 reduction of 15\% in peak CSA, while for the circuits
                 with test cubes rich in don't care bits ({$>$}=75\%),
                 the average reduction is 24\%. The proposed technique
                 also reduces the average power dissipation (considering
                 both scan cells and combinational logic) during the
                 scan phase by about 43.5\% on an average, compared to
                 the adjacent filling technique.",
  acknowledgement = ack-nhfb,
  articleno =    "14",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Huang:2015:PDU,
  author =       "Chien-Chih Huang and Chin-Long Wey and Jwu-E Chen and
                 Pei-Wen Luo",
  title =        "Performance-Driven Unit-Capacitor Placement of
                 Successive-Approximation-Register {ADCs}",
  journal =      j-TODAES,
  volume =       "21",
  number =       "1",
  pages =        "15:1--15:??",
  month =        nov,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2770872",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Dec 3 10:15:10 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The performance of many switched-capacitor analog
                 integrated circuits, such as analog-to-digital
                 converters (ADCs) and sample and hold circuits, is
                 directly related to their accurate capacitance ratios.
                 In general, capacitor mismatch can result from two
                 sources of errors: random mismatch and systematic
                 mismatch. Paralleling unit capacitance (UC) with a
                 common-centroid structure can alleviate the random
                 mismatch errors. The complexity of generating an
                 optimal solution to the UC placement problem is
                 extremely high, let alone if both placement and routing
                 problems are to be optimized simultaneously. This
                 article evaluates the performance of the UC placement
                 generated in an existing work and proposes an
                 alternative UC placement to achieve optimal ratio
                 mismatch M and better linearity performance of SAR ADC
                 design. Results show that the proposed UC placement
                 achieves a ratio mismatch of M = 0.695, the effective
                 number of bits ENOB = 8.314 bits, and the integral
                 nonlinearity INL = 0.816 LSB (least significant bits)
                 for a 9-bit SAR ADC design.",
  acknowledgement = ack-nhfb,
  articleno =    "15",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Sun:2015:NUB,
  author =       "Jin Sun and Claudio Talarico and Priyank Gupta and
                 Janet Roveda",
  title =        "A New Uncertainty Budgeting-Based Method for Robust
                 Analog\slash Mixed-Signal Design",
  journal =      j-TODAES,
  volume =       "21",
  number =       "1",
  pages =        "16:1--16:??",
  month =        nov,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2778959",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Dec 3 10:15:10 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article proposes a novel methodology for robust
                 analog/mixed-signal IC design by introducing a notion
                 of budget of uncertainty. This method employs a new
                 conic uncertainty model to capture process variability
                 and describes variability-affected circuit design as a
                 set-based robust optimization problem. For a
                 prespecified yield requirement, the proposed method
                 conducts uncertainty budgeting by associating
                 performance yield with the size of uncertainty set for
                 process variations. Hence the uncertainty budgeting
                 problem can be further translated into a tractable
                 robust optimization problem. Compared with the existing
                 robust design flow based on ellipsoid model, this
                 method is able to produce more reliable design
                 solutions by allowing varying size of conic uncertainty
                 set at different design points. In addition, the
                 proposed method addresses the limitation that the size
                 of the ellipsoid model is calculated solely relying on
                 the distribution of process parameters, while
                 neglecting the dependence of circuit performance upon
                 these design parameters. The proposed robust design
                 framework has been verified on various
                 analog/mixed-signal circuits to demonstrate its
                 efficiency against the ellipsoid model. Up to 24\%
                 reduction of design cost has been achieved by using the
                 uncertainty budgeting-based method.",
  acknowledgement = ack-nhfb,
  articleno =    "16",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Mitra:2015:OWS,
  author =       "Debasis Mitra and Sarmishtha Ghoshal and Hafizur
                 Rahaman and Krishnendu Chakrabarty and Bhargab B.
                 Bhattacharya",
  title =        "Offline Washing Schemes for Residue Removal in Digital
                 Microfluidic Biochips",
  journal =      j-TODAES,
  volume =       "21",
  number =       "1",
  pages =        "17:1--17:??",
  month =        nov,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2798726",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Dec 3 10:15:10 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "A digital microfluidic biochip (DMB) is often deployed
                 for multiplexing several assays in space and in time.
                 The residue left by one assay may contaminate the
                 droplets used for subsequent assays. Biochemical assays
                 involving cell culture and those based on particle
                 microfluidics also require sweeping of residual media
                 from an active droplet on-chip. Thus, fluidic
                 operations such as washing or residue removal need to
                 be performed routinely either to clean contamination
                 from the droplet pathways or to rinse off certain
                 droplets on the chip. In this work, several graph-based
                 techniques are presented for offline washing of
                 biochips that may have either a regular geometry (e.g.,
                 a 2D array of electrodes), or an irregular geometry
                 (e.g., an application-specific layout). The schemes can
                 be used for total washing, that is, for cleaning the
                 entire biochip or for selective washing of sites or
                 pathways located sparsely on the chip. The problem of
                 reducing the path length and washing time of the
                 droplets is investigated with or without capacity
                 constraints. The proposed algorithms for offline
                 washing make use of several techniques such as graph
                 traversal, integer linear programming (ILP) modeling,
                 and customized heuristics based on the nature of the
                 geometric distribution of the contamination profile.
                 The contaminated pathways are assumed to be Manhattan
                 or curved, and hence the techniques are applicable to
                 the conventional field-actuated DMBs as well as to the
                 emerging classes of light-actuated and active-matrix
                 DMBs. These techniques will be useful in enhancing the
                 reliability of a wide class of emerging digital
                 microfluidic healthcare devices",
  acknowledgement = ack-nhfb,
  articleno =    "17",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lin:2015:SAD,
  author =       "Chung-Wei Lin and Bowen Zheng and Qi Zhu and Alberto
                 Sangiovanni-Vincentelli",
  title =        "Security-Aware Design Methodology and Optimization for
                 Automotive Systems",
  journal =      j-TODAES,
  volume =       "21",
  number =       "1",
  pages =        "18:1--18:??",
  month =        nov,
  year =         "2015",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2803174",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Dec 3 10:15:10 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In this article, we address both security and safety
                 requirements and solve security-aware design problems
                 for the controller area network (CAN) protocol and time
                 division multiple access (TDMA)-based protocols. To
                 provide insights and guidelines for other similar
                 security problems with limited resources and strict
                 timing constraints, we propose a general security-aware
                 design methodology to address security with other
                 design constraints in a holistic framework and optimize
                 design objectives. The security-aware design
                 methodology is further applied to solve a
                 security-aware design problem for vehicle-to-vehicle
                 (V2V) communications with dedicated short-range
                 communication (DSRC) technology. Experimental results
                 demonstrate the effectiveness of our approaches in
                 system design without violating design constraints and
                 indicate that it is necessary to consider security
                 together with other metrics during design stages.",
  acknowledgement = ack-nhfb,
  articleno =    "18",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Zhang:2016:CFS,
  author =       "Daming Zhang and Shuangchen Li and Yongpan Liu and
                 Xiaobo Sharon Hu and Xinyu He and Yining Zhang and Pei
                 Zhang and Huazhong Yang",
  title =        "A {C2RTL} Framework Supporting Partition,
                 Parallelization, and {FIFO} Sizing for Streaming
                 Applications",
  journal =      j-TODAES,
  volume =       "21",
  number =       "2",
  pages =        "19:1--19:??",
  month =        jan,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2797135",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Feb 6 07:43:40 MST 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Developing circuits for streaming applications written
                 in C (or its variants) can benefit greatly from
                 C-to-RTL (C2RTL) synthesis. Yet, most existing C2RTL
                 tools lack system-level options to trade off various
                 design constraints, such as delay and area. This
                 article introduces a systematic way to accomplish C2RTL
                 synthesis for streaming applications containing
                 thousands of lines of C (or its variants) codes.
                 Synthesizing circuits for such large applications
                 presents serious challenges for existing C2RTL tools.
                 Specifically, the proposed approach determines
                 simultaneously the number of pipeline stages and the
                 number of times that each functional block is
                 duplicated in each pipeline stage. A mixed integer
                 linear programming-based solution is formulated for
                 obtaining the optimal solution. Furthermore, a
                 heuristic algorithm is developed for large-scale
                 problems. To accommodate the differences of the data
                 rates between the adjacent hardware modules,
                 first-in-first-out (FIFO) buffers are indispensable,
                 but their overheads are nonnegligible. A
                 parallelism-aware FIFO sizing method is also introduced
                 to determine the optimal sizes of FIFOs. Experimental
                 results on seven real-world applications demonstrate
                 that the algorithms in the synthesis flow can make
                 effective design trade-offs and find superior solutions
                 in a short time compared with existing approaches.
                 Furthermore, the algorithms achieve optimal results in
                 most cases with subsecond running time.",
  acknowledgement = ack-nhfb,
  articleno =    "19",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Pierre:2016:AVT,
  author =       "Laurence Pierre",
  title =        "Auxiliary Variables in Temporal Specifications:
                 Semantic and Practical Analysis for System-Level
                 Requirements",
  journal =      j-TODAES,
  volume =       "21",
  number =       "2",
  pages =        "20:1--20:??",
  month =        jan,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2811260",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Feb 6 07:43:40 MST 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Assertion-based verification (ABV) for IP blocks given
                 as synchronous RTL (register transfer level)
                 descriptions has now widely gained acceptance. The
                 challenge addressed here is ABV for systems on chip
                 (SoC) modeled at the system level in SystemC TLM
                 (Transactional Level Modeling). Requirements to be
                 verified at this level of abstraction usually express
                 temporal constraints on the interactions and
                 communications in the SoC. We use the IEEE standard
                 language PSL to formalize these temporal assertions
                 which represent properties on communication actions and
                 their parameters. Auxiliary variables are often
                 indispensable for this formalization, but their use may
                 induce semantic issues. This article discusses this
                 matter, analyzes various existing approaches and
                 proposes a summary of their advantages and
                 shortcomings. They are also compared to our syntactic
                 and semantic framework, implemented in a verification
                 tool. The proposed operational semantics has the
                 advantages of being simple and intuitive while
                 supporting both global and local auxiliary variables.
                 Experimental results on industrial case studies
                 illustrate its applicability.",
  acknowledgement = ack-nhfb,
  articleno =    "20",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Yan:2016:PDA,
  author =       "Jin-Tai Yan",
  title =        "Performance-Driven Assignment of Buffered {I/O}
                 Signals in Area-{I/O} Flip-Chip Designs",
  journal =      j-TODAES,
  volume =       "21",
  number =       "2",
  pages =        "21:1--21:??",
  month =        jan,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2818642",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Feb 6 07:43:40 MST 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Due to the inappropriate assignment of bump pads or
                 the improper assignment of I/O buffers, the constructed
                 buffered I/O signals in an area-I/O flip-chip design
                 may yield longer maximum delay. In this article, the
                 problem of assigning performance-driven buffered I/O
                 signals in an area-I/O flip-chip design is first
                 formulated. Furthermore, the assignment of the buffered
                 I/O signals can be divided into two sequential phases:
                 Construction of performance-driven I/O signals and
                 Assignment of timing-constrained I/O buffers. Finally,
                 an efficient matching-based approach is proposed to
                 construct the performance-driven I/O signals for the
                 given I/O pins and assign the timing-constrained I/O
                 buffers into the constructed I/O signals in the
                 assignment of the buffered I/O signals in an area-I/O
                 flip-chip design. Compared with the experimental
                 results of seven tested circuits in the Elmore delay
                 model, the experimental results show that the
                 matching-based assignment in our proposed approach can
                 reduce 3.56\% of the total path delay, 9.72\% of the
                 maximum input delay, 5.90\% of the input skew, 5.64\%
                 of the maximum output delay, and 6.25\% of the output
                 skew on average by reassigning the I/O buffers. Our
                 proposed approach can further reduce 38.89\% of the
                 total path delay, 44.00\% of the maximum input delay,
                 49.13\% of the input skew, 44.93\% of the maximum
                 output delay, and 50.82\% of output skew on average by
                 reconstructing the I/O signals and reassigning the I/O
                 buffers into the I/O signals. Compared with the
                 experimental results of seven tested circuits in Peng's
                 [Peng et al. 2006] publication, the experimental
                 results show that our proposed matching-based approach
                 can further reduce 71.06\% of the total path delay,
                 67.83\% of the maximum input delay, 59.84\% of the
                 input skew, 68.87\% of the maximum output delay, and
                 61.46\% of the output skew on average. On the other
                 hand, compared with the experimental results of five
                 tested circuits in Lai's [Lai and Chen 2008]
                 publication, the experimental results show that our
                 proposed approach can further reduce 75.36\% of the
                 total path delay, 48.94\% of the input skew, and
                 52.80\% of the output skew on the average.",
  acknowledgement = ack-nhfb,
  articleno =    "21",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kritikakou:2016:ASC,
  author =       "Angeliki Kritikakou and Francky Catthoor and Vasilios
                 Kelefouras and Costas Goutis",
  title =        "Array Size Computation under Uniform Overlapping and
                 Irregular Accesses",
  journal =      j-TODAES,
  volume =       "21",
  number =       "2",
  pages =        "22:1--22:??",
  month =        jan,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2818643",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Feb 6 07:43:40 MST 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The size required to store an array is crucial for an
                 embedded system, as it affects the memory size, the
                 energy per memory access, and the overall system cost.
                 Existing techniques for finding the minimum number of
                 resources required to store an array are less efficient
                 for codes with large loops and not regularly occurring
                 memory accesses. They have to approximate the accessed
                 parts of the array leading to overestimation of the
                 required resources. Otherwise, their exploration time
                 is increased with an increase over the number of the
                 different accessed parts of the array. We propose a
                 methodology to compute the minimum resources required
                 for storing an array which keeps the exploration time
                 low and provides a near-optimal result for regularly
                 and non-regularly occurring memory accesses and
                 overlapping writes and reads.",
  acknowledgement = ack-nhfb,
  articleno =    "22",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kim:2016:IWP,
  author =       "Youngsik Kim and Sungjoo Yoo and Sunggu Lee",
  title =        "Improving Write Performance by Controlling Target
                 Resistance Distributions in {MLC PRAM}",
  journal =      j-TODAES,
  volume =       "21",
  number =       "2",
  pages =        "23:1--23:??",
  month =        jan,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2820610",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Feb 6 07:43:40 MST 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Multi-level cell (MLC) phase change RAM (PRAM) is
                 expected to offer lower cost main memory than DRAM.
                 However, poor write performance is one of the most
                 critical problems for practical applications of MLC
                 PRAM. In this article, we present two schemes to
                 improve write performance by controlling the target
                 resistance distribution of MLC PRAM cells. First, we
                 propose multiple RESET/SET operations that relax the
                 target resistance bands of intermediate logic levels
                 with additional RESET/SET operations, which reduces the
                 program time of intermediate logic levels, thereby
                 improving write performance. Second, we propose a
                 two-step write scheme consisting of lightweight write
                 and idle-time completion write that exploits the fact
                 that hot dirty data tend to be overwritten in a short
                 time period and the MLC PRAM often has long idle times.
                 Experimental results show that the multiple RESET/SET
                 and two-step write schemes result in an average IPC
                 improvement of 15.7\% and 10.4\%, respectively, on a
                 hybrid DRAM/PRAM main memory subsystem. Furthermore,
                 their integrated solution results in an average IPC
                 improvement of 23.2\% (up to 46.4\%).",
  acknowledgement = ack-nhfb,
  articleno =    "23",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Xiang:2016:NUB,
  author =       "Dong Xiang and Kele Shen",
  title =        "A New Unicast-Based Multicast Scheme for
                 Network-on-Chip Router and Interconnect Testing",
  journal =      j-TODAES,
  volume =       "21",
  number =       "2",
  pages =        "24:1--24:??",
  month =        jan,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2821506",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Feb 6 07:43:40 MST 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "3D technology for networks-on-chip (NOCs) becomes
                 attractive. It is important to present an effective
                 scheme for 3D stacked NOC router and interconnect
                 testing. A new approach to testing of NOC routers is
                 proposed by classifying the routers. Routers with the
                 same number of input/output ports fall into the same
                 class. Routers of the same class are identical if their
                 tests are the same. A test packet is delivered to all
                 the identical routers by a simple unicast-based
                 multicast scheme. It is found that the depth of the
                 consumption buffer at each router has great impact on
                 the test delivery time because test application and
                 test delivery for router testing cannot be handled
                 concurrently. Test delivery must set a router to
                 operational mode. A mathematical model is presented to
                 evaluate the impact of consumption buffer depth on the
                 test delivery time. A new and simple test application
                 scheme is proposed for interconnect testing. Some
                 interesting extensions are presented for further test
                 time reduction and thermal considerations. Sufficient
                 experimental results are presented by comparison with
                 one previous method. The proposed method works for
                 single stuck-at, transition, even small delay faults at
                 routers, and single bridging faults at physical,
                 consumption and injection channels.",
  acknowledgement = ack-nhfb,
  articleno =    "24",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Li:2016:ODM,
  author =       "Zipeng Li and Tsung-Yi Ho and Krishnendu Chakrabarty",
  title =        "Optimization of {$3$D} Digital Microfluidic Biochips
                 for the Multiplexed Polymerase Chain Reaction",
  journal =      j-TODAES,
  volume =       "21",
  number =       "2",
  pages =        "25:1--25:??",
  month =        jan,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2811259",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Feb 6 07:43:40 MST 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "A digital microfluidic biochip (DMFB) is an attractive
                 technology platform for revolutionizing immunoassays,
                 clinical diagnostics, drug discovery, DNA sequencing,
                 and other laboratory procedures in biochemistry. In
                 most of these applications, real-time polymerase chain
                 reaction (PCR) is an indispensable step for amplifying
                 specific DNA segments. To reduce the reaction time to
                 meet the requirement of ``real-time'' applications,
                 multiplexed PCR is widely utilized. In recent years,
                 three-dimensional (3D) DMFBs that integrate
                 photodetectors (i.e., cyberphysical DMFBs) have been
                 developed, which offer the benefits of smaller size,
                 higher sensitivity, and faster result generations.
                 However, current DMFB design methods target
                 optimization in only two dimensions, thus ignoring the
                 3D two-layer structure of a DMFB. Furthermore, these
                 techniques ignore practical constraints related to the
                 interference between on-chip device pairs, the
                 performance-critical PCR thermal loop, and the physical
                 size of devices. Moreover, some practical issues in
                 real scenarios are not stressed (e.g., the avoidance of
                 the cross-contamination for multiplexed PCR). In this
                 article, we describe an optimization solution for a 3D
                 DMFB and present a three-stage algorithm to realize a
                 compact 3D PCR chip layout, which includes: (i) PCR
                 thermal-loop optimization, (ii) 3D global placement
                 based on Strong-Push-Weak-Pull (SPWP) model, and (iii)
                 constraint-aware legalization. To avoid
                 cross-contamination between different DNA samples, we
                 also propose a Minimum-Cost-Maximum-Flow-based
                 (MCMF-based) method for reservoir assignment.
                 Simulation results for four laboratory protocols
                 demonstrate that the proposed approach is effective for
                 the design and optimization of a 3D chip for
                 multiplexed real-time PCR.",
  acknowledgement = ack-nhfb,
  articleno =    "25",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Zhang:2016:PPG,
  author =       "Le Zhang and Vivek Sarin",
  title =        "Parallel Power Grid Analysis Based on Enlarged
                 Partitions",
  journal =      j-TODAES,
  volume =       "21",
  number =       "2",
  pages =        "26:1--26:??",
  month =        jan,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2806885",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Feb 6 07:43:40 MST 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "As the size and complexity of current VLSI circuits
                 grows, faster power grid simulation is becoming more
                 and more desirable. In this article, we present a
                 parallel iterative method for static VLSI power grid
                 simulation. In the proposed enlarged-partition-based
                 preconditioned conjugate gradient (EPPCG) power grid
                 solver, the power grid is divided into disjoint
                 partitions that are subsequently enlarged to obtain
                 accurate solution within each partition. The global
                 solution obtained by solving enlarged partition
                 problems concurrently acts as a highly effective
                 parallel preconditioner. The combination of effective
                 preconditioning and efficient parallelization helps
                 achieve very high performance. The experiments show
                 that our parallel implementation can achieve
                 significant speed improvement [61X--142X] over a
                 state-of-the-art direct solver.",
  acknowledgement = ack-nhfb,
  articleno =    "26",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Jin:2016:CEE,
  author =       "Song Jin and Songwei Pei and Yinhe Han and Huawei Li",
  title =        "A Cost-Effective Energy Optimization Framework of
                 Multicore {SoCs} Based on Dynamically Reconfigurable
                 Voltage-Frequency Islands",
  journal =      j-TODAES,
  volume =       "21",
  number =       "2",
  pages =        "27:1--27:??",
  month =        jan,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2817207",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Feb 6 07:43:40 MST 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Voltage-frequency island (VFI)-based design has been
                 widely exploited for optimizing system energy of
                 embedded multicore chip in recent years. The existing
                 work either constructed a single static VFI partition
                 for all kinds of applications or required per-core
                 voltage domain configuration. However, the former
                 solution is hard to find one optimal VFI partition for
                 diverse applications while the latter one suffers from
                 high hardware cost. In this article, we propose a cost
                 effective energy optimization framework based on
                 dynamically reconfigurable VFI (D-VFI). Our framework
                 treats a small number of cores as dynamic cores
                 (D-cores) and configures each of them with an
                 independent voltage domain. At runtime, the D-cores can
                 be pieced together with neighboring static VFIs by
                 scaling their operating voltages. This can dynamically
                 construct the optimal VFI partitions for different
                 kinds of applications, thus achieving more aggressive
                 energy optimization under low cost. To identify the
                 D-cores, we propose a rules constrained task scheduling
                 and VFI partitioning algorithm. Moreover, we analyze
                 the task schedules to determine the optimal scaling
                 intervals which can accommodate voltage scaling induced
                 latency. Experimental results demonstrate that the
                 effectiveness of the proposed scheme.",
  acknowledgement = ack-nhfb,
  articleno =    "27",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kamal:2016:YSI,
  author =       "Mehdi Kamal and Ali Afzali-Kusha and Saeed Safari and
                 Massoud Pedram",
  title =        "Yield and Speedup Improvements in Extensible
                 Processors by Allocating Extra Cycles to Some Custom
                 Instructions",
  journal =      j-TODAES,
  volume =       "21",
  number =       "2",
  pages =        "28:1--28:??",
  month =        jan,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2830566",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Feb 6 07:43:40 MST 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In this article, we investigate the application of
                 different techniques for mitigating the impact of
                 process variations on the custom functional unit (CFU)
                 of extensible processors. The techniques include using
                 extra cycles for the CFU and extending the clock period
                 for the extensible processor. The former technique is
                 based on providing an extra clock cycle to those custom
                 instructions (CIs) that have timing yields smaller than
                 one. For this purpose, we make use of a lookup table
                 (LUT) for each fabricated processor. Based on a
                 post-fabrication analysis, the need for an extra clock
                 cycle for some CIs is determined. Consequently, the CI
                 timing violations are prevented, and all manufactured
                 extensible processors will work with a predefined clock
                 cycle time. To study the effect of the objective
                 function (used during the CI selection phase) on the
                 efficacy of the suggested architectural technique, we
                 investigate three different objective functions. In the
                 second technique, the clock period extension is used to
                 guarantee a design yield of one. Our results
                 demonstrate that combining both techniques helps
                 increase the speedup achieved by the extensible
                 processor. To assess the efficacies of the proposed
                 methods, several benchmarks from different application
                 domains are used. Results of the study reveal that the
                 suggested techniques provide considerable improvements
                 in the speedups of the extensible processors when
                 compared to those of approaches that do not consider
                 the impact of process variations.",
  acknowledgement = ack-nhfb,
  articleno =    "28",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Chen:2016:TTS,
  author =       "Guoqing Chen and Yi Xu and Xing Hu and Xiangyang Guo
                 and Jun Ma and Yu Hu and Yuan Xie",
  title =        "{TSocket}: Thermal Sustainable Power Budgeting",
  journal =      j-TODAES,
  volume =       "21",
  number =       "2",
  pages =        "29:1--29:??",
  month =        jan,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2837023",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Feb 6 07:43:40 MST 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "As technology scales, thermal management for multicore
                 architectures becomes a critical challenge due to
                 increasing power density. Existing power budgeting
                 techniques focus on maximizing performance under a
                 given power budget by optimizing the core
                 configurations. In multicore era, a chip-wide power
                 budget, however, is not sufficient to ensure thermal
                 constraints because the thermal sustainable power
                 capacity varies with different threading strategies and
                 core configurations. In this article, we propose two
                 models to dynamically estimate the thermal sustainable
                 power capacity in homogeneous multicore systems:
                 uniform power model and nonuniform power model. These
                 two models convert the thermal effect of threading
                 strategies and core configurations into power capacity,
                 which provide a context-based core power capacity for
                 power budgeting. Based on these models, we introduce a
                 power budgeting framework aiming to improve the
                 performance within thermal constraints, named as
                 TSocket. Compared to the chip-wide power budgeting
                 solution, TSocket shows 19\% average performance
                 improvement for the PARSEC benchmarks in single program
                 scenario and up to 11\% performance improvement in
                 multiprogram scenario. The performance improvement is
                 achieved by reducing thermal violations and exploring
                 thermal headrooms.",
  acknowledgement = ack-nhfb,
  articleno =    "29",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Chen:2016:RAR,
  author =       "Liang Chen and Mojtaba Ebrahimi and Mehdi B. Tahoori",
  title =        "Reliability-Aware Resource Allocation and Binding in
                 High-Level Synthesis",
  journal =      j-TODAES,
  volume =       "21",
  number =       "2",
  pages =        "30:1--30:??",
  month =        jan,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2839300",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Feb 6 07:43:40 MST 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Soft error is nowadays a major reliability issue for
                 nanoscale VLSI, and addressing it during high-level
                 synthesis is essential to improve the efficiency of
                 error mitigation. Motivated by the observation that for
                 behavioral designs, especially control-flow intensive
                 ones, variables and operations have non-uniform soft
                 error vulnerabilities, we propose a novel
                 reliability-aware allocation and binding technique to
                 explore more effective soft error mitigation during
                 high level synthesis. We first perform a comprehensive
                 vulnerability analysis at the behavioral level by
                 considering error propagation and masking in both
                 control and data flows. Then the optimizations based on
                 integer linear programming, as well as heuristic
                 algorithm, are employed to incorporate the behavioral
                 vulnerabilities into the register and functional unit
                 binding phases to achieve cost-efficient error
                 mitigation. The experimental results reveal that
                 compared with the previous techniques which ignored
                 behavioral vulnerabilities, the proposed approach can
                 achieve up to 85\% reliability improvement with the
                 same amount of area budget in the RTL design.",
  acknowledgement = ack-nhfb,
  articleno =    "30",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Dubeuf:2016:EPA,
  author =       "Jeremy Dubeuf and David Hely and Vincent Beroulle",
  title =        "{ECDSA} Passive Attacks, Leakage Sources, and Common
                 Design Mistakes",
  journal =      j-TODAES,
  volume =       "21",
  number =       "2",
  pages =        "31:1--31:??",
  month =        jan,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2820611",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Feb 6 07:43:40 MST 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Elliptic Curves Cryptography (ECC) tends to replace
                 RSA for public key cryptographic services. ECC is
                 involved in many secure schemes such as Elliptic Curve
                 Diffie--Hellman (ECDH) key agreement, Elliptic Curve
                 Integrated Encryption Scheme (ECIES), and Elliptic
                 Curve Digital Signature Algorithm (ECDSA). As for every
                 cryptosystem, implementation of such schemes may
                 jeopardize the inherent security provided by the
                 mathematical properties of the ECC. Unfortunate
                 implementation or algorithm choices may create serious
                 vulnerabilities. The elliptic curve scalar operation is
                 particularly sensitive among these schemes. This
                 article surveys passive attacks against well-spread
                 elliptic curve scalar multiplication algorithms
                 highlighting leakage sources and common mistakes that
                 can be used to attack the ECDSA scheme. Experimental
                 results are provided to illustrate and demonstrate the
                 effectiveness of each vulnerability. Finally, the
                 article describes the link between partial leakage and
                 lattice attack in order to understand and demonstrate
                 the impact of small leakages on the security of ECDSA.
                 An example of side channel and lattice attack
                 combination on NIST P-256 is provided in the case where
                 the elliptic curve scalar multiplication is not
                 protected against DPA/CPA and a controllable device is
                 not accessible.",
  acknowledgement = ack-nhfb,
  articleno =    "31",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lukasiewycz:2016:SAO,
  author =       "Martin Lukasiewycz and Philipp Mundhenk and Sebastian
                 Steinhorst",
  title =        "Security-Aware Obfuscated Priority Assignment for
                 Automotive {CAN} Platforms",
  journal =      j-TODAES,
  volume =       "21",
  number =       "2",
  pages =        "32:1--32:??",
  month =        jan,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2831232",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Feb 6 07:43:40 MST 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Security in automotive in-vehicle networks is an
                 increasing problem with the growing connectedness of
                 road vehicles. This article proposes a security-aware
                 priority assignment for automotive controller area
                 network (CAN) platforms with the aim of mitigating
                 scaling effects of attacks on vehicle fleets. CAN is
                 the dominating field bus in the automotive domain due
                 to its simplicity, low cost, and robustness. While
                 messages might be encrypted to enhance the security of
                 CAN systems, their priorities are usually identical for
                 automotive platforms, comprising generally a large
                 number of vehicle models. As a result, the identifier
                 uniquely defines which message is sent, allowing
                 attacks to scale across a fleet of vehicles with the
                 same platform. As a remedy, we propose a methodology
                 that is capable of determining obfuscated message
                 identifiers for each individual vehicle. Since
                 identifiers directly represent message priorities, the
                 approach has to take the resulting response time
                 variations into account while satisfying application
                 deadlines for each vehicle schedule separately. Our
                 approach relies on Quadratically Constrained Quadratic
                 Program (QCQP) solving in two stages, specifying first
                 a set of feasible fixed priorities and subsequently
                 bounded priorities for each message. With the obtained
                 bounds, obfuscated identifiers are determined, using a
                 very fast randomized sampling. The experimental
                 results, consisting of a large set of synthetic test
                 cases and a realistic case study, give evidence of the
                 efficiency of the proposed approach in terms of
                 scalability. The results also show that the diversity
                 of obtained identifiers is effectively optimized with
                 our approach, resulting in a very good obfuscation of
                 CAN messages in in-vehicle communication.",
  acknowledgement = ack-nhfb,
  articleno =    "32",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Suresh:2016:AVD,
  author =       "Chandra K. H. Suresh and Ozgur Sinanoglu and Sule
                 Ozev",
  title =        "Adapting to Varying Distribution of Unknown Response
                 Bits",
  journal =      j-TODAES,
  volume =       "21",
  number =       "2",
  pages =        "33:1--33:??",
  month =        jan,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2835489",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Feb 6 07:43:40 MST 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Traditionally, test patterns that are generated for a
                 given circuit are applied in an identical manner to all
                 manufactured devices until each device under test
                 either fails or passes each test. With increasing
                 process variations, the statistical diversity of
                 manufactured devices is increasing, making such
                 one-size-fits-all approaches increasingly inefficient.
                 Adaptive test techniques address this problem by
                 tailoring the test decisions for the statistical
                 characteristics of the device under test. In this
                 article, we present several adaptive strategies to
                 enable adaptive unknown bit masking for
                 faster-than-at-speed testing so as to ensure no yield
                 loss while attaining the maximum test quality based on
                 tester memory constraints. We also develop a
                 tester-enabled compression scheme that helps alleviate
                 memory constraints further, shifting the tradeoff space
                 favorably to improve test quality.",
  acknowledgement = ack-nhfb,
  articleno =    "33",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Tan:2016:ESE,
  author =       "Jingweijia Tan and Zhi Li and Mingsong Chen and Xin
                 Fu",
  title =        "Exploring Soft-Error Robust and Energy-Efficient
                 Register File in {GPGPUs} using Resistive Memory",
  journal =      j-TODAES,
  volume =       "21",
  number =       "2",
  pages =        "34:1--34:??",
  month =        jan,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2827697",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Feb 6 07:43:40 MST 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The increasing adoption of graphics processing units
                 (GPUs) for high-performance computing raises the
                 reliability challenge, which is generally ignored in
                 traditional GPUs. GPUs usually support thousands of
                 parallel threads and require a sizable register file.
                 Such large register file is highly susceptible to soft
                 errors and power-hungry. Although ECC has been adopted
                 to register file in modern GPUs, it causes considerable
                 power overhead, which further increases the power
                 stress. Thus, an energy-efficient soft-error protection
                 mechanism is more desirable. Besides its extremely low
                 leakage power consumption, resistive memory (e.g.,
                 spin-transfer torque RAM) is also immune to the
                 radiation induced soft errors due to its magnetic field
                 based storage. In this article, we propose to LEverage
                 reSistive memory to enhance the Soft-error robustness
                 and reduce the power consumption (LESS) of registers in
                 the General-Purpose computing on GPUs (GPGPUs). Since
                 resistive memory experiences longer write latency
                 compared to SRAM, we explore the unique characteristics
                 of GPGPU applications to obtain the win-win gains:
                 achieving the near-full soft-error protection for the
                 register file, and meanwhile substantially reducing the
                 energy consumption with negligible performance
                 degradation. Our experimental results show that LESS is
                 able to mitigate the registers soft-error vulnerability
                 by 86\% and achieve 61\% energy savings with negligible
                 (e.g., 1\%) performance degradation.",
  acknowledgement = ack-nhfb,
  articleno =    "34",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Pomeranz:2016:DTF,
  author =       "Irith Pomeranz",
  title =        "Design-for-Testability for Functional Broadside Tests
                 under Primary Input Constraints",
  journal =      j-TODAES,
  volume =       "21",
  number =       "2",
  pages =        "35:1--35:??",
  month =        jan,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2831231",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Feb 6 07:43:40 MST 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Functional broadside tests avoid overtesting of delay
                 faults by creating functional operation conditions
                 during the clock cycles where delay faults are
                 detected. When a circuit is embedded in a larger
                 design, a functional broadside test needs to take into
                 consideration the functional constraints that the
                 design creates for its primary input vectors. At the
                 same time, application of primary input vectors as part
                 of a scan-based test requires hardware support. An
                 earlier work considered the case where a primary input
                 vector is held constant during a test. The approach
                 described in this article matches the hardware for
                 applying primary input vectors to the functional
                 constraints that the design creates. This increases the
                 transition fault coverage that can be achieved by
                 functional broadside tests. This article also considers
                 the effect on the transition fault coverage achievable
                 using close-to-functional broadside tests.",
  acknowledgement = ack-nhfb,
  articleno =    "35",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Young:2016:PSS,
  author =       "Evangeline Young and Azadeh Davoodi",
  title =        "Preface to Special Section on New Physical Design
                 Techniques for the Next Generation of Integration
                 Technology",
  journal =      j-TODAES,
  volume =       "21",
  number =       "3",
  pages =        "36:1--36:??",
  month =        jul,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2902365",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Sep 23 15:16:20 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  articleno =    "36",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Darav:2016:EPH,
  author =       "Nima Karimpour Darav and Andrew Kennings and Aysa
                 Fakheri Tabrizi and David Westwick and Laleh Behjat",
  title =        "{Eh?Placer}: a High-Performance Modern
                 Technology-Driven Placer",
  journal =      j-TODAES,
  volume =       "21",
  number =       "3",
  pages =        "37:1--37:??",
  month =        jul,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2899381",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Sep 23 15:16:20 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The placement problem has become more complex and
                 challenging due to a wide variety of complicated
                 constraints imposed by modern process technologies.
                 Some of the most challenging constraints and objectives
                 were highlighted during the most recent ACM/IEEE
                 International Symposium on Physical Design (ISPD)
                 contests. In this article, the framework of Eh?Placer
                 and its developed algorithms are elaborated, with the
                 main focus on modern technology constraints and
                 runtime. The technology constraints considered as part
                 of Eh?Placer are fence region, target density, and
                 detailed routability constraints. We present a complete
                 description on how these constraints are considered in
                 different stages of Eh?Placer. The results obtained
                 from the contests indicate that Eh?Placer is able to
                 efficiently handle modern technology constraints and
                 ranks highly among top academic placement tools.",
  acknowledgement = ack-nhfb,
  articleno =    "37",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Livramento:2016:CTA,
  author =       "Vinicius Livramento and Renan Netto and Chrystian Guth
                 and Jos{\'e} Lu{\'\i}s G{\"u}ntzel and Luiz C. V. {Dos
                 Santos}",
  title =        "Clock-Tree-Aware Incremental Timing-Driven Placement",
  journal =      j-TODAES,
  volume =       "21",
  number =       "3",
  pages =        "38:1--38:??",
  month =        jul,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2858793",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Sep 23 15:16:20 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The increasing impact of interconnections on overall
                 circuit performance makes timing-driven placement (TDP)
                 a crucial step toward timing closure. Current TDP
                 techniques improve critical paths but overlook the
                 impact of register placement on clock tree quality. On
                 the other hand, register placement techniques found in
                 the literature mainly focus on power consumption,
                 disregarding timing and routabilty. Indeed, postponing
                 register placement may undermine the optimization
                 achieved by TDP, since the wiring between sequential
                 and combinational elements would be touched. This work
                 proposes a new approach for an effective coupling
                 between register placement and TDP that relies on two
                 key aspects to handle sequential and combinational
                 elements separately: only the registers in the critical
                 paths are touched by TDP (in practice they represent a
                 small percentage of the total number of registers), and
                 the shortening of clock tree wirelength can be obtained
                 with limited variation in signal wirelength and
                 placement density. The approach consists of two steps:
                 (1) incremental register placement guided by a virtual
                 clock tree to reduce clock wiring capacitance while
                 preserving signal wirelength and density, and (2)
                 incremental TDP to minimize the total negative slack.
                 For the first step, we propose a novel technique that
                 combines clock-net contraction and register clustering
                 forces to reduce the clock wirelength. For the second
                 step, we propose a novel Lagrangian Relaxation
                 formulation that minimizes total negative slack for
                 both setup and hold timing violations. To solve the
                 formulation, we propose a TDP technique using a novel
                 discrete search that employs a Euclidean distance to
                 define a proper neighborhood. For the experimental
                 evaluation of the proposed approach, we relied on the
                 ICCAD 2014 TDP contest infrastructure and compared our
                 results with the best results obtained from that
                 contest in terms of timing closure, clock tree
                 compactness, signal wirelength, and density. Assuming a
                 long displacement constraint, our technique achieves
                 worst and total negative slack reductions of around
                 24\% and 26\%, respectively. In addition, our approach
                 leads to 44\% shorter clock tree wirelength with
                 negligible impact on signal wirelength and placement
                 density. In the face of such results, the proposed
                 coupling seems a useful approach to handle the
                 challenges faced by contemporary physical synthesis.",
  acknowledgement = ack-nhfb,
  articleno =    "38",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Wu:2016:PAC,
  author =       "Po-Hsun Wu and Mark Po-Hung Lin and Xin Li and
                 Tsung-Yi Ho",
  title =        "Parasitic-Aware Common-Centroid {FinFET} Placement and
                 Routing for Current-Ratio Matching",
  journal =      j-TODAES,
  volume =       "21",
  number =       "3",
  pages =        "39:1--39:??",
  month =        jul,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2856031",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Sep 23 15:16:20 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The FinFET technology is regarded as a better
                 alternative for modern high-performance and low-power
                 integrated-circuit design due to more effective channel
                 control and lower power consumption. However, the
                 gate-misalignment problem resulting from process
                 variation and the parasitic resistance resulting from
                 interconnecting wires based on the FinFET technology
                 becomes even more severe compared with the conventional
                 planar CMOS technology. Such gate misalignment and
                 unwanted parasitic resistance may increase the
                 threshold voltage and decrease the drain current of
                 transistors. When applying the FinFET technology to
                 analog circuit design, the variation of drain currents
                 can destroy current-ratio matching among transistors
                 and degrade circuit performance. In this article, we
                 present the first FinFET placement and routing
                 algorithms for layout generation of a common-centroid
                 FinFET array to precisely match the current ratios
                 among transistors. Experimental results show that the
                 proposed matching-driven FinFET placement and routing
                 algorithms can obtain the best current-ratio matching
                 compared with the state-of-the-art common-centroid
                 placer.",
  acknowledgement = ack-nhfb,
  articleno =    "39",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Huang:2016:FTS,
  author =       "Jinglei Huang and Song Chen and Wei Zhong and Wenchao
                 Zhang and Shengxi Diao and Fujiang Lin",
  title =        "Floorplanning and Topology Synthesis for
                 Application-Specific Network-on-Chips with
                 {RF}-Interconnect",
  journal =      j-TODAES,
  volume =       "21",
  number =       "3",
  pages =        "40:1--40:??",
  month =        jul,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2890499",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Sep 23 15:16:20 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Application-specific Network-on-Chip (ASNoC) has been
                 proposed as a promising solution to address the global
                 communication challenges in System-on-Chips. However,
                 with the number of cores increasing, the on-chip
                 communication becomes more and more complex and the
                 power consumption imposes the major challenge for
                 designing ASNoCs. In this article, we propose a
                 four-stage floorplanning and topology synthesis
                 approach for ASNoCs with Radio-Frequency Interconnect
                 (RF-I). First, considering the advantage of RF-I in
                 long-distance on-chip communication, we integrate the
                 floorplanning and clustering to explore the proper
                 clustering of cores, where the cores belonging to the
                 same cluster will share the same switch for
                 communications, form an island, and occupy a contiguous
                 physical region. After the switches and network
                 interfaces are inserted into the floorplan, the
                 allocation of routing paths and the RF-I logical
                 channels are integrated in an iterative procedure to
                 generate fine-grained dynamically reconfigurable ASNoC
                 topologies. Finally, considering the signal integrity
                 of RF-I, we adjust the placement of the switches by a
                 simulated annealing-based method to reduce the number
                 of RF-I routing corners. To evaluate the placement of
                 switches, we propose a dynamical programming-based
                 method to route the transmission line with the
                 minimized number of routing corners in linear time. The
                 results show that, using the RF-I, we can reduce the
                 power consumption of ASNoCs by 20\% to 26\%.",
  acknowledgement = ack-nhfb,
  articleno =    "40",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Xu:2016:ACS,
  author =       "Chang Xu and Guojie Luo and Peixin Li and Yiyu Shi and
                 Iris Hui-Ru Jiang",
  title =        "Analytical Clustering Score with Application to
                 Postplacement Register Clustering",
  journal =      j-TODAES,
  volume =       "21",
  number =       "3",
  pages =        "41:1--41:??",
  month =        jul,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2894753",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Sep 23 15:16:20 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Circuit clustering is usually done through discrete
                 optimizations to enable circuit size reduction or
                 design-specific cluster formation. In this article, we
                 are interested in the register-clustering technique for
                 clock-power reduction by leveraging new opportunities
                 introduced by multibit flip-flop (MBFF). Currently,
                 INTEGRA is the only existing postplacement MBFF
                 clustering optimizer with a subquadratic time
                 complexity. However, it severely degrades the
                 wirelength, especially for realistic designs, which may
                 nullify the benefits of MBFF clustering. In contrast,
                 we formulate an analytical clustering score with a
                 nonlinear programming framework, in which the
                 wirelength objective can be seamlessly integrated and
                 the solver has empirical subquadratic time complexity.
                 With the MBFF library, the application of our
                 analytical clustering method achieves comparable clock
                 power to the state-of-the-art techniques, but further
                 reduces the wirelength by about 25\%. Even without the
                 MBFF library, we can still achieve 30\% clock
                 wirelength reduction. In addition, the proposed method
                 can potentially be integrated into an in-placement MBFF
                 clustering solver and be applied to other problems that
                 require formulating clustering scores in their
                 objective functions.",
  acknowledgement = ack-nhfb,
  articleno =    "41",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Xu:2016:PPA,
  author =       "Xiaoqing Xu and Bei Yu and Jhih-Rong Gao and Che-Lun
                 Hsu and David Z. Pan",
  title =        "{PARR}: Pin-Access Planning and Regular Routing for
                 Self-Aligned Double Patterning",
  journal =      j-TODAES,
  volume =       "21",
  number =       "3",
  pages =        "42:1--42:??",
  month =        jul,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2842612",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Sep 23 15:16:20 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Pin access has become one of the most difficult
                 challenges for detailed routing in advanced technology
                 nodes, for example, in 14nm and below, for which
                 double-patterning lithography has to be used for
                 manufacturing lower metal routing layers with tight
                 pitches, such as M2 and M3. Self-aligned double
                 patterning (SADP) provides better control on line edge
                 roughness and overlay, but it has very restrictive
                 design constraints and prefers regular layout patterns.
                 This article presents a comprehensive pin-access
                 planning and regular routing framework (PARR) for SADP
                 friendliness. Our key techniques include precomputation
                 of both intracell and intercell pin accessibility, as
                 well as local and global pin-access planning to enable
                 handshaking between standard cell-level pin access and
                 detailed routing under SADP constraints. A pin
                 access-driven rip-up and reroute scheme is proposed to
                 improve the ultimate routability. Our experimental
                 results demonstrate that PARR can achieve much better
                 routability and overlay control compared with previous
                 approaches.",
  acknowledgement = ack-nhfb,
  articleno =    "42",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Yu:2016:EOA,
  author =       "Bei Yu and Kun Yuan and Jhih-Rong Gao and Shiyan Hu
                 and David Z. Pan",
  title =        "{EBL} Overlapping Aware Stencil Planning for {MCC}
                 System",
  journal =      j-TODAES,
  volume =       "21",
  number =       "3",
  pages =        "43:1--43:??",
  month =        jul,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2888394",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Sep 23 15:16:20 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Electron beam lithography (EBL) is a promising,
                 maskless solution for the technology beyond 14nm logic
                 nodes. To overcome its throughput limitation, industry
                 has proposed character projection (CP) technique, where
                 some complex shapes (characters) can be printed in one
                 shot. Recently, the traditional EBL system was extended
                 into a multi-column cell (MCC) system to further
                 improve the throughput. In an MCC system, several
                 independent CPs are used to further speed-up the
                 writing process. Because of the area constraint of
                 stencil, the MCC system needs to be packed/planned
                 carefully to take advantage of the characters. In this
                 article, we prove that the overlapping aware stencil
                 planning (OSP) problem is NP-hard. Then we propose
                 E-BLOW, a tool to solve the MCC system OSP problem.
                 E-BLOW involves several novel speedup techniques, such
                 as successive relaxation and dynamic programming.
                 Experimental results show that, compared with previous
                 works, E-BLOW demonstrates better performance for both
                 the conventional EBL system and the MCC system.",
  acknowledgement = ack-nhfb,
  articleno =    "43",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kim:2016:NAP,
  author =       "Seungwon Kim and Seokhyeong Kang and Ki Jin Han and
                 Youngmin Kim",
  title =        "Novel Adaptive Power-Gating Strategy and Tapered {TSV}
                 Structure in Multilayer {$3$D} {IC}",
  journal =      j-TODAES,
  volume =       "21",
  number =       "3",
  pages =        "44:1--44:??",
  month =        jul,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2894752",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Sep 23 15:16:20 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Among power dissipation components, leakage power has
                 become more dominant with each successive technology
                 node. Power-gating techniques have been widely used to
                 reduce the standby leakage energy. In this work, we
                 investigate a power-gating strategy for through-silicon
                 via (TSV)-based 3D IC stacking structures. Power-gating
                 control is becoming more complicated as more dies are
                 stacked. We combine the on-chip PDN and TSV in a
                 multilayered 3D IC to perform power-gating analysis of
                 the static and dynamic voltage drops and in-rush
                 current. Then, we propose a novel power-gating strategy
                 that optimizes the in-rush current profile, subject to
                 the voltage-drop constraints. Our power-gating strategy
                 provides a minimal wake-up latency such that the
                 voltage noise safety margins are not violated. In
                 addition, the layer dependency of the 3D IC on the
                 power gating is analyzed in terms of the wake-up time
                 reduction. We achieve an average wake-up time reduction
                 of 43\% for all cases with our adaptive power-gating
                 method that exploits location (or layer) information
                 regarding the aggressors in a 3D IC. A tapered TSV
                 architecture based on the layer dependency has been
                 analyzed; it exhibits up to 18\% wake-up time reduction
                 compared to that of circuits with uniform TSVs.",
  acknowledgement = ack-nhfb,
  articleno =    "44",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Chen:2016:DCV,
  author =       "Gong Chen and Toru Fujimura and Qing Dong and
                 Shigetoshi Nakatake and Bo Yang",
  title =        "{DC} Characteristics and Variability on 90nm {CMOS}
                 Transistor Array-Style Analog Layout",
  journal =      j-TODAES,
  volume =       "21",
  number =       "3",
  pages =        "45:1--45:??",
  month =        jul,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2888395",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Sep 23 15:16:20 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In the MOS analog layout, variability suppression is
                 becoming a major issue, as is layout efficiency.
                 Introducing a transistor array (TA) style to analog
                 layout, this article addresses the layout-dependent
                 variability based on the measurement results of test
                 chips on 90nm CMOS process. In TA style, a large
                 transistor is decomposed into a set of unified
                 subtransistors, which are connected in series or
                 parallel. Focusing on one row layout of diffusion
                 sharing for the multiple gates, we analyze the current
                 direction-dependent variability and the leakage current
                 via off-gates for the electrical isolation.
                 Furthermore, we present several analog design cases on
                 TA including analysis of the impact on the DC
                 characteristics caused by the transistor channel
                 decomposition.",
  acknowledgement = ack-nhfb,
  articleno =    "45",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Wang:2016:MSM,
  author =       "Chao Wang and Chuansheng Dong and Haibo Zeng and
                 Zonghua Gu",
  title =        "Minimizing Stack Memory for Hard Real-Time
                 Applications on Multicore Platforms with Partitioned
                 Fixed-Priority or {EDF} Scheduling",
  journal =      j-TODAES,
  volume =       "21",
  number =       "3",
  pages =        "46:1--46:??",
  month =        jul,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2846096",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Sep 23 15:16:20 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Multicore processors are increasingly adopted in
                 resource-constrained real-time embedded applications.
                 In the development of such applications, efficient use
                 of RAM memory is as important as the effective
                 scheduling of software tasks. Preemption Threshold
                 Scheduling (PTS) is a well-known technique for
                 controlling the degree of preemption, possibly
                 improving system schedulability, and to reduce system
                 stack usage. In this paper, we consider partitioned
                 multi-processor scheduling on a multicore processor
                 with either Fixed-Priority or Earliest Deadline First
                 scheduling algorithms with PTS and address the design
                 optimization problem of mapping tasks to processor
                 cores and assignment of task priorities and preemption
                 thresholds with the optimization objective of
                 minimizing system stack usage. We present both optimal
                 solution techniques based on Mixed Integer Linear
                 Programming and efficient heuristic algorithms that can
                 achieve high-quality results. We perform extensive
                 performance evaluations using both synthetic tasksets
                 and industrial case studies.",
  acknowledgement = ack-nhfb,
  articleno =    "46",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lee:2016:DWC,
  author =       "Sungkwang Lee and Taemin Lee and Hyunsun Park and
                 Junwhan Ahn and Sungjoo Yoo and Youjip Won and Sunggu
                 Lee",
  title =        "Differential Write-Conscious Software Design on
                 Phase-Change Memory: an {SQLite} Case Study",
  journal =      j-TODAES,
  volume =       "21",
  number =       "3",
  pages =        "47:1--47:??",
  month =        jul,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2842613",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Sep 23 15:16:20 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Phase-change memory (PCM) has several benefits
                 including low cost, non-volatility,
                 byte-addressability, etc., and limitations such as
                 write endurance. There have been several hardware
                 approaches to exploit the benefits while minimizing the
                 negative impact of limitations. Software approaches
                 could give further improvements, when used together
                 with hardware approaches, by taking advantage of write
                 behavior present in the program, e.g., write behavior
                 on dynamically allocated data, which is hardly captured
                 by hardware approaches. This work proposes a software
                 design methodology to reduce costly PCM writes. First,
                 on top of existing hardware approach such as
                 Flip-N-Write, we advocate exploiting the capability of
                 PCM bit-level differential write in the software by
                 judiciously reusing previously allocated memory
                 resource. In order to avoid wear-out incurred by the
                 reuse, we present software-based wear-leveling methods
                 that distribute writes across PCM cells. In order to
                 further reduce PCM writes, we propose identifying data,
                 the loss of which does not affect the functionality of
                 the underlying software, and then diverting write
                 traffic for those data items to volatile memory. To
                 evaluate the effectiveness of these methods, as a case
                 study, we applied the proposed methods to the design of
                 journaling in SQLite, which is an important database
                 application commonly used in smartphones. For the
                 experiments, we used an in-house PCM-based prototype
                 board. Our experiments with four representative mobile
                 applications show that the proposed design methods,
                 which is applied on top of the hardware approach,
                 Flip-N-Write, result in 75.2\% further reduction in
                 total bit updates in PCM, on average, without
                 aggravating wear-out compared with the baseline of
                 PCM-based journaling, which is based only on the
                 hardware approach. Also, the proposed design methods
                 result in 49.4\% reduction in energy consumption and
                 52.3\% reduction in runtime compared to a typical FIFO
                 management of free resources.",
  acknowledgement = ack-nhfb,
  articleno =    "47",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Huang:2016:FOF,
  author =       "Xing Huang and Wenzhong Guo and Genggeng Liu and
                 Guolong Chen",
  title =        "{FH-OAOS}: a Fast Four-Step Heuristic for
                 Obstacle-Avoiding Octilinear {Steiner} Tree
                 Construction",
  journal =      j-TODAES,
  volume =       "21",
  number =       "3",
  pages =        "48:1--48:??",
  month =        jul,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2856033",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Sep 23 15:16:20 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "With the sharp increase of very large-scale integrated
                 (VLSI) circuit density, we are faced with many knotty
                 issues. Particularly in the routing phase of VLSI
                 physical design, the interconnection effects directly
                 relate to the final performance of circuits. However,
                 the optimization capability of traditional rectilinear
                 architecture is limited; thus, both academia and
                 industry have been devoted to nonrectilinear
                 architecture in recent years, especially octilinear
                 architecture, which is the most promising because it
                 can greatly improve the performance of modern chips. In
                 this article, we design FH-OAOS, an obstacle-avoiding
                 algorithm in octilinear architecture, by constructing
                 an obstacle-avoiding the octilinear Steiner minimal
                 tree (OAOSMT). Our approach first constructs an
                 obstacle-free Euclidean minimal spanning tree (OFEMST)
                 on the given pins based on Delaunay triangulation (DT).
                 Then, two lookup tables about OFEMST's edge are
                 generated, which can be seen as the information center
                 of FH-OAOS and can provide information support for
                 algorithm operation. Next, an efficient
                 obstacle-avoiding strategy is proposed to convert the
                 OFEMST into an obstacle-avoiding octilinear Steiner
                 tree (OAOST). Finally, the generated OAOST is refined
                 to construct the final OAOSMT by applying three
                 effective strategies. Experimental results on various
                 benchmarks show that FH-OAOS achieves 66.39 times
                 speedup on average, while the average wirelength of the
                 final OAOSMT is only 0.36\% larger than the best
                 existing solution.",
  acknowledgement = ack-nhfb,
  articleno =    "48",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Mittal:2016:STC,
  author =       "Sparsh Mittal",
  title =        "A Survey of Techniques for Cache Locking",
  journal =      j-TODAES,
  volume =       "21",
  number =       "3",
  pages =        "49:1--49:??",
  month =        jul,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2858792",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Sep 23 15:16:20 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Cache memory, although important for boosting
                 application performance, is also a source of execution
                 time variability, and this makes its use difficult in
                 systems requiring worst-case execution time (WCET)
                 guarantees. Cache locking is a promising approach for
                 simplifying WCET estimation and providing
                 predictability, and hence, several commercial
                 processors provide ability for locking cache. However,
                 cache locking also has several disadvantages (e.g.,
                 extra misses for unlocked blocks, complex algorithms
                 required for selection of locking contents) and hence,
                 a careful management is required to realize the full
                 potential of cache locking. In this article, we present
                 a survey of techniques proposed for cache locking. We
                 categorize the techniques into several groups to
                 underscore their similarities and differences. We also
                 discuss the opportunities and obstacles in using cache
                 locking. We hope that this article will help
                 researchers gain insight into cache locking schemes and
                 will also stimulate further work in this area.",
  acknowledgement = ack-nhfb,
  articleno =    "49",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Venkatasubramanian:2016:PID,
  author =       "Ramachandran Venkatasubramanian and Robert Elio and
                 Sule Ozev",
  title =        "Process Independent Design Methodology for the Active
                 {RC} and Single-Inverter-Based Rail Clamp",
  journal =      j-TODAES,
  volume =       "21",
  number =       "3",
  pages =        "50:1--50:??",
  month =        jul,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2851490",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Sep 23 15:16:20 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "RC and single-inverter-based rail clamps are widely
                 used in semiconductor products for electrostatic
                 discharge (ESD) protection. We propose a
                 technology-node-independent design methodology for
                 these rail clamp circuits that takes process, voltage,
                 and temperature variations into consideration. The
                 methodology can be used as a cookbook by the designer
                 or be used to automate the entire design process.
                 Tradeoffs between various design metrics such as ESD
                 performance (Human Body Model), leakage, and area are
                 considered. Simplified circuit models for the rail
                 clamp are presented to gain insights into its working
                 and to size the circuit components. A rail clamp for
                 core power domain is designed using the proposed
                 approach in 40nm low-power process and performance
                 results of the design are also presented. The
                 effectiveness of the design methodology is proven in
                 three different technology nodes by comparing the
                 obtained design with the best design from among 250,000
                 designs obtained by randomly sampling from the design
                 space.",
  acknowledgement = ack-nhfb,
  articleno =    "50",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kim:2016:SDM,
  author =       "Sangmin Kim and Seokhyeong Kang and Youngsoo Shin",
  title =        "Synthesis of Dual-Mode Circuits Through Library
                 Design, Gate Sizing, and Clock-Tree Optimization",
  journal =      j-TODAES,
  volume =       "21",
  number =       "3",
  pages =        "51:1--51:??",
  month =        jul,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2856032",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Sep 23 15:16:20 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "A dual-mode circuit is a circuit that has two
                 operating modes: a default high-performance mode at
                 nominal voltage and a secondary low-performance
                 near-threshold voltage (NTV) mode. A key problem that
                 we address is to maximize NTV mode clock frequency.
                 Some cells that are particularly slow in NTV mode are
                 optimized through transistor sizing and stack removal;
                 static noise margin of each gate is extracted and
                 appended in a library so that function failures can be
                 checked and removed during synthesis. A new gate-sizing
                 algorithm is proposed that takes account of timing
                 slacks at both modes. A new sensitivity measure is
                 introduced for this purpose; binary search is then
                 applied to find the maximum NTV mode frequency.
                 Clock-tree synthesis is reformulated to minimize clock
                 skew at both modes. This is motivated by the fact that
                 the proportion of load-dependent delay along clock
                 paths, as well as clock-path delays themselves, should
                 be made equal. Experiments on some test circuits
                 indicate that NTV mode clock period is reduced by 24\%,
                 on average; clock skew at NTV decreases by 13\%, on
                 average; and NTV mode energy-delay product is reduced
                 by 20\%, on average.",
  acknowledgement = ack-nhfb,
  articleno =    "51",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Qian:2016:PEN,
  author =       "Zhiliang Qian and Paul Bogdan and Chi-Ying Tsui and
                 Radu Marculescu",
  title =        "Performance Evaluation of {NoC}-Based Multicore
                 Systems: From Traffic Analysis to {NoC} Latency
                 Modeling",
  journal =      j-TODAES,
  volume =       "21",
  number =       "3",
  pages =        "52:1--52:??",
  month =        jul,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2870633",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Sep 23 15:16:20 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In this survey, we review several approaches for
                 predicting performance of Network-on-Chip (NoC)-based
                 multicore systems, starting from the traffic models to
                 the complex NoC models for latency evaluation. We first
                 review typical traffic models to represent the
                 application workloads in NoC. Specifically, we review
                 Markovian and non-Markovian (e.g., self-similar or
                 long-range memory-dependent) traffic models and discuss
                 their applications on multicore platform design. Then,
                 we review the analytical techniques to predict NoC
                 performance under given input traffic. We investigate
                 analytical models for average as well as maximum delay
                 evaluation. We also review the developments and design
                 challenges of NoC simulators. One interesting research
                 direction in NoC performance evaluation consists of
                 combining simulation and analytical models in order to
                 exploit their advantages together. Toward this end, we
                 discuss several newly proposed approaches that use
                 hardware-based or learning-based techniques. Finally,
                 we summarize several open problems and our perspective
                 to address these challenges.",
  acknowledgement = ack-nhfb,
  articleno =    "52",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kashif:2016:PSR,
  author =       "Hany Kashif and Hiren Patel and Sebastian
                 Fischmeister",
  title =        "Path Selection for Real-Time Communication on
                 Priority-Aware {NoCs}",
  journal =      j-TODAES,
  volume =       "21",
  number =       "3",
  pages =        "53:1--53:??",
  month =        jul,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2866572",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Sep 23 15:16:20 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This work investigates selecting paths for
                 communication flows when deploying a hard real-time
                 application on a chip-multiprocessor system. This
                 chip-multiprocessor system uses a priority-aware
                 real-time network-on-chip interconnect between the
                 processors. Given a mapping of the computation tasks
                 onto the chip-multiprocessor, the problem we address in
                 this work is to discover paths the communication flows
                 take such that hard real-time deadlines of flows are
                 met. Furthermore, we must ensure that deadlines are met
                 even in the presence of direct and indirect
                 interference from other flows sharing network links on
                 the path. To achieve this, our algorithm utilizes a
                 stage-level analysis for real-time communication to
                 determine the impact of a network link being used by a
                 flow, and its effect on other flows sharing the link.
                 The path selection algorithm uses heuristics such as
                 selecting links with least interference, and
                 considering lower-priority flows when dedicating links
                 to paths of higher-priority flows since an optimal one
                 is intractable. The algorithm also considers
                 constraints on the number of virtual channels at each
                 router port in the network. The statistically
                 significant experimental results show an improvement in
                 schedulability by 5\% and 12\% over existing path
                 selection algorithms such as Minimum Interference
                 Routing and Widest Shortest Path algorithms,
                 respectively. We also present a set-top box case study
                 to further illustrate the benefits of using the
                 proposed algorithm.",
  acknowledgement = ack-nhfb,
  articleno =    "53",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Liu:2016:ECM,
  author =       "Chuangwen Liu and Peishan Tu and Pangbo Wu and Haomo
                 Tang and Yande Jiang and Jian Kuang and Evangeline F.
                 Y. Young",
  title =        "An Effective Chemical Mechanical Polishing Fill
                 Insertion Approach",
  journal =      j-TODAES,
  volume =       "21",
  number =       "3",
  pages =        "54:1--54:??",
  month =        jul,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2886097",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Sep 23 15:16:20 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "To reduce chip-scale topography variation, dummy fill
                 is commonly used to improve the layout density
                 uniformity. Previous works either sought the most
                 uniform density distribution or sought to minimize the
                 inserted dummy fills while satisfying certain density
                 uniformity constraint. However, due to more stringent
                 manufacturing challenges, more criteria, like line
                 deviation and outlier, emerge at newer technology
                 nodes. This article presents a joint optimization
                 scheme to consider variation, total fill, line
                 deviation, outlier, overlap, and running time
                 simultaneously. More specifically, first we decompose
                 the rectilinear polygons and partition fillable regions
                 into rectangles for easier processing. After
                 decomposition, we insert dummy fills into the fillable
                 rectangular regions optimizing the fill metrics
                 simultaneously. We propose three approaches, Fast
                 Median approach, LP approach, and Iterative approach,
                 which are much faster with better quality, compared
                 with the results of the top three contestants in the
                 ICCAD Contest 2014.",
  acknowledgement = ack-nhfb,
  articleno =    "54",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Zuluaga:2016:SSN,
  author =       "Marcela Zuluaga and Peter Milder and Markus
                 P{\"u}schel",
  title =        "Streaming Sorting Networks",
  journal =      j-TODAES,
  volume =       "21",
  number =       "4",
  pages =        "55:1--55:??",
  month =        sep,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2854150",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Sep 23 15:16:20 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Sorting is a fundamental problem in computer science
                 and has been studied extensively. Thus, a large variety
                 of sorting methods exist for both software and hardware
                 implementations. For the latter, there is a trade-off
                 between the throughput achieved and the cost (i.e., the
                 logic and storage invested to sort n elements). Two
                 popular solutions are bitonic sorting networks with O (
                 n log$^2$ n ) logic and storage, which sort n elements
                 per cycle, and linear sorters with O ( n ) logic and
                 storage, which sort n elements per n cycles. In this
                 article, we present new hardware structures that we
                 call streaming sorting networks, which we derive
                 through a mathematical formalism that we introduce, and
                 an accompanying domain-specific hardware generator that
                 translates our formal mathematical description into
                 synthesizable RTL Verilog. With the new networks, we
                 achieve novel and improved cost-performance trade-offs.
                 For example, assuming that n is a two-power and w is
                 any divisor of n, one class of these networks can sort
                 in n /; w cycles with O ( w log$^2$ n ) logic and O ( n
                 log$^2$ n ) storage; the other class that we present
                 sorts in n log$^2$ n /; w cycles with O ( w ) logic and
                 O ( n ) storage. We carefully analyze the performance
                 of these networks and their cost at three levels of
                 abstraction: (1) asymptotically, (2) exactly in terms
                 of the number of basic elements needed, and (3) in
                 terms of the resources required by the actual circuit
                 when mapped to a field-programmable gate array. The
                 accompanying hardware generator allows us to explore
                 the entire design space, identify the Pareto-optimal
                 solutions, and show superior cost-performance
                 trade-offs compared to prior work.",
  acknowledgement = ack-nhfb,
  articleno =    "55",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Zhao:2016:SRE,
  author =       "Yue Zhao and Taeyoung Kim and Hosoon Shin and Sheldon
                 X.-D. Tan and Xin Li and Haibao Chen and Hai Wang",
  title =        "Statistical Rare-Event Analysis and Parameter Guidance
                 by Elite Learning Sample Selection",
  journal =      j-TODAES,
  volume =       "21",
  number =       "4",
  pages =        "56:1--56:??",
  month =        sep,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2875422",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Sep 23 15:16:20 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Accurately estimating the failure region of rare
                 events for memory-cell and analog circuit blocks under
                 process variations is a challenging task. In this
                 article, we propose a new statistical method, called
                 EliteScope, to estimate the circuit failure rates in
                 rare-event regions and to provide conditions of
                 parameters to achieve targeted performance. The new
                 method is based on the iterative blockade framework to
                 reduce the number of samples, but consists of two new
                 techniques to improve existing methods. First, the new
                 approach employs an elite-learning sample-selection
                 scheme, which can consider the effectiveness of samples
                 and well coverage for the parameter space. As a result,
                 it can reduce additional simulation costs by pruning
                 less effective samples while keeping the accuracy of
                 failure estimation. Second, the EliteScope identifies
                 the failure regions in terms of parameter spaces to
                 provide a good design guidance to accomplish the
                 performance target. It applies variance-based feature
                 selection to find the dominant parameters and then
                 determine the in-spec boundaries of those parameters.
                 We demonstrate the advantage of our proposed method
                 using several memory and analog circuits with different
                 numbers of process parameters. Experiments on four
                 circuit examples show that EliteScope achieves a
                 significant improvement on failure-region estimation in
                 terms of accuracy and simulation cost over traditional
                 approaches. The 16b 6T-SRAM column example also
                 demonstrates that the new method is scalable for
                 handling large problems with large numbers of process
                 variables.",
  acknowledgement = ack-nhfb,
  articleno =    "56",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Ewetz:2016:CRC,
  author =       "Rickard Ewetz and Cheng-Kok Koh",
  title =        "Construction of Reconfigurable Clock Trees for {MCMM}
                 Designs Using Mode Separation and Scenario
                 Compression",
  journal =      j-TODAES,
  volume =       "21",
  number =       "4",
  pages =        "57:1--57:??",
  month =        sep,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2883609",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Sep 23 15:16:20 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The clock networks of many modern circuits have to
                 operate in multiple corners and multiple modes (MCMM).
                 We propose to construct mode-reconfigurable clock trees
                 (MRCTs) based on mode separation and scenario
                 compression. The technique of scenario compression is
                 proposed to consider the timing constraints in multiple
                 scenarios at the same time, compressing the MCMM
                 problem into an equivalent single-corner multiple-mode
                 (SCMM), or single-corner single-mode (SCSM) problem.
                 The compression is performed by combining the skew
                 constraints of the different scenarios in skew
                 constraint graphs based on delay linearization and
                 dominating skew constraints. An MRCT consists of
                 several clock trees and mode separation involves,
                 depending on the active mode, selecting one of the
                 clock trees to deliver the clock signal. To limit the
                 overhead, the bottom part (closer to the clock sinks)
                 of all the different clock trees are shared and only
                 the top part (closer to the clock source) of the clock
                 network is mode reconfigurable. The reconfiguration is
                 realized using OR-gates and a one-input-multiple-output
                 demultiplexer. The experimental results show that for a
                 set of synthesized MCMM circuits, with 715 to 13, 216
                 sequential elements, the proposed approach can achieve
                 high yield.",
  acknowledgement = ack-nhfb,
  articleno =    "57",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Ghasemzadeh:2016:HAE,
  author =       "Hassan Ghasemzadeh and Ramin Fallahzadeh and Roozbeh
                 Jafari",
  title =        "A Hardware-Assisted Energy-Efficient Processing Model
                 for Activity Recognition Using Wearables",
  journal =      j-TODAES,
  volume =       "21",
  number =       "4",
  pages =        "58:1--58:??",
  month =        sep,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2886096",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Sep 23 15:16:20 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Wearables are being widely utilized in health and
                 wellness applications, primarily due to the recent
                 advances in sensor and wireless communication, which
                 enhance the promise of wearable systems in providing
                 continuous and real-time monitoring and interventions.
                 Wearables are generally composed of hardware/software
                 components for collection, processing, and
                 communication of physiological data. Practical
                 implementation of wearable monitoring in real-life
                 applications is currently limited due to notable
                 obstacles. The wearability and form factor are
                 dominated by the amount of energy needed for sensing,
                 processing, and communication. In this article, we
                 propose an ultra-low-power granular decision-making
                 architecture, also called screening classifier, which
                 can be viewed as a tiered wake-up circuitry, consuming
                 three orders of magnitude-less power than the
                 state-of-the-art low-power microcontrollers. This
                 processing model operates based on computationally
                 simple template matching modules, based on coarse- to
                 fine-grained analysis of the signals with on-demand and
                 gradually increasing the processing power consumption.
                 Initial template matching rejects signals that are
                 clearly not of interest from the signal processing
                 chain, keeping the rest of processing blocks idle. If
                 the signal is likely of interest, the sensitivity and
                 the power of the template matching modules are
                 gradually increased, and ultimately, the main
                 processing unit is activated. We pose optimization
                 techniques to efficiently split a full template into
                 smaller bins, called mini-templates, and activate only
                 a subset of bins during each classification decision.
                 Our experimental results on real data show that this
                 signal screening model reduces power consumption of the
                 processing architecture by a factor of 70\% while the
                 sensitivity of detection remains at least 80\%.",
  acknowledgement = ack-nhfb,
  articleno =    "58",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Teman:2016:PAP,
  author =       "Adam Teman and Davide Rossi and Pascal Meinerzhagen
                 and Luca Benini and Andreas Burg",
  title =        "Power, Area, and Performance Optimization of Standard
                 Cell Memory Arrays Through Controlled Placement",
  journal =      j-TODAES,
  volume =       "21",
  number =       "4",
  pages =        "59:1--59:??",
  month =        sep,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2890498",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Sep 23 15:16:20 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Embedded memory remains a major bottleneck in current
                 integrated circuit design in terms of silicon area,
                 power dissipation, and performance; however, static
                 random access memories (SRAMs) are almost exclusively
                 supplied by a small number of vendors through memory
                 generators, targeted at rather generic design
                 specifications. As an alternative, standard cell
                 memories (SCMs) can be defined, synthesized, and placed
                 and routed as an integral part of a given digital
                 system, providing complete design flexibility, good
                 energy efficiency, low-voltage operation, and even area
                 efficiency for small memory blocks. Yet implementing an
                 SCM block with a standard digital flow often fails to
                 exploit the distinct and regular structure of such an
                 array, leaving room for optimization. In this article,
                 we present a design methodology for optimizing the
                 physical implementation of SCM macros as part of the
                 standard design flow. This methodology introduces
                 controlled placement, leading to a structured,
                 noncongested layout with close to 100\% placement
                 utilization, resulting in a smaller silicon footprint,
                 reduced wire length, and lower power consumption
                 compared to SCMs without controlled placement. This
                 methodology is demonstrated on SCM macros of various
                 sizes and aspect ratios in a state-of-the-art 28nm
                 fully depleted silicon-on-insulator technology, and
                 compared with equivalent macros designed with the
                 noncontrolled, standard flow, as well as with
                 foundry-supplied SRAM macros. The controlled SCMs
                 provide an average 25\% reduction in area as compared
                 to noncontrolled implementations while achieving a
                 smaller size than SRAM macros of up to 1Kbyte. Power
                 and performance comparisons of controlled SCM blocks of
                 a commonly found 256 $ \times $ 32 (1 Kbyte) memory
                 with foundry-provided SRAMs show greater than 65\% and
                 10\% reduction in read and write power, respectively,
                 while providing faster access than their SRAM
                 counterparts, despite being of an aspect ratio that is
                 typically unfavorable for SCMs. In addition, the SCM
                 blocks function correctly with a supply voltage as low
                 as 0.3V, well below the lower limit of even the SRAM
                 macros optimized for low-voltage operation. The
                 controlled placement methodology is applied within a
                 full-chip physical implementation flow of an
                 OpenRISC-based test chip, providing more than 50\%
                 power reduction compared to equivalently sized compiled
                 SRAMs under a benchmark application.",
  acknowledgement = ack-nhfb,
  articleno =    "59",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Narayanaswamy:2016:BRE,
  author =       "Swaminathan Narayanaswamy and Steffen Schlueter and
                 Sebastian Steinhorst and Martin Lukasiewycz and
                 Samarjit Chakraborty and Harry Ernst Hoster",
  title =        "On Battery Recovery Effect in Wireless Sensor Nodes",
  journal =      j-TODAES,
  volume =       "21",
  number =       "4",
  pages =        "60:1--60:??",
  month =        sep,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2890501",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Sep 23 15:16:20 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "With the perennial demand for longer runtime of
                 battery-powered Wireless Sensor Nodes (WSNs), several
                 techniques have been proposed to increase the battery
                 runtime. One such class of techniques exploiting the
                 battery recovery effect phenomenon claims that
                 performing an intermittent discharge instead of a
                 continuous discharge will increase the usable battery
                 capacity. Several works in the areas of embedded
                 systems and wireless sensor networks have assumed the
                 existence of this recovery effect and proposed
                 different power management techniques in the form of
                 power supply architectures (multiple battery setup) and
                 communication protocols (burst mode transmission) in
                 order to exploit it. However, until now, a systematic
                 experimental evaluation of the recovery effect has not
                 been performed with real battery cells, using
                 high-accuracy battery testers to confirm the existence
                 of this recovery phenomenon. In this article, a
                 systematic evaluation procedure is developed to verify
                 the existence of this battery recovery effect. Using
                 our evaluation procedure, we investigated Alkaline,
                 Nickel-Metal Hydride (NiMH), and Lithium-Ion (Li-Ion)
                 battery chemistries, which are commonly used as power
                 supplies for Wireless Sensor Node (WSN) applications.
                 Our experimental results do not show any evidence of
                 the aforementioned recovery effect in these battery
                 chemistries. In particular, our results show a
                 significant deviation from the stochastic battery
                 models, which were used by many power management
                 techniques. Therefore, the existing power management
                 approaches that rely on this recovery effect do not
                 hold in practice. Instead of a battery recovery effect,
                 our experimental results show the existence of the rate
                 capacity effect, which is the reduction of usable
                 battery capacity with higher discharge power, to be the
                 dominant electrochemical phenomenon that should be
                 considered for maximizing the runtime of WSN
                 applications. We outline power management techniques
                 that minimize the rate capacity effect in order to
                 obtain a higher energy output from the battery.",
  acknowledgement = ack-nhfb,
  articleno =    "60",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Tannir:2016:AMN,
  author =       "Dani Tannir and Ya Wang and Peng Li",
  title =        "Accurate Modeling of Nonideal Low-Power {PWM} {DC--DC}
                 Converters Operating in {CCM} and {DCM} using Enhanced
                 Circuit-Averaging Techniques",
  journal =      j-TODAES,
  volume =       "21",
  number =       "4",
  pages =        "61:1--61:??",
  month =        sep,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2890500",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Sep 23 15:16:20 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The development of enhanced modeling techniques for
                 the simulation of switched-mode Pulse Width Modulated
                 (PWM) DC-DC power converters using circuit averaging is
                 the main focus of this article. The circuit-averaging
                 technique has traditionally been used to model the
                 behavior of PWM DC-DC converters without considering
                 important nonideal characteristics of the switching
                 devices. As a result, most of these existing approaches
                 present simplified models that are ideal or linearized,
                 and do not accurately account for the performance
                 characteristics of the converter. This is especially
                 problematic for low-power applications. In this
                 article, we present an enhanced nonideal behavioral
                 circuit-averaged model that makes the simulation of
                 DC-DC converters both computationally efficient and
                 accurate, thereby presenting an important tool for
                 circuit designers. Experimentally, we show that our
                 Verilog-A-based new model allows for accurate
                 simulation of both Buck- and Boost-type PWM converters
                 operating in either CCM or DCM modes while providing
                 more than one order of magnitude speedup over the
                 transistor-level simulation.",
  acknowledgement = ack-nhfb,
  articleno =    "61",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Steinhorst:2016:CPC,
  author =       "Sebastian Steinhorst and Matthias Kauer and Arne Meeuw
                 and Swaminathan Narayanaswamy and Martin Lukasiewycz
                 and Samarjit Chakraborty",
  title =        "Cyber-Physical Co-Simulation Framework for Smart Cells
                 in Scalable Battery Packs",
  journal =      j-TODAES,
  volume =       "21",
  number =       "4",
  pages =        "62:1--62:??",
  month =        sep,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2891407",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Sep 23 15:16:20 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article introduces a Cyber-physical Co-Simulation
                 Framework (CPCSF) for design and analysis of smart
                 cells that enable scalable battery pack and Battery
                 Management System (BMS) architectures. In contrast to
                 conventional cells in battery packs, where all cells
                 are monitored and controlled centrally, each smart cell
                 is equipped with its own electronics in the form of a
                 Cell Management Unit (CMU). The CMU maintains the cell
                 in a safe and healthy operating state, while
                 system-level battery management functions are performed
                 by cooperation of the smart cells via communication.
                 Here, the smart cells collaborate in a self-organizing
                 fashion without a central controller instance. This
                 enables maximum scalability and modularity,
                 significantly simplifying integration of battery packs.
                 However, for this emerging architecture, system-level
                 design methodologies and tools have not been
                 investigated yet. By contrast, components are developed
                 individually and then manually tested in a hardware
                 development platform. Consequently, the systematic
                 design of the hardware/software architecture of smart
                 cells requires a cyber-physical multi-level
                 co-simulation of the network of smart cells that has to
                 include all the components from the software,
                 electronic, electric, and electrochemical domains. This
                 comprises distributed BMS algorithms running on the
                 CMUs, the communication network, control circuitry,
                 cell balancing hardware, and battery cell behavior. For
                 this purpose, we introduce a CPCSF that enables rapid
                 design and analysis of smart cell hardware/software
                 architectures. Our framework is then applied to
                 investigate request-driven active cell balancing
                 strategies that make use of the decentralized system
                 architecture. In an exhaustive analysis on a realistic
                 21.6kW h Electric Vehicle (EV) battery pack containing
                 96 smart cells in series, the CPCSF is able to simulate
                 hundreds of balancing runs together with all system
                 characteristics, using the proposed request-driven
                 balancing strategies at highest accuracy within an
                 overall time frame of several hours. Consequently, the
                 presented CPCSF for the first time allows us to
                 quantitatively and qualitatively analyze the behavior
                 of smart cell architectures for real-world
                 applications.",
  acknowledgement = ack-nhfb,
  articleno =    "62",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Guin:2016:FCS,
  author =       "Ujjwal Guin and Qihang Shi and Domenic Forte and Mark
                 M. Tehranipoor",
  title =        "{FORTIS}: a Comprehensive Solution for Establishing
                 Forward Trust for Protecting {IPs} and {ICs}",
  journal =      j-TODAES,
  volume =       "21",
  number =       "4",
  pages =        "63:1--63:??",
  month =        sep,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2893183",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Sep 23 15:16:20 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "With the advent of globalization in the semiconductor
                 industry, it is necessary to prevent unauthorized usage
                 of third-party IPs (3PIPs), cloning and unwanted
                 modification of 3PIPs, and unauthorized production of
                 ICs. Due to the increasing complexity of ICs,
                 system-on-chip (SoC) designers use various 3PIPs in
                 their design to reduce time-to-market and development
                 costs, which creates a trust issue between the SoC
                 designer and the IP owners. In addition, as the ICs are
                 fabricated around the globe, the SoC designers give
                 fabrication contracts to offshore foundries to
                 manufacture ICs and have little control over the
                 fabrication process, including the total number of
                 chips fabricated. Similarly, the 3PIP owners lack
                 control over the number of fabricated chips and/or the
                 usage of their IPs in an SoC. Existing research only
                 partially addresses the problems of IP piracy and IC
                 overproduction, and to the best of our knowledge, there
                 is no work that considers IP overuse. In this article,
                 we present a comprehensive solution for preventing IP
                 piracy and IC overproduction by assuring forward trust
                 between all entities involved in the SoC design and
                 fabrication process. We propose a novel design flow to
                 prevent IC overproduction and IP overuse. We use an
                 existing logic encryption technique to obfuscate the
                 netlist of an SoC or a 3PIP and propose a modification
                 to enable manufacturing tests before the activation of
                 chips which is absolutely necessary to prevent
                 overproduction. We have used asymmetric and symmetric
                 key encryption, in a fashion similar to Pretty Good
                 Privacy (PGP), to transfer keys from the SoC designer
                 or 3PIP owners to the chips. In addition, we also
                 propose to attach an IP digest (a cryptographic hash of
                 the entire IP) to the header of an IP to prevent
                 modification of the IP by the SoC designers. We have
                 shown that our approach is resistant to various attacks
                 with the cost of minimal area overhead.",
  acknowledgement = ack-nhfb,
  articleno =    "63",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lee:2016:TPD,
  author =       "William Lee and Vikas S. Vij and Kenneth S. Stevens",
  title =        "Timing Path-Driven Cycle Cutting for Sequential
                 Controllers",
  journal =      j-TODAES,
  volume =       "21",
  number =       "4",
  pages =        "64:1--64:??",
  month =        sep,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2893473",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Sep 23 15:16:20 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Power and performance optimization of integrated
                 circuits is performed by timing-driven algorithms that
                 operate on directed acyclic graphs. Sequential circuits
                 and circuits with topological feedback contain cycles.
                 Cyclic circuits must be represented as directed acyclic
                 graphs to be optimized and evaluated using static
                 timing analysis. Algorithms in commercial electronic
                 design automation tools generate the required acyclic
                 graphs by cutting cycles without considering timing
                 paths. This work reports on a method for generating
                 directed acyclic circuit graphs that do not cut the
                 specified timing paths. The algorithm is applied to
                 over 125 benchmark designs and asynchronous handshake
                 controllers. The runtime is less than 1 second, even
                 for even the largest published controllers. Circuit
                 timing graphs generated using this method retain the
                 necessary timing paths, which enables circuit
                 validation and optimization employing the commercial
                 tools. Additional benefits show these designs are on an
                 average a third in size, operate 33.3\% faster, and
                 consume one-fourth the energy.",
  acknowledgement = ack-nhfb,
  articleno =    "64",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Xu:2016:HSL,
  author =       "Yang Xu and J{\"u}rgen Teich",
  title =        "Hierarchical Statistical Leakage Analysis and Its
                 Application",
  journal =      j-TODAES,
  volume =       "21",
  number =       "4",
  pages =        "65:1--65:??",
  month =        sep,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2896820",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Sep 23 15:16:20 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In this article, we investigate a hierarchical
                 statistical leakage analysis (HSLA) design flow where
                 module-level statistical leakage models supplied by IP
                 vendors are used to improve the efficiency and capacity
                 of SoC statistical leakage power analysis. To solve the
                 challenges of incorporating spatial correlations
                 between IP modules at system level, we first propose a
                 method to extract correlation-inclusive leakage models.
                 Then a method to handle the spatial correlations at
                 system level is proposed. Using this method, the
                 runtime of system statistical leakage analysis (SLA)
                 can be significantly improved without disclosing the
                 netlists of the IP modules. Experimental results
                 demonstrate that the proposed HSLA method is about 100
                 times faster than gate-level full-chip SLA methods
                 while maintaining the accuracy. In addition, we also
                 investigate one application of this HSLA method, a
                 leakage-yield-driven floorplanning framework, to
                 demonstrate the benefits of such an HSLA method in
                 practice. Moreover, an optimized hierarchical leakage
                 analysis method dedicated to the floorplanning
                 framework is proposed. The effectiveness of the
                 floorplanning framework and the optimized method are
                 confirmed by extensive experimental results.",
  acknowledgement = ack-nhfb,
  articleno =    "65",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{S:2016:EAD,
  author =       "Ramprasath S. and Vinita Vasudevan",
  title =        "Efficient Algorithms for Discrete Gate Sizing and
                 Threshold Voltage Assignment Based on an Accurate
                 Analytical Statistical Yield Gradient",
  journal =      j-TODAES,
  volume =       "21",
  number =       "4",
  pages =        "66:1--66:??",
  month =        sep,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2896819",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Sep 23 15:16:20 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In this article, we derive a simple and accurate
                 expression for the change in timing yield due to a
                 change in the gate delay distribution. It is based on
                 analytical bounds that we have derived for the moments
                 of the circuit and path delay. Based on this, we
                 propose computationally efficient algorithms for (1)
                 discrete gate sizing and (2) simultaneous gate sizing
                 and threshold voltage ( V$^T$ ) assignment so that the
                 circuit meets a timing yield specification under
                 parameter variations. The use of this analytical yield
                 gradient within a gradient-based timing yield
                 optimization algorithm results in a significant
                 improvement in the runtime as compared to the numerical
                 method, while achieving the same final yield. It also
                 allows us to explore a larger search space in each
                 iteration more efficiently, which is required in the
                 case of simultaneous resizing and V$^T$ assignment. We
                 also propose heuristics for resizing/changing the V$^T$
                 of multiple gates in each iteration. This makes it
                 possible to optimize the timing yield for large
                 circuits. Results on ITC '99 benchmarks show that the
                 proposed multinode resizing algorithm results in a
                 significant improvement in the runtime with a marginal
                 average area penalty and no cost to the final yield
                 achieved.",
  acknowledgement = ack-nhfb,
  articleno =    "66",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Wang:2016:ERL,
  author =       "Hongfei Wang and R. D. (Shawn) Blanton",
  title =        "Ensemble Reduction via Logic Minimization",
  journal =      j-TODAES,
  volume =       "21",
  number =       "4",
  pages =        "67:1--67:??",
  month =        sep,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2897515",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Sep 23 15:16:20 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "An ensemble of machine learning classifiers usually
                 improves generalization performance and is useful for
                 many applications. However, the extra memory storage
                 and computational cost incurred from the combined
                 models often limits their potential applications. In
                 this article, we propose a new ensemble reduction
                 method called CANOPY that significantly reduces memory
                 storage and computations. CANOPY uses a technique from
                 logic minimization for digital circuits to select and
                 combine particular classification models from an
                 initial pool in the form of a Boolean function, through
                 which the reduced ensemble performs classification.
                 Experiments on 20 UCI datasets demonstrate that CANOPY
                 either outperforms or is very competitive with the
                 initial ensemble and one state-of-the-art ensemble
                 reduction method in terms of generalization error, and
                 is superior to all existing reduction methods surveyed
                 for identifying the smallest numbers of models in the
                 reduced ensembles.",
  acknowledgement = ack-nhfb,
  articleno =    "67",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Pomeranz:2016:DTS,
  author =       "Irith Pomeranz",
  title =        "{$N$}-Detection Test Sets for Circuits with Multiple
                 Independent Scan Chains",
  journal =      j-TODAES,
  volume =       "21",
  number =       "4",
  pages =        "68:1--68:??",
  month =        sep,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2897514",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Sep 23 15:16:20 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In a circuit with multiple independent scan chains, it
                 is possible to operate groups of scan chains
                 independently in functional or shift mode. This
                 design-for-testability approach can be used to increase
                 the quality of a test set. This article describes an N
                 -detection test generation procedure for increasing the
                 quality of a transition fault test set in such a
                 circuit. The procedure uses the possibility of applying
                 the same test, with the scan chains operating in
                 different modes, to increase the numbers of detections
                 without increasing the number of tests that need to be
                 generated or stored on a tester. This results in
                 reduced input storage requirements compared with a
                 conventional N -detection test set and an increased
                 number of applied tests. The increased quality of the
                 test set is verified by its bridging fault coverage.",
  acknowledgement = ack-nhfb,
  articleno =    "68",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Won:2016:RSC,
  author =       "Jae-Yeon Won and Paul V. Gratz and Srinivas Shakkottai
                 and Jiang Hu",
  title =        "Resource Sharing Centric Dynamic Voltage and Frequency
                 Scaling for {CMP} Cores, Uncore, and Memory",
  journal =      j-TODAES,
  volume =       "21",
  number =       "4",
  pages =        "69:1--69:??",
  month =        sep,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2897394",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Sep 23 15:16:20 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "With the breakdown of Dennard's scaling over the past
                 decade, performance growth of modern microprocessor
                 design has largely relied on scaling core count in chip
                 multiprocessors (CMPs). The challenge of chip power
                 density, however, remains and demands new power
                 management solutions. This work investigates a
                 coordinated CMP systemwide Dynamic Voltage and
                 Frequency Scaling (DVFS) policy centered around shared
                 resource utilization. This approach represents a new
                 angle on the problem, differing from the conventional
                 core-workload-driven approaches. The key component of
                 our work is per-core DVFS leveraging a technique
                 similar to TCP Vegas congestion control from
                 networking. This TCP Vegas-based DVFS can potentially
                 identify the synergy between power reduction and
                 performance improvement. Further, this work includes
                 uncore (on-chip interconnect and shared last level
                 cache) and main memory DVFS policies coordinated with
                 the per-core DVFS policy. Full system simulations on
                 PARSEC benchmarks show that our technique reduces total
                 energy dissipation by over 47\% across all benchmarks
                 with less than 2.3\% performance degradation. Our work
                 also leads to 12\% more energy savings compared to a
                 prior work CMP DVFS policy.",
  acknowledgement = ack-nhfb,
  articleno =    "69",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Ho:2016:AAD,
  author =       "Ching-Hsuan Ho and Yung-Chih Chen and Chun-Yao Wang
                 and Ching-Yi Huang and Suman Datta and Vijaykrishnan
                 Narayanan",
  title =        "Area-Aware Decomposition for Single-Electron
                 Transistor Arrays",
  journal =      j-TODAES,
  volume =       "21",
  number =       "4",
  pages =        "70:1--70:??",
  month =        sep,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2898998",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Sep 23 15:16:20 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Single-electron transistor (SET) at room temperature
                 has been demonstrated as a promising device for
                 extending Moore's law due to its ultra-low power
                 consumption. Existing SET synthesis methods synthesize
                 a Boolean network into a large reconfigurable SET array
                 where the height of SET array equals the number of
                 primary inputs. However, recent experiments on device
                 level have shown that this height is restricted to a
                 small number, say, 10, rather than arbitrary value due
                 to the ultra-low driving strength of SET devices. On
                 the other hand, the width of an SET array is also
                 suggested to be a small value. Consequently, it is
                 necessary to decompose a large SET array into a set of
                 small SET arrays where each of them realizes a
                 sub-function of the original circuit with no more than
                 10 inputs. Thus, this article presents two techniques
                 for achieving area-efficient SET array decomposition:
                 One is a width minimization algorithm for reducing the
                 area of a single SET array; the other is a
                 depth-bounded mapping algorithm, which decomposes a
                 Boolean network into many sub-functions such that the
                 widths of the corresponding SET arrays are balanced.
                 The width minimization algorithm leads to a 25\%--41\%
                 improvement compared to the state of the art, and the
                 mapping algorithm achieves a 60\% reduction in total
                 area compared to a na{\"\i}ve approach.",
  acknowledgement = ack-nhfb,
  articleno =    "70",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Mao:2016:LBP,
  author =       "Fubing Mao and Yi-Chung Chen and Wei Zhang and Hai
                 (Helen) Li and Bingsheng He",
  title =        "Library-Based Placement and Routing in {FPGAs} with
                 Support of Partial Reconfiguration",
  journal =      j-TODAES,
  volume =       "21",
  number =       "4",
  pages =        "71:1--71:??",
  month =        sep,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2901295",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Sep 23 15:16:20 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "While traditional Field-Programmable Gate Array design
                 flow usually employs fine-grained tile-based placement,
                 modular placement is increasingly required to speed up
                 the large-scale placement and save the synthesis time.
                 Moreover, the commonly used modules can be
                 pre-synthesized and stored in the library for design
                 reuse to significantly save the design, verification
                 time, and development cost. Previous work mainly
                 focuses on modular floorplanning without module
                 placement information. In this article, we propose a
                 library-based placement and routing flow that best
                 utilizes the pre-placed and routed modules from the
                 library to significantly save the execution time while
                 achieving the minimal area-delay product. The flow
                 supports the static and reconfigurable modules at the
                 same time. The modular information is represented in
                 the B*-Tree structure, and the B*-Tree operations are
                 amended together with Simulated Annealing to enable a
                 fast search of the placement space. Different
                 width-height ratios of the modules are exploited to
                 achieve area-delay product optimization. Partial
                 reconfiguration-aware routing using pin-to-wire
                 abutment is proposed to connect the modules after
                 placement. Our placer can reduce the compilation time
                 by 65\% on average with 17\% area and 8.2\% delay
                 overhead compared with the fine-grained results of
                 Versatile Place and Route through the reuse of module
                 information in the library for the base architecture.
                 For other architectures, the area increase ranges from
                 8.32\% to 25.79\%, the delay varies from --- 13.66\% to
                 19.79\%, and the runtime improves by 43.31\% to
                 77.2\%.",
  acknowledgement = ack-nhfb,
  articleno =    "71",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Bernasconi:2016:IRZ,
  author =       "Anna Bernasconi and Valentina Ciriani",
  title =        "Index-Resilient Zero-Suppressed {BDDs}: Definition and
                 Operations",
  journal =      j-TODAES,
  volume =       "21",
  number =       "4",
  pages =        "72:1--72:??",
  month =        sep,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2905363",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Sep 23 15:16:20 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Zero-Suppressed Binary Decision Diagrams (ZDDs) are
                 widely used data structures for representing and
                 handling combination sets and Boolean functions. In
                 particular, ZDDs are commonly used in CAD for the
                 synthesis and verification of integrated circuits. The
                 purpose of this article is to design an error-resilient
                 version of this data structure: a self-repairing ZDD.
                 More precisely, we design a new ZDD canonical form,
                 called index-resilient reduced ZDD, such that a faulty
                 index can be reconstructed in time O ( k ), where k is
                 the number of nodes with a corrupted index. Moreover,
                 we propose new versions of the standard algorithms for
                 ZDD manipulation and construction that are error
                 resilient during their execution and produce an
                 index-resilient ZDD as output. The experimental results
                 validate the proposed approach.",
  acknowledgement = ack-nhfb,
  articleno =    "72",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems (TODAES)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Wang:2016:HDT,
  author =       "Hai Wang and Jian Ma and Sheldon X.-D. Tan and Chi
                 Zhang and He Tang and Keheng Huang and Zhenghong
                 Zhang",
  title =        "Hierarchical Dynamic Thermal Management Method for
                 High-Performance Many-Core Microprocessors",
  journal =      j-TODAES,
  volume =       "22",
  number =       "1",
  pages =        "1:1--1:??",
  month =        dec,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2891409",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:29 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "It is challenging to manage the thermal behavior of
                 many-core microprocessors while still keeping them
                 running at high performance since the control
                 complexity increases as the core number increases. In
                 this article, a novel hierarchical dynamic thermal
                 management method is proposed to overcome this
                 challenge. The new method employs model predictive
                 control (MPC) with task migration and a DVFS scheme to
                 ensure smooth control behavior and negligible computing
                 performance sacrifice. In order to be scalable to
                 many-core systems, the hierarchical control scheme is
                 designed with two levels. At the lower level, the cores
                 are spatially clustered into blocks, and local task
                 migration is used to match current power distribution
                 with the optimal distribution calculated by MPC. At the
                 upper level, global task migration is used with the
                 unmatched powers from the lower level. A modified
                 iterative minimum cut algorithm is used to assist the
                 task migration decision making if the power number is
                 large at the upper level. Finally, DVFS is applied to
                 regulate the remaining unmatched powers. Experiments
                 show that the new method outperforms existing methods
                 and is very scalable to manage many-core
                 microprocessors with small performance degradation.",
  acknowledgement = ack-nhfb,
  articleno =    "1",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Poddar:2016:ECS,
  author =       "Sudip Poddar and Sarmishtha Ghoshal and Krishnendu
                 Chakrabarty and Bhargab B. Bhattacharya",
  title =        "Error-Correcting Sample Preparation with Cyberphysical
                 Digital Microfluidic Lab-on-Chip",
  journal =      j-TODAES,
  volume =       "22",
  number =       "1",
  pages =        "2:1--2:??",
  month =        dec,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2898999",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:29 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Digital (droplet-based) microfluidic technology offers
                 an attractive platform for implementing a wide variety
                 of biochemical laboratory protocols, such as
                 point-of-care diagnosis, DNA analysis, target
                 detection, and drug discovery. A digital microfluidic
                 biochip consists of a patterned array of electrodes on
                 which tiny fluid droplets are manipulated by electrical
                 actuation sequences to perform various fluidic
                 operations, for example, dispense, transport, mix, or
                 split. However, because of the inherent uncertainty of
                 fluidic operations, the outcome of biochemical
                 experiments performed on-chip can be erroneous even if
                 the chip is tested a priori and deemed to be
                 defect-free. In this article, we address an important
                 error recoverability problem in the context of sample
                 preparation. We assume a cyberphysical environment, in
                 which the physical errors, when detected online at
                 selected checkpoints with integrated sensors, can be
                 corrected through recovery techniques. However, almost
                 all prior work on error recoverability used
                 checkpointing-based rollback approach, that is,
                 re-execution of certain portions of the protocol
                 starting from the previous checkpoint. Unfortunately,
                 such techniques are expensive both in terms of assay
                 completion time and reagent cost, and can never ensure
                 full error-recovery in deterministic sense. We consider
                 imprecise droplet mix-split operations and present a
                 novel roll-forward approach where the erroneous
                 droplets, thus produced, are used in the error-recovery
                 process, instead of being discarded or remixed. All
                 erroneous droplets participate in the dilution process
                 and they mutually cancel or reduce the
                 concentration-error when the target droplet is reached.
                 We also present a rigorous analysis that reveals the
                 role of volumetric-error on the concentration of a
                 sample to be prepared, and we describe the layout of a
                 lab-on-chip that can execute the proposed cyberphysical
                 dilution algorithm. Our analysis reveals that fluidic
                 errors caused by unbalanced droplet splitting can be
                 classified as being either critical or non-critical,
                 and only those of the former type require correction to
                 achieve error-free sample dilution. Simulation
                 experiments on various sample preparation test cases
                 demonstrate the effectiveness of the proposed method.",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Czerwinski:2016:SAO,
  author =       "Robert Czerwinski and Dariusz Kania",
  title =        "State Assignment and Optimization of Ultra-High-Speed
                 {FSMs} Utilizing Tristate Buffers",
  journal =      j-TODAES,
  volume =       "22",
  number =       "1",
  pages =        "3:1--3:??",
  month =        dec,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2905366",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:29 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The logic synthesis of ultra-high-speed FSMs is
                 presented. The state assignment is based on a
                 well-known method that uses output vectors. This
                 technique is adjusted to include elements of two-level
                 minimization and takes into account the limited number
                 of terms contained in the programmable-AND/fixed-OR
                 logic cell. The state assignment is based on a special
                 form of the binary decision tree. The second phase of
                 the FSM design is logic optimization. The optimization
                 method is based on tristate buffers, thus making
                 possible a one-logic-level FSM structure. The key point
                 is to search partition variables that control the
                 tristate buffers. This technique can also be applied to
                 combinational circuits or the output block of FSMs
                 only. Algorithms for state assignment and optimization
                 are presented and richly illustrated by examples. The
                 method is dedicated to using specific features of
                 complex programmable logic devices. Experimental
                 results prove its effectiveness (e.g., the
                 implementation of the the 16-bit counter requires 136
                 logic cells and one-logic-cell level instead of 213
                 cells and four levels). The optimization method using
                 tristate buffers and a state assignment binary decision
                 tree can be directly applied to FPGA-dedicated logic
                 synthesis.",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Das:2016:FBP,
  author =       "Shirshendu Das and Hemangee K. Kapoor",
  title =        "A Framework for Block Placement, Migration, and Fast
                 Searching in Tiled-{DNUCA} Architecture",
  journal =      j-TODAES,
  volume =       "22",
  number =       "1",
  pages =        "4:1--4:??",
  month =        dec,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2907946",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:29 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Multicore processors have proliferated several domains
                 ranging from small-scale embedded systems to large data
                 centers, making tiled CMPs (TCMPs) the essential
                 next-generation scalable architecture. NUCA
                 architectures help in managing the capacity and access
                 time for such larger cache designs. It divides the
                 last-level cache (LLC) into multiple banks connected
                 through an on-chip network. Static NUCA (SNUCA) has a
                 fixed address mapping policy, whereas dynamic NUCA
                 (DNUCA) allows blocks to relocate nearer to the
                 processing cores at runtime. To allow this, DNUCA
                 divides the banks into multiple banksets and a block
                 can be placed in any bank within a particular bankset.
                 The entire bankset may need to be searched to access a
                 block. Optimal bankset searching mechanisms are
                 essential for getting the benefits from DNUCA. This
                 article proposes a DNUCA-based TCMP architecture called
                 TLD-NUCA. It reduces the LLC access time of TCMP and
                 also allows a heavily loaded bank to distribute its
                 load among the underused banks. Instead of other DNUCA
                 designs, TLD-NUCA considers larger banksets. Such
                 relaxations result in more uniform load distribution
                 than existing DNUCA-based TCMP (T-DNUCA). Considering
                 larger banksets improves the utilization factor, but
                 T-DNUCA cannot implement it because of its expensive
                 searching mechanism. TLD-NUCA uses a centralized
                 directory, called TLD, to search a block from all the
                 banks. Also, the proposed block placement policy
                 reduces the instances when the central TLD needs to be
                 contacted. It does not require the expensive
                 simultaneous search as needed by T-DNUCA. Better cache
                 utilization and a reduction in LLC access time improve
                 the miss rate as well as the average memory access time
                 (AMAT). Improving the miss rate and AMAT results in
                 improvements in cycles per instructions (CPI).
                 Experimental analysis found that TLD-NUCA improves
                 performance by 6.5\% as compared to T-DNUCA. The
                 improvement is 13\% as compared to the SNUCA-based TCMP
                 design.",
  acknowledgement = ack-nhfb,
  articleno =    "4",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Wu:2016:OAW,
  author =       "Yu-Wei Wu and Yiyu Shi and Sudip Roy and Tsung-Yi Ho",
  title =        "Obstacle-Avoiding Wind Turbine Placement for Power
                 Loss and Wake Effect Optimization",
  journal =      j-TODAES,
  volume =       "22",
  number =       "1",
  pages =        "5:1--5:??",
  month =        dec,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2905365",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:29 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "As finite energy resources are being consumed at
                 faster rate than they can be replaced, renewable energy
                 resources have drawn extensive attention. Wind power
                 development is one such example growing significantly
                 throughout the world. The main difficulty in wind power
                 development is that wind turbines interfere with each
                 other. The produced turbulence-wake effect-directly
                 reduces the power generation. In addition, wirelength
                 of the collection network among wind turbines is not
                 merely an economic factor; it also decides power loss
                 on the wind farm. Moreover, in reality, obstacles
                 (buildings, lakes, etc.) exist on the wind farm, which
                 are unavoidable. Nevertheless, to the best of our
                 knowledge, none of the existing works consider wake
                 effect, wirelength, and avoidance of obstacles all
                 together in the wind turbine placement problem. In this
                 article, we propose an analytical method to obtain the
                 obstacle-avoiding placement of wind turbines, thus
                 minimizing both power loss and wake effect. We also
                 propose a postprocessing method to fine-tune the
                 solution obtained from the analytical method to find a
                 better solution. Simulation results show that our tool
                 is 12x faster than the state-of-the-art industrial tool
                 AWS OpenWind and 203x faster than the state-of-the-art
                 academic tool TDA with almost the same produced
                 power.",
  acknowledgement = ack-nhfb,
  articleno =    "5",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Xiao:2016:HTL,
  author =       "K. Xiao and D. Forte and Y. Jin and R. Karri and S.
                 Bhunia and M. Tehranipoor",
  title =        "Hardware {Trojans}: Lessons Learned after One Decade
                 of Research",
  journal =      j-TODAES,
  volume =       "22",
  number =       "1",
  pages =        "6:1--6:??",
  month =        dec,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2906147",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:29 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Given the increasing complexity of modern electronics
                 and the cost of fabrication, entities from around the
                 globe have become more heavily involved in all phases
                 of the electronics supply chain. In this environment,
                 hardware Trojans (i.e., malicious modifications or
                 inclusions made by untrusted third parties) pose major
                 security concerns, especially for those integrated
                 circuits (ICs) and systems used in critical
                 applications and cyber infrastructure. While hardware
                 Trojans have been explored significantly in academia
                 over the last decade, there remains room for
                 improvement. In this article, we examine the research
                 on hardware Trojans from the last decade and attempt to
                 capture the lessons learned. A comprehensive
                 adversarial model taxonomy is introduced and used to
                 examine the current state of the art. Then the past
                 countermeasures and publication trends are categorized
                 based on the adversarial model and topic. Through this
                 analysis, we identify what has been covered and the
                 important problems that are underinvestigated. We also
                 identify the most critical lessons for those new to the
                 field and suggest a roadmap for future hardware Trojan
                 research.",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Pomeranz:2016:PSS,
  author =       "Irith Pomeranz",
  title =        "Periodic Scan-In States to Reduce the Input Test Data
                 Volume for Partially Functional Broadside Tests",
  journal =      j-TODAES,
  volume =       "22",
  number =       "1",
  pages =        "7:1--7:??",
  month =        dec,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2911983",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:29 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/datacompression.bib;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article describes a procedure for test data
                 compression targeting functional and partially
                 functional broadside tests. The scan-in state of such a
                 test is either a reachable state or has a known Hamming
                 distance from a reachable state. Reachable states are
                 fully specified, while the popular LFSR -based test
                 data compression methods require the use of
                 incompletely specified test cubes. The test data
                 compression approach considered in this article is
                 based on the use of periodic scan-in states. Such
                 states require the storage of a period that can be
                 significantly shorter than a scan-in state, thus
                 providing test data compression. The procedure computes
                 a set of periods that is sufficient for detecting all
                 the detectable target faults. Considering the scan-in
                 states that the periods produce, the procedure ranks
                 the periods based on the distances of the scan-in
                 states from reachable states, and the lengths of the
                 periods. Functional and partially functional broadside
                 tests are generated preferring shorter periods with
                 smaller Hamming distances. The results are compared
                 with those of an LFSR -based approach.",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lee:2016:ESM,
  author =       "Jinyong Lee and Ingoo Heo and Yongje Lee and Yunheung
                 Paek",
  title =        "Efficient Security Monitoring with the Core Debug
                 Interface in an Embedded Processor",
  journal =      j-TODAES,
  volume =       "22",
  number =       "1",
  pages =        "8:1--8:??",
  month =        dec,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2907611",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:29 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "For decades, various concepts in security monitoring
                 have been proposed. In principle, they all in common in
                 regard to the monitoring of the execution behavior of a
                 program (e.g., control-flow or dataflow) running on the
                 machine to find symptoms of attacks. Among the proposed
                 monitoring schemes, software-based ones are known for
                 their adaptability on the commercial products, but
                 there have been concerns that they may suffer from
                 nonnegligible runtime overhead. On the other hand,
                 hardware-based solutions are recognized for their high
                 performance. However, most of them have an inherent
                 problem in that they usually mandate drastic changes to
                 the internal processor architecture. More recent ones
                 have strived to minimize such modifications by
                 employing external hardware security monitors in the
                 system. However, these approaches intrinsically suffer
                 from the overhead caused by communication between the
                 host and the external monitor. Our solution also relies
                 on external hardware for security monitoring, but
                 unlike the others, ours tackles the communication
                 overhead by using the core debug interface (CDI), which
                 is readily available in most commercial processors for
                 debugging. We build our system simply by plugging our
                 monitoring hardware into the processor via CDI,
                 precluding the need for altering the processor
                 internals. To validate the effectiveness of our
                 approach, we implement two well-known monitoring
                 techniques on our proposed framework: dynamic
                 information flow tracking and branch regulation. The
                 experimental results on our FPGA prototype show that
                 our external hardware monitors efficiently perform
                 monitoring tasks with negligible performance overhead,
                 mainly with thanks to the support of CDI, which helps
                 us reduce communication costs substantially.",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Chang:2016:IPE,
  author =       "Yu-Ming Chang and Pi-Cheng Hsiu and Yuan-Hao Chang and
                 Chi-Hao Chen and Tei-Wei Kuo and Cheng-Yuan Michael
                 Wang",
  title =        "Improving {PCM} Endurance with a Constant-Cost Wear
                 Leveling Design",
  journal =      j-TODAES,
  volume =       "22",
  number =       "1",
  pages =        "9:1--9:??",
  month =        dec,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2905364",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:29 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Improving PCM endurance is a fundamental issue when it
                 is considered as an alternative to replace DRAM as main
                 memory. Memory-based wear leveling (WL) is an effective
                 way to improve PCM endurance, but its major challenge
                 is how to efficiently determine the appropriate memory
                 pages for allocation or swapping. In this article, we
                 present a constant-cost WL design that is compatible
                 with existing memory management. Two implementations,
                 namely bucket-based and array-based WL, with
                 constant-time (or nearly zero) search cost are proposed
                 to be integrated into the OS layer and the hardware
                 layer, respectively, as well as to trade between time
                 and space complexity. The results of experiments
                 conducted based on an implementation in Android, as
                 well as simulations with popular benchmarks, to
                 evaluate the effectiveness of the proposed design are
                 very encouraging.",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{He:2016:RIM,
  author =       "Xu He and Yao Wang and Yang Guo and Evangeline F. Y.
                 Young",
  title =        "{Ripple 2.0}: Improved Movement of Cells in
                 Routability-Driven Placement",
  journal =      j-TODAES,
  volume =       "22",
  number =       "1",
  pages =        "10:1--10:??",
  month =        dec,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2925989",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:29 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Routability is one of the most important problems in
                 high-performance circuit designs. From the viewpoint of
                 placement design, two major factors cause routing
                 congestion: (i) interconnections between cells and (ii)
                 connections on macro blockages. In this article, we
                 present a routability-driven placer, Ripple 2.0, which
                 emphasizes both kinds of routing congestion. Several
                 techniques will be presented, including (i) cell
                 inflation with routing path consideration, (ii)
                 congested cluster optimization, (iii)
                 routability-driven cell spreading, and (iv)
                 simultaneous routing and placement for routability
                 refinement. With the official evaluation protocol,
                 Ripple 2.0 outperforms other published academic
                 routability-driven placers. Compared with top results
                 in the ICCAD 2012 contest, Ripple 2.0 achieves a better
                 detailed routing solution obtained by a commercial
                 router.",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Mazumdar:2016:CIS,
  author =       "Bodhisatwa Mazumdar and Sk. Subidh Ali and Ozgur
                 Sinanoglu",
  title =        "A Compact Implementation of {Salsa20} and Its Power
                 Analysis Vulnerabilities",
  journal =      j-TODAES,
  volume =       "22",
  number =       "1",
  pages =        "11:1--11:??",
  month =        dec,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2934677",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:29 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In this article, we present a compact implementation
                 of the Salsa20 stream cipher that is targeted towards
                 lightweight cryptographic devices such as
                 radio-frequency identification (RFID) tags. The Salsa20
                 stream cipher, ann addition-rotation-XOR (ARX) cipher,
                 is used for high-security cryptography in NEON
                 instruction sets embedded in ARM Cortex A8 CPU
                 core-based tablets and smartphones. The existing
                 literature shows that although classical cryptanalysis
                 has been effective on reduced rounds of Salsa20, the
                 stream cipher is immune to software side-channel
                 attacks such as branch timing and cache timing attacks.
                 To the best of our knowledge, this work is the first to
                 perform hardware power analysis attacks, where we
                 evaluate the resistance of all eight keywords in the
                 proposed compact implementation of Salsa20. Our
                 technique targets the three subrounds of the first
                 round of the implemented Salsa20. The correlation power
                 analysis (CPA) attack has an attack complexity of
                 2$^{19}$. Based on extensive experiments on a compact
                 implementation of Salsa20, we demonstrate that all
                 these keywords can be recovered within 20,000 queries
                 on Salsa20. The attacks show a varying resilience of
                 the key words against CPA that has not yet been
                 observed in any stream or block cipher in the present
                 literature. This makes the architecture of this stream
                 cipher interesting from the side-channel analysis
                 perspective. Also, we propose a lightweight
                 countermeasure that mitigates the leakage in the power
                 traces as shown in the results of Welch's t -test
                 statistics. The hardware area overhead of the proposed
                 countermeasure is only 14\% and is designed with
                 compact implementation in mind.",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Chakraborty:2016:PDM,
  author =       "Prasenjit Chakraborty and Preeti Ranjan Panda and
                 Sandeep Sen",
  title =        "Partitioning and Data Mapping in Reconfigurable Cache
                 and Scratchpad Memory-Based Architectures",
  journal =      j-TODAES,
  volume =       "22",
  number =       "1",
  pages =        "12:1--12:??",
  month =        dec,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2934680",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:29 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Scratchpad memory (SPM) is considered a useful
                 component in the memory hierarchy, solely or along with
                 caches, for meeting the power and energy constraints as
                 performance ceases to be the sole criteria for
                 processor design. Although the efficiency of SPM is
                 well known, its use has been restricted owing to
                 difficulties in programmability. Real applications
                 usually have regions that are amenable to exploitation
                 by either SPM or cache and hence can benefit if the two
                 are used in conjunction. Dynamically adjusting the
                 local memory resources to suit application demand can
                 significantly improve the efficiency of the overall
                 system. In this article, we propose a compiler
                 technique to map application data objects to the
                 SPM-cache and also partition the local memory between
                 the SPM and cache depending on the dynamic requirement
                 of the application. First, we introduce a novel
                 graph-based structure to tackle data allocation in an
                 application. Second, we use this to present a data
                 allocation heuristic to map program objects for a
                 fixed-size SPM-cache hybrid system that targets whole
                 program optimization. We finally extend this
                 formulation to adapt the SPM and cache sizes, as well
                 as the data allocation as per the requirement of
                 different application regions. We study the
                 applicability of the technique on various workloads
                 targeted at both SPM-only and hardware reconfigurable
                 memory systems, observing an average of 18\%
                 energy-delay improvement over state-of-the-art
                 techniques.",
  acknowledgement = ack-nhfb,
  articleno =    "12",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Mehri:2016:GAB,
  author =       "Hossein Mehri and Bijan Alizadeh",
  title =        "Genetic-Algorithm-Based {FPGA} Architectural
                 Exploration Using Analytical Models",
  journal =      j-TODAES,
  volume =       "22",
  number =       "1",
  pages =        "13:1--13:??",
  month =        dec,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2939372",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:29 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "FPGA architectural optimization has emerged as one of
                 the most important digital design challenges. In recent
                 years, experimental methods have been replaced by
                 analytical ones to find the optimized architecture.
                 Time is the main reason for this replacement.
                 Conventional Geometric Programming (GP) is a routine
                 framework to solve analytical models, including area,
                 delay, and power models. In this article, we discuss
                 the application of the Genetic Algorithm (GA) to the
                 design of FPGA architectures. The performance model has
                 been integrated into the Genetic Algorithm framework in
                 order to investigate the impact of various
                 architectural parameters on the performance efficiency
                 of FPGAs. This way, we are able to rapidly analyze FPGA
                 architectures and select the best one. The main
                 advantages of using GA versus GP are concurrency and
                 speed. The results show that concurrent optimization of
                 high-level architecture parameters, including lookup
                 table size (K) and cluster size (N), and low-level
                 parameters, like scaling of transistors, is possible
                 for GA, whereas GP does not capture K and N under its
                 concurrency and it needs to exhaustively search all
                 possible combinations of K and N. The results also show
                 that more than two orders of magnitude in runtime
                 improvement in comparison with GP-based analysis is
                 achieved.",
  acknowledgement = ack-nhfb,
  articleno =    "13",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Gingade:2016:HPM,
  author =       "Ganesh Gingade and Wenyi Chen and Yung-Hsiang Lu and
                 Jan Allebach and Hernan Ildefonso Gutierrez-Vazquez",
  title =        "Hybrid Power Management for Office Equipment",
  journal =      j-TODAES,
  volume =       "22",
  number =       "1",
  pages =        "14:1--14:??",
  month =        dec,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2910582",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:29 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Office machines (such as printers, scanners, facsimile
                 machines, and copiers) can consume significant amounts
                 of power. Most office machines have sleep modes to save
                 power. Power management of these machines is usually
                 timeout-based: a machine sleeps after being idle long
                 enough. Setting the time-out duration can be difficult:
                 if it is too long, the machine wastes power during
                 idleness. If it is too short, the machine sleeps too
                 soon and too often-the wake-up delay can significantly
                 degrade productivity. Thus, power management is a
                 tradeoff between saving energy and keeping response
                 time short. Many power management policies have been
                 published and one policy may outperform another in some
                 scenarios. There is no definite conclusion regarding
                 which policy is always better. This article describes
                 two methods for office equipment power management. The
                 first method adaptively reduces power based on a
                 constraint of the wake-up delay. The second is a hybrid
                 method with multiple candidate policies and it selects
                 the most appropriate power management policy. Using 6
                 months of request traces from 18 different printers, we
                 demonstrate that the hybrid policy outperforms
                 individual policies. We also discover that power
                 management based on business hours does not produce
                 consistent energy savings.",
  acknowledgement = ack-nhfb,
  articleno =    "14",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Katoen:2016:PMC,
  author =       "Joost-Pieter Katoen and Hao Wu",
  title =        "Probabilistic Model Checking for Uncertain
                 Scenario-Aware Data Flow",
  journal =      j-TODAES,
  volume =       "22",
  number =       "1",
  pages =        "15:1--15:??",
  month =        dec,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2914788",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:29 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The Scenario-Aware Dataflow (SADF) model is based on
                 concurrent actors that interact via channels. It
                 combines streaming data and control to capture
                 scenarios while incorporating hard and soft real-time
                 aspects. To model data-flow computations that are
                 subject to uncertainty, SADF models are equipped with
                 random primitives. We propose to use probabilistic
                 model checking to analyze uncertain SADF models. We
                 show how measures such as expected time, long-run
                 objectives like throughput, as well as timed
                 reachability-can a given system configuration be
                 reached within a deadline with high probability?-can be
                 automatically determined. The crux of our method is a
                 compositional semantics of SADF with exponential agent
                 execution times combined with automated abstraction
                 techniques akin to partial-order reduction. We present
                 the semantics in detail and show how it accommodates
                 the incorporation of execution platforms, enabling the
                 analysis of energy consumption. The feasibility of our
                 approach is illustrated by analyzing several
                 quantitative measures of an MPEG-4 decoder and an
                 industrial face recognition application.",
  acknowledgement = ack-nhfb,
  articleno =    "15",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Liu:2016:DAE,
  author =       "Qixiao Liu and Miquel Moreto and Jaume Abella and
                 Francisco J. Cazorla and Mateo Valero",
  title =        "{DReAM}: an Approach to Estimate per-Task {DRAM}
                 Energy in Multicore Systems",
  journal =      j-TODAES,
  volume =       "22",
  number =       "1",
  pages =        "16:1--16:??",
  month =        dec,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2939370",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:29 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Accurate per-task energy estimation in multicore
                 systems would allow performing per-task energy-aware
                 task scheduling and energy-aware billing in data
                 centers, among other applications. Per-task energy
                 estimation is challenged by the interaction between
                 tasks in shared resources, which impacts tasks' energy
                 consumption in uncontrolled ways. Some accurate
                 mechanisms have been devised recently to estimate
                 per-task energy consumed on-chip in multicores, but
                 there is a lack of such mechanisms for DRAM memories.
                 This article makes the case for accurate per-task DRAM
                 energy metering in multicores, which opens new paths to
                 energy/performance optimizations. In particular, the
                 contributions of this article are (i) an ideal per-task
                 energy metering model for DRAM memories; (ii) DReAM, an
                 accurate yet low cost implementation of the ideal model
                 (less than 5\% accuracy error when 16 tasks share
                 memory); and (iii) a comparison with standard methods
                 (even distribution and access-count based) proving that
                 DReAM is much more accurate than these other methods.",
  acknowledgement = ack-nhfb,
  articleno =    "16",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Somashekar:2016:NEG,
  author =       "Ahish Mysore Somashekar and Spyros Tragoudas and
                 Rathish Jayabharathi and Sreenivas Gangadhar",
  title =        "Non-enumerative Generation of Path Delay Distributions
                 and Its Application to Critical Path Selection",
  journal =      j-TODAES,
  volume =       "22",
  number =       "1",
  pages =        "17:1--17:??",
  month =        dec,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2940327",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:29 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "A Monte Carlo-based approach is proposed capable of
                 identifying in a non-enumerative and scalable manner
                 the distributions that describe the delay of every path
                 in a combinational circuit. Furthermore, a scalable
                 approach to select critical paths from a potentially
                 exponential number of path candidates is presented.
                 Paths and their delay distributions are stored in Zero
                 Suppressed Binary Decision Diagrams. Experimental
                 results on some of the largest ISCAS-89 and ITC-99
                 benchmarks shows that the proposed method is highly
                 scalable and effective.",
  acknowledgement = ack-nhfb,
  articleno =    "17",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Wang:2016:ADB,
  author =       "Yi Wang and Zhiwei Qin and Renhai Chen and Zili Shao
                 and Laurence T. Yang",
  title =        "An Adaptive Demand-Based Caching Mechanism for {NAND}
                 Flash Memory Storage Systems",
  journal =      j-TODAES,
  volume =       "22",
  number =       "1",
  pages =        "18:1--18:??",
  month =        dec,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2947658",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:29 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "During past decades, the capacity of NAND flash memory
                 has been increasing dramatically, leading to the use of
                 nonvolatile flash in the system's memory hierarchy. The
                 increasing capacity of NAND flash memory introduces a
                 large RAM footprint to store the logical to physical
                 address mapping. The demand-based approach can
                 effectively reduce and well control the RAM footprint.
                 However, extra address translation overhead is also
                 introduced which may degrade the system performance. In
                 this article, we present CDFTL, an adaptive Caching
                 mechanism for Demand-based Flash Translation Layer, for
                 NAND flash memory storage systems. CDFTL adopts both
                 the fine-grained entry-based caching mechanism to
                 exploit temporal locality and the coarse-grained
                 translation-page-based caching mechanism to exploit
                 spatial locality of workloads. By selectively caching
                 the on-demand address mappings and adaptively changing
                 the space configurations of two granularities, CDFTL
                 can effectively utilize the RAM space and improve the
                 cache hit ratio. We evaluate CDFTL under a real
                 hardware embedded platform using a variety of I/O
                 traces. Experimental results show that our technique
                 can achieve an 11.13\% reduction in average system
                 response time and a 35.21\% reduction in translation
                 block erase counts compared with the previous work.",
  acknowledgement = ack-nhfb,
  articleno =    "18",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Nair:2016:ESP,
  author =       "Piyoosh Purushothaman Nair and Arnab Sarkar and N. M.
                 Harsha and Megha Gandhi and P. P. Chakrabarti and Sujoy
                 Ghose",
  title =        "{ERfair} Scheduler with Processor Suspension for
                 Real-Time Multiprocessor Embedded Systems",
  journal =      j-TODAES,
  volume =       "22",
  number =       "1",
  pages =        "19:1--19:??",
  month =        dec,
  year =         "2016",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2948979",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:29 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Proportional fair schedulers with their ability to
                 provide optimal schedulability along with hard
                 timeliness and quality-of-service guarantees on
                 multiprocessors form an attractive alternative in
                 real-time embedded systems that concurrently run a mix
                 of independent applications with varying timeliness
                 constraints. This article presents ERfair Scheduler
                 with Suspension on Multiprocessors (ESSM), an
                 efficient, optimal proportional fair scheduler that
                 attempts to reduce system wide energy consumption by
                 locally maximizing the processor suspension intervals
                 while not sacrificing the ERfairness timing constraints
                 of the system. The proposed technique takes advantage
                 of higher execution rates of tasks in underloaded
                 ERfair systems and uses a procrastination scheme to
                 search for time points within the schedule where
                 suspension intervals are locally maximal. Evaluation
                 results reveal that ESSM achieves good sleep efficiency
                 and provides up to 50\% higher effective total sleep
                 durations as compared to the Basic-ERfair scheduler on
                 systems consisting of 2 to 20 processors.",
  acknowledgement = ack-nhfb,
  articleno =    "19",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Nguyen:2017:SAA,
  author =       "Phuong Ha Nguyen and Durga Prasad Sahoo and Rajat
                 Subhra Chakraborty and Debdeep Mukhopadhyay",
  title =        "Security Analysis of Arbiter {PUF} and Its Lightweight
                 Compositions Under Predictability Test",
  journal =      j-TODAES,
  volume =       "22",
  number =       "2",
  pages =        "20:1--20:??",
  month =        mar,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2940326",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:30 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Unpredictability is an important security property of
                 Physically Unclonable Function (PUF) in the context of
                 statistical attacks, where the correlation between
                 challenge-response pairs is explicitly exploited. In
                 the existing literature on PUFs, the Hamming Distance
                 Test, denoted by HDT(t), was proposed to evaluate the
                 unpredictability of PUFs, which is a simplified case of
                 the Propagation Criterion test PC(t). The objective of
                 these test schemes is to estimate the output transition
                 probability when there are t or fewer than t bits
                 flips, and ideally this probability value should be
                 0.5. In this work, we show that aforementioned two test
                 schemes are not enough to ensure the unpredictability
                 of a PUF design. We propose a new test, which is
                 denoted as HDT(e, t). This test scheme is a fine-tuned
                 version of the previous schemes, as it considers the
                 flipping bit pattern vector e along with parameter t.
                 As a contribution, we provide a comprehensive
                 discussion and analytic interpretation of HDT(t),
                 PC(t), and HDT(e, t) test schemes for Arbiter PUF
                 (APUF), Exclusive-OR (XOR) PUF, and Lightweight Secure
                 PUF (LSPUF). Our analysis establishes that HDT(e, t)
                 test is more general in comparison with HDT(t) and
                 PC(t) tests. In addition, we demonstrate a few
                 scenarios where the adversary can exploit the
                 information obtained from the analysis of HDT(e, t)
                 properties of APUF, XOR PUF, and LSPUF to develop
                 statistical attacks on them, if the ideal value of
                 HDT(e, t) = 0.5 is not achieved for a given PUF. We
                 validate our theoretical observations using the
                 simulated and Field Programmable Gate Array (FPGA)
                 implemented APUF, XOR PUF, and LSPUF designs.",
  acknowledgement = ack-nhfb,
  articleno =    "20",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Zhu:2017:CCA,
  author =       "Di Zhu and Siyu Yue and Massoud Pedram and Lizhong
                 Chen",
  title =        "{CALM}: Contention-Aware Latency-Minimal Application
                 Mapping for Flattened Butterfly On-Chip Networks",
  journal =      j-TODAES,
  volume =       "22",
  number =       "2",
  pages =        "21:1--21:??",
  month =        mar,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2950045",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:30 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "With the emergence of many-core multiprocessor
                 system-on-chips (MPSoCs), on-chip networks are facing
                 serious challenges in providing fast communication
                 among various tasks and cores. One promising on-chip
                 network design approach shown in recent studies is to
                 add express channels to traditional mesh network as
                 shortcuts to bypass intermediate routers, thereby
                 reducing packet latency. This approach not only changes
                 the packet latency models, but also greatly affects
                 network traffic behaviors, both of which have not been
                 fully exploited in existing mapping algorithms. In this
                 article, we explore the opportunities in optimizing
                 application mapping for flattened butterfly, a popular
                 express channel-based on-chip network. Specifically, we
                 identify the unique characteristics of flattened
                 butterfly, analyze the opportunities and new
                 challenges, and propose an efficient heuristic mapping
                 algorithm. The proposed algorithm Contention-Aware
                 Latency Minimal (CALM) is able to reduce unnecessary
                 turns that would otherwise impose additional router
                 pipeline latency to packets, as well as adjust
                 forwarding traffic to reduce network contention
                 latency. Simulation results show that the proposed
                 algorithm can achieve, on average, 3.4X reduction in
                 the number of turns, 24.8\% reduction in contention
                 latency, and 14.12\% reduction in the overall packet
                 latency.",
  acknowledgement = ack-nhfb,
  articleno =    "21",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Azarbad:2017:SSB,
  author =       "Mohammad Reza Azarbad and Bijan Alizadeh",
  title =        "Scalable {SMT-Based} Equivalence Checking of Nested
                 Loop Pipelining in Behavioral Synthesis",
  journal =      j-TODAES,
  volume =       "22",
  number =       "2",
  pages =        "22:1--22:??",
  month =        mar,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2953879",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:30 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In this article, we present a novel methodology based
                 on SMT-solvers to verify equality of a high-level
                 described specification and a pipelined RTL
                 implementation produced by a high-level synthesis tool.
                 The complex transformations existing in the high-level
                 synthesis process, such as nested loop pipelining,
                 cause the conventional methods of equivalence checking
                 to be inefficient. The proposed equivalence checking
                 method simultaneously attacks the two problems in this
                 context: (1) state space explosion and (2) complex
                 high-level synthesis transformations. To show the
                 scalability and efficiency of the proposed method, the
                 verification results of large designs are compared with
                 those of the SAT-based method, including three
                 different state-of-the-art SAT-solvers: the SMT-based
                 procedure, the modular Horner expansion diagram
                 (M-HED)-based method, and the M-HED partitioning
                 approach. The results show 2470$ \times $, 2540$ \times
                 $, and 142$ \times $ average memory usage reduction and
                 252$ \times $, 28$ \times $, and 914$ \times $ speedup
                 in comparison with M-HED, M-HED partitioning, and
                 SMT-solver without using the proposed method,
                 respectively.",
  acknowledgement = ack-nhfb,
  articleno =    "22",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Zhao:2017:OIM,
  author =       "Qingling Zhao and Zaid Al-Bayati and Zonghua Gu and
                 Haibo Zeng",
  title =        "Optimized Implementation of Multirate
                 Mixed-Criticality Synchronous Reactive Models",
  journal =      j-TODAES,
  volume =       "22",
  number =       "2",
  pages =        "23:1--23:??",
  month =        mar,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2968445",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:30 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Model-based design using Synchronous Reactive (SR)
                 models enables early design and verification of
                 application functionality in a platform-independent
                 manner, and the implementation on the target platform
                 should guarantee the preservation of application
                 semantic properties. Mixed-Criticality Scheduling (MCS)
                 is an effective approach to addressing diverse
                 certification requirements of safety-critical systems
                 that integrate multiple subsystems with different
                 levels of criticality. This article considers
                 fixed-priority scheduling of mixed-criticality SR
                 models, and considers two scheduling approaches:
                 Adaptive MCS and Elastic MCS. We formulate the
                 optimization problem of minimizing the total system
                 cost of added functional delays in the implementation
                 while guaranteeing schedulability, and present an
                 optimal algorithm based on branch-and-bound search, and
                 an efficient heuristic algorithm.",
  acknowledgement = ack-nhfb,
  articleno =    "23",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Ali:2017:RCD,
  author =       "Hazem Ismail Ali and Sander Stuijk and Benny Akesson
                 and Lu{\'\i}s Miguel Pinho",
  title =        "Reducing the Complexity of Dataflow Graphs Using
                 Slack-Based Merging",
  journal =      j-TODAES,
  volume =       "22",
  number =       "2",
  pages =        "24:1--24:??",
  month =        mar,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2956232",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:30 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "There exist many dataflow applications with timing
                 constraints that require real-time guarantees on safe
                 execution without violating their deadlines. Extraction
                 of timing parameters (offsets, deadlines, periods) from
                 these applications enables the use of real-time
                 scheduling and analysis techniques, and provides
                 guarantees on satisfying timing constraints. However,
                 existing extraction techniques require the
                 transformation of the dataflow application from highly
                 expressive dataflow computational models, for example,
                 Synchronous Dataflow (SDF) and Cyclo-Static Dataflow
                 (CSDF) to Homogeneous Synchronous Dataflow (HSDF). This
                 transformation can lead to an exponential increase in
                 the size of the application graph that significantly
                 increases the runtime of the analysis. In this article,
                 we address this problem by proposing an offline
                 heuristic algorithm called slack-based merging. The
                 algorithm is a novel graph reduction technique that
                 helps in speeding up the process of timing parameter
                 extraction and finding a feasible real-time schedule,
                 thereby reducing the overall design time of the
                 real-time system. It uses two main concepts: (a) the
                 difference between the worst-case execution time of the
                 SDF graph's firings and its timing constraints (slack)
                 to merge firings together and generate a reduced-size
                 HSDF graph, and (b) the novel concept of merging called
                 safe merge, which is a merge operation that we formally
                 prove cannot cause a live HSDF graph to deadlock. The
                 results show that the reduced graph (1) respects the
                 throughput and latency constraints of the original
                 application graph and (2) typically speeds up the
                 process of extracting timing parameters and finding a
                 feasible real-time schedule for real-time dataflow
                 applications. They also show that when the throughput
                 constraint is relaxed with respect to the maximal
                 throughput of the graph, the merging algorithm is able
                 to achieve a larger reduction in graph size, which in
                 turn results in a larger speedup of the real-time
                 scheduling algorithms.",
  acknowledgement = ack-nhfb,
  articleno =    "24",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Mundhenk:2017:SAN,
  author =       "Philipp Mundhenk and Andrew Paverd and Artur Mrowca
                 and Sebastian Steinhorst and Martin Lukasiewycz and
                 Suhaib A. Fahmy and Samarjit Chakraborty",
  title =        "Security in Automotive Networks: Lightweight
                 Authentication and Authorization",
  journal =      j-TODAES,
  volume =       "22",
  number =       "2",
  pages =        "25:1--25:??",
  month =        mar,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2960407",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:30 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "With the increasing amount of interconnections between
                 vehicles, the attack surface of internal vehicle
                 networks is rising steeply. Although these networks are
                 shielded against external attacks, they often do not
                 have any internal security to protect against malicious
                 components or adversaries who can breach the network
                 perimeter. To secure the in-vehicle network, all
                 communicating components must be authenticated, and
                 only authorized components should be allowed to send
                 and receive messages. This is achieved through the use
                 of an authentication framework. Cryptography is widely
                 used to authenticate communicating parties and provide
                 secure communication channels (e.g., Internet
                 communication). However, the real-time performance
                 requirements of in-vehicle networks restrict the types
                 of cryptographic algorithms and protocols that may be
                 used. In particular, asymmetric cryptography is
                 computationally infeasible during vehicle operation. In
                 this work, we address the challenges of designing
                 authentication protocols for automotive systems. We
                 present Lightweight Authentication for Secure
                 Automotive Networks (LASAN), a full lifecycle
                 authentication approach. We describe the core LASAN
                 protocols and show how they protect the internal
                 vehicle network while complying with the real-time
                 constraints and low computational resources of this
                 domain. By leveraging the fixed structure of automotive
                 networks, we minimize bandwidth and computation
                 requirements. Unlike previous work, we also explain how
                 this framework can be integrated into all aspects of
                 the automotive product lifecycle, including
                 manufacturing, vehicle maintenance, and software
                 updates. We evaluate LASAN in two different ways:
                 First, we analyze the security properties of the
                 protocols using established protocol verification
                 techniques based on formal methods. Second, we evaluate
                 the timing requirements of LASAN and compare these to
                 other frameworks using a new highly modular discrete
                 event simulator for in-vehicle networks, which we have
                 developed for this evaluation.",
  acknowledgement = ack-nhfb,
  articleno =    "25",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Zhang:2017:RTV,
  author =       "Xianwei Zhang and Youtao Zhang and Bruce R. Childers
                 and Jun Yang",
  title =        "On the Restore Time Variations of Future {DRAM}
                 Memory",
  journal =      j-TODAES,
  volume =       "22",
  number =       "2",
  pages =        "26:1--26:??",
  month =        mar,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2967609",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:30 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "As the de facto main memory standard, DRAM (Dynamic
                 Random Access Memory) has achieved dramatic density
                 improvement in the past four decades, along with the
                 advancements in process technology. Recent studies
                 reveal that one of the major challenges in scaling DRAM
                 into the deep sub-micron regime is its significant
                 variations on cell restore time, which affect timing
                 constraints such as write recovery time. Adopting
                 traditional approaches results in either low yield rate
                 or large performance degradation. In this article, we
                 propose schemes to expose the variations to the
                 architectural level. By constructing memory chunks with
                 different access speeds and, in particular, exploiting
                 the performance benefits of fast chunks, a
                 variation-aware memory controller can effectively
                 mitigate the performance loss due to relaxed timing
                 constraints. We then proposed restore-time-aware rank
                 construction and page allocation schemes to make better
                 use of fast chunks. Our experimental results show that,
                 compared to traditional designs such as row sparing and
                 Error Correcting Codes, the proposed schemes help to
                 improve system performance by about 16\% and 20\%,
                 respectively, for 20nm and 14nm technology nodes on a
                 four-core multiprocessor system.",
  acknowledgement = ack-nhfb,
  articleno =    "26",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lin:2017:HDP,
  author =       "Ye-Jyun Lin and Chia-Lin Yang and Hsiang-Pang Li and
                 Cheng-Yuan Michael Wang",
  title =        "A Hybrid {DRAM\slash PCM} Buffer Cache Architecture
                 for {Smartphones} with {QoS} Consideration",
  journal =      j-TODAES,
  volume =       "22",
  number =       "2",
  pages =        "27:1--27:??",
  month =        mar,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2979143",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:30 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Flash memory is widely used in mobile phones to store
                 contact information, application files, and other types
                 of data. In an operating system, the buffer cache keeps
                 the I/O blocks in dynamic random access memory (DRAM)
                 to reduce the slow flash accesses. However, in
                 smartphones, we observed two issues which reduce the
                 benefits of the buffer cache. First, a large number of
                 synchronous writes force writing the data from the
                 buffer cache to flash frequently. Second, the large
                 amount of I/O accesses from background applications
                 diminishes the buffer cache efficiency of the
                 foreground application, which degrades the
                 quality-of-service (QoS). In this article, we propose a
                 buffer cache architecture with hybrid DRAM and phase
                 change memory (PCM) memory, which improves the I/O
                 performance and QoS for smartphones. We use a DRAM
                 first-level buffer cache to provide high buffer cache
                 performance and a PCM last-level buffer cache to reduce
                 the impact of frequent synchronous writes. Based on the
                 proposed hierarchical buffer cache architecture, we
                 propose a sub-block management and background flush to
                 reduce the impact of the PCM write limitation and the
                 dirty block write-back overhead, respectively. To
                 improve the QoS, we propose a least-recently-activated
                 first replacement policy (LRA) to keep the data from
                 the applications that are most likely to become the
                 foreground one. The experimental results show that with
                 the proposed mechanisms, our hierarchical buffer cache
                 can improve the I/O response time by 20\% compared to
                 the conventional buffer cache. The proposed LRA can
                 improve the foreground application performance by 1.74x
                 compared to the conventional CLOCK policy.",
  acknowledgement = ack-nhfb,
  articleno =    "27",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Su:2017:EMC,
  author =       "Hang Su and Dakai Zhu and Scott Brandt",
  title =        "An Elastic Mixed-Criticality Task Model and
                 Early-Release {EDF} Scheduling Algorithms",
  journal =      j-TODAES,
  volume =       "22",
  number =       "2",
  pages =        "28:1--28:??",
  month =        mar,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2984633",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:30 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Many algorithms have recently been studied for
                 scheduling mixed-criticality (MC) tasks. However, most
                 existing MC scheduling algorithms guarantee the timely
                 executions of high-criticality (HC) tasks at the
                 expense of discarding low-criticality (LC) tasks, which
                 can cause serious service interruption for such tasks.
                 In this work, aiming at providing guaranteed services
                 for LC tasks, we study an elastic mixed-criticality
                 (E-MC) task model for dual-criticality systems.
                 Specifically, the model allows each LC task to specify
                 its maximum period (i.e., minimum service level) and a
                 set of early-release points. We propose an
                 early-release (ER) mechanism that enables LC tasks to
                 be released more frequently and thus improve their
                 service levels at runtime, with both conservative and
                 aggressive approaches to exploiting system slack being
                 considered, which is applied to both earliest deadline
                 first (EDF) and preference-oriented earliest-deadline
                 schedulers. We formally prove the correctness of the
                 proposed early-release--earliest deadline first
                 scheduler on guaranteeing the timeliness of all tasks
                 through judicious management of the early releases of
                 LC tasks. The proposed model and schedulers are
                 evaluated through extensive simulations. The results
                 show that by moderately relaxing the service
                 requirements of LC tasks in MC task sets (i.e., by
                 having LC tasks' maximum periods in the E-MC model be
                 two to three times their desired MC periods), most
                 transformed E-MC task sets can be successfully
                 scheduled without sacrificing the timeliness of HC
                 tasks. Moreover, with the proposed ER mechanism, the
                 runtime performance of tasks (e.g., execution
                 frequencies of LC tasks, response times, and jitters of
                 HC tasks) can be significantly improved under the ER
                 schedulers when compared to that of the
                 state-of-the-art earliest deadline first-virtual
                 deadline scheduler.",
  acknowledgement = ack-nhfb,
  articleno =    "28",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Pomeranz:2017:CSL,
  author =       "Irith Pomeranz",
  title =        "Computation of Seeds for {LFSR}-Based $n$-Detection
                 Test Generation",
  journal =      j-TODAES,
  volume =       "22",
  number =       "2",
  pages =        "29:1--29:??",
  month =        mar,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2994144",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:30 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/prng.bib;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article describes a new procedure that generates
                 seeds for LFSR-based test generation when the goal is
                 to produce an $n$-detection test set. The procedure
                 does not use test cubes in order to avoid the situation
                 where a seed does not exist for a given test cube with
                 a given LFSR. Instead, the procedure starts from a set
                 of seeds that produces a one-detection test set. It
                 modifies seeds to obtain new seeds such that the tests
                 they produce increase the numbers of detections of
                 target faults. The modification procedure also
                 increases the number of faults that each additional
                 seed detects. Experimental results are presented to
                 demonstrate the effectiveness of the procedure.",
  acknowledgement = ack-nhfb,
  articleno =    "29",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Hankendi:2017:SCS,
  author =       "Can Hankendi and Ayse Kivilcim Coskun",
  title =        "Scale \& Cap: Scaling-Aware Resource Management for
                 Consolidated Multi-threaded Applications",
  journal =      j-TODAES,
  volume =       "22",
  number =       "2",
  pages =        "30:1--30:??",
  month =        mar,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2994145",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:30 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib;
                 https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
  abstract =     "As the number of cores per server node increases,
                 designing multi-threaded applications has become
                 essential to efficiently utilize the available hardware
                 parallelism. Many application domains have started to
                 adopt multi-threaded programming; thus, efficient
                 management of multi-threaded applications has become a
                 significant research problem. Efficient execution of
                 multi-threaded workloads on cloud environments, where
                 applications are often consolidated by means of
                 virtualization, relies on understanding the
                 multi-threaded specific characteristics of the
                 applications. Furthermore, energy cost and power
                 delivery limitations require data center server nodes
                 to work under power caps, which bring additional
                 challenges to runtime management of consolidated
                 multi-threaded applications. This article proposes a
                 dynamic resource allocation technique for consolidated
                 multi-threaded applications for power-constrained
                 environments. Our technique takes into account
                 application characteristics specific to multi-threaded
                 applications, such as power and performance scaling, to
                 make resource distribution decisions at runtime to
                 improve the overall performance, while accurately
                 tracking dynamic power caps. We implement and evaluate
                 our technique on state-of-the-art servers and show that
                 the proposed technique improves the application
                 performance by up to 21\% under power caps compared to
                 a default resource manager.",
  acknowledgement = ack-nhfb,
  articleno =    "30",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Backer:2017:SFT,
  author =       "Jerry Backer and David Hely and Ramesh Karri",
  title =        "Secure and Flexible Trace-Based Debugging of
                 Systems-on-Chip",
  journal =      j-TODAES,
  volume =       "22",
  number =       "2",
  pages =        "31:1--31:??",
  month =        mar,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2994601",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:30 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This work tackles the conflict between enforcing
                 security of a system-on-chip (SoC) and providing
                 observability during trace-based debugging. On one
                 hand, security objectives require that assets remain
                 confidential at different stages of the SoC life cycle.
                 On the other hand, the trace-based debug infrastructure
                 exposes values of internal signals that can leak the
                 assets to untrusted third parties. We propose a secure
                 trace-based debug infrastructure to resolve this
                 conflict. The secure infrastructure tags each asset to
                 identify its owner (to whom it can be exposed during
                 debug) and nonintrusively enforces the confidentiality
                 of the assets during runtime debug. We implement a
                 prototype of the enhanced infrastructure on an FPGA to
                 validate its functional correctness. ASIC estimations
                 show that our approach incurs practical area and power
                 costs.",
  acknowledgement = ack-nhfb,
  articleno =    "31",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Latifis:2017:MVC,
  author =       "Ioannis Latifis and Karthick Parashar and Grigoris
                 Dimitroulakos and Hans Cappelle and Christakis Lezos
                 and Konstantinos Masselos and Francky Catthoor",
  title =        "A {MATLAB} Vectorizing Compiler Targeting
                 Application-Specific Instruction Set Processors",
  journal =      j-TODAES,
  volume =       "22",
  number =       "2",
  pages =        "32:1--32:28",
  month =        mar,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2996182",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:30 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/matlab.bib;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article discusses a MATLAB-to-C vectorizing
                 compiler that exploits custom instructions, for
                 example, for Single Instruction Multiple Data (SIMD)
                 processing and instructions for complex arithmetic
                 present in Application-Specific Instruction Set
                 Processors (ASIPs). Custom instructions are represented
                 via specialized intrinsic functions in the generated
                 code, and the generated code can be used as input to
                 any C/C++ compiler supporting the target processor.
                 Furthermore, the specialized instruction set of the
                 target processor is described in a parameterized way
                 using a target processor-independent architecture
                 description approach, thus allowing the support of any
                 processor. The compiler has been used for the
                 generation of application code for two different ASIPs
                 for several benchmarks. The code generated by the
                 compiler achieves a speedup between 2$ \times $ --74$
                 \times $ and 2$ \times $ --97$ \times $ compared to the
                 code generated by the MathWorks MATLAB-to-C compiler.
                 Experimental results also prove that the compiler
                 efficiently exploits SIMD custom instructions achieving
                 a 3.3 factor speedup compared to cases where no SIMD
                 processing is used. Thus the compiler can be employed
                 to reduce the development time/effort/cost and time to
                 market through raising the abstraction of application
                 design in an embedded systems/system-on-chip
                 development context.",
  acknowledgement = ack-nhfb,
  articleno =    "32",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Santos:2017:SMH,
  author =       "Rui Santos and Shyamsundar Venkataraman and Akash
                 Kumar",
  title =        "Scrubbing Mechanism for Heterogeneous Applications in
                 Reconfigurable Devices",
  journal =      j-TODAES,
  volume =       "22",
  number =       "2",
  pages =        "33:1--33:??",
  month =        mar,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2997646",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:30 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Commercial off-the-shelf (COTS) reconfigurable devices
                 have been recognized as one of the most suitable
                 processing devices to be applied in nano-satellites,
                 since they can satisfy and combine their most important
                 requirements, namely processing performance,
                 reconfigurability, and low cost. However, COTS
                 reconfigurable devices, in particular Static-RAM Field
                 Programmable Gate Arrays, can be affected by cosmic
                 radiation, compromising the overall nano-satellite
                 reliability. Scrubbing has been proposed as a mechanism
                 to repair faults in configuration memory. However, the
                 current scrubbing mechanisms are predominantly static,
                 unable to adapt to heterogeneous applications and their
                 runtime variations. In this article, a dynamically
                 adaptive scrubbing mechanism is proposed. Through a
                 window-based scrubbing scheduling, this mechanism
                 adapts the scrubbing process to heterogeneous
                 applications (composed of periodic/sporadic and
                 streaming/DSP (Digital Signal Processing) tasks), as
                 well as their reconfigurations and modifications at
                 runtime. Conducted simulation experiments show the
                 feasibility and the efficiency of the proposed solution
                 in terms of system reliability metric and memory
                 overhead.",
  acknowledgement = ack-nhfb,
  articleno =    "33",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Enrici:2017:MDE,
  author =       "Andrea Enrici and Ludovic Apvrille and Renaud
                 Pacalet",
  title =        "A Model-Driven Engineering Methodology to Design
                 Parallel and Distributed Embedded Systems",
  journal =      j-TODAES,
  volume =       "22",
  number =       "2",
  pages =        "34:1--34:??",
  month =        mar,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2999537",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:30 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In Model-Driven Engineering system-level approaches,
                 the design of communication protocols and patterns is
                 subject to the design of processing operations
                 (computations) and to their mapping onto execution
                 resources. However, this strategy allows us to capture
                 simple communication schemes (e.g.,
                 processor-bus-memory) and prevents us from evaluating
                 the performance of both computations and communications
                 (e.g., impact of application traffic patterns onto the
                 communication interconnect) in a single step. To solve
                 these issues, we introduce a novel design approach-the
                 $ \Psi $-chart-where we design communication patterns
                 and protocols independently of a system's functionality
                 and resources, via dedicated models. At the mapping
                 step, both application and communication models are
                 bound to the platform resources and transformed to
                 explore design alternatives for both computations and
                 communications. We present the $ \Psi $-chart and its
                 implementation (i.e., communication models and Design
                 Space Exploration) in TTool/DIPLODOCUS, a Unified
                 Modeling Language (UML)/SysML framework for the
                 modeling, simulation, formal verification and automatic
                 code generation of data-flow embedded systems. The
                 effectiveness of our solution in terms of better design
                 quality (e.g., portability, time) is demonstrated with
                 the design of the physical layer of a ZigBee (IEEE
                 802.15.4) transmitter onto a multi-processor
                 architecture.",
  acknowledgement = ack-nhfb,
  articleno =    "34",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Basten:2017:SSI,
  author =       "Twan Basten and Orlando Moreira and Robert de Groote",
  title =        "Special Section: Integrating Dataflow, Embedded
                 Computing and Architecture",
  journal =      j-TODAES,
  volume =       "22",
  number =       "2",
  pages =        "35:1--35:??",
  month =        mar,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3023455",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:30 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  articleno =    "35",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Choi:2017:WCR,
  author =       "Junchul Choi and Soonhoi Ha",
  title =        "Worst-Case Response Time Analysis of a Synchronous
                 Dataflow Graph in a Multiprocessor System with
                 Real-Time Tasks",
  journal =      j-TODAES,
  volume =       "22",
  number =       "2",
  pages =        "36:1--36:??",
  month =        mar,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2997644",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:30 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In this article, we propose a novel technique that
                 estimates a tight upper bound of the worst-case
                 response time (WCRT) of a synchronous dataflow (SDF)
                 graph when the SDF graph shares processors with other
                 real-time tasks. When an SDF graph is executed at
                 runtime under a self-timed or static assignment
                 scheduling policy on a multi-processor system, static
                 scheduling of the SDF graph does not guarantee the
                 satisfaction of latency constraints since changes to
                 the schedule may result in timing anomalies. To
                 estimate the WCRT of an SDF graph with a given mapping
                 and scheduling result, we first construct a task
                 instance dependency graph that depicts the dependency
                 between node executions in a static schedule. The
                 proposed technique combines two techniques in a novel
                 way: schedule time bound analysis and response time
                 analysis. The former is used to consider the
                 interference between task instances in the same SDF
                 graph, and the latter is used to consider the
                 interference from other real-time tasks. Through
                 extensive experiments with synthetic examples and
                 benchmarks, we verify the superior performance of the
                 proposed technique compared to other existent
                 techniques.",
  acknowledgement = ack-nhfb,
  articleno =    "36",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Jung:2017:MSM,
  author =       "Hanwoong Jung and Hyunok Oh and Soonhoi Ha",
  title =        "Multiprocessor Scheduling of a Multi-Mode Dataflow
                 Graph Considering Mode Transition Delay",
  journal =      j-TODAES,
  volume =       "22",
  number =       "2",
  pages =        "37:1--37:??",
  month =        mar,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2997645",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:30 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The Synchronous Data Flow (SDF) model is widely used
                 for specifying signal processing or streaming
                 applications. Since modern embedded applications become
                 more complex with dynamic behavior changes at runtime,
                 several extensions of the SDF model have been proposed
                 to specify the dynamic behavior changes while
                 preserving static analyzability of the SDF model. They
                 assume that an application has a finite number of
                 behaviors (or modes), and each behavior (mode) is
                 represented by an SDF graph. They are classified as
                 multi-mode dataflow models in this article. While there
                 exist several scheduling techniques for multi-mode
                 dataflow models, no one allows task migration between
                 modes. By observing that the resource requirement can
                 be additionally reduced if task migration is allowed,
                 we propose a multiprocessor scheduling technique of a
                 multi-mode dataflow graph considering task migration
                 between modes. Based on a genetic algorithm, the
                 proposed technique schedules all SDF graphs in all
                 modes simultaneously to minimize the resource
                 requirement. To satisfy the throughput constraint, the
                 proposed technique calculates the actual throughput
                 requirement of each mode and the output buffer size for
                 tolerating throughput jitter. We compare the proposed
                 technique with a method that analyzes SDF graphs in
                 each execution mode separately, a method that does not
                 allow task migration, and a method that does not allow
                 mode-overlapped schedule for synthetic examples and
                 five real applications: H.264 decoder, lane detection,
                 vocoder, MP3 decoder, and printer pipeline.",
  acknowledgement = ack-nhfb,
  articleno =    "37",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Bouakaz:2017:SPD,
  author =       "Adnan Bouakaz and Pascal Fradet and Alain Girault",
  title =        "A Survey of Parametric Dataflow Models of
                 Computation",
  journal =      j-TODAES,
  volume =       "22",
  number =       "2",
  pages =        "38:1--38:??",
  month =        mar,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2999539",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:30 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Dataflow models of computation (MoCs) are widely used
                 to design embedded signal processing and streaming
                 systems. Dozens of dataflow MoCs have been proposed in
                 the past few decades. More recently, several parametric
                 dataflow MoCs have been presented as an interesting
                 tradeoff between analyzability and expressiveness. They
                 offer a controlled form of dynamism under the form of
                 parameters (e.g., parametric rates), along with runtime
                 parameter configuration. This survey provides a
                 comprehensive description of the existing parametric
                 dataflow MoCs (constructs, constraints, properties,
                 static analyses) and compares them using a common
                 example. The main objectives are to help designers of
                 streaming applications choose the most suitable model
                 for their needs and pave the way for the design of new
                 parametric MoCs.",
  acknowledgement = ack-nhfb,
  articleno =    "38",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Bouakaz:2017:SAD,
  author =       "Adnan Bouakaz and Pascal Fradet and Alain Girault",
  title =        "Symbolic Analyses of Dataflow Graphs",
  journal =      j-TODAES,
  volume =       "22",
  number =       "2",
  pages =        "39:1--39:??",
  month =        mar,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3007898",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:30 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The synchronous dataflow model of computation is
                 widely used to design embedded stream-processing
                 applications under strict quality-of-service
                 requirements (e.g., buffering size, throughput,
                 input-output latency). The required analyses can either
                 be performed at compile time (for design space
                 exploration) or at runtime (for resource management and
                 reconfigurable systems). However, these analyses have
                 an exponential time complexity, which may cause a huge
                 runtime overhead or make design space exploration
                 unacceptably slow. In this article, we argue that
                 symbolic analyses are more appropriate since they
                 express the system performance as a function of
                 parameters (i.e., input and output rates, execution
                 times). Such functions can be quickly evaluated for
                 each different configuration or checked with respect to
                 different quality-of-service requirements. We provide
                 symbolic analyses for computing the maximal throughput
                 of acyclic synchronous dataflow graphs, the minimum
                 required buffers for which as soon as possible (ASAP)
                 scheduling achieves this throughput, and finally, the
                 corresponding input-output latency of the graph. The
                 article first investigates these problems for a single
                 parametric edge. The results are extended to general
                 acyclic graphs using linear approximation techniques.
                 We assess the proposed analyses experimentally on both
                 synthetic and real benchmarks.",
  acknowledgement = ack-nhfb,
  articleno =    "39",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Park:2017:HHC,
  author =       "Jaehyun Park and Seungcheol Baek and Hyung Gyu Lee and
                 Chrysostomos Nicopoulos and Vinson Young and Junghee
                 Lee and Jongman Kim",
  title =        "{HoPE}: Hot-Cacheline Prediction for Dynamic Early
                 Decompression in Compressed {LLCs}",
  journal =      j-TODAES,
  volume =       "22",
  number =       "3",
  pages =        "40:1--40:??",
  month =        may,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2999538",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:30 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/datacompression.bib;
                 https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Data compression plays a pivotal role in improving
                 system performance and reducing energy consumption,
                 because it increases the logical effective capacity of
                 a compressed memory system without physically
                 increasing the memory size. However, data compression
                 techniques incur some cost, such as non-negligible
                 compression and decompression overhead. This overhead
                 becomes more severe if compression is used in the
                 cache. In this article, we aim to minimize the read-hit
                 decompression penalty in compressed Last-Level Caches
                 (LLCs) by speculatively decompressing frequently used
                 cachelines. To this end, we propose a Hot-cacheline
                 Prediction and Early decompression (HoPE) mechanism
                 that consists of three synergistic techniques:
                 Hot-cacheline Prediction (HP), Early Decompression
                 (ED), and Hit-history-based Insertion (HBI). HP and HBI
                 efficiently identify the hot compressed cachelines,
                 while ED selectively decompresses hot cachelines, based
                 on their size information. Unlike previous approaches,
                 the HoPE framework considers the performance
                 balance/tradeoff between the increased effective cache
                 capacity and the decompression penalty. To evaluate the
                 effectiveness of the proposed HoPE mechanism, we run
                 extensive simulations on memory traces obtained from
                 multi-threaded benchmarks running on a full-system
                 simulation framework. We observe significant
                 performance improvements over compressed cache schemes
                 employing the conventional Least-Recently Used (LRU)
                 replacement policy, the Dynamic Re-Reference Interval
                 Prediction (DRRIP) scheme, and the Effective Capacity
                 Maximizer (ECM) compressed cache management mechanism.
                 Specifically, HoPE exhibits system performance
                 improvements of approximately 11\%, on average, over
                 LRU, 8\% over DRRIP, and 7\% over ECM by reducing the
                 read-hit decompression penalty by around 65\%, over a
                 wide range of applications.",
  acknowledgement = ack-nhfb,
  articleno =    "40",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Tang:2017:PPE,
  author =       "Li Tang and Richard F. Barrett and Jeanine Cook and X.
                 Sharon Hu",
  title =        "{PeaPaw}: Performance and Energy-Aware Partitioning of
                 Workload on Heterogeneous Platforms",
  journal =      j-TODAES,
  volume =       "22",
  number =       "3",
  pages =        "41:1--41:??",
  month =        may,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2999540",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:30 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Performance and energy are two major concerns for
                 application development on heterogeneous platforms. It
                 is challenging for application developers to fully
                 exploit the performance/energy potential of
                 heterogeneous platforms. One reason is the lack of
                 reliable prediction of the system's performance/energy
                 before application implementation. Another reason is
                 that a heterogeneous platform presents a large design
                 space for workload partitioning between different
                 processors. To reduce such development cost, this
                 article proposes a framework, PeaPaw, to assist
                 application developers to identify a workload partition
                 (WP) that has high potential leading to high
                 performance or energy efficiency before actual
                 implementation. The PeaPaw framework includes both
                 analytical performance/energy models and two sets of
                 workload partitioning guidelines. Based on the design
                 goal, application developers can obtain a workload
                 partitioning guideline from PeaPaw for a given platform
                 and follow it to design one or multiple WPs for a given
                 workload. Then PeaPaw can be used to estimate the
                 performance/energy of the designed WPs, and the WP with
                 the best estimated performance/energy can be selected
                 for actual implementation. To demonstrate the
                 effectiveness of PeaPaw, we have conducted three case
                 studies. Results from these case studies show that
                 PeaPaw can faithfully estimate the performance/energy
                 relationships of WPs and provide effective workload
                 partitioning guidelines.",
  acknowledgement = ack-nhfb,
  articleno =    "41",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Yang:2017:CCS,
  author =       "Kun Yang and Domenic Forte and Mark M. Tehranipoor",
  title =        "{CDTA}: a Comprehensive Solution for Counterfeit
                 Detection, Traceability, and Authentication in the
                 {IoT} Supply Chain",
  journal =      j-TODAES,
  volume =       "22",
  number =       "3",
  pages =        "42:1--42:??",
  month =        may,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3005346",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:30 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The Internet of Things (IoT) is transforming the way
                 we live and work by increasing the connectedness of
                 people and things on a scale that was once
                 unimaginable. However, the vulnerabilities in the IoT
                 supply chain have raised serious concerns about the
                 security and trustworthiness of IoT devices and
                 components within them. Testing for device provenance,
                 detection of counterfeit integrated circuits (ICs) and
                 systems, and traceability of IoT devices are
                 challenging issues to address. In this article, we
                 develop a novel radio-frequency identification
                 (RFID)-based system suitable for counterfeit detection,
                 traceability, and authentication in the IoT supply
                 chain called CDTA. CDTA is composed of different types
                 of on-chip sensors and in-system structures that
                 collect necessary information to detect multiple
                 counterfeit IC types (recycled, cloned, etc.), track
                 and trace IoT devices, and verify the overall system
                 authenticity. Central to CDTA is an RFID tag employed
                 as storage and a channel to read the information from
                 different types of chips on the printed circuit board
                 (PCB) in both power-on and power-off scenarios. CDTA
                 sensor data can also be sent to the remote server for
                 authentication via an encrypted Ethernet channel when
                 the IoT device is deployed in the field. A novel board
                 ID generator is implemented by combining outputs of
                 physical unclonable functions (PUFs) embedded in the
                 RFID tag and different chips on the PCB. A light-weight
                 RFID protocol is proposed to enable mutual
                 authentication between RFID readers and tags. We also
                 implement a secure interchip communication on the PCB.
                 Simulations and experimental results using Spartan 3E
                 FPGAs demonstrate the effectiveness of this system. The
                 efficiency of the radio-frequency (RF) communication
                 has also been verified via a PCB prototype with a
                 printed slot antenna.",
  acknowledgement = ack-nhfb,
  articleno =    "42",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Pomeranz:2017:GTS,
  author =       "Irith Pomeranz",
  title =        "Generation of Transparent-Scan Sequences for Diagnosis
                 of Scan Chain Faults",
  journal =      j-TODAES,
  volume =       "22",
  number =       "3",
  pages =        "43:1--43:??",
  month =        may,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3007207",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:30 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Diagnosis of scan chain faults is important for yield
                 learning and improvement. Procedures that generate
                 tests for diagnosis of scan chain faults produce
                 scan-based tests with one or more functional capture
                 cycles between a scan-in and a scan-out operation. The
                 approach to test generation referred to as
                 transparent-scan has several advantages in this
                 context. (1) It allows functional capture cycles and
                 scan shift cycles to be interleaved arbitrarily. This
                 increases the flexibility to assign to the scan cells
                 values that are needed for diagnosis. (2) Test
                 generation under transparent-scan considers a circuit
                 model where the scan logic is included explicitly.
                 Consequently, the test generation procedure takes into
                 consideration the full effect of a scan chain fault. It
                 thus produces accurate tests. (3) For the same reason,
                 it can also target faults inside the scan logic. (4)
                 Transparent-scan results in compact test sequences.
                 Compaction is important because of the large volumes of
                 fail data that scan chain faults create. The cost of
                 transparent-scan is that it requires simulation
                 procedures for sequential circuits, and that arbitrary
                 sequences would be applicable to the scan select input.
                 Motivated by the advantages of transparent-scan, and
                 the importance of diagnosing scan chain faults, this
                 article describes a procedure for generating
                 transparent-scan sequences for diagnosis of scan chain
                 faults. The procedure is also applied to produce
                 transparent-scan sequences for diagnosis of faults
                 inside the scan logic.",
  acknowledgement = ack-nhfb,
  articleno =    "43",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Vatanparvar:2017:ASR,
  author =       "Korosh Vatanparvar and Mohammad Abdullah {Al
                 Faruque}",
  title =        "Application-Specific Residential Microgrid Design
                 Methodology",
  journal =      j-TODAES,
  volume =       "22",
  number =       "3",
  pages =        "44:1--44:??",
  month =        may,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3007206",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:30 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In power systems, the traditional, non-interactive,
                 and manually controlled power grid has been transformed
                 to a cyber-dominated smart grid. This cyber-physical
                 integration has provided the smart grid with
                 communication, monitoring, computation, and controlling
                 capabilities to improve its reliability, energy
                 efficiency, and flexibility. A microgrid is a localized
                 and semi-autonomous group of smart energy systems that
                 utilizes the above-mentioned capabilities to drive
                 modern technologies such as electric vehicle charging,
                 home energy management, and smart appliances. Design,
                 upgrading, test, and verification of these microgrids
                 can get too complicated to handle manually. The
                 complexity is due to the wide range of solutions and
                 components that are intended to address the microgrid
                 problems. This article presents a novel Model-Based
                 Design (MBD) methodology to model, co-simulate, design,
                 and optimize microgrid and its multi-level controllers.
                 This methodology helps in the design, optimization, and
                 validation of a microgrid for a specific application.
                 The application rules, requirements, and design-time
                 constraints are met in the designed/optimized microgrid
                 while the implementation cost is minimized. Based on
                 our novel methodology, a design automation,
                 co-simulation, and analysis tool, called GridMAT, is
                 implemented. Our experiments have illustrated that
                 implementing a hierarchical controller reduces the
                 average power consumption by 8\% and shifts the peak
                 load for cost saving. Moreover, optimizing the
                 microgrid design using our MBD methodology considering
                 smart controllers has decreased the total
                 implementation cost. Compared to the conventional
                 methodology, the cost decreases by 14\% and compared to
                 the MBD methodology where smart controllers are not
                 considered, it decreases by 5\%.",
  acknowledgement = ack-nhfb,
  articleno =    "44",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Yan:2017:LAE,
  author =       "Jin-Tai Yan",
  title =        "Layer Assignment of Escape Buses with Consecutive
                 Constraints in {PCB} Designs",
  journal =      j-TODAES,
  volume =       "22",
  number =       "3",
  pages =        "45:1--45:??",
  month =        may,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3012010",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:30 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "It is important for cost and reliability consideration
                 to minimize the number of the used layers in a PCB
                 design. In this article, given a set of n circular
                 escape buses with their escape directions between two
                 adjacent components and a set of m consecutive
                 constraints on the escape buses, the problem of
                 assigning the given escape buses between two adjacent
                 components onto the minimized layers is first
                 formulated for bus-oriented escape routing.
                 Furthermore, an efficient approach is proposed to
                 minimize the number of the used layers for the given
                 escape buses with the consecutive constraints and
                 assign the escape buses onto the available layers.
                 Compared with Yan's approach [Yan and Chen 2012] for
                 the layer assignment of the linear escape buses with no
                 consecutive constraint and Ma's approach [Ma et al.
                 2011a] for the layer assignment of the circular escape
                 buses with consecutive constraints, the experimental
                 results show that the proposed approach obtains the
                 same optimal results on the number of the used layers
                 and reduces 43.6\% and 90.5\% of CPU time for the
                 tested examples on the average, respectively.",
  acknowledgement = ack-nhfb,
  articleno =    "45",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Peng:2017:LSA,
  author =       "Yin-Chi Peng and Chien-Chih Chen and Hsiang-Jen Tsai
                 and Keng-Hao Yang and Pei-Zhe Huang and Shih-Chieh
                 Chang and Wen-Ben Jone and Tien-Fu Chen",
  title =        "{Leak Stopper}: an Actively Revitalized Snoop Filter
                 Architecture with Effective Generation Control",
  journal =      j-TODAES,
  volume =       "22",
  number =       "3",
  pages =        "46:1--46:??",
  month =        may,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3015770",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:30 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "To alleviate high energy dissipation of unnecessary
                 snooping accesses, snoop filters have been designed to
                 reduce snoop lookups. These filters have the problem of
                 decreasing filtering efficiency, and thus usually rely
                 on partial or whole filter reset by detecting block
                 evictions. Unfortunately, the reset conditions occur
                 infrequently or unevenly (called passive filter
                 deletion). This work proposes the concept of
                 revitalized snoop filter (RSF) design, which can
                 actively renew the destination filter by employing a
                 generation wrapping-around scheme for various reference
                 behaviors. We further utilize a sampling mechanism for
                 RSF to timely trigger precise filter revitalizations,
                 so that unnecessary RSF flushing can be minimized. The
                 proposed RSF can be integrated to various existent
                 inclusive snoop filters with only a minor change to
                 their designs. We evaluate our proposed design and
                 demonstrate that RSF eliminates 58.6\% of snoop energy
                 compared to JETTY on average while inducing only 6.5\%
                 of revitalization energy overhead. In addition, RSF
                 eliminates 45.5\% of snoop energy compared to stream
                 registers on average and only induces 2.5\% of
                 revitalization energy overhead. Overall, these RSFs
                 reduce the total L2 cache energy consumption by 52.1\%
                 (58.6\% --- 6.5\%) as compared to JETTY and by 43\%
                 (45.5\% --- 2.5\%) as compared to stream registers.
                 Furthermore, RSF improves the overall performance by
                 1\% to 1.4\% on average compared to JETTY and stream
                 registers for various benchmark suites.",
  acknowledgement = ack-nhfb,
  articleno =    "46",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Shi:2017:TAA,
  author =       "Guoyong Shi and Hanbin Hu and Shuwen Deng",
  title =        "Topological Approach to Automatic Symbolic
                 {Macromodel} Generation for Analog Integrated
                 Circuits",
  journal =      j-TODAES,
  volume =       "22",
  number =       "3",
  pages =        "47:1--47:??",
  month =        may,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3015782",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:30 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In the field of analog integrated circuit (IC) design,
                 small-signal macromodels play indispensable roles for
                 developing design insight and sizing reference.
                 However, the subject of automatically generating
                 symbolic low-order macromodels in human readable
                 circuit form has not been well studied. Traditionally,
                 work has been published on reducing full-scale symbolic
                 transfer functions to simpler forms but without the
                 guarantee of interpretability. On the other hand,
                 methodologies developed for interconnect circuits
                 (mainly resistor-capacitor-inductor (RCL) networks) are
                 not suitable for analog ICs. In this work, a
                 topological reduction method is introduced that is able
                 to automatically generate interpretable macromodel
                 circuits in symbolic form; that is, the circuit
                 elements in the compact model maintain analytical
                 relations of the parameters of the original full
                 circuit. This type of symbolic macromodel has several
                 benefits that other traditional modeling methods do not
                 offer: First, reusability, namely that designer need
                 not repeatedly generate macromodels for the same
                 circuit even it is re-sized or re-biased; second,
                 interpretability, namely a designer may directly
                 identify circuit parameters (in the original circuit)
                 that are closely related to the dominant frequency
                 characteristics, such as dc gain, gain/phase margins,
                 and dominant poles/zeros. The effectiveness and
                 computational efficiency of the proposed method have
                 been validated by several operational amplifier (opamp)
                 circuit examples.",
  acknowledgement = ack-nhfb,
  articleno =    "47",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Han:2017:CAB,
  author =       "Miseon Han and Youngsun Han and Seon Wook Kim and
                 Hokyoon Lee and Il Park",
  title =        "Content-Aware Bit Shuffling for Maximizing {PCM}
                 Endurance",
  journal =      j-TODAES,
  volume =       "22",
  number =       "3",
  pages =        "48:1--48:??",
  month =        may,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3017445",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:30 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Recently, phase change memory (PCM) has been emerging
                 as a strong replacement for DRAM owing to its many
                 advantages such as nonvolatility, high capacity, low
                 leakage power, and so on. However, PCM is still
                 restricted for use as main memory because of its
                 limited write endurance. There have been many methods
                 introduced to resolve the problem by either reducing or
                 spreading out bit flips. Although many previous studies
                 have significantly contributed to reducing bit flips,
                 they still have the drawback that lower bits are
                 flipped more often than higher bits because the lower
                 bits frequently change their bit values. Also,
                 interblock wear-leveling schemes are commonly employed
                 for spreading out bit flips by shifting input data, but
                 they increase the number of bit flips per write. In
                 this article, we propose a noble content-aware bit
                 shuffling (CABS) technique that minimizes bit flips and
                 evenly distributes them to maximize the lifetime of PCM
                 at the bit level. We also introduce two additional
                 optimizations, namely, addition of an inversion bit and
                 use of an XOR key, to further reduce bit flips.
                 Moreover, CABS is capable of recovering from stuck-at
                 faults by restricting the change in values of stuck-at
                 cells. Experimental results showed that CABS
                 outperformed the existing state-of-the-art methods in
                 the aspect of PCM lifetime extension with minimal
                 overhead. CABS achieved up to 48.5\% enhanced lifetime
                 compared to the data comparison write (DCW) method only
                 with a few metadata bits. Moreover, CABS obtained
                 approximately 9.7\% of improved write throughput than
                 DCW because it significantly reduced bit flips and
                 evenly distributed them. Also, CABS reduced about 5.4\%
                 of write dynamic energy compared to DCW. Finally, we
                 have also confirmed that CABS is fully applicable to
                 BCH codes as it was able to reduce the maximum number
                 of bit flips in metadata cells by 32.1\%.",
  acknowledgement = ack-nhfb,
  articleno =    "48",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Saha:2017:SSS,
  author =       "Shamik Saha and Prabal Basu and Chidhambaranathan
                 Rajamanikkam and Aatreyi Bal and Koushik Chakraborty
                 and Sanghamitra Roy",
  title =        "{SSAGA}: {SMs} Synthesized for Asymmetric {GPGPU}
                 Applications",
  journal =      j-TODAES,
  volume =       "22",
  number =       "3",
  pages =        "49:1--49:??",
  month =        may,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3014163",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:30 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The emergence of GPGPU applications, bolstered by
                 flexible GPU programming platforms, has created a
                 tremendous challenge in maintaining high energy
                 efficiency in modern GPUs. In this article, we
                 demonstrate that customizing a Streaming Multiprocessor
                 (SM) of a GPU at a lower frequency is significantly
                 more energy efficient compared to employing DVFS on an
                 SM designed for a high-frequency operation. Using a
                 system-level CAD technique, we propose SSAGA-Streaming
                 Multiprocessors Synthesized for Asymmetric GPGPU
                 Applications -an energy-efficient GPU design paradigm.
                 SSAGA creates architecturally identical SM cores,
                 customized for different voltage-frequency domains. Our
                 rigorous cross-layer methodology demonstrates an
                 average of 20\% improvement in energy efficiency over a
                 spatially multitasking GPU across a range of GPGPU
                 applications.",
  acknowledgement = ack-nhfb,
  articleno =    "49",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lu:2017:LPC,
  author =       "Tiantao Lu and Ankur Srivastava",
  title =        "Low-Power Clock Tree Synthesis for {$3$D-ICs}",
  journal =      j-TODAES,
  volume =       "22",
  number =       "3",
  pages =        "50:1--50:??",
  month =        may,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3019610",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:30 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We propose efficient algorithms to construct a
                 low-power clock tree for through-silicon-via
                 (TSV)-based 3D-ICs. We use shutdown gates to save clock
                 trees' dynamic power, which selectively turn off
                 certain clock tree branches to avoid unnecessary clock
                 activities when the modules in these tree branches are
                 inactive. While this clock gating technique has been
                 extensively studied in 2D circuits, its application in
                 3D-ICs is unclear. In 3D-ICs, a shutdown gate is
                 connected to a control signal unit through control
                 TSVs, which may cause placement conflicts with existing
                 clock TSVs in the layout due to TSV's large physical
                 dimension. We develop a two-phase clock tree synthesis
                 design flow for 3D-ICs: (1) 3D abstract clock tree
                 generation based on K-means clustering and (2) clock
                 tree embedding with simultaneous shutdown gates'
                 insertion based on simulated annealing (SA) and a
                 force-directed TSV placer. Experimental results
                 indicate that (1) the K-means clustering heuristic
                 significantly reduces the clock power by clustering
                 modules with similar switching behavior and close
                 proximity, and (2) the SA algorithm effectively inserts
                 the shutdown gates to a 3D clock tree, while
                 considering control TSV's placement. Compared with
                 previous 3D clock tree synthesis techniques, our
                 K-means clustering-based approach achieves larger
                 reduction in clock tree power consumption while
                 ensuring zero clock skew.",
  acknowledgement = ack-nhfb,
  articleno =    "50",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lee:2017:TPT,
  author =       "Woojoo Lee and Kyuseung Han and Yanzhi Wang and
                 Tiansong Cui and Shahin Nazarian and Massoud Pedram",
  title =        "{TEI}-power: Temperature Effect Inversion-Aware
                 Dynamic Thermal Management",
  journal =      j-TODAES,
  volume =       "22",
  number =       "3",
  pages =        "51:1--51:??",
  month =        may,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3019941",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:30 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "FinFETs have emerged as a promising replacement for
                 planar CMOS devices in sub-20nm technology nodes.
                 However, based on the temperature effect inversion
                 (TEI) phenomenon observed in FinFET devices, the delay
                 characteristics of FinFET circuits in sub-, near-, and
                 superthreshold voltage regimes may be fundamentally
                 different from those of CMOS circuits with nominal
                 voltage operation. For example, FinFET circuits may run
                 faster in higher temperatures. Therefore, the existing
                 CMOS-based and TEI-unaware dynamic power and thermal
                 management techniques would not be applicable. In this
                 article, we present TEI-power, a dynamic voltage and
                 frequency scaling--based dynamic thermal management
                 technique that considers the TEI phenomenon and also
                 the superlinear dependencies of power consumption
                 components on the temperature and outlines a real-time
                 trade-off between delay and power consumption as a
                 function of the chip temperature to provide significant
                 energy savings, with no performance penalty-namely, up
                 to 42\% energy savings for small circuits where the
                 logic cell delay is dominant and up to 36\% energy
                 savings for larger circuits where the interconnect
                 delay is considerable.",
  acknowledgement = ack-nhfb,
  articleno =    "51",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lee:2017:UCP,
  author =       "Yongje Lee and Jinyong Lee and Ingoo Heo and Dongil
                 Hwang and Yunheung Paek",
  title =        "Using {CoreSight PTM} to Integrate {CRA} Monitoring
                 {IPs} in an {ARM}-Based {SoC}",
  journal =      j-TODAES,
  volume =       "22",
  number =       "3",
  pages =        "52:1--52:??",
  month =        may,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3035965",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:30 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The ARM CoreSight Program Trace Macrocell (PTM) has
                 been widely deployed in recent ARM processors for
                 real-time debugging and tracing of software. Using PTM,
                 the external debugger can extract execution behaviors
                 of applications running on an ARM processor. Recently,
                 some researchers have been using this feature for other
                 purposes, such as fault-tolerant computation and
                 security monitoring. This motivated us to develop an
                 external security monitor that can detect control
                 hijacking attacks, of which the goal is to maliciously
                 manipulate the control flow of victim applications at
                 an attacker's disposal. This article focuses on
                 detecting a special type of attack called code reuse
                 attacks (CRA), which use a recently introduced
                 technique that allows attackers to perform arbitrary
                 computation without injecting their code by reusing
                 only existing code fragments. Our external monitor is
                 attached to the outside of the host system via the
                 system bus and ARM CoreSight PTM, and is fed with
                 execution traces of a victim application running on the
                 host. As a majority of CRAs violates the normal
                 execution behaviors of a program, our monitor
                 constantly watches and analyzes the execution traces of
                 the victim application and detects a symptom of attacks
                 when the execution behaviors violate certain rules that
                 normal applications are known to adhere. We present two
                 different implementations for this purpose: a
                 hardware-based solution in which all CRA detection
                 components are implemented in hardware, and a
                 hardware/software mixed solution that can be employed
                 in a more resource-constrained environment where the
                 deployment of full hardware-level CRA detection is
                 burdensome.",
  acknowledgement = ack-nhfb,
  articleno =    "52",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Xue:2017:FCT,
  author =       "Yuankun Xue and Ji Li and Shahin Nazarian and Paul
                 Bogdan",
  title =        "Fundamental Challenges Toward Making the {IoT} a
                 Reachable Reality: a Model-Centric Investigation",
  journal =      j-TODAES,
  volume =       "22",
  number =       "3",
  pages =        "53:1--53:??",
  month =        may,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3001934",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:30 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Constantly advancing integration capability is paving
                 the way for the construction of the extremely large
                 scale continuum of the Internet where entities or
                 things from vastly varied domains are uniquely
                 addressable and interacting seamlessly to form a giant
                 networked system of systems known as the
                 Internet-of-Things (IoT). In contrast to this visionary
                 networked system paradigm, prior research efforts on
                 the IoT are still very fragmented and confined to
                 disjoint explorations of different applications,
                 architecture, security, services, protocol, and
                 economical domains, thus preventing design exploration
                 and optimization from a unified and global perspective.
                 In this context, this survey article first proposes a
                 mathematical modeling framework that is rich in
                 expressivity to capture IoT characteristics from a
                 global perspective. It also sets forward a set of
                 fundamental challenges in sensing, decentralized
                 computation, robustness, energy efficiency, and
                 hardware security based on the proposed modeling
                 framework. Possible solutions are discussed to shed
                 light on future development of the IoT system
                 paradigm.",
  acknowledgement = ack-nhfb,
  articleno =    "53",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Guo:2017:OBP,
  author =       "Zimu Guo and Jia Di and Mark M. Tehranipoor and
                 Domenic Forte",
  title =        "Obfuscation-Based Protection Framework against Printed
                 Circuit Boards Unauthorized Operation and Reverse
                 Engineering",
  journal =      j-TODAES,
  volume =       "22",
  number =       "3",
  pages =        "54:1--54:??",
  month =        may,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3035482",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:30 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Printed circuit boards (PCBs) are a basic necessity
                 for all modern electronic systems but are becoming
                 increasingly vulnerable to cloning, overproduction,
                 tampering, and unauthorized operation. Most efforts to
                 prevent such attacks have only focused on the chip
                 level, leaving a void for PCBs and higher levels of
                 abstraction. In this article, we propose the first ever
                 obfuscation-based framework for the protection of PCBs.
                 Central to our approach is a permutation block that
                 hides the inter-chip connections between chips on the
                 PCB and is controlled by a key. If the correct key is
                 applied, then the correct connections between chips are
                 made. Otherwise, the connections are incorrectly
                 permuted, and the PCB/system fails to operate. We
                 propose a permutation network added to the PCB based on
                 a Benes network that can easily be implemented in a
                 complex programmable logic device or field-programmable
                 gate arrays. Based on this implementation, we analyze
                 the security of our approach with respect to (i)
                 brute-force attempts to reverse engineer the PCB, (ii)
                 brute-force attempts at guessing the correct key, and
                 (iii) physical and logistic attacks by a range of
                 adversaries. Performance evaluation results on 12
                 reference designs show that brute force generally
                 requires prohibitive time to break the obfuscation. We
                 also provide detailed requirements for countermeasures
                 that prevent reverse engineering, unauthorized
                 operation, and so on, for different classes of
                 attackers.",
  acknowledgement = ack-nhfb,
  articleno =    "54",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Torabi:2017:FHA,
  author =       "Mohammad Torabi and Lihong Zhang",
  title =        "A Fast Hierarchical Adaptive Analog Routing Algorithm
                 Based on Integer Linear Programming",
  journal =      j-TODAES,
  volume =       "22",
  number =       "3",
  pages =        "55:1--55:??",
  month =        may,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3035464",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:30 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The shrinking design window and high parasitic
                 sensitivity in advanced technologies have imposed
                 special challenges on analog and radio frequency (RF)
                 integrated circuit design. The state-of-the-art analog
                 routing research tends to favor linear programming to
                 achieve various analog constraints, which, although
                 effective, fail to offer high routing efficiency on its
                 own. In this article, we propose a new methodology to
                 address such a deficiency based on integer linear
                 programming (ILP) but without compromising the
                 capability of handling any special constraints for the
                 analog routing problems. Our proposed method supports
                 hierarchical routing, which can divide the entire
                 routing area into multiple small heterogeneous regions
                 where the ILP can efficiently derive routing solutions.
                 Distinct from the conventional methods, our algorithm
                 utilizes adaptive resolutions for various routing
                 regions. For a more congested region, a routing grid
                 with higher resolution is employed, whereas a
                 lower-resolution grid is adopted to a less-crowded
                 routing region. For a large empty space, routing
                 efficiency can be even boosted by creating more routing
                 hierarchy levels. This scheme is especially beneficial
                 to the analog and RF layouts, which are far sparser
                 than their digital counterparts. The experimental
                 results show that our proposed adaptive ILP-based
                 router is much faster than the conventional ones, since
                 it spends much less time in the areas that need no
                 accurate routing anyway. The higher efficiency is
                 demonstrated for large circuits and especially sparse
                 layouts along with promising routing quality in terms
                 of analog constraints.",
  acknowledgement = ack-nhfb,
  articleno =    "55",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Song:2017:STV,
  author =       "Yang Song and Kambiz Samadi and Bill Lin",
  title =        "A Single-Tier Virtual Queuing Memory Controller
                 Architecture for Heterogeneous {MPSoCs}",
  journal =      j-TODAES,
  volume =       "22",
  number =       "3",
  pages =        "56:1--56:??",
  month =        may,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3035481",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:30 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Heterogeneous MPSoCs typically integrate diverse
                 cores, including application CPUs, GPUs, and HD coders.
                 These cores commonly share an off-chip memory to save
                 cost and energy, but their memory accesses often
                 interfere with each other, leading to undesirable
                 consequences like a slowdown of application performance
                 or a failure to sustain real-time performance. The
                 memory controller plays a central role in meeting the
                 QoS needs of real-time cores while maximizing CPU
                 performance. Previous QoS-aware memory controllers are
                 based on a classic two-tier queuing architecture that
                 buffers memory transactions at the first tier, followed
                 by a second tier that buffers translated DRAM commands.
                 In these designs, QoS-aware policies are used to
                 schedule competing transactions at the first stage, but
                 the translated DRAM commands are served in FIFO order
                 at the second stage. Unfortunately, once the scheduled
                 transactions have been forwarded to the command stage,
                 newly arriving transactions that may be more critical
                 cannot be served ahead of those translated commands
                 that are already queued at the second stage. To address
                 this, we propose a scalable memory controller
                 architecture based on single-tier virtual queuing
                 (STVQ) that maintains a single tier of request queues
                 and employs an efficacious scheduler that considers
                 both QoS requirements and DRAM bank states. In
                 comparison with previous QoS-aware memory controllers,
                 the proposed STVQ memory controller reduces CPU
                 slowdown by up to 13.9\% while satisfying all frame
                 rate requirements. We propose further optimizations
                 that can significantly increase row-buffer hits by up
                 to 66.2\% and reduce memory latency by up to 19.8\%.",
  acknowledgement = ack-nhfb,
  articleno =    "56",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Li:2017:ASE,
  author =       "Ji Li and Jeffrey Draper",
  title =        "Accelerated Soft-Error-Rate {(SER)} Estimation for
                 Combinational and Sequential Circuits",
  journal =      j-TODAES,
  volume =       "22",
  number =       "3",
  pages =        "57:1--57:??",
  month =        may,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3035496",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jul 21 10:49:30 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Radiation-induced soft errors have posed an increasing
                 reliability challenge to combinational and sequential
                 circuits in advanced CMOS technologies. Therefore, it
                 is imperative to devise fast, accurate and scalable
                 soft error rate (SER) estimation methods as part of
                 cost-effective robust circuit design. This paper
                 presents an efficient SER estimation framework for
                 combinational and sequential circuits, which considers
                 single-event transients (SETs) in combinational logic
                 and multiple cell upsets (MCUs) in sequential elements.
                 A novel top-down memoization algorithm is proposed to
                 accelerate the propagation of SETs, and a general
                 schematic and layout co-simulation approach is proposed
                 to model the MCUs for redundant sequential storage
                 structures. The feedback in sequential logic is
                 analyzed with an efficient time frame expansion method.
                 Experimental results on various ISCAS85 combinational
                 benchmark circuits demonstrate that the proposed
                 approach achieves up to 560.2X times speedup with less
                 than 3\% difference in terms of SER results compared
                 with the baseline algorithm. The average runtime of the
                 proposed framework on a variety of ISCAS89 benchmark
                 circuits is 7.20s, and the runtime is 119.23s for the
                 largest benchmark circuit with more than 3,000
                 flip-flops and 17,000 gates.",
  acknowledgement = ack-nhfb,
  articleno =    "57",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Yan:2017:EEE,
  author =       "Kaige Yan and Lu Peng and Mingsong Chen and Xin Fu",
  title =        "Exploring Energy-Efficient Cache Design in Emerging
                 Mobile Platforms",
  journal =      j-TODAES,
  volume =       "22",
  number =       "4",
  pages =        "58:1--58:??",
  month =        jul,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/2843940",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 22 09:03:32 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Mobile devices are quickly becoming the most widely
                 used processors in consumer devices. Since their major
                 power supply is battery, energy-efficient computing is
                 highly desired. In this article, we focus on
                 energy-efficient cache design in emerging mobile
                 platforms. We observe that more than 40\% of L2 cache
                 accesses are OS kernel accesses in interactive
                 smartphone applications. Such frequent kernel accesses
                 cause serious interferences between the user and kernel
                 blocks in the L2 cache, leading to unnecessary block
                 replacements and high L2 cache miss rate. We first
                 propose to statically partition the L2 cache into two
                 separate segments, which can be accessed only by the
                 user code and kernel code, respectively. Meanwhile, the
                 overall size of the two segments is shrunk, which
                 reduces the energy consumption while still maintaining
                 the similar cache miss rate. We then find completely
                 different access behaviors between the two separated
                 kernel and user segments and explore the
                 multi-retention STT-RAM-based user and kernel segments
                 to obtain higher energy savings in this static
                 partition-based cache design. Finally, we propose to
                 dynamically partition the L2 cache into the user and
                 kernel segments to minimize overall cache size. We also
                 integrate the short-retention STT-RAM into this dynamic
                 partition-based cache design for maximal energy
                 savings. The experimental results show that our static
                 technique reduces cache energy consumption by 75\% with
                 2\% performance loss, and our dynamic technique further
                 shows strong capability to reduce cache energy
                 consumption by 85\% with only 3\% performance loss.",
  acknowledgement = ack-nhfb,
  articleno =    "58",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kim:2017:SBS,
  author =       "Taehyun Kim and Jongbum Lim and Jinku Kim and
                 Woo-Cheol Cho and Eui-Young Chung and Hyuk-Jun Lee",
  title =        "Scalable Bandwidth Shaping Scheme via Adaptively
                 Managed Parallel Heaps in Manycore-Based Network
                 Processors",
  journal =      j-TODAES,
  volume =       "22",
  number =       "4",
  pages =        "59:1--59:??",
  month =        jul,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3065926",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 22 09:03:32 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Scalability of network processor-based routers heavily
                 depends on limitations imposed by memory accesses and
                 associated power consumption. Bandwidth shaping of a
                 flow is a key function, which requires a token bucket
                 per output queue and abuses memory bandwidth. As the
                 number of output queues increases, managing token
                 buckets becomes prohibitively expensive and limits
                 scalability. In this work, we propose a scalable
                 software-based token bucket management scheme that can
                 reduce memory accesses and power consumption
                 significantly. To satisfy real-time and low-cost
                 constraints, we propose novel parallel heap data
                 structures running on a manycore-based network
                 processor. By using cache locking, the performance of
                 heap processing is enhanced significantly and is more
                 predictable. In addition, we quantitatively analyze the
                 performance and memory footprint of the proposed
                 software scheme using stochastic modeling and the
                 Lyapunov central limit theorem. Finally, the proposed
                 scheme provides an adaptive method to limit the size of
                 heaps in the case of oversubscribed queues, which can
                 successfully isolate the queues showing unideal
                 behavior. The proposed scheme reduces memory accesses
                 by up to three orders of magnitude for one million
                 queues sharing a 100Gbps interface of the router while
                 maintaining stability under stressful scenarios.",
  acknowledgement = ack-nhfb,
  articleno =    "59",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Agrawal:2017:OSA,
  author =       "Prabhav Agrawal and Mike Broxterman and Biswadeep
                 Chatterjee and Patrick Cuevas and Kathy H. Hayashi and
                 Andrew B. Kahng and Pranay K. Myana and Siddhartha
                 Nath",
  title =        "Optimal Scheduling and Allocation for {IC} Design
                 Management and Cost Reduction",
  journal =      j-TODAES,
  volume =       "22",
  number =       "4",
  pages =        "60:1--60:??",
  month =        jul,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3035483",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 22 09:03:32 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "A large semiconductor product company spends hundreds
                 of millions of dollars each year on design
                 infrastructure to meet tapeout schedules for multiple
                 concurrent projects. Resources (servers, electronic
                 design automation tool licenses, engineers, and so on)
                 are limited and must be shared --- and the cost per day
                 of schedule slip can be enormous. Co-constraints
                 between resource types (e.g., one license per every two
                 cores (threads)) and dedicated versus shareable
                 resource pools make scheduling and allocation hard. In
                 this article, we formulate two mixed integer-linear
                 programs for optimal multi-project, multi-resource
                 allocation with task precedence and resource
                 co-constraints. Application to a real-world
                 three-project scheduling problem extracted from a
                 leading-edge design center of anonymized Company X
                 shows substantial compute and license costs savings.
                 Compared to the product company, our solution shows
                 that the makespan of schedule of all projects can be
                 reduced by seven days, which not only saves ~ 2.7\% of
                 annual labor and infrastructure costs but also enhances
                 market competitiveness. We also demonstrate the
                 capability of scheduling over two dozen chip
                 development projects at the design center level,
                 subject to resource and datacenter capacity limits as
                 well as per-project penalty functions for schedule
                 slips. The design center ended up purchasing 600
                 additional servers, whereas our solution demonstrates
                 that the schedule can be met without having to purchase
                 any additional servers. Application to a four-project
                 scheduling problem extracted from a leading-edge design
                 center in a non-US location shows availability of up to
                 ~ 37\% headcount reduction during a half-year schedule
                 for just one type of chip design activity.",
  acknowledgement = ack-nhfb,
  articleno =    "60",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Isenberg:2017:PCH,
  author =       "Tobias Isenberg and Marco Platzner and Heike Wehrheim
                 and Tobias Wiersema",
  title =        "Proof-Carrying Hardware via Inductive Invariants",
  journal =      j-TODAES,
  volume =       "22",
  number =       "4",
  pages =        "61:1--61:??",
  month =        jul,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3054743",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 22 09:03:32 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Proof-carrying hardware (PCH) is a principle for
                 achieving safety for dynamically reconfigurable
                 hardware systems. The producer of a hardware module
                 spends huge effort when creating a proof for a safety
                 policy. The proof is then transferred as a certificate
                 together with the configuration bitstream to the
                 consumer of the hardware module, who can quickly verify
                 the given proof. Previous work utilized SAT solvers and
                 resolution traces to set up a PCH technology and
                 corresponding tool flows. In this article, we present a
                 novel technology for PCH based on inductive invariants.
                 For sequential circuits, our approach is fundamentally
                 stronger than the previous SAT-based one since we avoid
                 the limitations of bounded unrolling. We contrast our
                 technology to existing ones and show that it fits into
                 previously proposed tool flows. We conduct experiments
                 with four categories of benchmark circuits and report
                 consumer and producer runtime and peak memory
                 consumption, as well as the size of the certificates
                 and the distribution of the workload between producer
                 and consumer. Experiments clearly show that our new
                 induction-based technology is superior for sequential
                 circuits, whereas the previous SAT-based technology is
                 the better choice for combinational circuits.",
  acknowledgement = ack-nhfb,
  articleno =    "61",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Bonetti:2017:AID,
  author =       "Andrea Bonetti and Nicholas Preyss and Adam Teman and
                 Andreas Burg",
  title =        "Automated Integration of Dual-Edge Clocking for
                 Low-Power Operation in Nanometer Nodes",
  journal =      j-TODAES,
  volume =       "22",
  number =       "4",
  pages =        "62:1--62:??",
  month =        jul,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3054744",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 22 09:03:32 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Clocking power, including both clock distribution and
                 registers, has long been one of the primary factors in
                 the total power consumption of many digital systems.
                 One straightforward approach to reduce this power
                 consumption is to apply dual-edge-triggered (DET)
                 clocking, as sequential elements operate at half the
                 clock frequency while maintaining the same throughput
                 as with conventional single-edge-triggered (SET)
                 clocking. However, the DET approach is rarely taken in
                 modern integrated circuits, primarily due to the
                 perceived complexity of integrating such a clocking
                 scheme. In this article, we first identify the most
                 promising conditions for achieving low-power operation
                 with DET clocking and then introduce a fully automated
                 design flow for applying DET to a conventional SET
                 design. The proposed design flow is demonstrated on
                 three benchmark circuits in a 40nm CMOS technology,
                 providing as much as a 50\% reduction in clock
                 distribution and register power consumption.",
  acknowledgement = ack-nhfb,
  articleno =    "62",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Li:2017:DMF,
  author =       "Katherine Shu-Min Li and Sying-Jyan Wang",
  title =        "Design Methodology of Fault-Tolerant Custom {$3$D}
                 Network-on-Chip",
  journal =      j-TODAES,
  volume =       "22",
  number =       "4",
  pages =        "63:1--63:??",
  month =        jul,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3054745",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 22 09:03:32 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "A systematic design methodology is presented for
                 custom Network-on-Chip (NoC) in three-dimensional
                 integrated circuits (3D-ICs). In addition, fault
                 tolerance is supported in the NoC if extra links are
                 included in the NoC topology. In the proposed method,
                 processors and the communication architecture are
                 synthesized simultaneously in the 3D floorplanning
                 process. 3D-IC technology enables ICs to be implemented
                 in smaller size with higher performance; on the flip
                 side, 3D-ICs suffer yield loss due to multiple dies in
                 a 3D stack and lower manufacturing yield of
                 through-silicon vias (TSVs). To alleviate this problem,
                 a known-good-dies (KGD) test can be applied to ensure
                 every die to be packaged into a 3D-IC is fault-free.
                 However, faulty TSVs cannot be tested in the KGD test.
                 In this article, the proposed method deals with the
                 problem by providing fault tolerance in the NoC
                 topology. The efficiency of the proposed method is
                 evaluated using several benchmark circuits, and the
                 experimental results show that the proposed method
                 produces 3D NoCs with comparable performance than
                 previous methods when fault-tolerant features are not
                 realized. With fault tolerance in NoCs, higher yield
                 can be achieved at the cost of performance penalty and
                 elevated power level.",
  acknowledgement = ack-nhfb,
  articleno =    "63",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Pagliari:2017:AEE,
  author =       "Daniele Jahier Pagliari and Enrico Macii and Massimo
                 Poncino",
  title =        "Approximate Energy-Efficient Encoding for Serial
                 Interfaces",
  journal =      j-TODAES,
  volume =       "22",
  number =       "4",
  pages =        "64:1--64:??",
  month =        jul,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3041220",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 22 09:03:32 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Serial buses are ubiquitous interconnections in
                 embedded computing systems that are used to interface
                 processing elements with peripherals, such as sensors,
                 actuators, and I/O controllers. Despite their limited
                 wiring, as off-chip connections they can account for a
                 significant amount of the total power consumption of a
                 system-on-chip device. Encoding the information sent on
                 these buses is the most intuitive and affordable way to
                 reduce their power contribution; moreover, the encoding
                 can be made even more effective by exploiting the fact
                 that many embedded applications can tolerate
                 intermediate approximations without a significant
                 impact on the final quality of results, thus trading
                 off accuracy for power consumption. We propose a simple
                 yet very effective approximate encoding for reducing
                 dynamic energy in serial buses. Our approach uses
                 differential encoding as a baseline scheme and extends
                 it with bounded approximations to overcome the
                 intrinsic limitations of differential encoding for data
                 with low temporal correlation. We show that the
                 proposed scheme, in addition to yielding extremely
                 compact codecs, is superior to all state-of-the-art
                 approximate serial encodings over a wide set of traces
                 representing data received or sent from/to sensor or
                 actuators.",
  acknowledgement = ack-nhfb,
  articleno =    "64",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Schafer:2017:PHL,
  author =       "Benjamin Carrion Schafer",
  title =        "Parallel High-Level Synthesis Design Space Exploration
                 for Behavioral {IPs} of Exact Latencies",
  journal =      j-TODAES,
  volume =       "22",
  number =       "4",
  pages =        "65:1--65:??",
  month =        jul,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3041219",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 22 09:03:32 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This works presents a Design Space Exploration (DSE)
                 method for Behavioral IPs (BIPs) given in ANSI-C or
                 SystemC to find the smallest micro-architecture for a
                 specific target latency. Previous work on High-Level
                 Synthesis (HLS) DSE mainly focused on finding a
                 tradeoff curve with Pareto-optimal designs. HLS is,
                 however, a single process (component) synthesis method.
                 Very often, the latency of the components requires a
                 specific fixed latency when inserted within a larger
                 system. This work presents a fast multi-threaded method
                 to find the smallest micro-architecture for a given BIP
                 and target latency by discriminating between all
                 different exploration knobs and exploring these
                 concurrently. Experimental results show that our
                 proposed method is very effective and comprehensive
                 results compare the quality of results vs. the speedup
                 of your proposed explorer.",
  acknowledgement = ack-nhfb,
  articleno =    "65",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Moudallal:2017:GCC,
  author =       "Zahi Moudallal and Farid N. Najm",
  title =        "Generating Current Constraints to Guarantee {RLC}
                 Power Grid Safety",
  journal =      j-TODAES,
  volume =       "22",
  number =       "4",
  pages =        "66:1--66:??",
  month =        jul,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3054746",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 22 09:03:32 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "A critical task during early chip design is the
                 efficient verification of the chip power distribution
                 network. Vectorless verification, developed since the
                 mid-2000s as an alternative to traditional
                 simulation-based methods, requires the user to specify
                 current constraints (budgets) for the underlying
                 circuitry and checks if the corresponding voltage
                 variations on all grid nodes are within a
                 user-specified margin. This framework is extremely
                 powerful, as it allows for efficient and early
                 verification, but specifying/obtaining current
                 constraints remains a burdensome task for users and a
                 hurdle to adoption of this framework by the industry.
                 Recently, the inverse problem has been introduced:
                 Generate circuit current constraints that, if satisfied
                 by the underlying logic circuitry, would guarantee grid
                 safety from excessive voltage variations. This approach
                 has many potential applications, including various grid
                 quality metrics, as well as voltage drop-aware
                 placement and floorplanning. So far, this framework has
                 been developed assuming only resistive and capacitive
                 (RC) elements in the power grid model. Inductive
                 effects are becoming a significant component of the
                 power supply noise and can no longer be ignored. In
                 this article, we extend the constraints generation
                 approach to allow for inductance. We give a rigorous
                 problem definition and develop some key theoretical
                 results related to maximality of the current space
                 defined by the constraints. Based on this, we then
                 develop three constraints generation algorithms that
                 target the peak total chip power that is allowed by the
                 grid, the uniformity of current distribution across the
                 die area, and a combination of both metrics.",
  acknowledgement = ack-nhfb,
  articleno =    "66",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Pomeranz:2017:TMR,
  author =       "Irith Pomeranz and M. Enamul Amyeen and Srikanth
                 Venkataraman",
  title =        "Test Modification for Reduced Volumes of Fail Data",
  journal =      j-TODAES,
  volume =       "22",
  number =       "4",
  pages =        "67:1--67:??",
  month =        jul,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3065925",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 22 09:03:32 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "As part of a yield improvement process, fail data is
                 collected from faulty units. Several approaches exist
                 for reducing the tester time and the volume of fail
                 data that needs to be collected based on the
                 observation that a subset of the fail data is
                 sufficient for accurate defect diagnosis. This article
                 addresses the volume of fail data by considering the
                 test set that is used for collecting fail data. It
                 observes that certain faults from a set of target
                 faults produce significantly larger numbers of faulty
                 output values (and therefore significantly larger
                 volumes of fail data) than other faults under a given
                 test set. Based on this observation, it describes a
                 procedure for modifying the test set to reduce the
                 maximum number of faulty output values that a target
                 fault produces. When defects are considered in a
                 simulation experiment, and a defect diagnosis procedure
                 is applied to the fail data that they produce, two
                 effects are observed: the maximum and average numbers
                 of faulty output values per defect are reduced
                 significantly with the modified test set, and the
                 quality of diagnosis is similar or even improved with
                 the modified test set.",
  acknowledgement = ack-nhfb,
  articleno =    "67",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Wang:2017:MSS,
  author =       "Ya Wang and Di Gao and Dani Tannir and Ning Dong and
                 G. Peter Fang and Wei Dong and Peng Li",
  title =        "Multiharmonic Small-Signal Modeling of Low-Power {PWM}
                 {DC-DC} Converters",
  journal =      j-TODAES,
  volume =       "22",
  number =       "4",
  pages =        "68:1--68:??",
  month =        jul,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3057274",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 22 09:03:32 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Small-signal models of pulse-width modulation (PWM)
                 converters are widely used for analyzing stability and
                 play an important role in converter design and control.
                 However, existing small-signal models either are based
                 on averaged DC behaviors, and hence are unable to
                 capture frequency responses that are faster than the
                 switching frequency, or greatly approximate these
                 high-frequency responses. We address the severe
                 limitations of the existing models by proposing a
                 multiharmonic model that provides a complete
                 small-signal characterization of both DC averages and
                 high-order harmonic responses. The proposed model
                 captures important high-frequency overshoots and
                 undershoots of the converter response, which are
                 otherwise unaccounted for by the existing techniques.
                 In two converter examples, the proposed model corrects
                 the misleading results of the existing models by
                 providing truthful characterization of the overall
                 converter AC response and offers important guidance for
                 converter design and closed-loop control.",
  acknowledgement = ack-nhfb,
  articleno =    "68",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Albalawi:2017:TFP,
  author =       "Hassan Albalawi and Yuanning Li and Xin Li",
  title =        "Training Fixed-Point Classifiers for On-Chip Low-Power
                 Implementation",
  journal =      j-TODAES,
  volume =       "22",
  number =       "4",
  pages =        "69:1--69:??",
  month =        jul,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3057275",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 22 09:03:32 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In this article, we develop several novel algorithms
                 to train classifiers that can be implemented on chip
                 with low-power fixed-point arithmetic with extremely
                 small word length. These algorithms are based on Linear
                 Discriminant Analysis (LDA), Support Vector Machine
                 (SVM), and Logistic Regression (LR), and are referred
                 to as LDA-FP, SVM-FP, and LR-FP, respectively. They
                 incorporate the nonidealities (i.e., rounding and
                 overflow) associated with fixed-point arithmetic into
                 the offline training process so that the resulting
                 classifiers are robust to these nonidealities.
                 Mathematically, LDA-FP, SVM-FP, and LR-FP are
                 formulated as mixed integer programming problems that
                 can be robustly solved by the branch-and-bound methods
                 described in this article. Our numerical experiments
                 demonstrate that LDA-FP, SVM-FP, and LR-FP
                 substantially outperform the conventional approaches
                 for the emerging biomedical applications of brain
                 decoding.",
  acknowledgement = ack-nhfb,
  articleno =    "69",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Hoveida:2017:EMA,
  author =       "Mohaddeseh Hoveida and Fatemeh Aghaaliakbari and Ramin
                 Bashizade and Mohammad Arjomand and Hamid
                 Sarbazi-Azad",
  title =        "Efficient Mapping of Applications for Future
                 Chip-Multiprocessors in Dark Silicon Era",
  journal =      j-TODAES,
  volume =       "22",
  number =       "4",
  pages =        "70:1--70:??",
  month =        jul,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3055202",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 22 09:03:32 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The failure of Dennard scaling has led to the
                 utilization wall that is the source of dark silicon and
                 limits the percentage of a chip that can actively
                 switch within a given power budget. To address this
                 issue, a structure is needed to guarantee the limited
                 power budget along with providing sufficient
                 flexibility and performance for different applications
                 with various communication requirements. In this
                 article, we present a general-purpose platform for
                 future many-core Chip-Multiprocessors (CMPs) that
                 benefits from the advantages of clustering,
                 Network-on-Chip (NoC) resource sharing among cores, and
                 power gating the unused components of clusters. We also
                 propose two task mapping methods for the proposed
                 platform in which active and dark cores are dispersed
                 appropriately, so that an excess of power budget can be
                 obtained. Our evaluations reveal that the first and
                 second proposed mapping mechanisms respectively reduce
                 the execution time by up to 28.6\% and 39.2\% and the
                 NoC power consumption by up to 11.1\% and 10\%, and
                 gain an excess power budget of up to 7.6\% and 13.4\%
                 over the baseline architecture.",
  acknowledgement = ack-nhfb,
  articleno =    "70",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Saha:2017:STS,
  author =       "Sangeet Saha and Arnab Sarkar and Amlan Chakrabarti",
  title =        "Spatio-Temporal Scheduling of Preemptive Real-Time
                 Tasks on Partially Reconfigurable Systems",
  journal =      j-TODAES,
  volume =       "22",
  number =       "4",
  pages =        "71:1--71:??",
  month =        jul,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3056561",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 22 09:03:32 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Reconfigurable devices that promise to offer the twin
                 benefits of flexibility as in general-purpose
                 processors along with the efficiency of dedicated
                 hardwares often provide a lucrative solution for many
                 of today's highly complex real-time embedded systems.
                 However, online scheduling of dynamic hard real-time
                 tasks on such systems with efficient resource
                 utilization in terms of both space and time poses an
                 enormously challenging problem. We attempt to solve
                 this problem using a combined offline-online approach.
                 The offline component generates and stores various
                 optional feasible placement solutions for different
                 sub-sets of tasks that may possibly be co-mapped
                 together. Given a set of periodic preemptive real-time
                 tasks that requires to be executed at runtime, the
                 online scheduler first carries out an admission control
                 procedure and then produces a schedule, which is
                 guaranteed to meet all timing constraints provided it
                 is spatially feasible to place designated subsets of
                 these tasks at specified scheduling points within a
                 future time interval. These feasibility checks are done
                 and actual placement solutions are obtained through a
                 low overhead search of the statically precomputed
                 placement solutions. Based on this approach, we have
                 proposed a periodic preemptive real-time scheduling
                 methodology for runtime partially reconfigurable
                 devices. Effectiveness of the proposed strategy has
                 been verified through simulation based experiments and
                 we observed that the strategy achieves high resource
                 utilization with low task rejection rates over various
                 simulation scenarios.",
  acknowledgement = ack-nhfb,
  articleno =    "71",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Abella:2017:MBW,
  author =       "Jaume Abella and Maria Padilla and Joan {Del Castillo}
                 and Francisco J. Cazorla",
  title =        "Measurement-Based Worst-Case Execution Time Estimation
                 Using the Coefficient of Variation",
  journal =      j-TODAES,
  volume =       "22",
  number =       "4",
  pages =        "72:1--72:??",
  month =        jul,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3065924",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 22 09:03:32 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Extreme Value Theory (EVT) has been historically used
                 in domains such as finance and hydrology to model
                 worst-case events (e.g., major stock market
                 incidences). EVT takes as input a sample of the
                 distribution of the variable to model and fits the tail
                 of that sample to either the Generalised Extreme Value
                 (GEV) or the Generalised Pareto Distribution (GPD).
                 Recently, EVT has become popular in real-time systems
                 to derive worst-case execution time (WCET) estimates of
                 programs. However, the application of EVT is not
                 straightforward and requires a detailed analysis of,
                 and customisation for, the particular problem at hand.
                 In this article, we tailor the application of EVT to
                 timing analysis. To that end, (1) we analyse the
                 response time of different hardware resources (e.g.,
                 cache memories) and identify those that may lead to
                 radically different types of execution time
                 distributions. (2) We show that one of these
                 distributions, known as mixture distribution, causes
                 problems in the use of EVT. In particular, mixture
                 distributions challenge not only properly selecting
                 GEV/GPD parameters (i.e., location, scale and shape)
                 but also determining the size of the sample to ensure
                 that enough tail values are passed to EVT and that only
                 tail values are used by EVT to fit GEV/GPD. Failing to
                 select these parameters has a negative impact on the
                 quality of the derived WCET estimates. We tackle these
                 problems, by (3) proposing Measurement-Based
                 Probabilistic Timing Analysis using the Coefficient of
                 Variation (MBPTA-CV), a new mixture-distribution aware,
                 WCET-suited MBPTA method that builds on recent EVT
                 developments in other fields (e.g., finance) to
                 automatically select the distribution parameters that
                 best fit the maxima of the observed execution times.
                 Our results on a simulation environment and a real
                 board show that MBPTA-CV produces high-quality WCET
                 estimates.",
  acknowledgement = ack-nhfb,
  articleno =    "72",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Salcic:2017:NHH,
  author =       "Zoran Salcic and Heejong Park and J{\"u}rgen Teich and
                 Avinash Malik and Muhammad Nadeem",
  title =        "{Noc-HMP}: a Heterogeneous Multicore Processor for
                 Embedded Systems Designed in {SystemJ}",
  journal =      j-TODAES,
  volume =       "22",
  number =       "4",
  pages =        "73:1--73:??",
  month =        jul,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3073416",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 22 09:03:32 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Scalability and performance in multicore processors
                 for embedded and real-time systems usually don't go
                 well each with the other. Networks on Chip (NoCs)
                 provide scalable execution platforms suitable for such
                 kind of embedded systems. This article presents a
                 NoC-based Heterogeneous Multi-Processor system, called
                 NoC-HMP, which is a scalable platform for embedded
                 systems developed in the GALS language SystemJ. NoC-HMP
                 uses a time-predictable TDMA-MIN NoC to guarantee
                 latencies and communication time between the two types
                 of time-predictable cores and can be customized for a
                 specific performance goal through the execution
                 strategy and scheduling of SystemJ program deployed
                 across multiple cores. Examples of different execution
                 strategies are introduced, explored and analyzed via
                 measurements. The number of used cores can be minimized
                 to achieve the target performance of the application.
                 TDMA-MIN allows easy extensions of NoC-HMP with other
                 cores or IP blocks. Experiments show a significant
                 improvement of performance over a single core system
                 and demonstrate how the addition of cores affects the
                 performance of the designed system.",
  acknowledgement = ack-nhfb,
  articleno =    "73",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Behera:2017:TTS,
  author =       "Lalatendu Behera and Purandar Bhaduri",
  title =        "Time-Triggered Scheduling of Mixed-Criticality
                 Systems",
  journal =      j-TODAES,
  volume =       "22",
  number =       "4",
  pages =        "74:1--74:??",
  month =        jul,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3073415",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 22 09:03:32 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Real-time and embedded systems are moving from the
                 traditional design paradigm to integration of multiple
                 functionalities onto a single computing platform. Some
                 of the functionalities are safety critical and subject
                 to certification. The rest of the functionalities are
                 nonsafety critical and do not need to be certified.
                 Designing efficient scheduling algorithms which can be
                 used to meet the certification requirement is
                 challenging. Our research considers the time-triggered
                 approach to scheduling of mixed-criticality jobs with
                 two criticality levels. The first proposed algorithm
                 for the time-triggered approach is based on the OCBP
                 scheduling algorithm which finds a fixed-priority order
                 of jobs. Based on this priority order, the existing
                 algorithm constructs two scheduling tables
                 S$_{LO}^{oc}$ and S$_{HI}^{oc}$. The scheduler uses
                 these tables to find a scheduling strategy. Another
                 time-triggered algorithm called MCEDF was proposed as
                 an improvement over the OCBP-based algorithm. Here we
                 propose an algorithm which directly constructs two
                 scheduling tables without using a priority order.
                 Furthermore, we show that our algorithm schedules a
                 strict superset of instances which can be scheduled by
                 the OCBP-based algorithm as well as by MCEDF. We show
                 that our algorithm outperforms both the OCBP-based
                 algorithm and MCEDF in terms of the number of instances
                 scheduled in a randomly generated set of instances. We
                 generalize our algorithm for jobs with m criticality
                 levels. Subsequently, we extend our algorithm to find
                 scheduling tables for periodic and dependent jobs.
                 Finally, we show that our algorithm is also applicable
                 to mixed-criticality synchronous programs upon
                 uniprocessor platforms and schedules a bigger set of
                 instances than the existing algorithm.",
  acknowledgement = ack-nhfb,
  articleno =    "74",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Liu:2017:ILA,
  author =       "Derong Liu and Bei Yu and Salim Chowdhury and David Z.
                 Pan",
  title =        "Incremental Layer Assignment for Timing Optimization",
  journal =      j-TODAES,
  volume =       "22",
  number =       "4",
  pages =        "75:1--75:??",
  month =        jul,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3083727",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 22 09:03:32 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "With VLSI technology nodes scaling into the nanometer
                 regime, interconnect delay plays an increasingly
                 critical role in timing. For layer assignment, most
                 works deal with via counts or total net delays,
                 ignoring critical paths of each net and resulting in
                 potential timing issues. In this article, we propose an
                 incremental layer assignment framework targeting delay
                 optimization in timing the critical path of each net. A
                 set of novel techniques are presented: self-adaptive
                 quadruple partition based on K $ \times $ K division
                 benefits the runtime; semidefinite programming is
                 utilized for each partition; and the sequential mapping
                 algorithm guarantees integer solutions while satisfying
                 edge capacities; additionally, concurrent mapping
                 offers a global view of assignment and post delay
                 optimization reduces the path timing violations. The
                 effectiveness of our work is verified by ISPD'08
                 benchmarks.",
  acknowledgement = ack-nhfb,
  articleno =    "75",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Bi:2017:OQE,
  author =       "Zhaori Bi and Dian Zhou and Sheng-Guo Wang and Xuan
                 Zeng",
  title =        "Optimization and Quality Estimation of Circuit Design
                 via Random Region Covering Method",
  journal =      j-TODAES,
  volume =       "23",
  number =       "1",
  pages =        "1:1--1:??",
  month =        oct,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3084685",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 22 09:03:33 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Random region covering is a global optimization
                 technique that explores the landscape by introducing
                 multiple random starting points to initiate the local
                 optimization solvers. This study applies the random
                 region covering technique to circuit design automation
                 and proposes a theory to explain why this technique is
                 efficient at searching for the global optimum. In
                 addition to analyzing the efficiency of the random
                 region covering algorithm, the theory gives a
                 probability-based estimation of the goodness of the
                 optimization result. To enhance the efficiency of the
                 random region covering technique, this work evaluates
                 the boundary of top performance regions and proposes a
                 modified random region covering method that only
                 performs the global optimization on the top design
                 region. The results from a large number of mathematical
                 experiments verify the proposed methodology. The
                 optimized designs of a class-E power amplifier and a
                 wide load range operational amplifier outperform both
                 manual designs and other state-of-the-art optimization
                 techniques.",
  acknowledgement = ack-nhfb,
  articleno =    "1",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Jeong:2017:CSP,
  author =       "Jae Woong Jeong and Vishwanath Natarajan and Shreyas
                 Sen and Tm Mak and Jennifer Kitchen and Sule Ozev",
  title =        "A Comprehensive {BIST} Solution for Polar Transceivers
                 Using On-Chip Resources",
  journal =      j-TODAES,
  volume =       "23",
  number =       "1",
  pages =        "2:1--2:??",
  month =        oct,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3084689",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 22 09:03:33 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article presents a Built-in self-test (BIST)
                 solution for polar transceivers with low cost and high
                 accuracy. Radio frequency (RF) Polar transceivers are
                 desirable for portable devices due to higher power
                 efficiency compared to traditional RF Cartesian
                 transceivers. Unfortunately, their design is quite
                 challenging due to substantially different signal paths
                 that need to work coherently to ensure signal quality.
                 In the receiver, phase and gain mismatches degrade
                 sensitivity and error vector magnitude. In the
                 transmitter, delay skew between the envelope and phase
                 signals and the finite envelope bandwidth can create
                 intermodulation distortion, which leads to violation of
                 spectral mask requirements. Typically, these parameters
                 are not directly measured but calibrated through
                 spectral analysis using expensive RF equipment, leading
                 to lengthy and costly measurement/calibration cycles.
                 However, characterization and calibration of these
                 parameters with analytical model would reduce the test
                 time and cost considerably. In this article, we propose
                 a technique to measure with the intent to calibrate
                 impairments of the polar transceiver in the loop-back
                 mode. Simulation and hardware measurement results show
                 that the proposed technique can characterize the
                 targeted impairments accurately.",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Vatanparvar:2017:EVO,
  author =       "Korosh Vatanparvar and Mohammad Abdullah {Al
                 Faruque}",
  title =        "Electric Vehicle Optimized Charge and Drive
                 Management",
  journal =      j-TODAES,
  volume =       "23",
  number =       "1",
  pages =        "3:1--3:??",
  month =        oct,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3084686",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 22 09:03:33 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Electric vehicles (EVs) have been considered as a
                 solution to the environmental issues caused by
                 transportation, such as air pollution and greenhouse
                 gas emission. However, limited energy capacity, scarce
                 EV supercharging stations, and long recharging time
                 have brought anxiety to drivers who use EVs as their
                 main mean of transportation. Furthermore, EV owners
                 need to deal with a huge battery replacement cost when
                 the battery capacity degrades. Yet in-house EV chargers
                 affect the pattern of the power grid load, which is not
                 favorable to the utilities. The driving route,
                 departure/arrival time of daily trips, and electricity
                 price influence the EV energy consumption, battery
                 lifetime, electricity cost, and EV charger load on the
                 power grid. The EV driving range and battery lifetime
                 issues have been addressed by battery management
                 systems and route optimization methodologies. However,
                 in this article, we are proposing an optimized charge
                 and drive management (OCDM) methodology that selects
                 the optimal driving route, schedules daily trips, and
                 optimizes the EV charging process while considering the
                 driver's timing preference. Our methodology will
                 improve the EV driving range, extend the battery
                 lifetime, reduce the recharging cost, and diminish the
                 influence of EV chargers on the power grid. The
                 performance of our methodology compared to the state of
                 the art have been analyzed by experimenting on three
                 benchmark EVs and three drivers. Our methodology has
                 decreased EV energy consumption by 27\%, improved the
                 battery lifetime by 24.8\%, reduced the electricity
                 cost by 35\%, and diminished the power grid peak load
                 by 17\% while increasing less than 20 minutes of daily
                 driving time. Moreover, the scalability of our OCDM
                 methodology for different parameters (e.g., time
                 resolution and multiday cycles) in terms of execution
                 time and memory usage has been analyzed.",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Wang:2017:WPL,
  author =       "Shuai Wang and Guangshan Duan and Yupeng Li and
                 Qianhao Dong",
  title =        "Word- and Partition-Level Write Variation Reduction
                 for Improving Non-Volatile Cache Lifetime",
  journal =      j-TODAES,
  volume =       "23",
  number =       "1",
  pages =        "4:1--4:??",
  month =        oct,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3084690",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 22 09:03:33 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Non-volatile memory technologies are among the most
                 promising technologies for implementing the main
                 memories and caches in future microprocessors and
                 replacing the traditional DRAM and SRAM technologies.
                 However, one of the most challenging design issues of
                 the non-volatile memory technologies is the limited
                 write. In this article, we first propose to exploit the
                 narrow-width values to improve the lifetime of
                 non-volatile last-level caches with word-level write
                 variation reduction. Leading zeros masking scheme is
                 proposed to reduce the write stress to the upper half
                 of the narrow-width data. To balance the write
                 variations between the upper half and the lower half of
                 the narrow-width data, two swapping schemes, the swap
                 on write (SW) and swap on replacement (SRepl), are
                 proposed. Two existing optimization schemes, the
                 multiple dirty bit (MDB) and read before write (RBW),
                 are adopted with our word-level swapping design. To
                 further reduce the write variation on the partition
                 level, we propose to exploit the cache partitioning
                 design to improve the lifetime. Based on the
                 observation that different applications demonstrate
                 different cache access (write) behaviors, we propose to
                 partition the last-level cache for different
                 applications and balance the write variations by
                 partition swapping. Both software-based and
                 hardware-based partitioning and swapping schemes are
                 proposed and evaluated for different situations. Our
                 experimental results show that both our word- and
                 partition-level designs can improve the lifetime of the
                 non-volatile caches effectively with low performance
                 and energy overheads.",
  acknowledgement = ack-nhfb,
  articleno =    "4",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Trinadh:2017:ODC,
  author =       "A. Satya Trinadh and Seetal Potluri and Sobhan Babu
                 Ch. and V. Kamakoti and Shiv Govind Singh",
  title =        "Optimal Don't Care Filling for Minimizing Peak Toggles
                 During At-Speed Stuck-At Testing",
  journal =      j-TODAES,
  volume =       "23",
  number =       "1",
  pages =        "5:1--5:??",
  month =        oct,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3084684",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 22 09:03:33 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Due to the increase in manufacturing/environmental
                 uncertainties in the nanometer regime, testing digital
                 chips under different operating conditions becomes
                 mandatory. Traditionally, stuck-at tests were applied
                 at slow speed to detect structural defects and
                 transition fault tests were applied at-speed to detect
                 delay defects. Recently, it was shown that certain
                 cell-internal defects can only be detected using
                 at-speed stuck-at testing. Stuck-at test patterns are
                 power hungry, thereby causing excessive voltage droop
                 on the power grid, delaying the test response, and
                 finally leading to false delay failures on the tester.
                 This motivates the need for peak power minimization
                 during at-speed stuck-at testing. In this article, we
                 use input toggle minimization as a means to minimize a
                 circuit's power dissipation during at-speed stuck-at
                 testing under the Combinational State Preservation scan
                 (CSP-scan) Design-For-Testability (DFT) scheme. For
                 circuits whose test sets are dominated by don't cares,
                 this article maps the problem of optimal X-filling for
                 peak input toggle minimization to a variant of the
                 interval coloring problem and proposes a Dynamic
                 Programming (DP) algorithm (DP-fill) for the same along
                 with a theoretical proof for its optimality. For
                 circuits whose test sets are not dominated by don't
                 cares, we propose a max scatter Hamiltonian path
                 algorithm, which ensures that the ordering is done such
                 that the don't cares are evenly distributed in the
                 final ordering of test cubes, thereby leading to better
                 input toggle savings than DP-fill. The proposed
                 algorithms, when experimented on ITC99 benchmarks,
                 produced peak power savings of up to 48\% over the
                 best-known algorithms in literature. We have also
                 pruned the solutions thus obtained using Greedy and
                 Simulated Annealing strategies with iterative 1-bit
                 neighborhood to validate our idea of optimal input
                 toggle minimization as an effective technique for
                 minimizing peak power dissipation during at-speed
                 stuck-at testing.",
  acknowledgement = ack-nhfb,
  articleno =    "5",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Li:2017:TSL,
  author =       "Xingquan Li and Wenxing Zhu",
  title =        "Two-Stage Layout Decomposition for Hybrid E-Beam and
                 Triple Patterning Lithography",
  journal =      j-TODAES,
  volume =       "23",
  number =       "1",
  pages =        "6:1--6:??",
  month =        oct,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3084683",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 22 09:03:33 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Hybrid e-beam lithography (EBL) and triple patterning
                 lithography (TPL) are advanced technologies for the
                 manufacture of integrated circuits. We propose a
                 technology that combines the advantages of EBL and TPL,
                 which is more promising for the pattern product
                 industry. Layout decomposition is a crucial step in
                 this technology. In this article, we propose a
                 two-stage decomposition flow for the hybrid e-beam and
                 triple patterning lithography of the general layout
                 decomposition (HETLD) problem. At the first stage, we
                 formulate two optimization problems: the e-beam and
                 stitch-aware TPL mask assignment (ESTMA) problem and
                 the extended minimum weight dominating set for R$_4$
                 mask assignment (MDS R$_4$ MA) problem. Binary linear
                 program formulations of the two problems are solved by
                 the cutting plane approach. At the second stage,
                 solutions of the first stage problems are legalized to
                 feasible solutions of the HETLD problem by stitch
                 insertion and e-beam shot. To speed up decomposition,
                 we reduce the problem size by removing some vertices
                 and some minor conflict edges before decomposition.
                 Experimental results show the effectiveness of our
                 decomposition methods based on ESTMA and MDS R$_4$
                 MA.",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Das:2017:VBP,
  author =       "Sourav Das and Dongjin Lee and Wonje Choi and
                 Janardhan Rao Doppa and Partha Pratim Pande and
                 Krishnendu Chakrabarty",
  title =        "{VFI}-Based Power Management to Enhance the Lifetime
                 of High-Performance {$3$D} {NoCs}",
  journal =      j-TODAES,
  volume =       "23",
  number =       "1",
  pages =        "7:1--7:??",
  month =        oct,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3092843",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 22 09:03:33 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The emergence of 3D network-on-chip (NoC) has
                 revolutionized the design of high-performance and
                 energy-efficient manycore chips. However, the
                 anticipated performance gain can be compromised due to
                 the degradation and failure of vertical links (VLs).
                 The Through-Silicon-Via (TSV)-enabled VLs may fail due
                 to workload-induced stress; the failure of a VL can
                 affect the neighboring VLs, thereby causing a cascade
                 of failures and reducing the lifetime of the chip. To
                 enhance the reliability of 3D NoC-enabled manycore
                 chips, we propose to incorporate a voltage-frequency
                 island (VFI)-based power management strategy that helps
                 to reduce the energy consumption and hence, the
                 workload-induced stress of the highly utilized VLs. The
                 adopted power-management strategy relies on control
                 decisions about the voltage/frequency (V/F) levels on
                 VLs. We demonstrate that compared to the well-known
                 spare TSV allocation and adaptive routing strategies,
                 power management is more effective in enhancing the
                 reliability of a 3D NoC. VFI-based power management
                 improves the reliability of the 3D NoC by one order of
                 magnitude compared to both adaptive routing and spare
                 allocation while running popular SPLASH-2 and PARSEC
                 benchmarks. The principal benefit of power management
                 is that it is capable of reducing the operating
                 temperature of the system, which in turn enhances the
                 Mean-Time-To-Failure (MTTF) of the VLs and reliability
                 of the overall 3D NoC.",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Murugesan:2017:NRM,
  author =       "Shanmugakumar Murugesan and Noor Mahammad Sk",
  title =        "A Novel Range Matching Architecture for Packet
                 Classification Without Rule Expansion",
  journal =      j-TODAES,
  volume =       "23",
  number =       "1",
  pages =        "8:1--8:??",
  month =        oct,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3105958",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 22 09:03:33 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The speed requirement for the routing table lookup and
                 the packet classification is rapidly increasing due to
                 the increase in the number of packets needed to be
                 processed per second. The hardware-based packet
                 classification relies on ternary content addressable
                 memory (TCAM) to meet this speed requirement. However,
                 TCAM consumes huge power and also supports only for
                 longest prefix match and exact match, where the
                 classification rule also has a range match (RM) field.
                 Hence, it is mandatory to encode the RM into prefix
                 match to accommodate the rule in TCAM. In the worst
                 case, one rule is encoded into (2 W -2)$^2$ rules
                 (where W is a number of bits to represent range). This
                 work proposes a novel RM architecture, and a detailed
                 analysis about the range field on the standard dataset
                 and the real-life classifier rules are presented. In
                 the literature, the existing RM architecture is used to
                 avoid the range to prefix conversion, but due to the
                 serial operation, it lacks in performance. For constant
                 time lookup, TCAM is the best option, but it does not
                 support RM. The proposed architecture takes one clock
                 cycle for RM and does not require any encoding/
                 conversion. Hence, there will be a single entry for
                 every rule. It is observed that just 4\% of the
                 two-dimensional range rules are present in this
                 dataset, and it will increase the rule set size by 4
                 times in the best case and nearly 30 times in the worst
                 case. The proposed RM circuit is operated in parallel
                 with TCAM without compromising the speed, and this
                 circuit saves huge power around 70\% and area around
                 61\%, where the range to prefix conversion/encoding is
                 completely avoided. The proposed architecture is well
                 suited for current IPv4- and IPv6-based networks, as
                 well as in software-defined networks in the near
                 future.",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Chithira:2017:HTS,
  author =       "P. R. Chithira and Vinita Vasudevan",
  title =        "A Hierarchical Technique for Statistical Path
                 Selection and Criticality Computation",
  journal =      j-TODAES,
  volume =       "23",
  number =       "1",
  pages =        "9:1--9:??",
  month =        oct,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3107030",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 22 09:03:33 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Due to process variations, every path in the circuit
                 is associated with a probability of being critical and
                 a measure of this probability is the criticality of the
                 path. Identification of critical paths usually proceeds
                 in two steps, namely, generation of a candidate path
                 set followed by computation of path criticality. As
                 criticality computation is expensive, the candidate
                 path set is chosen using simpler metrics. However,
                 these metrics are not directly related to path
                 criticality and, often, the set also contains low
                 criticality paths that do not need to be tested. In
                 this article, we propose a hierarchical technique that
                 directly gives all paths above a global criticality
                 threshold. The circuit is divided into disjoint groups
                 at various levels. We show that the criticality of a
                 group at each level of hierarchy can be computed using
                 criticality of the parent group and the local
                 complementary delay within the group. Low criticality
                 groups are pruned at every level, making the
                 computation efficient. This recursive partitioning and
                 group criticality computation is continued until the
                 group criticality falls below a threshold. Beyond this,
                 the path selection within the group is done using
                 branch-and-bound algorithm with global criticality as
                 the metric. This is possible, since our method for
                 criticality computation is very efficient. Unlike other
                 techniques, path selection and criticality computation
                 are integrated together so that when the path selection
                 is complete, path criticality is also obtained. The
                 proposed algorithm is tested with ISCAS'85, ISCAS'89,
                 and ITC'99 benchmark circuits and the results are
                 verified using Monte Carlo simulation. The experimental
                 results suggest that the proposed method gives better
                 accuracy on average with around 90\% reduction in
                 run-time.",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Moon:2017:ASP,
  author =       "Hyungon Moon and Jinyong Lee and Dongil Hwang and
                 Seonhwa Jung and Jiwon Seo and Yunheung Paek",
  title =        "Architectural Supports to Protect {OS} Kernels from
                 Code-Injection Attacks and Their Applications",
  journal =      j-TODAES,
  volume =       "23",
  number =       "1",
  pages =        "10:1--10:??",
  month =        oct,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3110223",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 22 09:03:33 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The kernel code injection is a common behavior of
                 kernel-compromising attacks where the attackers aim to
                 gain their goals by manipulating an OS kernel. Several
                 security mechanisms have been proposed to mitigate such
                 threats, but they all suffer from non-negligible
                 performance overhead. This article introduces a
                 hardware reference monitor, called Kargos, which can
                 detect the kernel code injection attacks with nearly
                 zero performance cost. Kargos monitors the behaviors of
                 an OS kernel from outside the CPU through the standard
                 bus interconnect and debug interface available with
                 most major microprocessors. By watching the execution
                 traces and memory access events in the monitored target
                 system, Kargos uncovers attempts to execute malicious
                 code with the kernel privilege. On top of this, we also
                 applied the architectural supports for Kargos to the
                 detection of ROP attacks. KS-Stack is the hardware
                 component that builds and maintains the shadow stacks
                 using the existing supports to detect this ROP attacks.
                 According to our experiments, Kargos detected all the
                 kernel code injection attacks that we tested, yet just
                 increasing the computational loads on the target CPU by
                 less than 1\% on average. The performance overhead of
                 the KS-Stack was also less than 1\%.",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Yang:2017:ELD,
  author =       "Yunfeng Yang and Wai-Shing Luk and Hai Zhou and David
                 Z. Pan and Dian Zhou and Changhao Yan and Xuan Zeng",
  title =        "An Effective Layout Decomposition Method for {DSA}
                 with Multiple Patterning in Contact-Hole Generation",
  journal =      j-TODAES,
  volume =       "23",
  number =       "1",
  pages =        "11:1--11:??",
  month =        oct,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3131847",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 22 09:03:33 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Directed self-assembly (DSA) complemented with
                 multiple patterning (MP) is an attractive next
                 generation lithography (NGL) technique for contact-hole
                 generation. Nevertheless, a high-quality DSA-aware
                 layout decomposer is required to enable the technology.
                 In this article, we introduce an efficient method which
                 incorporates a set packing for generating DSA template
                 candidates and a local search method. Besides, a
                 multi-start strategy is integrated into the framework
                 to prevent the local minima. Our framework encourages
                 the reuse of existing coloring solvers. Hence, the
                 development cost can significantly be reduced. In
                 addition, for DSA multiple patterning where the number
                 of masks is larger than two, we present an efficient
                 iterative partition based method. Experimental results
                 show that compared with the state-of-the-art work, our
                 methods can achieve roughly 100$ \times $ speedup for
                 double patterning, and 78.8\% conflict reduction with
                 5$ \times $ speedup for triple patterning on the dense
                 graphs.",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Chen:2017:AMM,
  author =       "Chao Chen and Giovanni Beltrame",
  title =        "An Adaptive {Markov} Model for the Timing Analysis of
                 Probabilistic Caches",
  journal =      j-TODAES,
  volume =       "23",
  number =       "1",
  pages =        "12:1--12:??",
  month =        oct,
  year =         "2017",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3123877",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 22 09:03:33 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Accurate timing prediction for real-time embedded
                 software execution is becoming a problem due to the
                 increasing complexity of computer architecture, and the
                 presence of mixed-criticality workloads. Probabilistic
                 caches were proposed to set bounds to Worst Case
                 Execution Time (WCET) estimates and help designers
                 improve real-time embedded system resource use. Static
                 Probabilistic Timing Analysis (SPTA) for probabilistic
                 caches is nevertheless difficult to perform, because
                 cache accesses depend on execution history, and the
                 computational complexity of SPTA makes it intractable
                 for calculation as the number of accesses increases. In
                 this paper, we explore and improve SPTA for caches with
                 evict-on-miss random replacement policy using a state
                 space modeling technique. A nonhomogeneous Markov model
                 is employed for single-path programs in discrete-time
                 finite state space representation. To make this Markov
                 model tractable, we limit the number of states and use
                 an adaptive method for state modification. Experiments
                 show that compared to the state-of-the-art methodology,
                 the proposed adaptive Markov chain approach provides
                 better results at the occurrence probability of
                 10$^{-15}$: in terms of accuracy, the state-of-the-art
                 SPTA results are more conservative, by 11\% more on
                 average. In terms of computation time, our approach is
                 not significantly different from the state-of-the-art
                 SPTA.",
  acknowledgement = ack-nhfb,
  articleno =    "12",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kritikakou:2018:DDS,
  author =       "Angeliki Kritikakou and Thibaut Marty and Matthieu
                 Roy",
  title =        "{DYNASCORE}: {DYNAmic Software COntroller to Increase
                 REsource} Utilization in Mixed-Critical Systems",
  journal =      j-TODAES,
  volume =       "23",
  number =       "2",
  pages =        "13:1--13:??",
  month =        jan,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3110222",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:39 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In real-time mixed-critical systems, Worst-Case
                 Execution Time (WCET) analysis is required to guarantee
                 that timing constraints are respected-at least for
                 high-criticality tasks. However, the WCET is
                 pessimistic compared to the real execution time,
                 especially for multicore platforms. As WCET computation
                 considers the worst-case scenario, it means that
                 whenever a high-criticality task accesses a shared
                 resource in multicore platforms, it is considered that
                 all cores use the same resource concurrently. This
                 pessimism in WCET computation leads to a dramatic
                 underutilization of the platform resources, or even
                 failing to meet the timing constraints. In order to
                 increase resource utilization while guaranteeing
                 real-time guarantees for high-criticality tasks,
                 previous works proposed a runtime control system to
                 monitor and decide when the interferences from
                 low-criticality tasks cannot be further tolerated.
                 However, in the initial approaches, the points where
                 the controller is executed were statically predefined.
                 In this work, we propose a dynamic runtime control
                 which adapts its observations to online temporal
                 properties, further increasing the dynamism of the
                 approach, and mitigating the unnecessary overhead
                 implied by existing static approaches. Our dynamic
                 adaptive approach allows one to control the ongoing
                 execution of tasks based on runtime information, and
                 further increases the gains in terms of resource
                 utilization compared with static approaches.",
  acknowledgement = ack-nhfb,
  articleno =    "13",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Boukhobza:2018:ENS,
  author =       "Jalil Boukhobza and St{\'e}phane Rubini and Renhai
                 Chen and Zili Shao",
  title =        "Emerging {NVM}: a Survey on Architectural Integration
                 and Research Challenges",
  journal =      j-TODAES,
  volume =       "23",
  number =       "2",
  pages =        "14:1--14:??",
  month =        jan,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3131848",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:39 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "There has been a surge of interest in Non-Volatile
                 Memory (NVM) in recent years. With many advantages,
                 such as density and power consumption, NVM is carving
                 out a place in the memory hierarchy and may eventually
                 change our view of computer architecture. Many NVMs
                 have emerged, such as Magnetoresistive random access
                 memory (MRAM), Phase Change random access memory (PCM),
                 Resistive random access memory (ReRAM), and
                 Ferroelectric random access memory (FeRAM), each with
                 its own peculiar properties and specific challenges.
                 The scientific community has carried out a substantial
                 amount of work on integrating those technologies in the
                 memory hierarchy. As many companies are announcing the
                 imminent mass production of NVMs, we think that it is
                 time to have a step back and discuss the body of
                 literature related to NVM integration. This article
                 surveys state-of-the-art work on integrating NVM into
                 the memory hierarchy. Specially, we introduce the four
                 types of NVM, namely, MRAM, PCM, ReRAM, and FeRAM, and
                 investigate different ways of integrating them into the
                 memory hierarchy from the horizontal or vertical
                 perspectives. Here, horizontal integration means that
                 the new memory is placed at the same level as an
                 existing one, while vertical integration means that the
                 new memory is interleaved between two existing levels.
                 In addition, we describe challenges and opportunities
                 with each NVM technique.",
  acknowledgement = ack-nhfb,
  articleno =    "14",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Gao:2018:ECI,
  author =       "Congming Gao and Liang Shi and Yejia Di and Qiao Li
                 and Chun Jason Xue and Kaijie Wu and Edwin Sha",
  title =        "Exploiting Chip Idleness for Minimizing Garbage
                 Collection-Induced Chip Access Conflict on {SSDs}",
  journal =      j-TODAES,
  volume =       "23",
  number =       "2",
  pages =        "15:1--15:??",
  month =        jan,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3131850",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:39 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Solid state drives (SSDs) are normally constructed
                 with a number of parallel-accessible flash chips, where
                 host I/O requests are processed in parallel. In
                 addition, there are many internal activities in SSDs,
                 such as garbage collection and wear leveling induced
                 read, write, and erase operations, to solve the issues
                 of inability of in-place updates and limited lifetime.
                 When internal activities are triggered on a chip, the
                 chip will be blocked. Our preliminary studies on
                 several workloads show that when internal activities
                 are frequently triggered, the host I/O performance will
                 be significantly impacted because of the access
                 conflict between them. In this work, in order to
                 improve the access conflict induced performance
                 degradation, a novel access conflict minimization
                 scheme is proposed. The basic idea of the scheme is
                 motivated by an interesting observation in SSDs:
                 several chips are idle when other chips are busy with
                 internal activities and host I/O requests. Based on
                 this observation, we propose to schedule internal
                 activities induced operations for minimized access
                 conflict by exploiting the idleness of the multiple
                 chips of SSDs. This approach is realized by two steps:
                 First, read internal activities accessed data to the
                 controller; second, by exploiting the idle chips during
                 internal activities, write internal activities accessed
                 data back to these idle chips. With this scheme, the
                 internal activities can be processed with minimized
                 access conflict to the host requests. Simulation
                 results show that the proposed approach significantly
                 reduces the access conflict, and in turn leads to a
                 significant performance improvement of SSDs.",
  acknowledgement = ack-nhfb,
  articleno =    "15",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Jun:2018:RBD,
  author =       "Jaeyung Jun and Kyu Hyun Choi and Hokwon Kim and Sang
                 Ho Yu and Seon Wook Kim and Youngsun Han",
  title =        "Recovering from Biased Distribution of Faulty Cells in
                 Memory by Reorganizing Replacement Regions through
                 Universal Hashing",
  journal =      j-TODAES,
  volume =       "23",
  number =       "2",
  pages =        "16:1--16:??",
  month =        jan,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3131241",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:39 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/hash.bib;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Recently, scaling down dynamic random access memory
                 (DRAM) has become more of a challenge, with more faults
                 than before and a significant degradation in yield. To
                 improve the yield in DRAM, a redundancy repair
                 technique with intra-subarray replacement has been
                 extensively employed to replace faulty elements (i.e.,
                 rows or columns with defective cells) with spare
                 elements in each subarray. Unfortunately, such
                 technique cannot efficiently handle a biased
                 distribution of faulty cells because each subarray has
                 a fixed number of spare elements. In this article, we
                 propose a novel redundancy repair technique that uses a
                 hashing method to solve this problem. Our hashing
                 technique reorganizes replacement regions by changing
                 the way in which their replacement information is
                 referred, thus making faulty cells become evenly
                 distributed to the regions. We also propose a fast
                 repair algorithm to find the best hash function among
                 all possible candidates. Even if our approach requires
                 little hardware overhead, it significantly improves the
                 yield when compared with conventional redundancy
                 techniques. In particular, the results of our
                 experiment show that our technique saves spare elements
                 by about 57\% and 55\% for a yield of 99\% at BER 1e-6
                 and 5e-7, respectively.",
  acknowledgement = ack-nhfb,
  articleno =    "16",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Zhou:2018:RRD,
  author =       "Hongxia Zhou and Chiu-Wing Sham and Hailong Yao",
  title =        "Revisiting Routability-Driven Placement for Analog and
                 Mixed-Signal Circuits",
  journal =      j-TODAES,
  volume =       "23",
  number =       "2",
  pages =        "17:1--17:??",
  month =        jan,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3131849",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:39 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The exponential increase in scale and complexity of
                 very large-scale integrated circuits (VLSIs) poses a
                 great challenge to current electronic design automation
                 (EDA) techniques. As an essential step in the whole EDA
                 layout synthesis, placement is attracting more and more
                 attention, especially for analog and mixed-signal
                 integrated circuits. Recently, experts in this field
                 have observed a variety of analog-specific layout
                 constraints to obtain high-performance placement
                 solutions. These constraints include symmetry,
                 alignment, boundary, preplace, abutment, range and
                 maximum separation, and routability of the placement
                 solutions. In this article, the effectiveness of
                 slicing and nonslicing representation is investigated.
                 Additionally, the technique of congestion-based virtual
                 sizing is proposed. Experimental results show that the
                 routability can be improved significantly by applying
                 congestion-based virtual sizing. Results also show that
                 the slicing representation can improve the regularity
                 of the placement solutions and hence improve the
                 routability with higher efficiency compared to the
                 nonslicing representation.",
  acknowledgement = ack-nhfb,
  articleno =    "17",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Wang:2018:ACS,
  author =       "Shao-Chung Wang and Li-Chen Kan and Chao-Lin Lee and
                 Yuan-Shin Hwang and Jenq-Kuen Lee",
  title =        "Architecture and Compiler Support for {GPUs} Using
                 Energy-Efficient Affine Register Files",
  journal =      j-TODAES,
  volume =       "23",
  number =       "2",
  pages =        "18:1--18:??",
  month =        jan,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3133218",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:39 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "A modern GPU can simultaneously process thousands of
                 hardware threads. These threads are grouped into
                 fixed-size SIMD batches executing the same instruction
                 on vectors of data in a lockstep to achieve high
                 throughput and performance. The register files are huge
                 due to each SIMD group accessing a dedicated set of
                 vector registers for fast context switching, and
                 consequently the power consumption of register files
                 has become an important issue. One proposed solution is
                 to replace some of the vector registers by scalar
                 registers, as different threads in a same SIMD group
                 operate on scalar values and so the redundant
                 computations and accesses of these scalar values can be
                 eliminated. However, it has been observed that a
                 significant number of registers containing affine
                 vectors $ \upsilon $ such that $ \upsilon [i] = b + i
                 \times s $ can be represented by base $b$ and stride
                 $s$. Therefore, this article proposes an affine
                 register file design for GPUs that is energy efficient
                 due to it reducing the redundant executions of both the
                 uniform and affine vectors. This design uses a pair of
                 registers to store the base and stride of each affine
                 vector and provides specific affine ALUs to execute
                 affine instructions. A method of compiler analysis has
                 been developed to detect scalars and affine vectors and
                 annotate instructions for facilitating their
                 corresponding scalar and affine computations.
                 Furthermore, a priority-based register allocation
                 scheme has been implemented to assign scalars and
                 affine vectors to appropriate scalar and affine
                 register files. Experimental results show that this
                 design was able to dispatch 43.56\% of the computations
                 to scalar and affine ALUs when using eight scalar and
                 four affine registers per warp. This resulted in the
                 current design also reducing the energy consumption of
                 the register files and ALUs to 21.86\% and 26.54\%,
                 respectively, and it reduced the overall energy
                 consumption of the GPU by an average of 5.18\%.",
  acknowledgement = ack-nhfb,
  articleno =    "18",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Pereira-Santos:2018:RFB,
  author =       "Leonardo Pereira-Santos and Gabriel Luca Nazar and
                 Luigi Carro",
  title =        "Repair of {FPGA}-Based Real-Time Systems With Variable
                 Slacks",
  journal =      j-TODAES,
  volume =       "23",
  number =       "2",
  pages =        "19:1--19:??",
  month =        jan,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3144533",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:39 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Field-programmable gate arrays (FPGAs) based on SRAM
                 cells are an attractive alternative for real-time
                 system designers, as they offer high density, low cost,
                 and high performance. The use of SRAM cells in the
                 FPGA's configuration memory, while enabling these
                 desirable characteristics, also creates a reliability
                 hazard as RAM cells are susceptible to single-event
                 upsets (SEUs). The usual approach is the use of double
                 or triple redundancy allied with a correction
                 mechanism, such as periodic scrubbing. Although
                 scrubbing is an effective technique to remove
                 SEU-induced errors, the repair of real-time systems
                 presents specific challenges, such as avoiding failures
                 by missing real-time deadlines. In this article, a
                 novel approach is proposed to use a deadline-aware
                 scrubbing scheme with negligible area costs that
                 dynamically chooses the scrubbing starting position.
                 Such a scheme allows us to avoid missing real-time
                 deadlines while maximizing the repair probability given
                 a bounded repair time. Our approach reduces the failure
                 rate, considering the probability of missing deadlines
                 due to faults, by 33.39\% on average, with an average
                 area cost of 1.23\%.",
  acknowledgement = ack-nhfb,
  articleno =    "19",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lin:2018:CMD,
  author =       "Chen-Hsuan Lin and Lu Wan and Deming Chen",
  title =        "{C-Mine}: Data Mining of Logic Common Cases for
                 Improved Timing Error Resilience with Energy
                 Efficiency",
  journal =      j-TODAES,
  volume =       "23",
  number =       "2",
  pages =        "20:1--20:??",
  month =        jan,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3144534",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:39 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The better-than-worst-case (BTW) design methodology
                 can achieve higher circuit energy efficiency,
                 performance, or reliability by allowing timing errors
                 for rare cases and rectifying them with error
                 correction mechanisms. Therefore, the performance of
                 BTW design heavily depends on the correctness of common
                 cases, which are frequent input patterns in a workload.
                 However, most existing methods do not provide
                 sufficiently scalable solutions and also overlook the
                 whole picture of the design. Thus, we propose a new
                 technique, common-case mining method (C-Mine), which
                 combines two scalable techniques, data mining and
                 Boolean satisfiability (SAT) solving, to overcome these
                 limitations. Data mining can efficiently extract
                 patterns from an enormous dataset, and SAT solving is
                 famous for its scalable verification. In this article,
                 we present two versions of C-Mine, C-Mine-DCT and
                 C-Mine-APR, which aim at faster runtime and better
                 energy saving, respectively. The experimental results
                 show that, compared to a recent publication, C-Mine-DCT
                 can achieve compatible performance with an additional
                 8\% energy savings and 54x speedup for bigger
                 benchmarks on average. Furthermore, C-Mine-APR can
                 achieve up to 13\% more energy saving than C-Mine-DCT
                 while confronting designs with more common cases.",
  acknowledgement = ack-nhfb,
  articleno =    "20",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Rosvall:2018:FTA,
  author =       "Kathrin Rosvall and Ingo Sander",
  title =        "Flexible and Tradeoff-Aware Constraint-Based Design
                 Space Exploration for Streaming Applications on
                 Heterogeneous Platforms",
  journal =      j-TODAES,
  volume =       "23",
  number =       "2",
  pages =        "21:1--21:??",
  month =        jan,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3133210",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:39 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Due to its complexity, the problem of mapping and
                 scheduling streaming applications on heterogeneous
                 MPSoCs under real-time and performance constraints has
                 traditionally been tackled by incomplete heuristic
                 algorithms. In recent years, approaches based on
                 Constraint Programming (CP) have shown promising
                 results as complete methods for finding optimal
                 mappings, in particular concerning throughput. However,
                 so far none of the available CP approaches consider the
                 tradeoff between throughput and buffer requirements or
                 throughput and power consumption. This article
                 integrates tradeoff awareness into the CP model and
                 introduces a two-step solving approach that utilizes
                 the advantages of heuristics, while still keeping the
                 completeness property of CP. With a number of
                 experiments considering several streaming applications
                 and different platform models, the article illustrates
                 not only the efficiency of the presented model but also
                 its suitability for solving different problems with
                 various combinations of performance constraints.",
  acknowledgement = ack-nhfb,
  articleno =    "21",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Knechtel:2018:MOF,
  author =       "Johann Knechtel and Jens Lienig and Ibrahim (Abe) M.
                 Elfadel",
  title =        "Multi-Objective {$3$D} Floorplanning with Integrated
                 Voltage Assignment",
  journal =      j-TODAES,
  volume =       "23",
  number =       "2",
  pages =        "22:1--22:??",
  month =        jan,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3149817",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:39 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Voltage assignment is a well-known technique for
                 circuit design, which has been applied successfully to
                 reduce power consumption in classical 2D integrated
                 circuits (ICs). Its usage in the context of 3D ICs has
                 not been fully explored yet although reducing power in
                 3D designs is of crucial importance, for example, to
                 tackle the ever-present challenge of thermal
                 management. In this article, we investigate the
                 effective and efficient partitioning of 3D designs into
                 multiple voltage domains during the floorplanning step
                 of physical design. In particular, we introduce,
                 implement, and evaluate novel algorithms for effective
                 integration of voltage assignment into the inner
                 floorplanning loops. Our algorithms are compatible not
                 only with the traditional objectives of 2D
                 floorplanning but also with the additional objectives
                 and constraints of 3D designs, including the planning
                 of through-silicon vias (TSVs) and the thermal
                 management of stacked dies. We test our 3D floorplanner
                 extensively on the GSRC benchmarks as well as on an
                 augmented version of the IBM-HB+ benchmarks. The 3D
                 floorplans are shown to achieve effective trade-offs
                 for power and delays throughout different
                 configurations-our results surpass na{\"\i}ve low-power
                 and high-performance voltage assignment by 17\% and
                 10\%, on average. Finally, we release our 3D
                 floorplanning framework as open-source code.",
  acknowledgement = ack-nhfb,
  articleno =    "22",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Yang:2018:HEP,
  author =       "Kun Yang and Haoting Shen and Domenic Forte and Swarup
                 Bhunia and Mark Tehranipoor",
  title =        "Hardware-Enabled Pharmaceutical Supply Chain
                 Security",
  journal =      j-TODAES,
  volume =       "23",
  number =       "2",
  pages =        "23:1--23:??",
  month =        jan,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3144532",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:39 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The pharmaceutical supply chain is the pathway through
                 which prescription and over-the-counter (OTC) drugs are
                 delivered from manufacturing sites to patients.
                 Technological innovations, price fluctuations of raw
                 materials, as well as tax, regulatory, and market
                 demands are driving change and making the
                 pharmaceutical supply chain more complex. Traditional
                 supply chain management methods struggle to protect the
                 pharmaceutical supply chain, maintain its integrity,
                 enhance customer confidence, and aid regulators in
                 tracking medicines. To develop effective measures that
                 secure the pharmaceutical supply chain, it is important
                 that the community is aware of the state-of-the-art
                 capabilities available to the supply chain owners and
                 participants. In this article, we will be presenting a
                 survey of existing hardware-enabled pharmaceutical
                 supply chain security schemes and their limitations. We
                 also highlight the current challenges and point out
                 future research directions. This survey should be of
                 interest to government agencies, pharmaceutical
                 companies, hospitals and pharmacies, and all others
                 involved in the provenance and authenticity of
                 medicines and the integrity of the pharmaceutical
                 supply chain.",
  acknowledgement = ack-nhfb,
  articleno =    "23",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Noltsis:2018:RSC,
  author =       "Michail Noltsis and Dimitrios Rodopoulos and Nikolaos
                 Zompakis and Francky Catthoor and Dimitrios Soudris",
  title =        "Runtime Slack Creation for Processor Performance
                 Variability using System Scenarios",
  journal =      j-TODAES,
  volume =       "23",
  number =       "2",
  pages =        "24:1--24:??",
  month =        jan,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3152158",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:39 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Modern microprocessors contain a variety of mechanisms
                 used to mitigate errors in the logic and memory,
                 referred to as Reliability, Availability, and
                 Serviceability (RAS) techniques. Many of these
                 techniques, such as component disabling, come at a
                 performance cost. With the aggressive downscaling of
                 device dimensions, it is reasonable to expect that
                 chip-wide error rates will intensify in the future and
                 perhaps vary throughout system lifetime. As a result,
                 it is important to reclaim the temporal RAS overheads
                 in a systematic way and enable dependable performance.
                 The current article presents a closed-loop control
                 scheme that actuates processor's frequency based on
                 detected timing interference to ensure performance
                 dependability. The concepts of slack and deadline
                 vulnerability factor are introduced to support the
                 formulation of a discrete time control problem. Default
                 application timing is derived using the system scenario
                 methodology, the applicability of which is demonstrated
                 through simulations. Additionally, the proposed concept
                 is demonstrated on a real platform and application: a
                 Proportional-Integral-Differential controller,
                 implemented within the application, actuates the
                 Dynamic Voltage and Frequency Scaling (DVFS) framework
                 of the Linux kernel to effectively reclaim temporal
                 overheads injected at runtime. The current article
                 discusses the responsiveness and energy efficiency of
                 the proposed performance dependability scheme. Finally,
                 additional formulation is introduced to predict the
                 upper bound of timing interference that can be absorbed
                 by actuating the DVFS of any processor and is also
                 validated on a representative reduction to practice.",
  acknowledgement = ack-nhfb,
  articleno =    "24",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Shafiee:2018:DFB,
  author =       "M. Shafiee and N. Beohar and P. Bakliwal and S. Roy
                 and D. Mandal and B. Bakkaloglu and S. Ozev",
  title =        "A Disturbance-Free Built-In Self-Test and Diagnosis
                 Technique for {DC--DC} Converters",
  journal =      j-TODAES,
  volume =       "23",
  number =       "2",
  pages =        "25:1--25:??",
  month =        jan,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3152157",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:39 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Complex electronic systems include multiple power
                 domains and drastically varying dynamic power
                 consumption patterns, requiring the use of multiple
                 power conversion and regulation units. High-frequency
                 switching converters have been gaining prominence in
                 the DC-DC converter market due to their high efficiency
                 and smaller form factor. Unfortunately, they are also
                 subject to higher process variations, and faster
                 in-field degradation, jeopardizing stable operation of
                 the power supply. This article presents a technique to
                 track changes in the dynamic loop characteristics of
                 DC-DC converters without disturbing the normal mode of
                 operation using a white noise-based excitation and
                 correlation. Using multiple points for injection and
                 analysis, we show that the degraded part can be
                 diagnosed to take remedial action. White noise
                 excitation is generated via a pseudo-random disturbance
                 at reference, load current, and pulse-width modulation
                 (PWM) nodes of the converter with the test signal
                 energy being spread over a wide bandwidth, without
                 significantly affecting the converter noise and ripple
                 floor. The impulse response is extracted by correlating
                 the random input sequence with the disturbed output
                 generated. Test signal analysis is achieved by
                 correlating the pseudo-random input sequence with the
                 output response and thereby accumulating the desired
                 behavior over time and pulling it above the noise floor
                 of the measurement set-up. An off-the-shelf power
                 converter, LM27402, is used as the device-under-test
                 (DUT) for experimental verification. Experimental
                 results show that the proposed technique can estimate
                 converter natural frequency and quality factor
                 ($Q$-factor) within $ \pm 2.5$ \% and $ \pm 0.7$ \%
                 error margin respectively, over changes in load
                 inductance and capacitance. For the diagnosis purpose,
                 a measure of inductor's DC resistance (DCR) value,
                 which is the inductor's series resistance and
                 indicative of the degradation in inductor's $Q$-factor,
                 is estimated within less than $ \pm 1.6$ \% error
                 margin.",
  acknowledgement = ack-nhfb,
  articleno =    "25",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Emeretlis:2018:SMA,
  author =       "Andreas Emeretlis and George Theodoridis and
                 Panayiotis Alefragis and Nikolaos Voros",
  title =        "Static Mapping of Applications on Heterogeneous
                 Multi-Core Platforms Combining Logic-Based {Benders}
                 Decomposition with Integer Linear Programming",
  journal =      j-TODAES,
  volume =       "23",
  number =       "2",
  pages =        "26:1--26:??",
  month =        jan,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3133219",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:39 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The proper mapping of an application on a multi-core
                 platform and the scheduling of its tasks are key
                 elements to achieve the maximum performance. In this
                 article, a novel hybrid approach based on integrating
                 the Logic-Based Benders Decomposition (LBBD) principle
                 with a pure Integer Linear Programming (ILP) model is
                 introduced for mapping applications described by
                 Directed Acyclic Graphs (DAGs) on platforms consisting
                 of heterogeneous cores. The LBBD approach combines two
                 optimization techniques with complementary strengths,
                 namely ILP and Constraint Programming (CP), and is
                 employed as a cut generation scheme. The generated
                 constraints are utilized by the ILP model to cut
                 possible assignment combinations aiming at improving
                 the solution or proving the optimality of the
                 best-found one. The introduced approach was applied
                 both on synthetic DAGs and on DAGs derived from real
                 applications. Through the proposed approach, many
                 problems were optimally solved that could not be solved
                 by any of the above methods (ILP, LBBD) alone within a
                 time limit of 2 hours, while the overall solution time
                 was also significantly decreased. Specifically, the
                 hybrid method exhibited speedups equal to $ 4.2 \times
                 $ for the synthetic instances and $ 10 \times $ for the
                 real-application DAGs over the LBBD approach and two
                 orders of magnitude over the ILP model.",
  acknowledgement = ack-nhfb,
  articleno =    "26",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Gomez:2018:SCP,
  author =       "Andres F. Gomez and Victor Champac",
  title =        "Selection of Critical Paths for Reliable Frequency
                 Scaling under {BTI}-Aging Considering Workload
                 Uncertainty and Process Variations Effects",
  journal =      j-TODAES,
  volume =       "23",
  number =       "3",
  pages =        "27:1--27:??",
  month =        apr,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3177864",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:39 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Conventional clock guardbanding to assure a circuit's
                 reliable operation under device aging due to NBTI/PBTI
                 and process variations introduce significant
                 performance loss in modern nanometer circuits. Dynamic
                 Frequency Scaling (DFS) is a more efficient technique
                 that allows us to adjust the system clock frequency
                 according to the process condition and aging
                 deterioration of the circuit. At the design phase, the
                 DFS technique requires the identification of the logic
                 paths to be monitored to introduce the required
                 circuitry to monitor their delay. However, critical
                 path identification is a complex problem due to three
                 major challenges: (1) The critical paths of the circuit
                 depend on the stress duty cycle of the devices, which
                 are unknown in advance at design phase; (2) the
                 critical paths of the circuit depend on the process
                 parameters variations, whose impact on delay depend on
                 the spatial correlation due to proximity at the circuit
                 layout; and (3) the critical paths reordering
                 probability may change over time due to aging. This
                 article presents a methodology for efficient selection
                 of the critical paths to be monitored under a DFS
                 framework, addressing the aforementioned challenges.
                 Experimental results on ISCAS 85/89 benchmark circuits
                 show the feasibility of the proposed approach to select
                 a restricted path set while providing reliable aging
                 monitoring.",
  acknowledgement = ack-nhfb,
  articleno =    "27",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Huang:2018:PSC,
  author =       "Sheng-Min Huang and Li-Pin Chang",
  title =        "Providing {SLO} Compliance on {NVMe SSDs} Through
                 Parallelism Reservation",
  journal =      j-TODAES,
  volume =       "23",
  number =       "3",
  pages =        "28:1--28:??",
  month =        apr,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3174867",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:39 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib;
                 https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
  abstract =     "Non-Volatile Memory Express (NVMe) is a specification
                 for next-generation solid-state disks (SSDs). Benefited
                 from the massive internal parallelism and the
                 high-speed PCIe bus, NVMe SSDs achieve extremely high
                 data transfer rates, and they are an ideal solution of
                 shared storage in virtualization environments.
                 Providing virtual machines with Service Level Objective
                 (SLO) compliance on NVMe SSDs is a challenging task,
                 because garbage collection activities inside of NVMe
                 SSDs globally affect the I/O performance of all virtual
                 machines. In this study, we introduce a novel approach,
                 called parallelism reservation, which is inspired by
                 the rich internal parallelism of NVMe SSDs. The degree
                 of parallelism stands for how many flash chips are
                 concurrently active. Our basic idea is to reserve
                 sufficient degrees of parallelism for read, write, and
                 garbage collection operations, making sure that an NVMe
                 SSD delivers stable read and write throughput and
                 reclaims free space at a constant rate. The stable read
                 and write throughput are proportionally distributed
                 among virtual machines for SLO compliance. Our
                 experimental results show that our parallelism
                 reservation approach delivered satisfiable throughput
                 and highly predictable response to virtual machines.",
  acknowledgement = ack-nhfb,
  articleno =    "28",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Yang:2018:RRE,
  author =       "Kun Yang and Domenic Forte and Mark Tehranipoor",
  title =        "{ReSC}: an {RFID-Enabled} Solution for Defending {IoT}
                 Supply Chain",
  journal =      j-TODAES,
  volume =       "23",
  number =       "3",
  pages =        "29:1--29:??",
  month =        apr,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3174850",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:39 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The Internet of Things (IoT), an emerging global
                 network of uniquely identifiable embedded computing
                 devices within the existing Internet infrastructure, is
                 transforming how we live and work by increasing the
                 connectedness of people and things on a scale that was
                 once unimaginable. In addition to facilitated
                 information and service exchange between connected
                 objects, enhanced computing power and analytic
                 capabilities of individual objects, and increased
                 interaction between objects and their environments, the
                 IoT also raises new security and privacy challenges.
                 Hardware trust across the IoT supply chain is the
                 foundation of IoT security and privacy. Two major
                 supply chain issues --- disappearance/theft of
                 authentic IoT devices and appearance of unauthentic
                 ones --- have to be addressed to secure the IoT supply
                 chain and lay the foundation for further security and
                 privacy-defensive measures. Comprehensive solutions
                 that enable IoT device authentication and traceability
                 across the entire supply chain (i.e., during
                 distribution and after being provisioned) need to be
                 established. Existing hardware, software, and network
                 protection methods, however, do not address IoT supply
                 chain issues. To mitigate this shortcoming, we propose
                 an RFID-enabled solution called ReSC that aims at
                 defending the IoT supply chain. By incorporating three
                 techniques-one-to-one mapping between RFID tag identity
                 and control chip identity; unique tag trace, which
                 records tag provenance and history information; and
                 neighborhood attestation of IoT devices-ReSC is
                 resistant to split attacks (i.e., separating tag from
                 product, swapping tags), counterfeit injection, product
                 theft throughout the entire supply chain, device
                 recycling, and illegal network service access (e.g.,
                 Internet, cable TV, online games, remote firmware
                 updates). Simulations, theoretical analysis, and
                 experimental results based on a printed circuit board
                 (PCB) prototype demonstrate the effectiveness of ReSC.
                 Finally, we evaluate the security of our proposed
                 scheme against various attacks.",
  acknowledgement = ack-nhfb,
  articleno =    "29",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lee:2018:LBF,
  author =       "Dongwook Lee and Andreas Gerstlauer",
  title =        "Learning-Based, Fine-Grain Power Modeling of
                 System-Level Hardware {IPs}",
  journal =      j-TODAES,
  volume =       "23",
  number =       "3",
  pages =        "30:1--30:??",
  month =        apr,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3177865",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:39 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Accurate power and performance models are needed to
                 enable rapid, early system-level analysis and
                 optimization. There is, however, a lack of fast yet
                 fine-grain power models of hardware components at such
                 high levels of abstraction. In this article, we present
                 novel learning-based approaches for extending fast
                 functional simulation models of accelerators and other
                 hardware intellectual property components (IPs) with
                 accurate cycle-, block-, and invocation-level power
                 estimates. Our proposed power modeling approach is
                 based on annotating functional hardware descriptions
                 with capabilities that, depending on observability,
                 allow capturing data-dependent resource, block, or
                 input and output (I/O) activity without a significant
                 loss in simulation speed. We further leverage advanced
                 machine learning techniques to synthesize abstract
                 power models using novel decomposition techniques that
                 reduce model complexities and increase estimation
                 accuracy. Results of applying our approach to various
                 industrial-strength design examples show that our power
                 models can predict cycle-, basic block-, and
                 invocation-level power consumption to within 10\%, 9\%,
                 and 3\% of a commercial gate-level power estimation
                 tool, respectively, all while running at several order
                 of magnitude faster speeds of 1-10Mcycles/sec. Model
                 training and synthesis takes less than 34 minutes in
                 all cases, including up to 30 minutes for training data
                 and trace generation using gate-level simulations.",
  acknowledgement = ack-nhfb,
  articleno =    "30",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Naderan-Tahan:2018:DCE,
  author =       "Mahmood Naderan-Tahan and Hamid Sarbazi-Azad",
  title =        "{Domino Cache}: an Energy-Efficient Data Cache for
                 Modern Applications",
  journal =      j-TODAES,
  volume =       "23",
  number =       "3",
  pages =        "31:1--31:??",
  month =        apr,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3174848",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:39 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The energy consumption for processing modern workloads
                 is challenging in data centers. Due to the large
                 datasets of cloud workloads, the miss rate of the L1
                 data cache is high, and with respect to the energy
                 efficiency concerns, such misses are costly for memory
                 instructions because lower levels of memory hierarchy
                 consume more energy per access than the L1. Moreover,
                 large last-level caches are not performance effective,
                 in contrast to traditional scientific workloads. The
                 aim of this article is to propose a large L1 data
                 cache, called Domino, to reduce the number of accesses
                 to lower levels in order to improve the energy
                 efficiency. In designing Domino, we focus on two
                 components that use the on-chip area and are not energy
                 efficient, which makes them good candidates to use
                 their area for enlarging the L1 data cache. Domino is a
                 highly associative cache that extends the conventional
                 cache by borrowing the prefetcher and last-level-cache
                 storage budget and using it as additional ways for data
                 cache. In Domino, the additional ways are separated
                 from the conventional cache ways; hence, the critical
                 path of the first access is not altered. On a miss in
                 the conventional part, it searches the added ways in a
                 mix of parallel-sequential fashion to compromise the
                 latency and energy consumption. Results on the
                 Cloudsuite benchmark suite show that read and write
                 misses are reduced by 30\%, along with a 28\% reduction
                 in snoop messages. The overall energy consumption per
                 access is then reduced by 20\% on average (maximum
                 38\%) as a result of filtering accesses to the lower
                 levels.",
  acknowledgement = ack-nhfb,
  articleno =    "31",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Abolmaali:2018:EFP,
  author =       "Sheis Abolmaali and Mehdi Kamal and Ali Afzali-Kusha
                 and Massoud Pedram",
  title =        "An Efficient False Path-Aware Heuristic Critical Path
                 Selection Method with High Coverage of the Process
                 Variation Space",
  journal =      j-TODAES,
  volume =       "23",
  number =       "3",
  pages =        "32:1--32:??",
  month =        apr,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3177866",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:39 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In this article, we present a critical path selection
                 method that efficiently finds true (sensitizable)
                 critical paths of a circuit in the presence of process
                 variations. The method, which is based on the viability
                 analysis, tries to select the least number of true
                 critical paths that cover all of circuit critical
                 gates. Critical gates are those that make a path
                 critical with a probability higher than a predefined
                 threshold value. Selecting fewer critical paths leads
                 to less computation time for the algorithm and shorter
                 test time of fabricated chips. For this purpose, an
                 efficient Statistical Static Timing Analysis- (SSTA)
                 based technique is suggested. This technique tries to
                 find circuit-critical gates whose process parameter
                 variations cover a major part of the process space.
                 Improving the process space coverage using fewer paths
                 is achieved by considering both spatial (proximity of
                 gates) and structural (having common gates)
                 correlations in the analysis of choosing the critical
                 paths. In the selection process, paths with low
                 similarities in their characteristics are preferred. In
                 addition, only true paths whose delays affect the
                 maximum delay of the circuit are included. The selected
                 paths can be used in the test process of the fabricated
                 chips to determine if the chip meets its timing
                 requirements. Also, a modified viability analysis that
                 incorporates statistical computations is used in the
                 SSTA. The efficacy of the proposed method is evaluated
                 by comparing its results for combinational and
                 sequential ISCAS benchmarks with those obtained by
                 exhaustive search. Results indicate although, on
                 average, only 4.38\% of all the critical paths found by
                 the exhaustive search are selected by the proposed
                 method, the maximum probability of criticality for the
                 paths that are not considered in our method is, on
                 average, less than 4\%.",
  acknowledgement = ack-nhfb,
  articleno =    "32",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Jalili:2018:ERM,
  author =       "Majid Jalili and Hamid Sarbazi-Azad",
  title =        "Express Read in {MLC} Phase Change Memories",
  journal =      j-TODAES,
  volume =       "23",
  number =       "3",
  pages =        "33:1--33:??",
  month =        apr,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3177876",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:39 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In the era of big data, the capability of computer
                 systems must be enhanced to support 2.5 quintillion
                 byte/day data delivery. Among the components of a
                 computer system, main memory has a great impact on
                 overall system performance. DRAM technology has been
                 used over the past four decades to build main memories.
                 However, the scalability of DRAM technology has faced
                 serious challenges. To keep pace with the
                 ever-increasing demand for larger main memory, some new
                 alternative technologies have been introduced. Phase
                 change memory (PCM) is considered as one of such
                 technologies for substituting DRAM. PCM offers some
                 noteworthy properties such as low static power
                 consumption, nonvolatility, and capability of storing
                 more than one bit per cell (multilevel cell, or MLC).
                 However, the short lifetime and long access latency of
                 PCM (specifically MLC PCM) require feasible and
                 efficient solutions. In this article, based on the
                 observation that applications access a significant
                 number of read-friendly data blocks, we propose Express
                 Read to prevent the MLC PCM read circuit to spend
                 unnecessary time sensing the cells of a memory block. A
                 read-friendly data block (RFDB) is composed of only
                 ``11'' and ``00'' bit pairs, and thus upon sensing the
                 most significant bit of a cell, the read operation can
                 be early terminated to reduce the MLC read time and
                 power consumption. Moreover, we increase the number of
                 RFDBs using two simple techniques to better exploit the
                 benefits of Express Read. Results obtained from
                 full-system simulation near 6\% performance improvement
                 and 21\% energy gain, on average, over the baseline
                 system.",
  acknowledgement = ack-nhfb,
  articleno =    "33",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Yan:2018:DCR,
  author =       "Jin-Tai Yan",
  title =        "Direction-Constrained Rectangle Escape Routing",
  journal =      j-TODAES,
  volume =       "23",
  number =       "3",
  pages =        "34:1--34:??",
  month =        apr,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3178047",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:39 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Given a set of buses with available escape directions
                 inside a chip, a two-phase algorithm is proposed to
                 assign one feasible escape direction onto any bus such
                 that the number of used layers is minimized and to
                 allocate the pin rectangle and the projection rectangle
                 of any escape bus onto the minimized layers in
                 direction-constrained rectangle escape routing. In our
                 proposed algorithm, based on the concept of
                 two-dimensional maximum density inside a chip, the
                 escape directions of the buses can be first assigned to
                 minimize the number of the used layers by iteratively
                 eliminating unnecessary escape directions for any bus
                 inside a chip. Furthermore, based on the construction
                 of the represented intervals and the assignment
                 constraints for the escape buses, a modified left-edge
                 algorithm can be used to allocate all the escape buses
                 onto the minimized layers. Compared with Ma's integer
                 linear program (ILP)-based algorithm [10] using
                 lp_solve and Gurobi in rectangle escape routing, the
                 experimental results show that our proposed algorithm
                 obtains the same results but reduces CPU time by 94.2\%
                 and 35.7\% when using lp_solve and Gurobi for 16 tested
                 examples with no direction constraint on average,
                 respectively. Compared with the modified algorithm from
                 Ma's ILP-based algorithm [10] using lp_solve and Gurobi
                 in direction-constrained rectangle escape routing, the
                 experimental results show that our proposed algorithm
                 obtains the same results but reduces CPU time by 94.3\%
                 and 37.7\% when using lp_solve and Gurobi for 16 tested
                 examples with direction constraints on average,
                 respectively. Besides that, compared with Yan's
                 iterative algorithm, the experimental results show that
                 our proposed algorithm increases CPU time by 1.0\% to
                 reduce the number of used layers 11.1\% for 16 tested
                 examples on average.",
  acknowledgement = ack-nhfb,
  articleno =    "34",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Wang:2018:MTI,
  author =       "Shengcheng Wang and Ran Wang and Krishnendu
                 Chakrabarty and Mehdi B. Tahoori",
  title =        "Multicast Testing of Interposer-Based {$ 2.5 $D}
                 {ICs}: Test-Architecture Design and Test Scheduling",
  journal =      j-TODAES,
  volume =       "23",
  number =       "3",
  pages =        "35:1--35:??",
  month =        apr,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3177879",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:39 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Interposer-based 2.5D integrated circuits (ICs) are
                 seen today as a precursor to 3D ICs based on
                 through-silicon vias (TSVs). All the dies in a 2.5D IC
                 must be adequately tested for product qualification.
                 However, due to the limited number of package pins, it
                 is a major challenge to test 2.5D ICs using
                 conventional methods. Moreover, due to higher
                 integration levels, test-application time and test
                 power consumption for 2.5D ICs are also increased
                 compared to their 2D counterparts. Therefore, it is
                 imperative to take these issues into account during
                 2.5D IC testing. In this article, we present an
                 efficient multicast test architecture for targeting
                 defects in dies, in which multiple dies can be tested
                 simultaneously to reduce the test-application time
                 under constraints on test power and fault coverage. We
                 also propose a test scheduling and optimization
                 technique that can be utilized with the multicast test
                 architecture. By considering the trade-off between
                 test-application time, test-power budget, and test
                 quality, the proposed technique provides test schedules
                 with minimum test-application time under constraints on
                 power consumption and fault coverage. Compared to
                 previous work, the proposed technique can reduce
                 test-application time by up to 53.4 for benchmark
                 designs while achieving higher fault coverage. Since
                 the loss in fault coverage due to multicast testing is
                 extremely small, we can use top-off patterns to achieve
                 full fault coverage for the dies at negligible
                 additional cost.",
  acknowledgement = ack-nhfb,
  articleno =    "35",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Zhai:2018:ENG,
  author =       "Jinyuan Zhai and Changhao Yan and Sheng-Guo Wang and
                 Dian Zhou and Hai Zhou and Xuan Zeng",
  title =        "An Efficient Non-{Gaussian} Sampling Method for High
                 Sigma {SRAM} Yield Analysis",
  journal =      j-TODAES,
  volume =       "23",
  number =       "3",
  pages =        "36:1--36:??",
  month =        apr,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3174866",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:39 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Yield$^1$ analysis of SRAM is a challenging issue,
                 because the failure rates of SRAM cells are extremely
                 small. In this article, an efficient non-Gaussian
                 sampling method of cross entropy optimization is
                 proposed for estimating the high sigma SRAM yield.
                 Instead of sampling with the Gaussian distribution in
                 existing methods, a non-Gaussian distribution, i.e., a
                 joint one-dimensional generalized Pareto distribution
                 and ( n -1)-dimensional Gaussian distribution, is taken
                 as the function family of practical distribution, which
                 is proved to be more suitable to fit the ideal
                 distribution in the view of extreme failure event. To
                 minimize the cross entropy between practical and ideal
                 distributions, a sequential quadratic programming
                 solver with multiple starting points strategy is
                 applied for calculating the optimal parameters of
                 practical distributions. Experimental results show that
                 the proposed non-Gaussian sampling is a $ 2.2$--$ 4.1
                 \times $ speedup over the Gaussian sampling, on the
                 whole, it is about a $ 1.6$--$ 2.3 \times $ speedup
                 over state-of-the-art methods with low- and
                 high-dimensional cases without loss of accuracy",
  acknowledgement = ack-nhfb,
  articleno =    "36",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lu:2018:FDR,
  author =       "Guan-Ruei Lu and Chun-Hao Kuo and Kuen-Cheng Chiang
                 and Ansuman Banerjee and Bhargab B. Bhattacharya and
                 Tsung-Yi Ho and Hung-Ming Chen",
  title =        "Flexible Droplet Routing in Active Matrix-Based
                 Digital Microfluidic Biochips",
  journal =      j-TODAES,
  volume =       "23",
  number =       "3",
  pages =        "37:1--37:??",
  month =        apr,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3184388",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:39 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The active matrix (AM)-based architecture offers many
                 advantages over conventional digital
                 electrowetting-on-dielectric (EWOD) microfluidic
                 biochips, such as the capability of handling
                 variable-size droplets, more flexible droplet movement,
                 and precise control over droplet navigation. However, a
                 major challenge in choosing the routing paths is to
                 decide when the droplets are to be reshaped depending
                 on the congestion of the intended path, or split- and
                 route sub droplets,and merging them at their respective
                 destinations. As the number of microelectrodes in
                 AM-EWOD chips is large, the path selection problem
                 becomes further complicated. In this article, we
                 propose a negotiation-guided flow based on routing of
                 subdroplets that obviates the explicit need for
                 deciding when the droplets are to be manipulated, yet
                 fully utilizing the power of droplet reshaping,
                 splitting, and merging them to facilitate their
                 journey. The proposed algorithm reduces routing cost
                 and provides more freedom in deadlock avoidance in the
                 presence of multiple routing tasks by assigning certain
                 congestion penalty for sibling subdroplets and fluidic
                 penalty for heterogeneous droplets. Compared to
                 existing techniques, it reduces latest arrival time by
                 an average of 29\% for several benchmark and random
                 test suites. Furthermore, our method is observed to
                 provide 100\% routability of nets for all test cases,
                 whereas existing and baseline routers fail to produce
                 feasible solutions in many instances. We also propose a
                 reliable mode droplet routing strategy where the number
                 of unreliable splitting operations can be reduced by
                 paying a small penalty on latest arrival time.",
  acknowledgement = ack-nhfb,
  articleno =    "37",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Xie:2018:ADI,
  author =       "Mimi Xie and Chen Pan and Mengying Zhao and Yongpan
                 Liu and Chun Jason Xue and Jingtong Hu",
  title =        "Avoiding Data Inconsistency in Energy Harvesting
                 Powered Embedded Systems",
  journal =      j-TODAES,
  volume =       "23",
  number =       "3",
  pages =        "38:1--38:??",
  month =        apr,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3182170",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:39 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Energy harvesting is becoming a favorable alternative
                 to power future generation embedded systems, as it is
                 more environmentally and user friendly. However, energy
                 harvesting powered embedded systems suffer from
                 frequent execution interruption due to unstable energy
                 supply. To tackle this problem, nonvolatile memory has
                 been deployed to save the whole volatile state for
                 computation. When power resumes, the processor can
                 restore the state back to volatile memories and
                 continue execution. However, without careful
                 consideration, the process of checkpointing and
                 resuming could cause inconsistency between volatile and
                 nonvolatile memories, which leads to irreversible
                 errors. In this article, we propose a consistency-aware
                 adaptive checkpointing scheme that ensures correctness
                 for all checkpoints. The proposed technique efficiently
                 identifies all possible inconsistency positions in
                 programs and inserts auxiliary code to ensure
                 correctness by offline analysis. In addition, adaptive
                 checkpointing assisted register file profiling and
                 online tracking techniques further reduce the overhead
                 of each checkpoint. Evaluation results show that the
                 proposed checkpointing strategy can successfully
                 eliminate inconsistency errors and greatly reduce the
                 checkpointing overhead.",
  acknowledgement = ack-nhfb,
  articleno =    "38",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Arcaro:2018:RTG,
  author =       "Lu{\'\i}s Fernando Arcaro and Karila {Palma Silva} and
                 R{\^o}mulo {Silva De Oliveira}",
  title =        "On the Reliability and Tightness of {GP} and
                 Exponential Models for Probabilistic {WCET}
                 Estimation",
  journal =      j-TODAES,
  volume =       "23",
  number =       "3",
  pages =        "39:1--39:??",
  month =        apr,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3185154",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:39 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "As computer architectures evolve, guaranteeing that
                 Real-Time Systems' (RTSs') timing requirements are met
                 through Worst Case Execution Time (WCET) upper bounds
                 becomes increasingly difficult. Techniques such as
                 Measurement-Based Probabilistic Timing Analysis (MBPTA)
                 have emerged that estimate WCET bounds exceeded only
                 with arbitrarily low probabilities (i.e., pWCETs)
                 through Extreme Value Theory (EVT). The Peaks Over
                 Threshold (POT) approach for applying EVT involves
                 adjusting a tail-shaped distribution, e.g., Generalized
                 Pareto (GP) or Exponential, to the values that exceed a
                 carefully selected high threshold. Several works
                 suggest that GP should be used within POT for best
                 representing different tail shapes, while others
                 consider the Exponential model more adequate for
                 providing upper bounds with increased reliability. This
                 work presents empirical reliability and tightness
                 evaluations of the pWCET estimates yielded by the GP
                 and Exponential models while applying MBPTA through the
                 POT approach. It mainly provides counter-evidence to
                 the GP model reliability and evidence of the
                 Exponential model adequacy in this context.",
  acknowledgement = ack-nhfb,
  articleno =    "39",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Jassi:2018:GGB,
  author =       "Munish Jassi and Yong Hu and Daniel
                 Mueller-Gritschneder and Ulf Schlichtmann",
  title =        "Graph-Grammar-Based {IP}-Integration ({GRIP}) --- An
                 {EDA} Tool for Software-Defined {SoCs}",
  journal =      j-TODAES,
  volume =       "23",
  number =       "3",
  pages =        "40:1--40:??",
  month =        apr,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3139381",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:39 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In modern system-on-chip (SoC) designs, IP-reuse is
                 considered a driving force to increase productivity. To
                 support various designs, a huge amount of Intellectual
                 Property (IP) hardware blocks have been developed. The
                 integration of those IPs into an SoC may require
                 significant effort-up to days or weeks depending on
                 experience and complexity. This article presents a
                 novel approach to significantly reduce the design
                 effort to bring-up a working SoC design by automatic IP
                 integration as part of a library-based Software-defined
                 SoC flow. In detail, the IP-supplier prepares a
                 HW-accelerated software library (HASL) for the SoC
                 architect, who wants to use the IP in an SoC design. As
                 a key point of our approach, integration knowledge is
                 encoded in the library as a set of integration rules.
                 These rules are defined in the machine-readable
                 standardized IP-XACT format by the IP supplier, who has
                 a good knowledge of the IP's hardware details. The
                 library preparation step on the IP supplier's side is
                 also partly automated in the proposed flow, including a
                 partial generation of configurable HW drivers,
                 schedulers, and the software library functions. For the
                 SoC architect, we have developed the
                 graph-grammar-based IP-integration (GRIP) tool. The
                 software application is developed using the functions
                 supplied in the HASL. According to the calls to the
                 HASL functions, the GRIP tool automatically integrates
                 IP-blocks using the rule information supplied with the
                 library and runs a full Design Space Exploration. For
                 this, the SoC architecture and rules are transformed
                 into the graph domain to apply graph rewriting methods.
                 The GRIP tool is model-driven and based on the Eclipse
                 Modeling Framework. With code generation techniques,
                 SoC candidate architectures can be transformed to
                 hardware descriptions for the target platform. The
                 HW/SW interfaces between SW library functions and IP
                 blocks can be automatically generated for bare-metal or
                 Linux-based applications. The approach is demonstrated
                 with two case-studies on the Xilinx Zynq-based ZedBoard
                 evaluation board using a HASL for computer vision. It
                 can yield $ 10 \times $--$ 150 \times $ performance
                 improvement for the bare-metal application versions and
                 $ 4 \times $--$ 7 \times $ performance improvement for
                 the Linux-based application versions, when executed on
                 an optimized HW-accelerated SoC architecture compared
                 to a non HW-accelerated SoC. The effort for IP
                 integration is comparable to using a software library,
                 hence, providing a significant advantage over a manual
                 IP integration.",
  acknowledgement = ack-nhfb,
  articleno =    "40",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Chu:2018:ISS,
  author =       "Chris Chu and Mustafa Ozdal",
  title =        "Introduction to the Special Section on Advances in
                 Physical Design Automation",
  journal =      j-TODAES,
  volume =       "23",
  number =       "4",
  pages =        "41:1--41:??",
  month =        jul,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3199220",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:39 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  articleno =    "41",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Li:2018:UHP,
  author =       "Wuxi Li and Yibo Lin and Meng Li and Shounak Dhar and
                 David Z. Pan",
  title =        "{UTPlaceF 2.0}: a High-Performance Clock-Aware {FPGA}
                 Placement Engine",
  journal =      j-TODAES,
  volume =       "23",
  number =       "4",
  pages =        "42:1--42:??",
  month =        jul,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3174849",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:39 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Modern field-programmable gate array (FPGA) devices
                 contain complex clock architectures on top of
                 configurable logics. Unlike application specific
                 integrated circuits (ASICs), the physical structure of
                 clock networks in an FPGA is pre-manufactured and
                 cannot be adjusted to different applications.
                 Furthermore, clock routing resources are typically
                 limited for high-utilization designs. Consequently,
                 clock architectures impose extra clock constraints and
                 further complicate physical implementation tasks such
                 as placement. Traditional ASIC placement techniques
                 only optimize conventional design metrics such as
                 wirelength, routability, power, and timing without
                 clock legality consideration. It is imperative to have
                 new techniques to honor clock constraints during
                 placement for FPGAs. In this article, we propose a
                 high-performance FPGA placement engine, UTPlaceF 2.0,
                 that optimizes wirelength and routability while
                 honoring complex clock constraints. Our proposed
                 approaches consist of an iterative
                 minimum-cost-flow-based cell assignment as well as a
                 clock-aware packing for producing clock-legal yet
                 high-quality placement solutions. UTPlaceF 2.0 won
                 first place in the ISPD'17 clock-aware FPGA placement
                 contest organized by Xilinx, outperforming the second-
                 and the third-place winners by 4.0\% and 10.0\%,
                 respectively, in routed wirelength with competitive
                 runtime, on a set of industry benchmarks.",
  acknowledgement = ack-nhfb,
  articleno =    "42",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Darav:2018:ELH,
  author =       "Nima Karimpour Darav and Ismail S. Bustany and Andrew
                 Kennings and David Westwick and Laleh Behjat",
  title =        "{Eh?Legalizer}: a High Performance Standard-Cell
                 Legalizer Observing Technology Constraints",
  journal =      j-TODAES,
  volume =       "23",
  number =       "4",
  pages =        "43:1--43:??",
  month =        jul,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3158215",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:39 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The legalization step is performed after global
                 placement where wire length and routability are
                 optimized or during timing optimization where buffer
                 insertion or gate sizing are applied to meet timing
                 requirements. Therefore, an ideal legalization approach
                 must preserve the quality of the input placement in
                 terms of routability, wire length, and timing
                 constraints. These requirements indirectly impose
                 maximum and average cell movement constraints during
                 legalization. In addition, the legalization step should
                 effectively manage white space availability with a
                 highly efficient runtime in order to be used in an
                 iterative process such as timing optimization. In this
                 article, a robust and fast legalization method called
                 Eh?Legalizer for standard-cell placement is presented.
                 Eh?Legalizer legalizes input placements while
                 minimizing the maximum and average cell movements using
                 a highly efficient novel network flow-based approach.
                 In contrast to the traditional network flow-based
                 legalizers, areas with high cell utilizations are
                 effectively legalized by finding several candidate
                 paths and there is no need for a post-process step. The
                 experimental results conducted on several benchmarks
                 show that Eh?Legalizer results in 2.5 times and 3.3
                 times less the maximum and average cell movement,
                 respectively, while its runtime is significantly ($ 18
                 \times $) lower compared to traditional legalizers. In
                 addition, the experimental results illustrate the
                 scalability and robustness of Eh?Legalizer with respect
                 to the floorplan complexity. Finally, the
                 detailed-routing results show detailed-routing
                 violations are reduced on average by 23\% when
                 Eh?Legalizer is used to generate legal solutions.",
  acknowledgement = ack-nhfb,
  articleno =    "43",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Wang:2018:VAG,
  author =       "Chen Wang and Yanan Sun and Shiyan Hu and Li Jiang and
                 Weikang Qian",
  title =        "Variation-Aware Global Placement for Improving
                 Timing-Yield of Carbon-Nanotube Field Effect Transistor
                 Circuit",
  journal =      j-TODAES,
  volume =       "23",
  number =       "4",
  pages =        "44:1--44:??",
  month =        jul,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3175500",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:39 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "As the conventional silicon-based CMOS technology
                 marches toward the sub-10nm region, the problem of high
                 power density becomes increasingly serious. Under this
                 circumstance, the carbon-nanotube field effect
                 transistors (CNFETs) emerge as a promising alternative
                 to the conventional silicon-based CMOS devices.
                 However, they experience a much larger variation than
                 the silicon-based CMOS devices, which results in a
                 large circuit delay variation and hence, a significant
                 timing yield loss. One of the main variation sources is
                 the carbon-nanotube (CNT) density variation. However,
                 it shows a special property not existing for
                 silicon-based CMOS devices, namely the asymmetric
                 spatial correlation. In this work, we propose novel
                 global placement algorithms to reduce the timing yield
                 loss caused by the CNT density variation. To
                 effectively reduce the statistical circuit delay, we
                 first develop a statistical delay measure for a segment
                 of gates. Based on this measure, we further develop a
                 segment-based strategy and a path-based placement
                 strategy to reduce the delays of the statistically
                 critical paths. Experimental results demonstrated that
                 both of our approaches effectively improve the timing
                 yield.",
  acknowledgement = ack-nhfb,
  articleno =    "44",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lin:2018:MRB,
  author =       "Kuen-Wey Lin and Yeh-Sheng Lin and Yih-Lang Li and
                 Rung-Bin Lin",
  title =        "A Maze Routing-Based Methodology With Bounded
                 Exploration and Path-Assessed Retracing for Constrained
                 Multilayer Obstacle-Avoiding Rectilinear {Steiner} Tree
                 Construction",
  journal =      j-TODAES,
  volume =       "23",
  number =       "4",
  pages =        "45:1--45:??",
  month =        jul,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3177878",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:39 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Owing to existing intellectual properties, prerouted
                 nets, and power/ground wires, the routing of a system
                 on chip design demands to detour around multilayer
                 obstacles. Traditional approaches for the multilayer
                 obstacle-avoiding rectilinear Steiner tree (ML-OARST)
                 problem are thus nonmaze routing-based approaches for
                 runtime issues, yet they cannot be directly applied to
                 deal with additional constraints such as variant edge
                 weights on a routing layer. In this article, we propose
                 the maze routing-based methodology with bounded
                 exploration and path-assessed retracing to reduce
                 runtime and routing cost for the constrained ML-OARST
                 construction problem. The exploration of maze routing
                 is bounded to reduce the runtime; the costs of
                 connecting pins are computed to select Steiner points
                 in the retracing phase. To further reduce the routing
                 cost, we develop a Steiner point-based ripping-up and
                 rebuilding scheme for altering tree topology.
                 Experimental results on industrial and randomly
                 generated benchmarks demonstrate that the proposed
                 methodology can provide a solution with good quality in
                 terms of routing cost and has a significant speedup
                 compared to traditional maze routing. A commercial tool
                 is also used to show the effectiveness of the proposed
                 methodology.",
  acknowledgement = ack-nhfb,
  articleno =    "45",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Jiao:2018:OER,
  author =       "Fengxian Jiao and Sheqin Dong",
  title =        "Ordered Escape Routing with Consideration of
                 Differential Pair and Blockage",
  journal =      j-TODAES,
  volume =       "23",
  number =       "4",
  pages =        "46:1--46:??",
  month =        jul,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3185783",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:39 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Ordered escape routing is a critical issue in
                 high-speed PCB routing. Differential pair and
                 thermal-blockage-avoided are useful in PCB design to
                 obtain high noise immunity and low electromagnetic
                 interference. In this article, a Min-cost
                 Multi-commodity Flow (MMCF) approach is proposed to
                 solve the ordered escape routing. First, the
                 characteristic of grid pin array and staggered pin
                 array is analyzed and then a basic network model is
                 used to convert ordered escape routing to MMCF model.
                 To satisfy the constraints of ordered escape routing,
                 three novel transformations, such as non-crossing
                 transformation, ordering transformation, and capacity
                 transformation, are used to convert the basic network
                 model to the final correct MMCF model. After that, the
                 differential pair in ordered escape routing is
                 discussed. Finally, a method to deal with the blockage
                 issue is proposed. Experimental results show that our
                 method achieves 100\% routability for all the test
                 cases. The method can get both a feasible solution and
                 an optimal solution for ordered escape routing.
                 Compared to published approaches, our method improves
                 in both wire length and CPU time remarkably. At the
                 same time, the proposed method can effectively avoid
                 the blockage and deal with the differential pair.",
  acknowledgement = ack-nhfb,
  articleno =    "46",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Liu:2018:RML,
  author =       "Bo Liu and Gong Chen and Bo Yang and Shigetoshi
                 Nakatake",
  title =        "Routable and Matched Layout Styles for Analog Module
                 Generation",
  journal =      j-TODAES,
  volume =       "23",
  number =       "4",
  pages =        "47:1--47:??",
  month =        jul,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3182169",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:39 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Two$^1$ novel automatic generation methods for analog
                 layout-a symmetrical twin-row method for MOS
                 transistors and a twisted common-centroid method for
                 capacitor arrays-are introduced. Based on the proposed
                 layout styles and the corresponding algorithms, the
                 symmetry and common-centroid placement patterns for
                 analog devices are realized to guarantee matching
                 properties. On this basis, as the most prominent
                 contribution of this article, channel routing-based
                 algorithms for the proposed layout styles are presented
                 and could achieve 100\% routability due to
                 well-arranged devices and corresponding low routing
                 complexity. The algorithms benefits include a small
                 layout area that maximizes the diffusion-sharing of MOS
                 transistors and less routing layer usage for
                 common-centroid device arrays. Moreover, we
                 successfully applied our algorithms to the layout
                 designs of two typical analog modules including a
                 two-stage operating amplifier and a Successive
                 Approximation Register Analog-to-Digital Converter
                 (SAR-ADC). The generated layouts and the circuit
                 simulation results demonstrate the effectiveness of our
                 algorithms in terms of their routability and matching
                 properties. Our algorithms can also be extended to
                 apply to a variety of essential MOS analog circuits.",
  acknowledgement = ack-nhfb,
  articleno =    "47",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lee:2018:ICA,
  author =       "Pei-Yu Lee and Iris Hui-Ru Jiang",
  title =        "{iTimerM}: a Compact and Accurate Timing Macro Model
                 for Efficient Hierarchical Timing Analysis",
  journal =      j-TODAES,
  volume =       "23",
  number =       "4",
  pages =        "48:1--48:??",
  month =        jul,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3149818",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:39 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "As designs continue to grow in size and complexity,
                 EDA paradigm shifts from flat to hierarchical timing
                 analysis. In this article, we present compact and
                 accurate timing macro modeling, which is the key to
                 efficient and accurate hierarchical timing analysis.
                 Our goal is to contain only a minimal amount of
                 interface logic in our timing macro model. The main
                 idea is to separate the interface logic into variant
                 and constant timing regions. Then, the variant timing
                 region is reserved for accuracy, while the constant
                 timing region is reduced for compactness. For reducing
                 the constant timing region, we propose anchor pin
                 insertion and deletion by generalizing existing timing
                 graph reduction techniques. Furthermore, we devise a
                 lookup table index selection technique to achieve high
                 model accuracy over the possible operating condition
                 range. Compared with two common models used in
                 industry, extracted timing model and interface logic
                 model, our model has high model accuracy and small
                 model size. Based on the TAU 2016 and 2017 timing macro
                 modeling contest benchmark suites, our results show
                 that our algorithm delivers superior efficiency and
                 accuracy: Hierarchical timing analysis using our model
                 can significantly reduce runtime and memory compared
                 with flat timing analysis on the original design.
                 Moreover, our algorithm outperforms TAU 2016 and 2017
                 contest winners in model accuracy, model size, model
                 generation performance, and model usage performance.",
  acknowledgement = ack-nhfb,
  articleno =    "48",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Sadat:2018:OAL,
  author =       "Sayed Abdullah Sadat and Mustafa Canbolat and
                 Sel{\c{c}}uk K{\"o}se",
  title =        "Optimal Allocation of {LDOs} and Decoupling Capacitors
                 within a Distributed On-Chip Power Grid",
  journal =      j-TODAES,
  volume =       "23",
  number =       "4",
  pages =        "49:1--49:??",
  month =        jul,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3177877",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:39 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Parallel on-chip voltage regulation, where multiple
                 regulators are connected to the same power grid, has
                 recently attracted significant attention with the
                 proliferation of small on-chip voltage regulators. In
                 this article, the number, size, and location of
                 parallel low-dropout (LDO) regulators and intentional
                 decoupling capacitors are optimized using mixed integer
                 non-linear programming formulation. The proposed
                 optimization function concurrently considers multiple
                 objectives such as area, power noise, and overall power
                 consumption. Certain objectives are optimized by
                 putting constraints on the other objectives with the
                 proposed technique. Additional constraints have been
                 added to avoid the overlap of LDOs and decoupling
                 capacitors in the optimization process. The results of
                 an optimized LDO allocation in the POWER8 chip is
                 compared with the recent LDO allocation in the same IBM
                 chip in a case study where a 20\% reduction in the
                 noise is achieved. The results of the proposed
                 multi-criteria objective function under a different
                 area, power, and noise constraints are also evaluated
                 with a sample ISPD'11 benchmark circuits in another
                 case study.",
  acknowledgement = ack-nhfb,
  articleno =    "49",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Cakir:2018:RED,
  author =       "Burcin Cakir and Sharad Malik",
  title =        "Reverse Engineering Digital {ICs} through Geometric
                 Embedding of Circuit Graphs",
  journal =      j-TODAES,
  volume =       "23",
  number =       "4",
  pages =        "50:1--50:??",
  month =        jul,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3193121",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:39 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Outsourcing of design and manufacturing processes
                 makes integrated circuits (ICs) vulnerable to
                 adversarial changes and raises concerns about their
                 integrity. Reverse engineering the manufactured netlist
                 helps identify malicious insertions. In this article,
                 we present an automated approach that, given a
                 reference design description with high-level blocks,
                 infers these blocks in an untrusted gate-level (test)
                 implementation. Using the graph connectivity of the
                 netlists, we compute a geometric embedding for each
                 wire in the circuits, which, then, is used to compute a
                 bipartite matching between the nodes of the two designs
                 and identify high-level blocks in the test circuit.
                 Experiments to evaluate the efficacy of the proposed
                 technique on various-sized designs, including the
                 multi-core processor OpenSparc T1, show that it can
                 correctly match over 90\% of gates in the test circuit
                 to their corresponding block in the reference model.",
  acknowledgement = ack-nhfb,
  articleno =    "50",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Ittershagen:2018:IFM,
  author =       "Philipp Ittershagen and Kim Gr{\"u}ttner and Wolfgang
                 Nebel",
  title =        "An Integration Flow for Mixed-Critical Embedded
                 Systems on a Flexible Time-Triggered Platform",
  journal =      j-TODAES,
  volume =       "23",
  number =       "4",
  pages =        "51:1--51:??",
  month =        jul,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3190837",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:39 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The rise of mixed-critical embedded systems imposes
                 novel challenges on the specification, development, and
                 functional validation in a design flow. In the emerging
                 dynamic scheduling context of mixed-criticality
                 platforms, the system behaviour needs to be estimated
                 in an early step in the design flow to assess the
                 integration impact, especially for quality of
                 service-driven, low-critical subsystems. We provide a
                 modelling and integration flow for specifying,
                 estimating, and evaluating software functions, ranging
                 from an initial executable specification to an
                 implementation candidate on an MPSoC. Based on a
                 data-driven model to evaluate dynamic resource
                 consumption effects of high-critical subsystems and the
                 scheduling overhead, we propose a systematic method for
                 constructing workload models of high-critical software
                 components on the target. Our proxies provide an
                 integration environment for low-critical functions by
                 mimicking the high-critical temporal behaviour on the
                 target. By integrating a low-critical video encoding
                 subsystem with a benchmark suite as the high-critical
                 subsystem we show that the performance model allows for
                 evaluating end-to-end execution times in the
                 low-critical function with an average error of 0.37\%
                 and the application proxy only introduces a maximum
                 error of 1.14\% in a performance evaluation.",
  acknowledgement = ack-nhfb,
  articleno =    "51",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Chen:2018:ESA,
  author =       "Yung-Chih Chen",
  title =        "Enhancements to {SAT} Attack: Speedup and Breaking
                 Cyclic Logic Encryption",
  journal =      j-TODAES,
  volume =       "23",
  number =       "4",
  pages =        "52:1--52:??",
  month =        jul,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3190853",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:39 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Logic encryption is an IC protection technique for
                 preventing an IC design from overproduction and
                 unauthorized use. It hides a design's functionality by
                 inserting key gates and key inputs, such that a secret
                 key is required to activate the design and make it
                 function correctly. The security of a logic encryption
                 algorithm is evaluated according to the difficulty of
                 cracking the secret key. The state-of-the-art attack
                 method identifies a secret key with a series of
                 SAT-solving calls to prune all the incorrect keys.
                 Although it can break most of the existing logic
                 encryption algorithms within a few hours, we observe
                 that there exist two enhancements for increasing its
                 efficiency. First, we introduce a preprocess to
                 identify and eliminate redundant key inputs and
                 simplify SAT problems. Second, we present a key
                 checking process for increasing the pruned incorrect
                 keys in each SAT-solving iteration. We conducted the
                 experiments on a set of benchmark circuits encrypted by
                 six different logic encryption algorithms. The
                 simulation results show that the enhanced method can
                 successfully unlock 10 benchmark circuits which
                 originally could not be cracked within 1 hour. For all
                 the benchmark circuits, the average speedup is
                 approximately 2.2x in terms of simulation time.
                 Furthermore, a recent logic encryption method locks a
                 design by creating cyclic paths, which can invalidate
                 the SAT-based attack method. We analyze the impact of
                 cyclic paths and propose an enhancement to break the
                 cyclic logic encryption method.",
  acknowledgement = ack-nhfb,
  articleno =    "52",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Pomeranz:2018:PIP,
  author =       "Irith Pomeranz",
  title =        "Partially Invariant Patterns for {LFSR}-Based
                 Generation of Close-to-Functional Broadside Tests",
  journal =      j-TODAES,
  volume =       "23",
  number =       "4",
  pages =        "53:1--53:??",
  month =        jul,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3201405",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:39 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Close-to-functional scan-based tests are expected to
                 create close-to-functional operation conditions in
                 order to avoid overtesting of delay faults. Existing
                 metrics for the proximity to functional operation
                 conditions are based on the scan-in state. For example,
                 they consider the distance between the scan-in state
                 and a reachable state (a state that the circuit can
                 visit during functional operation). However, the
                 deviation from functional operation conditions can
                 increase during a test beyond the deviation that is
                 measured by the scan-in state. To ensure that the
                 deviation does not increase, this article introduces
                 the concept of a partially invariant pattern. The
                 article describes a procedure for extracting partially
                 invariant patterns from functional broadside tests
                 whose scan-in states are reachable states. Being
                 partially specified, partially invariant patterns are
                 suitable for test data compression. The article studies
                 the use of partially invariant patterns for
                 linear-feedback shift-register ( LFSR ) based test data
                 compression. Noting that a seed may not exist for a
                 given partially invariant pattern with a given LFSR,
                 the procedure described in this article uses an
                 iterative process that not only matches a seed to a
                 partially invariant pattern, but also adjusts the
                 partially invariant pattern based on the test that the
                 seed produces. The article also addresses the selection
                 of LFSR's for the generation of close-to-functional
                 broadside tests based on partially invariant patterns.
                 Experimental results are presented to demonstrate the
                 feasibility of the procedure.",
  acknowledgement = ack-nhfb,
  articleno =    "53",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Zhao:2018:TSB,
  author =       "Hengyang Zhao and Qi Hua and Hai-Bao Chen and Yaoyao
                 Ye and Hai Wang and Sheldon X.-D. Tan and Esteban
                 Tlelo-Cuautle",
  title =        "Thermal-Sensor-Based Occupancy Detection for Smart
                 Buildings Using Machine-Learning Methods",
  journal =      j-TODAES,
  volume =       "23",
  number =       "4",
  pages =        "54:1--54:??",
  month =        jul,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3200904",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:39 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In this article, we propose a novel approach to detect
                 the occupancy behavior of a building through the
                 temperature and/or possible heat source information.
                 The new method can be used for energy reduction and
                 security monitoring for emerging smart buildings. Our
                 work is based on a building simulation program,
                 EnergyPlus, from the Department of Energy. EnergyPlus
                 can model various time-series inputs to a building such
                 as ambient temperature; heating, ventilation, and
                 air-conditioning (HVAC) inputs; power consumption of
                 electronic equipment; lighting; and number of occupants
                 in a room, sampled each hour, and produce resulting
                 temperature traces of zones (rooms). Two
                 machine-learning-based approaches for detecting human
                 occupancy of a smart building are applied herein,
                 namely support vector regression (SVR) and recurrent
                 neural network (RNN). Experimental results with SVR
                 show that the four-feature model provides accurate
                 detection rates, giving a 0.638 average error and
                 5.32\% error rate, and the five-feature model delivers
                 a 0.317 average error and 2.64\% error rate. This
                 indicates that SVR is a viable option for occupancy
                 detection. In the RNN method, Elman's RNN can estimate
                 occupancy information of each room of a building with
                 high accuracy. It has local feedback in each layer and,
                 for a five-zone building, it is very accurate for
                 occupancy behavior estimation. The error level, in
                 terms of number of people, can be as low as 0.0056 on
                 average and 0.288 at maximum, considering ambient, room
                 temperatures, and HVAC powers as detectable
                 information. Without knowing HVAC powers, the
                 estimation error can still be 0.044 on average, and
                 only 0.71\% estimated points have errors greater than
                 0.5. Our article further shows that both methods
                 deliver similar accuracy in the occupancy detection.
                 But the SVR model is more stable for adding or removing
                 features of the system, while the RNN method can
                 deliver more accuracy when the features used in the
                 model do not change a lot.",
  acknowledgement = ack-nhfb,
  articleno =    "54",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Shalu:2018:DDS,
  author =       "Shalu and Srijan Kumar and Ananya Singla and Sudip Roy
                 and Krishnendu Chakrabarty and Partha P. Chakrabarti
                 and Bhargab B. Bhattacharya",
  title =        "Demand-Driven Single- and Multitarget Mixture
                 Preparation Using Digital Microfluidic Biochips",
  journal =      j-TODAES,
  volume =       "23",
  number =       "4",
  pages =        "55:1--55:??",
  month =        jul,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3200903",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:39 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Recent studies in algorithmic microfluidics have led
                 to the development of several techniques for automated
                 solution preparation using droplet-based digital
                 microfluidic (DMF) biochips. A major challenge in this
                 direction is to produce a mixture of several reactants
                 with a desired ratio while optimizing reactant cost and
                 preparation time. The sequence of mix-split operations
                 that are to be performed on the droplets is usually
                 represented as a mixing tree (or graph). In this
                 article, we present an efficient mixing algorithm,
                 namely, Mixing Tree with Common Subtrees ( MTCS ), for
                 preparing single-target mixtures. MTCS attempts to best
                 utilize intermediate droplets, which were otherwise
                 wasted, and uses morphing based on permutation of leaf
                 nodes to further reduce the graph size. The technique
                 can be generalized to produce multitarget ratios, and
                 we present another algorithm, namely, Multiple Target
                 Ratios ( MTR ). Additionally, in order to enhance the
                 output load, we also propose an algorithm for droplet
                 streaming called Multitarget Multidemand ( MTMD ).
                 Simulation results on a large set of target ratios show
                 that MTCS can reduce the mean values of the total
                 number of mix-split steps ($ T_{ms}$) and waste
                 droplets ($W$) by 16\% and 29\% over Min-Mix (Thies et
                 al. 2008) and by 22\% and 34\% over RMA (Roy et al.
                 2015), respectively. Experimental results also suggest
                 that MTR can reduce the average values of T$_{ms}$ and
                 W by 23\% and 44\% over the repeated version of
                 Min-Mix, by 30\% and 49\% over the repeated version of
                 RMA, and by 9\% and 22\% over the repeated-version of
                 MTCS, respectively. It is observed that MTMD can reduce
                 the mean values of T$_{ms}$ and W by 64\% and 85\%,
                 respectively, over MTR. Thus, the proposed multitarget
                 techniques MTR and MTMD provide efficient solutions to
                 multidemand, multitarget mixture preparationon a DMF
                 platform.",
  acknowledgement = ack-nhfb,
  articleno =    "55",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Huang:2018:DML,
  author =       "Hantao Huang and Hang Xu and Yuehua Cai and Rai
                 Suleman Khalid and Hao Yu",
  title =        "Distributed Machine Learning on Smart-Gateway Network
                 toward Real-Time Smart-Grid Energy Management with
                 Behavior Cognition",
  journal =      j-TODAES,
  volume =       "23",
  number =       "5",
  pages =        "56:1--56:??",
  month =        oct,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3209888",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:40 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Real-time data analytics for smart-grid energy
                 management is challenging with consideration of both
                 occupant behavior profiles and energy profiles. This
                 article proposes a distributed and networked
                 machine-learning platform on smart-gateway-based
                 smart-grid in residential buildings. It can analyze
                 occupant behaviors, provide short-term load
                 forecasting, and allocate renewable energy resources.
                 First, occupant behavior profile is captured by
                 real-time indoor positioning system with WiFi data
                 analytics; and the energy profile is extracted by
                 real-time meter system with electricity load data
                 analytics. Then, the 24-hour occupant behavior profile
                 and energy profile are fused with prediction using an
                 online distributed machine-learning algorithm with
                 real-time data update. Based on the forecasted occupant
                 behavior profile and energy profile, solar energy
                 source is allocated to reduce peak demand on the main
                 electricity power-grid. The whole management flow can
                 be operated on the distributed smart-gateway network
                 with limited computational resources but with a
                 supported general machine-learning engine. Experimental
                 results on occupant behavior extraction show that the
                 proposed algorithm can achieve 91.2\% positioning
                 accuracy within 3.64m. Moreover, $ 50 \times $ and $ 38
                 \times $ speed-up is obtained during data testing and
                 training, respectively, when compared to traditional
                 support vector machine (SVM) method. For short-term
                 load forecasting, it is 14.83\% more accurate when
                 compared to SVM-based data analytics. Based on the
                 predicted occupant behavior profile and energy profile,
                 our proposed energy management system can achieve
                 19.66\% more peak load reduction and 26.41\% more cost
                 saving as compared to the SVM-based method.",
  acknowledgement = ack-nhfb,
  articleno =    "56",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Zoni:2018:CSC,
  author =       "Davide Zoni and Alessandro Barenghi and Gerardo Pelosi
                 and William Fornaciari",
  title =        "A Comprehensive Side-Channel Information Leakage
                 Analysis of an In-Order {RISC CPU} Microarchitecture",
  journal =      j-TODAES,
  volume =       "23",
  number =       "5",
  pages =        "57:1--57:??",
  month =        oct,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3212719",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:40 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Side-channel attacks are a prominent threat to the
                 security of embedded systems. To perform them, an
                 adversary evaluates the goodness of fit of a set of
                 key-dependent power consumption models to a collection
                 of side-channel measurements taken from an actual
                 device, identifying the secret key value as the one
                 yielding the best-fitting model. In this work, we
                 analyze for the first time the microarchitectural
                 components of a 32-bit in-order RISC CPU, showing which
                 one of them is accountable for unexpected side-channel
                 information leakage. We classify the leakage sources,
                 identifying the data serialization points in the
                 microarchitecture and providing a set of hints that can
                 be fruitfully exploited to generate implementations
                 resistant against side-channel attacks, either writing
                 or generating proper assembly code.",
  acknowledgement = ack-nhfb,
  articleno =    "57",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Seo:2018:NIS,
  author =       "Minjun Seo and Roman Lysecky",
  title =        "Non-Intrusive In-Situ Requirements Monitoring of
                 Embedded System",
  journal =      j-TODAES,
  volume =       "23",
  number =       "5",
  pages =        "58:1--58:??",
  month =        oct,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3206213",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:40 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Accounting for all operating conditions of a system at
                 the design stage is typically infeasible for complex
                 systems. Monitoring and verifying system requirements
                 at runtime enable a system to continuously and
                 introspectively ensure the system is operating
                 correctly in the presence of dynamic execution
                 scenarios. In this article, we present a
                 requirements-driven methodology enabling efficient
                 runtime monitoring of embedded systems. The proposed
                 approach extracts a runtime monitoring graph from
                 system requirements specified using UML sequence
                 diagrams. Non-intrusive, on-chip hardware dynamically
                 monitors the system execution, verifies the execution
                 adheres to the requirements model, and in the event of
                 a failure provides detailed information that can be
                 analyzed to determine the root cause. Using case
                 studies of an autonomous vehicle and pacemaker
                 prototypes, we analyze the relationship between event
                 coverage, detection rate, and hardware requirements",
  acknowledgement = ack-nhfb,
  articleno =    "58",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Pomeranz:2018:DDP,
  author =       "Irith Pomeranz",
  title =        "Dynamically Determined Preferred Values and a
                 Design-for-Testability Approach for Multiplexer Select
                 Inputs under Functional Test Sequences",
  journal =      j-TODAES,
  volume =       "23",
  number =       "5",
  pages =        "59:1--59:??",
  month =        oct,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3219778",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:40 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Earlier works observed that certain primary inputs
                 have preferred values, which help increase the
                 gate-level fault coverage when they appear in a
                 functional test sequence. This article observes that
                 multiplexers present additional opportunities for
                 increasing the fault coverage of a functional test
                 sequence, which are not captured by preferred primary
                 input values. Because multiplexers are prevalent, their
                 effect on the fault coverage can be significant. A
                 static analysis that is independent of any functional
                 test sequence is performed in this article to identify
                 preferred values for the outputs of multiplexers. This
                 is followed by a dynamic analysis that adjusts the
                 select inputs of the multiplexers for a given
                 functional test sequence to ensure that the preferred
                 values appear on the outputs of the multiplexers more
                 often. The analysis yields design-for-testability logic
                 for the select inputs of the multiplexers that have
                 preferred values. The logic is independent of the
                 functional test sequence, and it allows the fault
                 coverage to be increased when the select inputs are not
                 primary inputs, or when the same select inputs are used
                 for different multiplexers. Experimental results are
                 presented to demonstrate that this approach has a
                 significant effect on the fault coverage of functional
                 test sequences.",
  acknowledgement = ack-nhfb,
  articleno =    "59",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lee:2018:PTT,
  author =       "Dongjin Lee and Sourav Das and Janardhan Rao Doppa and
                 Partha Pratim Pande and Krishnendu Chakrabarty",
  title =        "Performance and Thermal Tradeoffs for Energy-Efficient
                 Monolithic {$3$D} Network-on-Chip",
  journal =      j-TODAES,
  volume =       "23",
  number =       "5",
  pages =        "60:1--60:??",
  month =        oct,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3223046",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:40 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Three-dimensional (3D) integration enables the design
                 of high-performance and energy-efficient network on
                 chip (NoC) architectures as communication backbones for
                 manycore chips. To exploit the benefits of the vertical
                 dimension of 3D integration, through-silicon-via (TSV)
                 has been predominantly used in state-of-the-art
                 manycore chip design. However, for TSV-based systems,
                 high power density and the resultant thermal hotspot
                 remain major concerns from the perspectives of chip
                 functionality and overall reliability. The power
                 consumption and thermal profiles of 3D NoCs can be
                 improved by incorporating a Voltage-Frequency-Island
                 (VFI)-based power management strategy. However, due to
                 inherent thermal constraints of a TSV-based 3D system,
                 we are unable to fully exploit the benefits offered by
                 the power management methodology. In this context,
                 emergence of monolithic 3D (M3D) integration has opened
                 up new possibility of designing ultra-low-power and
                 high-performance circuits and systems. The smaller
                 dimensions of the inter-layer dielectric (ILD) and
                 monolithic inter-tier vias (MIVs) offer high-density
                 integration, flexibility of partitioning logic blocks
                 across multiple tiers, and significant reduction of
                 total wire-length. In this work, we present the
                 first-ever study of the performance-thermal tradeoffs
                 for energy efficient monolithic 3D manycore chips. In
                 particular, we present a comparative performance
                 evaluation of M3D NoCs with respect to their
                 conventional TSV-based counterparts. We demonstrate
                 that the proposed M3D-based NoC architecture
                 incorporating VFI-based power management achieves a
                 maximum of 29.4\% lower energy-delay-product (EDP)
                 compared to the TSV-based designs for a large set of
                 benchmarks. We also demonstrate that the M3D-based NoC
                 shows up to 29.1\% lower maximum temperature than the
                 TSV-based counterpart for these benchmarks.",
  acknowledgement = ack-nhfb,
  articleno =    "60",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Han:2018:FCS,
  author =       "Inhak Han and Youngsoo Shin",
  title =        "Folded Circuit Synthesis: Min-Area Logic Synthesis
                 Using Dual-Edge-Triggered Flip-Flops",
  journal =      j-TODAES,
  volume =       "23",
  number =       "5",
  pages =        "61:1--61:??",
  month =        oct,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3229082",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:40 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The area required by combinational logic of a
                 sequential circuit based on standard flip-flops can be
                 reduced by identifying subcircuits that are identical.
                 Pairs of matching subcircuits can then be replaced by
                 circuits in which dual-edge-triggered flip-flops
                 operate on multiplexed data at the rising and falling
                 edges of the clock signal. We show how to modify the
                 Boolean network describing a combinational logic to
                 increase the opportunities for folding, without
                 affecting its function. Experiments with benchmark
                 circuits achieved an average reduction in circuit area
                 of 18\%.",
  acknowledgement = ack-nhfb,
  articleno =    "61",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Elmandouh:2018:GFV,
  author =       "Eman M. Elmandouh and Amr G. Wassal",
  title =        "Guiding Formal Verification Orchestration Using
                 Machine Learning Methods",
  journal =      j-TODAES,
  volume =       "23",
  number =       "5",
  pages =        "62:1--62:??",
  month =        oct,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3224206",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:40 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Typical modern HW designs include many blocks
                 associated with thousands of design properties. Having
                 today's commercial formal verifiers utilize a
                 complementary set of state-of-art formal algorithms is
                 a key in enabling the formal verification tools to
                 successfully cope with verification problems of
                 different sizes, types, and complexities. Formal
                 engines orchestration is the methodology used to pick
                 the most appropriate formal engine for a specific
                 verification problem. It assures proper scheduling of
                 the formal engines to minimize the time consumed to
                 solve individual design verification problems, hence
                 highly impacts the time required to verify the overall
                 design properties. This work proposes the utilization
                 of supervised machine learning classification
                 techniques to guide the orchestration step by
                 predicting the formal engines that should be assigned
                 to a design property. Up to 16,500 formal verification
                 runs on RTL designs and their properties are used to
                 train the classifier to create a prediction model. The
                 classifier assigns any new verification problem to an
                 appropriate list of formal engines associated with a
                 probability distribution over the set of engines
                 classes. Our results indicate how the proposed model is
                 able to improve the formal suite total run-time by up
                 to 59\% of its maximum allowable time improvement using
                 multi-classification-based orchestration and to
                 nominate with 88\% accuracy the appropriate formal
                 engines for new-to-verify HW designs.",
  acknowledgement = ack-nhfb,
  articleno =    "62",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{K:2018:AAF,
  author =       "Keerthi K. and Chester Rebeiro and Aritra Hazra",
  title =        "An Algorithmic Approach to Formally Verify an {ECC}
                 Library",
  journal =      j-TODAES,
  volume =       "23",
  number =       "5",
  pages =        "63:1--63:??",
  month =        oct,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3224205",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:40 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The weakest link in cryptosystems is quite often due
                 to the implementation rather than the mathematical
                 underpinnings. A vast majority of attacks in the recent
                 past have targeted programming flaws and bugs to break
                 security systems. Due to the complexity, empirically
                 verifying such systems is practically impossible, while
                 manual verification as well as testing do not provide
                 adequate guarantees. In this article, we leverage model
                 checking techniques to prove the functional correctness
                 of an elliptic curve cryptography (ECC) library with
                 respect to its formal specification. We demonstrate how
                 the huge state space of the C library can be aptly
                 verified using a hierarchical assume-guarantee
                 verification strategy. To test the scalability of this
                 approach, we verify the correctness of five
                 NIST-specified elliptic curve implementations. We also
                 verify the newer curve25519 elliptic curve, which is
                 finding multiple applications, due to its higher
                 security and simpler implementation. The 192-bit NIST
                 elliptic curve took 1 day to verify. This was the
                 smallest curve we verified. The largest curve with a
                 521-bit prime field took 26 days to verify. Curve25519
                 took 1.5 days to verify.",
  acknowledgement = ack-nhfb,
  articleno =    "63",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Chen:2018:EFM,
  author =       "Tseng-Yi Chen and Yuan-Hao Chang and Yuan-Hung Kuan
                 and Ming-Chang Yang and Yu-Ming Chang and Pi-Cheng
                 Hsiu",
  title =        "Enhancing Flash Memory Reliability by Jointly
                 Considering Write-back Pattern and Block Endurance",
  journal =      j-TODAES,
  volume =       "23",
  number =       "5",
  pages =        "64:1--64:??",
  month =        oct,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3229192",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:40 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Owing to high cell density caused by the advanced
                 manufacturing process, the reliability of flash drives
                 turns out to be rather challenging in flash system
                 designs. To enhance the reliability of flash drives,
                 error-correcting code (ECC) has been widely utilized in
                 flash drives to correct error bits during
                 programming/reading data to/from flash drives. Although
                 ECC can effectively enhance the reliability of flash
                 drives by correcting error bits, the capability of ECC
                 would degrade while the program/erase (P/E) cycles of
                 flash blocks is increased. Finally, ECC could not
                 correct a flash page, because a flash page contains too
                 many error bits. As a result, reducing error bits is an
                 effective solution to further improve the reliability
                 of flash drives when a specific ECC is adopted in the
                 flash drive. This work focuses on how to reduce the
                 probability of producing error bits in a flash page.
                 Thus, we propose a pattern-aware write strategy for
                 flash reliability enhancement. The proposed write
                 strategy considers both the P/E cycle of blocks and the
                 pattern of written data while a flash block is
                 allocated to store the written data. Since the proposed
                 write strategy allocates young blocks (respectively,
                 old blocks) for hot data (respectively, cold data) and
                 flips the bit pattern of the written data to the
                 appropriate bit pattern, the proposed strategy can
                 effectively improve the reliability of flash drives.
                 The experimental results show that the proposed
                 strategy can reduce the number of error pages by up to
                 50\%, compared with the well-known DFTL solution.
                 Moreover, the proposed strategy is orthogonal with all
                 ECC mechanisms so that the reliability of the flash
                 drives with ECC mechanisms can be further improved by
                 the proposed strategy.",
  acknowledgement = ack-nhfb,
  articleno =    "64",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Xie:2018:TER,
  author =       "Guoqi Xie and Zhetao Li and Na Yuan and Renfa Li and
                 Keqin Li",
  title =        "Toward Effective Reliability Requirement Assurance for
                 Automotive Functional Safety",
  journal =      j-TODAES,
  volume =       "23",
  number =       "5",
  pages =        "65:1--65:??",
  month =        oct,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3230620",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:40 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Automotive functional safety requirement includes
                 response time and reliability requirements learning
                 from the functional safety standard ISO 26262. These
                 two requirements must be simultaneously satisfied to
                 assure automotive functional safety requirement.
                 However, increasing reliability increases the response
                 time intuitively. This study proposes a method to find
                 the solution with the minimum response time while
                 assuring reliability requirement. Pre-assigning
                 reliability values to unassigned tasks by transferring
                 the reliability requirement of the function to each
                 task is a useful reliability requirement assurance
                 approach proposed in recent years. However, the
                 pre-assigned reliability values in state-of-the-art
                 studies have unbalanced distribution of the reliability
                 of all tasks, thereby resulting in a limited reduction
                 in response time. This study presents the geometric
                 mean-based non-fault-tolerant reliability
                 pre-assignment (GMNRP) and geometric mean-based
                 fault-tolerant reliability pre-assignment (GMFRP)
                 approaches, in which geometric mean-based reliability
                 values are pre-assigned to unassigned tasks. Geometric
                 mean can make the pre-assigned reliability values of
                 unassigned tasks to the central tendency, such that it
                 can distribute the reliability requirements in a more
                 balanced way. Experimental results show that GMNRP and
                 GMFRP can effectively reduce the response time compared
                 with their individual state-of-the-art counterparts.",
  acknowledgement = ack-nhfb,
  articleno =    "65",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Abuowaimer:2018:GRD,
  author =       "Ziad Abuowaimer and Dani Maarouf and Timothy Martin
                 and Jeremy Foxcroft and Gary Gr{\'e}wal and Shawki
                 Areibi and Anthony Vannelli",
  title =        "{GPlace3.0}: Routability-Driven Analytic Placer for
                 {UltraScale FPGA} Architectures",
  journal =      j-TODAES,
  volume =       "23",
  number =       "5",
  pages =        "66:1--66:??",
  month =        oct,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3233244",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:40 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Optimizing for routability during FPGA placement is
                 becoming increasingly important, as failure to spread
                 and resolve congestion hotspots throughout the chip,
                 especially in the case of large designs, may result in
                 placements that either cannot be routed or that require
                 the router to work excessively hard to obtain success.
                 In this article, we introduce a new, analytic
                 routability-aware placement algorithm for Xilinx
                 UltraScale FPGA architectures. The proposed algorithm,
                 called GPlace3.0, seeks to optimize both wirelength and
                 routability. Our work contains several unique features
                 including a novel window-based procedure for satisfying
                 legality constraints in lieu of packing, an accurate
                 congestion estimation method based on modifications to
                 the pathfinder global router, and a novel detailed
                 placement algorithm that optimizes both wirelength and
                 external pin count. Experimental results show that
                 compared to the top three winners at the recent ISPD'16
                 FPGA placement contest, GPlace3.0 is able to achieve
                 (on average) a 7.53\%, 15.15\%, and 33.50\% reduction
                 in routed wirelength, respectively, while requiring
                 less overall runtime. As well, an additional 360
                 benchmarks were provided directly from Xilinx Inc.
                 These benchmarks were used to compare GPlace3.0 to the
                 most recently improved versions of the first- and
                 second-place contest winners. Subsequent experimental
                 results show that GPlace3.0 is able to outperform the
                 improved placers in a variety of areas including number
                 of best solutions found, fewest number of benchmarks
                 that cannot be routed, runtime required to perform
                 placement, and runtime required to perform routing.",
  acknowledgement = ack-nhfb,
  articleno =    "66",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Fallahzadeh:2018:TPC,
  author =       "Ramin Fallahzadeh and Hassan Ghasemzadeh",
  title =        "Trading Off Power Consumption and Prediction
                 Performance in Wearable Motion Sensors: an Optimal and
                 Real-Time Approach",
  journal =      j-TODAES,
  volume =       "23",
  number =       "5",
  pages =        "67:1--67:??",
  month =        oct,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3198457",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:40 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Power consumption is identified as one of the main
                 complications in designing practical wearable systems,
                 mainly due to their stringent resource limitations.
                 When designing wearable technologies, several
                 system-level design choices, which directly contribute
                 to the energy consumption of these systems, must be
                 considered. In this article, we propose a
                 computationally lightweight system optimization
                 framework that trades off power consumption and
                 performance in connected wearable motion sensors. While
                 existing approaches exclusively focus on one or a few
                 hand-picked design variables, our framework
                 holistically finds the optimal power-performance
                 solution with respect to the specified application
                 need. Our design tackles a multi-variant non-convex
                 optimization problem that is theoretically hard to
                 solve. To decrease the complexity, we propose a
                 smoothing function that reduces this optimization to a
                 convex problem. The reduced optimization is then solved
                 in linear time using a devised derivative-free
                 optimization approach, namely cyclic coordinate search.
                 We evaluate our framework against several holistic
                 optimization baselines using a real-world wearable
                 activity recognition dataset. We minimize the energy
                 consumption for various activity-recognition
                 performance thresholds ranging from 40\% to 80\% and
                 demonstrate up to 64\% energy savings.",
  acknowledgement = ack-nhfb,
  articleno =    "67",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Daboul:2018:AAT,
  author =       "Siad Daboul and Stephan Held and Jens Vygen and Sonja
                 Wittke",
  title =        "An Approximation Algorithm for Threshold Voltage
                 Optimization",
  journal =      j-TODAES,
  volume =       "23",
  number =       "6",
  pages =        "68:1--68:??",
  month =        dec,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3232538",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:40 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We present a primal-dual approximation algorithm for
                 minimizing the leakage power of an integrated circuit
                 by assigning gate threshold voltages. While most
                 existing techniques do not provide a performance
                 guarantee, we prove an upper bound on the power
                 consumption. The algorithm is practical and works with
                 an industrial sign-off timer. It can be used for
                 post-routing power reduction or for optimizing leakage
                 power throughout the design flow. We demonstrate the
                 practical performance on recent microprocessor units.
                 Our implementation obtains significant leakage power
                 reductions of up to 8\% on top of one of the most
                 successful algorithms for gate sizing and threshold
                 voltage optimization. After timing-aware global
                 routing, we achieve leakage power reductions of up to
                 34\%.",
  acknowledgement = ack-nhfb,
  articleno =    "68",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Delledonne:2018:CDA,
  author =       "Lorenzo Delledonne and Vittorio Zaccaria and Ruggero
                 Susella and Guido Bertoni and Filippo Melzani",
  title =        "{CASCA}: a Design Automation Approach for Designing
                 Hardware Countermeasures Against Side-Channel Attacks",
  journal =      j-TODAES,
  volume =       "23",
  number =       "6",
  pages =        "69:1--69:??",
  month =        dec,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3241047",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:40 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Implementing a cryptographic circuit poses challenges
                 not always acknowledged in the backing mathematical
                 theory. One of them is the vulnerability against
                 side-channel attacks. A side-channel attack is a
                 procedure that uses information leaked by the circuit
                 through, for example, its own power consumption or
                 electromagnetic emissions, to derive sensitive data
                 (e.g, the secret key used for encryption). Nowadays, we
                 design circuitry to keep this sensitive information
                 from leaking (i.e., a countermeasure ), but the path
                 from specification down to implementation is far from
                 being fully automatic. As we know, manual refinement
                 steps can be error prone and the sheer potential of
                 these errors can be devastating in a scenario such as
                 the one we are dealing with. In this article, we
                 investigate whether a single embedded domain specific
                 language (EDSL) can, at the same time, help us in
                 specifying and enforcing the functionality of the
                 circuit as well as its protection against side-channel
                 attacks. The EDSL is a fundamental block of an original
                 design flow (named Countermeasure Against Side-Channel
                 Attacks, i.e., CASCA) whose aim is to complement an
                 existing industrial scenario and to provide the
                 necessary guarantee that a secure primitive is not
                 vulnerable up to a first-order attack. As a practical
                 case study, we will show how we applied the proposed
                 tools to ensure both functional and extra-functional
                 correctness of a composite-field Advanced Encryption
                 Standard (AES) S-Box. To ensure the reproducibility of
                 this research, this article is accompanied by an open
                 source release of the EDSL$^1$ that contains the
                 presented S-Box implementation and an additional
                 3-Shares threshold implementation of the Keccak $ \chi
                 $ function [7].",
  acknowledgement = ack-nhfb,
  articleno =    "69",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Chang:2018:DMU,
  author =       "Doohwang Chang and Ganapati Bhat and Umit Ogras and
                 Bertan Bakkaloglu and Sule Ozev",
  title =        "Detection Mechanisms for Unauthorized Wireless
                 Transmissions",
  journal =      j-TODAES,
  volume =       "23",
  number =       "6",
  pages =        "70:1--70:??",
  month =        dec,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3241046",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:40 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "With increasing diversity of supply chains from design
                 to delivery, there is an increasing risk that
                 unauthorized changes can be made within an IC. One of
                 the motivations for this type of change is to learn
                 important information (such as encryption keys,
                 spreading codes) from the hardware, and transmit this
                 information to a malicious party. To evade detection,
                 such unauthorized communication can be hidden within
                 legitimate bursts of transmit signal. In this article,
                 we present several signal processing techniques to
                 detect unauthorized transmissions which can be hidden
                 within the legitimate signal. We employ a scheme where
                 the legitimate transmission is configured to emit a
                 single sinusoidal waveform. We use time and spectral
                 domain analysis techniques to explore the transmit
                 spectrum. Since every transmission, no matter how low
                 the signal power is, must have a spectral signature, we
                 identify unauthorized transmission by eliminating the
                 desired signal from the spectrum after capture.
                 Experiment results show that when spread spectrum
                 techniques are used, the presence of an unauthorized
                 signal can be determined without the need for decoding
                 the malicious signal. The proposed detection techniques
                 need to be used as enhancements to the regular testing
                 and verification procedures if hardware security is a
                 concern.",
  acknowledgement = ack-nhfb,
  articleno =    "70",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Dong:2018:PAA,
  author =       "Xuan Dong and Lihong Zhang",
  title =        "{PV}-Aware Analog Sizing for Robust Analog Layout
                 Retargeting with Optical Proximity Correction",
  journal =      j-TODAES,
  volume =       "23",
  number =       "6",
  pages =        "71:1--71:??",
  month =        dec,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3236624",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:40 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "For analog integrated circuits (ICs) in nanometer
                 technology nodes, process variation (PV) induced by
                 lithography may not only cause serious wafer pattern
                 distortion, but also result in device mismatch, which
                 can readily ruin circuit performance. Although the
                 conventional optical proximity correction (OPC)
                 operations can effectively improve the wafer image
                 fidelity, an analog circuit without robust device sizes
                 is still highly vulnerable to such a mismatch effect.
                 In this article, a PV-aware sizing-inclusive analog
                 layout retargeting framework, which encloses an
                 efficient hybrid OPC scheme for yield enhancement, is
                 proposed. The device sizes are tuned during the layout
                 retargeting process by using a deterministic
                 circuit-sizing algorithm considering PV conditions. Our
                 hybrid OPC method combines global rule-based OPC with
                 local model-based OPC functions to boost the wafer
                 image quality improvement but without degrading the
                 computational efficiency. The experimental results show
                 that our proposed framework can achieve the best wafer
                 image quality and circuit performance preservation
                 compared to any other alternative approaches.",
  acknowledgement = ack-nhfb,
  articleno =    "71",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Eslami:2018:RTC,
  author =       "Fatemeh Eslami and Steven J. E. Wilton",
  title =        "Rapid Triggering Capability Using an Adaptive Overlay
                 during {FPGA} Debug",
  journal =      j-TODAES,
  volume =       "23",
  number =       "6",
  pages =        "72:1--72:??",
  month =        dec,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3241045",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:40 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Field Programmable Gate Array (FPGA) technology is
                 rapidly gaining traction in a wide range of
                 applications. Nonetheless, FPGAs still require long
                 design and debug cycles. To debug hardware circuits,
                 trace-based instrumentation is inserted into the design
                 that enables capturing data during the circuit
                 execution into on-chip memories for later offline
                 analysis. Since on-chip memories are limited, a trigger
                 circuitry is used to only record data related to
                 specific events during the execution. However, during
                 debugging, a circuit recompilation is required on
                 modifying these instruments. This can be very slow,
                 reducing debug productivity. In this article, we
                 propose a non-intrusive and rapid triggering solution
                 with a tailored overlay fabric and mapping algorithm
                 that seeks to enable fast debug iterations without
                 performing a recompilation. This overlay is specialized
                 for small combinational and sequential circuits with a
                 single output; such circuits are typical of common
                 trigger functions. We present an adaptive strategy to
                 construct the overlay fabric using spare FPGA resources
                 at compile time. At debug time, our proposed trigger
                 mapping algorithms adapt to this specialized overlay to
                 rapidly implement combinational and sequential trigger
                 circuits. Our results show that the overlay fabric can
                 be reconfigured to map different triggering scenarios
                 in less than 40s instead of recompiling the circuit
                 during debug iterations, increasing debug
                 productivity.",
  acknowledgement = ack-nhfb,
  articleno =    "72",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Xiang:2018:FTU,
  author =       "Dong Xiang and Krishnendu Chakrabarty and Hideo
                 Fujiwara",
  title =        "Fault-Tolerant Unicast-Based Multicast for Reliable
                 Network-on-Chip Testing",
  journal =      j-TODAES,
  volume =       "23",
  number =       "6",
  pages =        "73:1--73:??",
  month =        dec,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3243214",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:40 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We present a unified test technique that targets
                 faults in links, routers, and cores of a
                 network-on-chip design based on test sessions. We call
                 an entire procedure, that delivers test packets to the
                 subset of routers/cores, a test session. Test delivery
                 for router/core testing is formulated as two
                 fault-tolerant multicast algorithms. Test packet
                 delivery for routers is implemented as a fault-tolerant
                 unicast-based multicast scheme via the fault-free links
                 and routers that were identified in the previous test
                 sessions to avoid packet corruption. A new
                 fault-tolerant routing algorithm is also proposed for
                 the unicast-based multicast core test delivery in the
                 whole network. Identical cores share the same test set,
                 and they are tested within the same test session.
                 Simulation results highlight the effectiveness of the
                 proposed method in reducing test time.",
  acknowledgement = ack-nhfb,
  articleno =    "73",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Topaloglu:2018:ETS,
  author =       "Rasit O. Topaloglu and Farinaz Koushanfar",
  title =        "Editorial for {TODAES} Special Issue on {Internet of
                 Things} System Performance, Reliability, and Security",
  journal =      j-TODAES,
  volume =       "23",
  number =       "6",
  pages =        "74:1--74:??",
  month =        dec,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3276908",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:40 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  articleno =    "74e",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Yang:2018:UUE,
  author =       "Kun Yang and Ulbert Botero and Haoting Shen and Damon
                 L. Woodard and Domenic Forte and Mark M. Tehranipoor",
  title =        "{UCR}: an Unclonable Environmentally Sensitive
                 Chipless {RFID} Tag For Protecting Supply Chain",
  journal =      j-TODAES,
  volume =       "23",
  number =       "6",
  pages =        "74:1--74:??",
  month =        dec,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3264658",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:40 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Chipless Radio Frequency Identification (RFID) tags
                 that do not include an integrated circuit (IC) in the
                 transponder are more appropriate for supply-chain
                 management of low-cost commodities and have been
                 gaining extensive attention due to their relatively
                 lower price. However, existing chipless RFID tags
                 consume considerable tag area and manufacturing
                 time/cost because of complex fabrication process (e.g.,
                 requiring removing or shorting some resonators on the
                 tag substrate to encode data). Worse still, their
                 identifiers (IDs) are deterministic, clonable, and
                 small in terms of bitwidth. To address these
                 shortcomings and help preserve the cold chain for
                 commodities (e.g., vaccines, pharmaceuticals, etc.)
                 sensitive to temperature, we develop a novel unclonable
                 environmentally sensitive chipless RFID (UCR) tag that
                 intrinsically generates a unique ID from both
                 manufacturing variations and ambient temperature
                 variation. A UCR tag consists of two parts: (i) a
                 certain number of concentric ring slot resonators
                 integrated on a certain laminate (e.g., TACONIC TLX-0),
                 whose resonance frequencies rely on geometric
                 parameters of slot resonators and dielectric constant
                 of substrate material that are sensitive to
                 manufacturing variations, and (ii) a stand-alone
                 circular ring slot resonator integrated on a particular
                 substrate (e.g., grease) that will be melted at a high
                 temperature, whose resonance frequency relies on
                 geometric parameters of slot resonator, dielectric
                 constant of substrate material, and ambient
                 temperature. UCR tags have the capability to track
                 commodities and their temperatures in the supply chain.
                 The area of UCR tag is comparable to regular quick
                 response (QR) code. Experimental results based on UCR
                 tag prototypes have verified their uniqueness and
                 reliability.",
  acknowledgement = ack-nhfb,
  articleno =    "74",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Hussain:2018:SSH,
  author =       "Siam Umar Hussain and M. Sadegh Riazi and Farinaz
                 Koushanfar",
  title =        "{SHAIP}: {Secure Hamming Distance for Authentication
                 of Intrinsic PUFs}",
  journal =      j-TODAES,
  volume =       "23",
  number =       "6",
  pages =        "75:1--75:??",
  month =        dec,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3274669",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:40 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In this article, we present SHAIP, a secure Hamming
                 distance-based mutual authentication protocol. It
                 allows an unlimited number of authentications by
                 employing an intrinsic Physical Unclonable Function
                 (PUF). PUFs are being increasingly employed for remote
                 authentication of devices. Most of these devices have
                 limited resources. Therefore, the intrinsic PUFs are
                 most suitable for this task as they can be built with
                 little or no modification to the underlying hardware
                 platform. One major drawback of the current
                 authentication schemes is that they expose the PUF
                 response. This makes the intrinsic PUFs, which have a
                 limited number of challenge-response pairs, unusable
                 after a certain number of authentication sessions.
                 Moreover, these schemes are one way in the sense that
                 they only allow one party, the prover, to authenticate
                 herself to the verifier. We propose a symmetric mutual
                 authentication scheme based on secure
                 (privacy-preserving) computation of the Hamming
                 distance between the PUF response from the remote
                 device and reference response stored at the verifier
                 end. This allows both parties to authenticate each
                 other without revealing their respective sets of
                 inputs. We show that our scheme is effective with all
                 state-of-the-art intrinsic PUFs. The proposed scheme is
                 lightweight and does not require any modification to
                 the underlying hardware.",
  acknowledgement = ack-nhfb,
  articleno =    "75",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Winograd:2018:PGU,
  author =       "Ted Winograd and Gaurav Shenoy and Hassan Salmani and
                 Hamid Mahmoodi and Setareh Rafatirad and Houman
                 Homayoun",
  title =        "Programmable Gates Using Hybrid {CMOS--STT} Design to
                 Prevent {IC} Reverse Engineering",
  journal =      j-TODAES,
  volume =       "23",
  number =       "6",
  pages =        "76:1--76:??",
  month =        dec,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3236622",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:40 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article presents a rigorous step towards
                 design-for-assurance by introducing a new class of
                 logically reconfigurable design resilient to design
                 reverse engineering. Based on the non-volatile spin
                 transfer torque (STT) magnetic technology, we introduce
                 a basic set of non-volatile reconfigurable
                 Look-Up-Table (LUT) logic components (NV-STT-based
                 LUTs). An STT-based LUT with a significantly different
                 set of characteristics compared to CMOS provides new
                 opportunities to enhance design security yet makes it
                 challenging to remain highly competitive with custom
                 CMOS or even SRAM-based LUT in terms of power,
                 performance, and area. To address these challenges, we
                 propose several algorithms to select and replace custom
                 CMOS gates with reconfigurable STT-based LUTs during
                 design implementation such that the functionality of
                 STT-based components and therefore the entire design
                 cannot be determined in any manageable time, rendering
                 any design reverse engineering attack ineffective. Our
                 study, conducted on a large number of standard circuit
                 benchmarks, concludes significant resiliency of hybrid
                 STT-CMOS circuits against various types of attacks.
                 Furthermore, the selection algorithms on average have a
                 small impact on the performance of the circuit. We also
                 tested these techniques against satisfiability attacks
                 developed recently and show that these techniques also
                 render more advanced reverse-engineering techniques
                 computationally infeasible.",
  acknowledgement = ack-nhfb,
  articleno =    "76",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Truong:2018:LSE,
  author =       "Anh Truong and S. Rasoul Etesami and Negar Kiyavash",
  title =        "Learning From Sleeping Experts: Rewarding Informative,
                 Available, and Accurate Experts",
  journal =      j-TODAES,
  volume =       "23",
  number =       "6",
  pages =        "77:1--77:??",
  month =        dec,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3236617",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:40 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We consider a generalized model of learning from
                 expert advice in which experts could abstain from
                 participating at some rounds. Our proposed online
                 algorithm falls into the class of weighted average
                 predictors and uses a time-varying multiplicative
                 weight update rule. This update rule changes the weight
                 of an expert based on his or her relative performance
                 compared to the average performance of available
                 experts at the current round. This makes the algorithm
                 suitable for recommendation systems in the presence of
                 an adversary with many potential applications in the
                 new emerging area of the Internet of Things. We prove
                 the convergence of our algorithm to the best expert,
                 defined in terms of both availability and accuracy, in
                 the stochastic setting. In particular, we show the
                 applicability of our definition of best expert through
                 convergence analysis of another well-known algorithm in
                 this setting. Finally, through simulation results on
                 synthetic and real datasets, we justify the
                 out-performance of our proposed algorithms compared to
                 the existing ones in the literature.",
  acknowledgement = ack-nhfb,
  articleno =    "77",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Chopra:2018:OAC,
  author =       "Abhimanyu Chopra and Hakan Aydin and Setareh Rafatirad
                 and Houman Homayoun",
  title =        "Optimal Allocation of Computation and Communication in
                 an {IoT} Network",
  journal =      j-TODAES,
  volume =       "23",
  number =       "6",
  pages =        "78:1--78:??",
  month =        dec,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3236623",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:40 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Internet of things (IoT) is being developed for a wide
                 range of applications from home automation and personal
                 fitness to smart cities. With the extensive growth in
                 adaptation of IoT devices comes the uncoordinated and
                 substandard designs aimed at promptly making products
                 available to the end consumer. This substandard
                 approach restricts the growth of IoT in the near future
                 and necessitates that studies understand requirements
                 for an efficient design. A particular area where IoT
                 applications have grown significantly is surveillance
                 and monitoring. Applications of IoT in this domain are
                 relying on distributed sensors, each equipped with a
                 battery, capable of collecting images, processing
                 images, and communicating the raw or processed data to
                 the nearest node until it reaches the base station for
                 decision making. In such an IoT network where
                 processing can be distributed over the network, the
                 important research question is how much of data each
                 node should process and how much it should communicate
                 for a given objective. This work answers this question
                 and provides a deeper understanding of energy and delay
                 tradeoffs in an IoT network with three different target
                 metrics.",
  acknowledgement = ack-nhfb,
  articleno =    "78",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Hussain:2018:PPP,
  author =       "Siam Umar Hussain and Farinaz Koushanfar",
  title =        "{P3}: Privacy Preserving Positioning for Smart
                 Automotive Systems",
  journal =      j-TODAES,
  volume =       "23",
  number =       "6",
  pages =        "79:1--79:??",
  month =        dec,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3236625",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:40 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article presents the first privacy-preserving
                 localization method based on provably secure primitives
                 for smart automotive systems. Using this method, a car
                 that is lost due to unavailability of GPS can compute
                 its location with assistance from three nearby cars,
                 while the locations of all the participating cars
                 including the lost car remain private. Technological
                 enhancement of modern vehicles, especially in
                 navigation and communication, necessitates parallel
                 enhancement in security and privacy. Previous
                 approaches to maintaining user location privacy
                 suffered from one or more of the following drawbacks:
                 trade-off between accuracy and privacy, one-sided
                 privacy, and the need of a trusted third party that
                 presents a single point to attack. The localization
                 method presented here is one of the very first
                 location-based services that eliminates all these
                 drawbacks. Two protocols for computing the location is
                 presented here based on two Secure Function Evaluation
                 (SFE) techniques that allow multiple parties to jointly
                 evaluate a function on inputs that are encrypted to
                 maintain privacy. The first one is based on the
                 two-party protocol named Yao's Garbled Circuit (GC).
                 The second one is based on the Beaver-Micali-Rogaway
                 (BMR) protocol that allows inputs from more than two
                 parties. The two secure localization protocols exhibit
                 trade-offs between performance and resilience against
                 collusion. Along with devising the protocols, we design
                 and optimize netlists for the functions required for
                 location computation by leveraging conventional logic
                 synthesis tools with custom libraries optimized for
                 SFE. Proof-of-concept implementation of the protocol
                 shows that the complete operation can be performed
                 within only 355ms. The fast computing time enables
                 localization of even moving cars.",
  acknowledgement = ack-nhfb,
  articleno =    "79",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Muztoba:2018:IAI,
  author =       "Md Muztoba and Rohit Voleti and Fatih Karabacak and
                 Jaehyun Park and Umit Y. Ogras",
  title =        "Instinctive Assistive Indoor Navigation using
                 Distributed Intelligence",
  journal =      j-TODAES,
  volume =       "23",
  number =       "6",
  pages =        "80:1--80:??",
  month =        dec,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3212720",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:40 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Cyber-physical systems (CPS) and the Internet of
                 Things (IoT) offer a significant potential to improve
                 the effectiveness of assistive technologies for those
                 with physical disabilities. Practical assistive
                 technologies should minimize the number of inputs from
                 users to reduce their cognitive and physical effort.
                 This article presents an energy-efficient framework and
                 algorithm for assistive indoor navigation with
                 multi-modal user input. The goal of the proposed
                 framework is to simplify the navigation tasks and make
                 them more instinctive for the user. Our framework
                 automates indoor navigation using only a few user
                 commands captured through a wearable device. The
                 proposed methodology is evaluated using both a virtual
                 smart building and a prototype. The evaluations for
                 three different floorplans show one order of magnitude
                 reduction in user effort and communication energy
                 required for navigation, when compared to conventional
                 navigation methodologies that require continuous user
                 inputs.",
  acknowledgement = ack-nhfb,
  articleno =    "80",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Karabacak:2018:RDU,
  author =       "Fatih Karabacak and Umit Ogras and Sule Ozev",
  title =        "Remote Detection of Unauthorized Activity via Spectral
                 Analysis",
  journal =      j-TODAES,
  volume =       "23",
  number =       "6",
  pages =        "81:1--81:??",
  month =        dec,
  year =         "2018",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3276770",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:40 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Unauthorized hardware or firmware modifications, known
                 as trojans, can steal information, drain the battery,
                 or damage IoT devices. Since trojans may be triggered
                 in the field at an unknown instance, it is important to
                 detect their presence at runtime. However, it is
                 difficult to run sophisticated detection algorithms on
                 these devices due to limited computational power and
                 energy and, in some cases, lack of accessibility. This
                 article presents a stand-off self-referencing technique
                 for detecting unauthorized activity. The proposed
                 technique processes involuntary electromagnetic
                 emissions on a separate hardware, which is physically
                 decoupled from the device under test. When the device
                 enters the test mode, a predefined test application is
                 run on the device repetitively for a known period. The
                 periodicity ensures that the spectral electromagnetic
                 power of the test application concentrates at known
                 frequencies, leaving the remaining frequencies within
                 the operating bandwidth at the noise level. Any
                 deviations from the noise level for these unoccupied
                 frequency locations indicate the presence of unknown
                 (unauthorized) activity. Hence, we are able to
                 differentiate trojan activity without using a golden
                 reference, or any knowledge of the attributes of the
                 trojan activity. Experiments based on hardware
                 measurements show that the proposed technique achieves
                 close to 100\% detection accuracy at up to 120cm
                 distance.",
  acknowledgement = ack-nhfb,
  articleno =    "81",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lin:2019:QEO,
  author =       "Chun-Han Lin and Chih-Kai Kang and Pi-Cheng Hsiu",
  title =        "Quality-Enhanced {OLED} Power Savings on Mobile
                 Devices",
  journal =      j-TODAES,
  volume =       "24",
  number =       "1",
  pages =        "1:1--1:??",
  month =        jan,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3243215",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:40 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In the future, mobile systems will increasingly
                 feature more advanced organic light-emitting diode
                 (OLED) displays. The power consumption of these
                 displays is highly dependent on the image content.
                 However, existing OLED power-saving techniques either
                 change the visual experience of users or degrade the
                 visual quality of images in exchange for a reduction in
                 the power consumption. Some techniques attempt to
                 enhance the image quality by employing a compound
                 objective function. In this article, we present a
                 win-win scheme that always enhances the image quality
                 while simultaneously reducing the power consumption. We
                 define metrics to assess the benefits and cost for
                 potential image enhancement and power reduction. We
                 then introduce algorithms that ensure the
                 transformation of images into their quality-enhanced
                 power-saving versions. Next, the win-win scheme is
                 extended to process videos at a justifiable
                 computational cost. All the proposed algorithms are
                 shown to possess the win-win property without assuming
                 accurate OLED power models. Finally, the proposed
                 scheme is realized through a practical camera
                 application and a video camcorder on mobile devices.
                 The results of experiments conducted on a commercial
                 tablet with a popular image database and on a
                 smartphone with real-world videos are very encouraging
                 and provide valuable insights for future research and
                 practices.",
  acknowledgement = ack-nhfb,
  articleno =    "1",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Amir:2019:SPC,
  author =       "Maral Amir and Frank Vahid and Tony Givargis",
  title =        "Switching Predictive Control Using Reconfigurable
                 State-Based Model",
  journal =      j-TODAES,
  volume =       "24",
  number =       "1",
  pages =        "2:1--2:??",
  month =        jan,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3267126",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:40 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Advanced control methodologies have helped the
                 development of modern vehicles that are capable of path
                 planning and path following. For instance, Model
                 Predictive Control (MPC) employs a predictive model to
                 predict the behavior of the physical system for a
                 specific time horizon in the future. An optimization
                 problem is solved to compute optimal control actions
                 while handling model uncertainties and nonlinearities.
                 However, these prediction routines are computationally
                 intensive and the computational overhead grows with the
                 complexity of the model. Switching MPC addresses this
                 issue by combining multiple predictive models, each
                 with a different precision granularity. In this
                 article, we proposed a novel switching predictive
                 control method based on a model reduction scheme to
                 achieve various model granularities for path following
                 in autonomous vehicles. A state-based model with
                 tunable parameters is proposed to operate as a
                 reconfigurable predictive model of the vehicle. A
                 runtime switching algorithm is presented that selects
                 the best model using machine learning. We employed a
                 metric that formulates the tradeoff between the error
                 and computational savings due to model reduction. Our
                 simulation results show that the use of the predictive
                 model in the switching scheme as opposed to single
                 granularity scheme, yields a 45\% decrease in execution
                 time in tradeoff for a small 12\% loss in accuracy in
                 prediction of future outputs and no loss of accuracy in
                 tracking the reference trajectory.",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Erol:2019:KSB,
  author =       "Osman Emir Erol and Sule Ozev",
  title =        "Knowledge- and Simulation-Based Synthesis of
                 Area-Efficient Passive Loop Filter Incremental
                 {Zoom-ADC} for Built-In Self-Test Applications",
  journal =      j-TODAES,
  volume =       "24",
  number =       "1",
  pages =        "3:1--3:??",
  month =        jan,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3266227",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:40 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We propose a fully differential, synthesizable
                 zoom-ADC architecture with a passive loop filter for
                 low-frequency Built-In Self-Test (BIST) applications,
                 along with a synthesis tool that can target various
                 design specifications. We present the detailed ADC
                 architecture and a step-by-step process for designing
                 the zoom-ADC. The design flow does not rely on the
                 extensive knowledge of an experienced ADC designer. Two
                 ADCs have been synthesized with different performance
                 requirements in the 65nm CMOS process. The first ADC
                 achieves a 90.4dB Signal-to-Noise Ratio (SNR) in 512 $
                 \mu $ s measurement time and consumes 17 $ \mu $ W
                 power. The second design achieves a 78.2dB SNR in 31.25
                 $ \mu $ s measurement time and consumes 63 $ \mu $ W
                 power.",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Chen:2019:SAT,
  author =       "Yukai Chen and Sara Vinco and Enrico Macii and Massimo
                 Poncino",
  title =        "{SystemC-AMS} Thermal Modeling for the Co-simulation
                 of Functional and Extra-Functional Properties",
  journal =      j-TODAES,
  volume =       "24",
  number =       "1",
  pages =        "4:1--4:??",
  month =        jan,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3267125",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:40 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Temperature is a critical property of smart systems,
                 due to its impact on reliability and to its
                 inter-dependence with power consumption. Unfortunately,
                 the current design flows evaluate thermal evolution
                 ex-post on offline power traces. This does not allow to
                 consider temperature as a dimension in the design loop,
                 and it misses all the complex inter-dependencies with
                 design choices and power evolution. In this article, by
                 adopting the functional language SystemC-AMS (Analog
                 Mixed Signal), we propose a method to enable
                 thermal/power/functional co-simulation. The system
                 thermal model is built by using state-of-the-art
                 circuit equivalent models, by exploiting the support
                 for electrical linear networks intrinsic of
                 SystemC-AMS. The experimental results will show that
                 the choice of SystemC-AMS is a winning strategy for
                 building a simultaneous simulation of multiple
                 functional and extra-functional properties of a system.
                 The generated code exposes an accuracy comparable to
                 that of the reference thermal simulator HotSpot.
                 Additionally, the initial overhead due to the general
                 purpose nature of SystemC-AMS is compensated by the
                 surprisingly high performance of transient simulation,
                 with speedups as high as two orders of magnitude.",
  acknowledgement = ack-nhfb,
  articleno =    "4",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Song:2019:HRB,
  author =       "Yang Song and Olivier Alavoine and Bill Lin",
  title =        "Harvesting Row-Buffer Hits via Orchestrated Last-Level
                 Cache and {DRAM} Scheduling for Heterogeneous Multicore
                 Systems",
  journal =      j-TODAES,
  volume =       "24",
  number =       "1",
  pages =        "5:1--5:??",
  month =        jan,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3269982",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:40 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In heterogeneous multicore systems, the memory
                 subsystem, including the last-level cache and DRAM, is
                 widely shared among the CPU, the GPU, and the real-time
                 cores. Due to their distinct memory traffic patterns,
                 heterogeneous cores result in more frequent cache
                 misses at the last-level cache. As cache misses travel
                 through the memory subsystem, two schedulers are
                 involved for the last-level cache and DRAM,
                 respectively. Prior studies treated the scheduling of
                 the last-level cache and DRAM as independent stages.
                 However, with no orchestration and limited visibility
                 of memory traffic, neither scheduling stage is able to
                 ensure optimal scheduling decisions for memory
                 efficiency. Unnecessary precharges and row activations
                 happen in DRAM when the memory scheduler is ignorant of
                 incoming cache misses, and DRAM row-buffer states are
                 invisible to the last-level cache. In this article, we
                 propose a unified memory controller for the the
                 last-level cache and DRAM with orchestrated schedulers.
                 The memory scheduler harvests row-buffer hit
                 opportunities in cache request buffers during spare
                 time without inducing significant implementation cost.
                 We further introduce a dynamic orchestrated scheduling
                 policy to improve memory efficiency while achieving
                 target CPU IPC. Extensive evaluations show that the
                 proposed controller improves the total memory bandwidth
                 of DRAM by 16.8\% on average and saves DRAM energy by
                 up to 29.7\% while achieving comparable CPU IPCs. With
                 the dynamic scheduling policy, the unified controller
                 achieves the same IPC as the conventional design and
                 increases DRAM bandwidth by 9.2\%. In addition, we
                 explore the potential of the proposed memory controller
                 to attain improvements on both memory bandwidth and CPU
                 IPC.",
  acknowledgement = ack-nhfb,
  articleno =    "5",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Choi:2019:OFT,
  author =       "Junchul Choi and Hoeseok Yang and Soonhoi Ha",
  title =        "Optimization of Fault-Tolerant Mixed-Criticality
                 Multi-Core Systems with Enhanced {WCRT} Analysis",
  journal =      j-TODAES,
  volume =       "24",
  number =       "1",
  pages =        "6:1--6:??",
  month =        jan,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3275154",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:40 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article proposes a novel optimization technique
                 of fault-tolerant mixed-criticality multi-core systems
                 with worst-case response time (WCRT) guarantees.
                 Typically, in fault-tolerant multi-core systems, tasks
                 can be replicated or re-executed in order to enhance
                 the reliability. In addition, based on the policy of
                 mixed-criticality scheduling, low-criticality tasks can
                 be dropped at runtime. Such uncertainties caused by
                 hardening and mixed-criticality scheduling make WCRT
                 analysis very difficult. We show that previous analysis
                 techniques are pessimistic as they consider avoidably
                 extreme cases that can be safely ignored within the
                 given reliability constraint. We improve the analysis
                 in order to tighten the pessimism of WCRT estimates by
                 considering the maximum number of faults to be
                 tolerated. Further, we improve the mixed-criticality
                 scheduling by allowing partial dropping of
                 low-criticality tasks. On top of those, we explore the
                 design space of hardening, task-to-core mapping, and
                 quality-of-service of the multi-core mixed-criticality
                 systems. The effectiveness of the proposed technique is
                 verified by extensive experiments with synthetic and
                 real-life benchmarks.",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Pomeranz:2019:BFB,
  author =       "Irith Pomeranz",
  title =        "Boundary-Functional Broadside and Skewed-Load Tests",
  journal =      j-TODAES,
  volume =       "24",
  number =       "1",
  pages =        "7:1--7:??",
  month =        jan,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3276976",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:40 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Close-to-functional broadside tests are used for
                 avoiding overtesting of delay faults that can result
                 from non-functional operation conditions, while
                 avoiding test escapes because of faults that cannot be
                 detected under functional operation conditions. When a
                 close-to-functional broadside test deviates from
                 functional operation conditions, the deviation can
                 affect the entire circuit. This article defines the
                 concept of a boundary-functional broadside test where
                 non-functional operation conditions are prevented from
                 crossing a preselected boundary. Using the procedure
                 described in this article, the boundary maintains the
                 same values under a boundary-functional broadside test
                 as under a functional broadside test from which it is
                 derived. Indirectly, this ensures that the deviations
                 from functional operation conditions throughout the
                 entire circuit are limited. The concept of a
                 boundary-functional broadside test is extended to
                 skewed-load tests, and to partial-boundary-functional
                 tests. Experimental results are presented for benchmark
                 circuits to demonstrate the fault coverage improvements
                 that can be achieved using boundary-functional
                 broadside and skewed-load tests as well as
                 partial-boundary-functional tests of both types.",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Li:2019:SEA,
  author =       "Jiajun Li and Guihai Yan and Wenyan Lu and Shijun Gong
                 and Shuhao Jiang and Jingya Wu and Xiaowei Li",
  title =        "{SynergyFlow}: an Elastic Accelerator Architecture
                 Supporting Batch Processing of Large-Scale Deep Neural
                 Networks",
  journal =      j-TODAES,
  volume =       "24",
  number =       "1",
  pages =        "8:1--8:??",
  month =        jan,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3275243",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:40 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Neural networks (NNs) have achieved great success in a
                 broad range of applications. As NN-based methods are
                 often both computation and memory intensive,
                 accelerator solutions have been proved to be highly
                 promising in terms of both performance and energy
                 efficiency. Although prior solutions can deliver high
                 computational throughput for convolutional layers, they
                 could incur severe performance degradation when
                 accommodating the entire network model, because there
                 exist very diverse computing and memory bandwidth
                 requirements between convolutional layers and fully
                 connected layers and, furthermore, among different NN
                 models. To overcome this problem, we proposed an
                 elastic accelerator architecture, called SynergyFlow,
                 which intrinsically supports layer-level and
                 model-level parallelism for large-scale deep neural
                 networks. SynergyFlow boosts the resource utilization
                 by exploiting the complementary effect of resource
                 demanding in different layers and different NN models.
                 SynergyFlow can dynamically reconfigure itself
                 according to the workload characteristics, maintaining
                 a high performance and high resource utilization among
                 various models. As a case study, we implement
                 SynergyFlow on a P395-AB FPGA board. Under 100MHz
                 working frequency, our implementation improves the
                 performance by 33.8\% on average (up to 67.2\% on
                 AlexNet) compared to comparable provisioned previous
                 architectures.",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Smirnov:2019:AOV,
  author =       "Fedor Smirnov and Felix Reimann and J{\"u}rgen Teich
                 and Michael Gla{\ss}",
  title =        "Automatic Optimization of the {VLAN} Partitioning in
                 Automotive Communication Networks",
  journal =      j-TODAES,
  volume =       "24",
  number =       "1",
  pages =        "9:1--9:??",
  month =        jan,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3278120",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:40 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Dividing the communication network into so-called
                 Virtual Local Area Networks (VLANs), i.e., subnetworks
                 that are isolated at the data link layer (OSI layer 2),
                 is a promising approach to address the increasing
                 security challenges in automotive networks. The
                 automation of the VLAN partitioning is a
                 well-researched problem in the domain of local or
                 metropolitan area networks. However, the approaches
                 used there are hardly applicable for the design of
                 automotive networks as they mainly focus on reducing
                 the amount of broadcast traffic and cannot capture the
                 many design objectives of automotive networks like the
                 message timing or the link load, which are affected by
                 the VLAN partitioning. As a remedy, this article
                 proposes an approach based on a set of Pseudo-Boolean
                 constraints to generate a message routing which is
                 feasible with respect to the VLAN-related routing
                 restrictions in automotive networks. This approach can
                 be used for a design space exploration to optimize not
                 only the VLAN partitioning but also other
                 routing-related objectives. We demonstrate both the
                 efficiency of our message routing approach and the now
                 accessible optimization potential for the complete
                 Electric/Electronic architecture with a
                 mixed-criticality system from the automotive domain.
                 There we thoroughly investigate the impact of the VLAN
                 partitioning on the message timing and the link loads
                 by optimizing these design objectives concurrently.
                 During the exploration of the huge design space, where
                 each resource can be assigned to one of four VLANs, our
                 approach requires less than 40ms for the creation of a
                 valid solution and ensures that all messages satisfy
                 their deadlines and link load bounds.",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Huang:2019:ILA,
  author =       "Bo-Yuan Huang and Hongce Zhang and Pramod Subramanyan
                 and Yakir Vizel and Aarti Gupta and Sharad Malik",
  title =        "Instruction-Level Abstraction {(ILA)}: a Uniform
                 Specification for System-on-Chip {(SoC)} Verification",
  journal =      j-TODAES,
  volume =       "24",
  number =       "1",
  pages =        "10:1--10:??",
  month =        jan,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3282444",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:40 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Modern Systems-on-Chip (SoC) designs are increasingly
                 heterogeneous and contain specialized semi-programmable
                 accelerators in addition to programmable processors. In
                 contrast to the pre-accelerator era, when the ISA
                 played an important role in verification by enabling a
                 clean separation of concerns between software and
                 hardware, verification of these ``accelerator-rich''
                 SoCs presents new challenges. From the perspective of
                 hardware designers, there is a lack of a common
                 framework for formal functional specification of
                 accelerator behavior. From the perspective of software
                 developers, there exists no unified framework for
                 reasoning about software/hardware interactions of
                 programs that interact with accelerators. This article
                 addresses these challenges by providing a formal
                 specification and high-level abstraction for
                 accelerator functional behavior. It formalizes the
                 concept of an Instruction Level Abstraction (ILA),
                 developed informally in our previous work, and shows
                 its application in modeling and verification of
                 accelerators. This formal ILA extends the familiar
                 notion of instructions to accelerators and provides a
                 uniform, modular, and hierarchical abstraction for
                 modeling software-visible behavior of both accelerators
                 and programmable processors. We demonstrate the
                 applicability of the ILA through several case studies
                 of accelerators (for image processing, machine
                 learning, and cryptography), and a general-purpose
                 processor (RISC-V). We show how the ILA model
                 facilitates equivalence checking between two ILAs, and
                 between an ILA and its hardware finite-state machine
                 (FSM) implementation. Further, this equivalence
                 checking supports accelerator upgrades using the notion
                 of ILA compatibility, similar to processor upgrades
                 using ISA compatibility.",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Carpent:2019:RAS,
  author =       "Xavier Carpent and Norrathep Rattanavipanon and Gene
                 Tsudik",
  title =        "Remote Attestation via Self-Measurement",
  journal =      j-TODAES,
  volume =       "24",
  number =       "1",
  pages =        "11:1--11:??",
  month =        jan,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3279950",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:40 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Remote attestation (RA) is a popular means of
                 detecting malware in embedded and IoT devices. RA is
                 usually realized as an interactive protocol, whereby a
                 trusted party ( verifier ) measures software integrity
                 of a potentially compromised remote device ( prover).
                 Early work focused on purely software-based and fully
                 hardware-based techniques, neither of which is ideal
                 for low-end embedded devices. More recent results
                 yielded hybrid (SW/HW) architectures with a minimal set
                 of features to support efficient and secure RA on
                 low-end devices. All prior techniques require on-demand
                 operation, i.e., RA is performed in real time. We
                 identify some drawbacks of this general approach in the
                 context of unattended devices: First, it fails to
                 detect mobile malware that enters and leaves prover
                 between successive RA instances. Second, it requires
                 prover to engage in a potentially expensive (in terms
                 of time and energy) computation, which can be harmful
                 for mission-critical or real-time devices. To address
                 these drawbacks, we introduce the concept of
                 self-measurement, whereby prover periodically and
                 securely measures and records its own software state,
                 based on a pre-established schedule. A (possibly
                 untrusted) verifier occasionally collects and verifies
                 these measurements. We present the design of a concrete
                 technique, called Efficient Remote Attestation via
                 Self-Measurement for Unattended Settings, (ERASMUS),
                 justify its features and evaluate its performance. In
                 the process, we also define a new metric, Quality of
                 Attestation (QoA). We believe that ERASMUS is well
                 suited for time-sensitive and/or safety-critical
                 applications that are not served well by on-demand RA.
                 Finally, we show that ERASMUS is a promising stepping
                 stone toward handling attestation of multiple devices
                 (i.e., a group or swarm) with high mobility.",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Tan:2019:EMI,
  author =       "Jingweijia Tan and Kaige Yan",
  title =        "Efficiently Managing the Impact of Hardware
                 Variability on {GPUs}' Streaming Processors",
  journal =      j-TODAES,
  volume =       "24",
  number =       "1",
  pages =        "12:1--12:??",
  month =        jan,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3287308",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:40 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Graphics Processing Units (GPUs) are widely used in
                 general-purpose high-performance computing fields due
                 to their highly parallel architecture. In recent years,
                 a new era with the nanometer scale integrated circuit
                 manufacture process has come. As a consequence, GPUs'
                 computation capability gets even stronger. However, as
                 process technology scales down, hardware variability,
                 e.g., process variations (PVs) and negative bias
                 temperature instability (NBTI), has a higher impact on
                 the chip quality. The parallelism of GPU desires high
                 consistency of hardware units on chip; otherwise, the
                 worst unit will inevitably become the bottleneck. So
                 the hardware variability becomes a pressing concern to
                 further improve GPUs' performance and lifetime, not
                 only in integrated circuit fabrication, but more in GPU
                 architecture design. Streaming Processors (SPs) are the
                 key units in GPUs, which perform most of parallel
                 computing operations. Therefore, in this work, we focus
                 on mitigating the impact of hardware variability in GPU
                 SPs. We first model and analyze SPs' performance
                 variations under hardware variability. Then, we observe
                 that both PV and NBTI have a large impact on SPs'
                 performance. We further observe unbalanced SP
                 utilization, e.g., some SPs are idle when others are
                 active, during program execution. Leveraging this
                 observation, we propose a Hardware Variability-aware
                 SPs' Management policy (HVSM), which dynamically
                 dispatches computation in appropriate SPs to balance
                 the utilizations. In addition, we find that a large
                 portion of compute operations are duplicate. We also
                 propose an Operation Compression (OC) technique to
                 minimize the unnecessary computations to further
                 mitigate the hardware variability effects. Our
                 experimental results show the combined HVSM and OC
                 technique effectively reduces the impact of hardware
                 variability, which can translate to 37\% performance
                 improvement or 18.3\% lifetime extension for a GPU
                 chip.",
  acknowledgement = ack-nhfb,
  articleno =    "12",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kang:2019:TDF,
  author =       "Ilgweon Kang and Fang Qiao and Dongwon Park and Daniel
                 Kane and Evangeline Fung Yu Young and Chung-Kuan Cheng
                 and Ronald Graham",
  title =        "Three-dimensional Floorplan Representations by Using
                 Corner Links and Partial Order",
  journal =      j-TODAES,
  volume =       "24",
  number =       "1",
  pages =        "13:1--13:??",
  month =        jan,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3289179",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:40 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Three-dimensional integrated circuit (3D IC)
                 technology offers a potential breakthrough to enable a
                 paradigm-shift strategy, called ``more than Moore,''
                 with novel features and advantages over the
                 conventional 2D process technology. By having
                 three-dimensional interconnections, 3D IC provides
                 substantial wirelength reduction and a massive amount
                 of bandwidth, which gives significant performance
                 improvement to overcome many of the nontrivial
                 challenges in semiconductor industry. Moreover, 3D
                 integration technology enables to stack disparate
                 technologies with various functionalities into a single
                 system-in-package (SiP), introducing ``true 3D IC''
                 design. As the first physical design (PD) step, IC
                 floorplanning takes a crucial role to determine IC's
                 overall design qualities such as footprint area, timing
                 closure, power distribution, thermal management, and so
                 on. However, lack of efficient 3D floorplanning
                 algorithms that practically implement advantages of 3D
                 integration technology is a critical bottleneck for PD
                 automation of 3D IC design and implementation. 3D
                 floorplanning (or packing, block partitioning) is a
                 well-known NP-hard problem, and most of 3D
                 floorplanning algorithms rely on heuristics and
                 iterative improvements. Thus, developing complete and
                 efficient 3D floorplan representations is important,
                 since floorplan representation provides the foundation
                 of data structure to search the solution space for 3D
                 IC floorplanning. A well-defined floorplan
                 representation provides a well-organized and
                 cost-effective methodology to design high-performance
                 3D IC. We propose a new 3D IC floorplan representation
                 methodology using corner links and partial order. Given
                 a fixed number of cuboidal blocks and their volume,
                 algorithmic 3D floorplan representations describe
                 topological structure and physical
                 positions/orientations of each block relative to the
                 origin in the 3D floorplan space. In this article, (1)
                 we introduce our novel 3D floorplan representation,
                 called corner links representation, (2) we analyze the
                 equivalence relation between the corner links
                 representation and its corresponding partial order
                 representation, and (3) we discuss several key
                 properties of the corner links representation and
                 partial order representation. The corner links
                 representation provides a complete and efficient
                 structure to assemble the original 3D mosaic floorplan.
                 Also, the corner links representation for the
                 non-degenerate 3D mosaic floorplan can be equivalently
                 expressed by the four trees representation. The partial
                 order representation defines the topological structure
                 of the 3D floorplan with three transitive closure
                 graphs (TCG) for each direction and captures all
                 stitching planes in the 3D floorplan in the order of
                 their respective directions. We demonstrate that the
                 corner links representation can be reduced to its
                 corresponding partial order representation, indicating
                 that the corner links representation shares
                 well-defined and -studied features/properties of 3D
                 TCG-based floorplan representation. If the partial
                 order representation describes relations between any
                 pairs of blocks in the 3D floorplan, then the floorplan
                 is a valid floorplan. We show that the partial order
                 representation can restore the absolute coordinates of
                 all blocks in the 3D mosaic floorplan by using the
                 given physical dimensions of blocks.",
  acknowledgement = ack-nhfb,
  articleno =    "13",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Gong:2019:PEH,
  author =       "Yanping Gong and Fengyu Qian and Lei Wang",
  title =        "Probabilistic Evaluation of Hardware Security
                 Vulnerabilities",
  journal =      j-TODAES,
  volume =       "24",
  number =       "2",
  pages =        "14:1--14:??",
  month =        mar,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3290405",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:41 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3290405",
  abstract =     "Various design techniques can be applied to implement
                 the finite state machine (FSM) functions in order to
                 optimize timing, performance, power, and to reduce
                 overhead. Recently, malicious attacks to hardware
                 systems have emerged as a critical problem. Fault
                 injection attacks, in particular, alter the function or
                 reveal the critical information of a hardware system
                 through precisely controlled fault injection processes.
                 Attackers can utilize the loopholes and vulnerabilities
                 of FSM functions to access the states that are under
                 protection. A probabilistic model is developed in this
                 article to evaluate the potential vulnerabilities of
                 FSM circuits at the design stage. Analysis based on the
                 statistical behaviors of FSM also shows that the
                 induced circuit errors can be exploited to access the
                 protected states. An effective solution based on state
                 re-encoding is proposed to minimize the risk of
                 unauthorized transitions. Simulation results
                 demonstrate that vulnerable transition paths can be
                 protected with small hardware overheads.",
  acknowledgement = ack-nhfb,
  articleno =    "14",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Zheng:2019:HEB,
  author =       "Jianwei Zheng and Chao Lu and Jiefeng Guo and Deming
                 Chen and Donghui Guo",
  title =        "A Hardware-Efficient Block Matching Algorithm and Its
                 Hardware Design for Variable Block Size Motion
                 Estimation in Ultra-High-Definition Video Encoding",
  journal =      j-TODAES,
  volume =       "24",
  number =       "2",
  pages =        "15:1--15:??",
  month =        mar,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3290408",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:41 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3290408",
  abstract =     "Variable block size motion estimation has contributed
                 greatly to achieving an optimal interframe encoding,
                 but involves high computational complexity and huge
                 memory access, which is the most critical bottleneck in
                 ultra-high-definition video encoding. This article
                 presents a hardware-efficient block matching algorithm
                 with an efficient hardware design that is able to
                 reduce the computational complexity of motion
                 estimation while providing a sustained and steady
                 coding performance for high-quality video encoding. A
                 three-level memory organization is proposed to reduce
                 memory bandwidth requirement while supporting a
                 predictive common search window. By applying multiple
                 search strategies and early termination, the proposed
                 design provides 1.8 to 3.7 times higher hardware
                 efficiency than other works. Furthermore, on-chip
                 memory has been reduced by 96.5\% and off-chip
                 bandwidth requirement has been reduced by 39.4\% thanks
                 to the proposed three-level memory organization. The
                 corresponding power consumption is only 198mW at the
                 highest working frequency of 500MHz. The proposed
                 design is attractive for high-quality video encoding in
                 real-time applications with low power consumption.",
  acknowledgement = ack-nhfb,
  articleno =    "15",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Bakhshalipour:2019:RWT,
  author =       "Mohammad Bakhshalipour and Aydin Faraji and Seyed
                 Armin Vakil Ghahani and Farid Samandi and Pejman
                 Lotfi-Kamran and Hamid Sarbazi-Azad",
  title =        "Reducing Writebacks Through In-Cache Displacement",
  journal =      j-TODAES,
  volume =       "24",
  number =       "2",
  pages =        "16:1--16:??",
  month =        mar,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3289187",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:41 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3289187",
  abstract =     "Non-Volatile Memory (NVM) technology is a promising
                 solution to fulfill the ever-growing need for higher
                 capacity in the main memory of modern systems. Despite
                 having many great features, however, NVM's poor write
                 performance remains a severe obstacle, preventing it
                 from being used as a DRAM alternative in the main
                 memory. Most of the prior work targeted optimizing
                 writes at the main memory side and neglected the
                 decisive role of upper-level cache management policies
                 on reducing the number of writes. In this article, we
                 propose a novel cache management policy that attempts
                 to maximize write-coalescing in the on-chip SRAM
                 last-level cache (LLC) for the sake of reducing the
                 number of costly writes to the off-chip NVM. We
                 decouple a few physical ways of the LLC to have a
                 dedicated and exclusive storage for the dirty blocks
                 after being evicted from the cache and before being
                 sent to the off-chip memory. By displacing dirty blocks
                 in exclusive storage, they are kept in the cache based
                 on their rewrite distance and are evicted when they are
                 unlikely to be reused shortly. To maximize the
                 effectiveness of exclusive storage, we manage it as a
                 Cuckoo Cache to offer associativity based on the
                 various applications' demands. Through detailed
                 evaluations targeting various single- and
                 multi-threaded applications, we show that our proposal
                 reduces the number of writebacks by 21\%, on average,
                 over the state-of-the-art method and enhances both
                 performance and energy efficiency.",
  acknowledgement = ack-nhfb,
  articleno =    "16",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Bhowmik:2019:PAT,
  author =       "Biswajit Bhowmik and Jatindra Kumar Deka and Santosh
                 Biswas and Bhargab B. Bhattacharya",
  title =        "Performance-Aware Test Scheduling for Diagnosing
                 Coexistent Channel Faults in Topology-Agnostic
                 Networks-on-Chip",
  journal =      j-TODAES,
  volume =       "24",
  number =       "2",
  pages =        "17:1--17:??",
  month =        mar,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3291532",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:41 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3291532",
  abstract =     "High--performance multiprocessor SoCs used in practice
                 require a complex network-on-chip (NoC) as
                 communication architecture, and the channels therein
                 often suffer from various manufacturing defects. Such
                 physical defects cause a multitude of system-level
                 failures and subsequent degradation of reliability,
                 yield, and performance of the computing platform. Most
                 of the existing test approaches consider mesh-based NoC
                 channels only and do not perform well for other regular
                 topologies such as octagons or spidergons, with regard
                 to test time and overhead issues. This article proposes
                 a topology-agnostic test mechanism that is capable of
                 diagnosing on-line, coexistent channel-short, and
                 stuck-at faults in these special NoCs as well as in
                 traditional mesh architectures. We introduce a new test
                 model called Damaru to decompose the network and
                 present an efficient scheduling scheme to reduce test
                 time without compromising resource utilization during
                 testing. Additionally, the proposed scheduling scheme
                 scales well with network size, channel width, and
                 topological diversity. Simulation results show that the
                 method achieves nearly 92\% fault coverage and improves
                 area overhead by almost 60\% and test time by 98\%
                 compared to earlier approaches. As a sequel, packet
                 latency and energy consumption are also improved by
                 67.05\% and 54.69\%, respectively, and they are further
                 improved with increasing network size.",
  acknowledgement = ack-nhfb,
  articleno =    "17",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Pourshirazi:2019:WAL,
  author =       "Bahareh Pourshirazi and Majed Valad Beigi and Zhichun
                 Zhu and Gokhan Memik",
  title =        "Writeback-Aware {LLC} Management for {PCM-Based} Main
                 Memory Systems",
  journal =      j-TODAES,
  volume =       "24",
  number =       "2",
  pages =        "18:1--18:??",
  month =        mar,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3292009",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:41 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3292009",
  abstract =     "With the increase in the number of data-intensive
                 applications on today's workloads, DRAM-based main
                 memories are struggling to satisfy the growing data
                 demand capacity. Phase Change Memory (PCM) is a type of
                 non-volatile memory technology that has been explored
                 as a promising alternative for DRAM-based main memories
                 due to its better scalability and lower leakage energy.
                 Despite its many advantages, PCM also has shortcomings
                 such as long write latency, high write energy
                 consumption, and limited write endurance, which are all
                 related to the write operations. In this article, we
                 propose a novel writeback-aware Last Level Cache (LLC)
                 management scheme named WALL to reduce the number of
                 LLC writebacks and consequently improve performance,
                 energy efficiency, and lifetime of a PCM-based main
                 memory system. First, we investigate the writeback
                 behavior of LLC sets and show that writebacks are not
                 uniformly distributed among sets; some sets observe
                 much higher writeback rates than others. We then
                 propose a writeback-aware set-balancing mechanism,
                 which employs the underutilized LLC sets with few
                 writebacks as an auxiliary storage for the evicted
                 dirty lines from sets with frequent writebacks. We also
                 propose a simple and effective writeback-aware
                 replacement policy to avoid the eviction of the dirty
                 blocks that are highly reused after being evicted from
                 the cache. Our experimental results show that WALL
                 achieves an average of 30.9\% reduction in the total
                 number of LLC writebacks, compared to the baseline
                 scheme, which uses the LRU replacement policy. As a
                 result, WALL can reduce the memory energy consumption
                 by 23.1\% and enhance PCM lifetime by $ 1.29 \times $,
                 on average, on an 8-core system with a 4GB PCM main
                 memory, running memory-intensive applications.",
  acknowledgement = ack-nhfb,
  articleno =    "18",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Muhammad:2019:RBS,
  author =       "Shaheer Muhammad and M. Usman Rafique and Shuai Li and
                 Zili Shao and Qixin Wang and Xue Liu",
  title =        "Reconfigurable Battery Systems: a Survey on Hardware
                 Architecture and Research Challenges",
  journal =      j-TODAES,
  volume =       "24",
  number =       "2",
  pages =        "19:1--19:??",
  month =        mar,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3301301",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:41 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3301301",
  abstract =     "In a reconfigurable battery pack, the connections
                 among cells can be changed during operation to form
                 different configurations. This can lead a battery, a
                 passive two-terminal device, to a smart battery that
                 can reconfigure itself according to the requirement to
                 enhance operational performance. Several hardware
                 architectures with different levels of complexities
                 have been proposed. Some researchers have used existing
                 hardware and demonstrated improved performance on the
                 basis of novel optimization and scheduling algorithms.
                 The possibility of software techniques to benefit the
                 energy storage systems is exciting, and it is the
                 perfect time for such methods as the need for
                 high-performance and long-lasting batteries is on the
                 rise. This novel field requires new understanding,
                 principles, and evaluation metrics of proposed schemes.
                 In this article, we systematically discuss and
                 critically review the state of the art. This is the
                 first effort to compare the existing hardware
                 topologies in terms of flexibility and functionality.
                 We provide a comprehensive review that encompasses all
                 existing research works, starting from the details of
                 the individual battery including modeling and
                 properties as well as fixed-topology traditional
                 battery packs. To stimulate further research in this
                 area, we highlight key challenges and open problems in
                 this domain.",
  acknowledgement = ack-nhfb,
  articleno =    "19",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Sahoo:2019:FMV,
  author =       "Debiprasanna Sahoo and Swaraj Sha and Manoranjan
                 Satpathy and Madhu Mutyam and S. Ramesh and Partha
                 Roop",
  title =        "Formal Modeling and Verification of a Victim {DRAM}
                 Cache",
  journal =      j-TODAES,
  volume =       "24",
  number =       "2",
  pages =        "20:1--20:??",
  month =        mar,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3306491",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:41 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3306491",
  abstract =     "The emerging Die-stacking technology enables DRAM to
                 be used as a cache to break the ``Memory Wall''
                 problem. Recent studies have proposed to use DRAM as a
                 victim cache in both CPU and GPU memory hierarchies to
                 improve performance. DRAM caches are large in size and,
                 hence, when realized as a victim cache, non-inclusive
                 design is preferred. This non-inclusive design adds
                 significant differences to the conventional DRAM cache
                 design in terms of its probe, fill, and writeback
                 policies. Design and verification of a victim DRAM
                 cache can be much more complex than that of a
                 conventional DRAM cache. Hence, without rigorous
                 modeling and formal verification, ensuring the
                 correctness of such a system can be difficult. The
                 major focus of this work is to show how formal modeling
                 is applied to design and verify a victim DRAM cache. In
                 this approach, we identify the agents in the victim
                 DRAM cache design and model them in terms of
                 interacting state machines. We derive a set of
                 properties from the specifications of a victim cache
                 and encode them using Linear Temporal Logic. The
                 properties are then proven using symbolic and bounded
                 model checking. Finally, we discuss how these
                 properties are related to the dataflow paths in a
                 victim DRAM cache.",
  acknowledgement = ack-nhfb,
  articleno =    "20",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Gupta:2019:DAD,
  author =       "Ankur Gupta and Juinn-Dar Huang and Shigeru Yamashita
                 and Sudip Roy",
  title =        "Design Automation for Dilution of a Fluid Using
                 Programmable Microfluidic Device-Based Biochips",
  journal =      j-TODAES,
  volume =       "24",
  number =       "2",
  pages =        "21:1--21:??",
  month =        mar,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3306492",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:41 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3306492",
  abstract =     "Microfluidic lab-on-a-chip has emerged as a new
                 technology for implementing biochemical protocols on
                 small-sized portable devices targeting low-cost medical
                 diagnostics. Among various efforts of fabrication of
                 such chips, a relatively new technology is a
                 programmable microfluidic device (PMD) for
                 implementation of flow-based lab-on-a-chip. A PMD chip
                 is suitable for automation due to its symmetric nature.
                 In order to implement a bioprotocol on such a
                 reconfigurable device, it is crucial to automate a
                 sample preparation on-chip as well. In this article, we
                 propose a dilution PMD algorithm (namely DPMD ) and its
                 architectural mapping scheme (namely generalized
                 architectural mapping algorithm ( GAMA )) for
                 addressing fluidic cells of such a device to perform
                 dilution of a reagent fluid on-chip. We used an
                 optimization function that first minimizes the number
                 of mixing steps and then reduces the waste generation
                 and further reagent requirement. Simulation results
                 show that the proposed DPMD scheme is comparative to
                 the existing state-of-the-art dilution algorithm. The
                 proposed design automation using the architectural
                 mapping scheme reduces the required chip area and,
                 hence, minimizes the valve switching that, in turn,
                 increases the life span of the PMD-chip.",
  acknowledgement = ack-nhfb,
  articleno =    "21",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Jung:2019:ILP,
  author =       "Jinwook Jung and Gi-Joon Nam and Woohyun Chung and
                 Youngsoo Shin",
  title =        "Integrated Latch Placement and Cloning for Timing
                 Optimization",
  journal =      j-TODAES,
  volume =       "24",
  number =       "2",
  pages =        "22:1--22:??",
  month =        mar,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3301613",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:41 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3301613",
  abstract =     "This article presents an algorithm for integrated
                 timing-driven latch placement and cloning. Given a
                 circuit placement, the proposed algorithm relocates
                 some latches while circuit timing is improved. Some
                 latches are replicated to further improve the timing;
                 the number of replicated latches along with their
                 locations are automatically determined. After latch
                 cloning, each of the replicated latches is set to drive
                 a subset of the fanouts that have been driven by the
                 original single latch. The proposed algorithm is then
                 extended such that relocation and cloning are applied
                 to some latches together with their neighbor logic
                 gates. Experimental results demonstrate that the worst
                 negative slack and the total negative slack are
                 improved by 24\% and 59\%, respectively, on average of
                 test circuits. The negative impacts on circuit area and
                 power consumption are both marginal, at 0.7\% and 1.9\%
                 respectively.",
  acknowledgement = ack-nhfb,
  articleno =    "22",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Pomeranz:2019:ITU,
  author =       "Irith Pomeranz",
  title =        "Incomplete Tests for Undetectable Faults to Improve
                 Test Set Quality",
  journal =      j-TODAES,
  volume =       "24",
  number =       "2",
  pages =        "23:1--23:??",
  month =        mar,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3306493",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:41 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3306493",
  abstract =     "The presence of undetectable faults in a set of target
                 faults implies that tests, which may be important for
                 detecting defects, are missing from the test set. This
                 article suggests an approach for addressing missing
                 tests that fits with the rationale for computing an
                 $n$-detection test set. The article defines the concept
                 of an incomplete test that is relevant when a target
                 fault is undetectable. An incomplete test activates the
                 fault but fails to detect it because of one or more
                 assignments that are missing from the test. The
                 procedure described in this article improves the
                 quality of a test set by attempting to ensure that
                 every undetectable fault has n incomplete tests with
                 the smallest possible numbers of missing assignments,
                 for a constant n {$>$}= 1. The incomplete tests are
                 expected to contribute to the detection of detectable
                 defects around the site of the undetectable fault. The
                 computation of missing assignments for a test is
                 performed in linear time by avoiding fault simulation
                 and considering all the undetectable faults
                 simultaneously. Experimental results demonstrate the
                 extent to which a given test set can be improved
                 without increasing the number of tests.",
  acknowledgement = ack-nhfb,
  articleno =    "23",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Hyun:2019:IAA,
  author =       "Daijoon Hyun and Youngsoo Shin",
  title =        "Integrated Approach of Airgap Insertion for Circuit
                 Timing Optimization",
  journal =      j-TODAES,
  volume =       "24",
  number =       "2",
  pages =        "24:1--24:??",
  month =        mar,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3306494",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:41 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3306494",
  abstract =     "Airgap technology enables air to be introduced in
                 inter-metal dielectric (IMD). Airgap between certain
                 wires reduces coupling capacitance due to the reduced
                 permittivity; this can be utilized to decrease circuit
                 delay. We propose an integrated approach of airgap
                 insertion with the goal of circuit timing optimization.
                 It consists of three sub-problems. We first select the
                 layers that employ airgap, called airgap layers, that
                 maximize total negative slack (TNS) improvement; this
                 yields TNS improvement of 7\% to 15\% and worst
                 negative slack (WNS) improvement of 2\% to 8\%,
                 compared to a simple assumption of airgap layers.
                 Second, we reassign the layers of wires such that more
                 wires on critical paths can be placed in airgap layers.
                 This is formulated as integer linear programming (ILP),
                 and a more practical heuristic algorithm is also
                 proposed. It provides an additional 17\% TNS
                 improvement and 6\% WNS improvement. Finally, we
                 perform airgap insertion through ILP formulation, where
                 a number of design rules are modeled with linear
                 constraints. To reduce the heavy runtime of ILP, a
                 layout partitioning technique is also applied. It
                 implements a feasible airgap mask in a manageable time
                 where the amount of inserted airgap is close to the
                 optimal solution.",
  acknowledgement = ack-nhfb,
  articleno =    "24",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Li:2019:NRM,
  author =       "Taozhong Li and Qin Wang and Yongxin Zhu and Jianfei
                 Jiang and Guanghui He and Jing Jin and Zhigang Mao and
                 Naifeng Jing",
  title =        "A Novel Resistive Memory-based Process-in-memory
                 Architecture for Efficient Logic and Add Operations",
  journal =      j-TODAES,
  volume =       "24",
  number =       "2",
  pages =        "25:1--25:??",
  month =        mar,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3306495",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:41 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The coming era of big data revives the
                 Processing-in-memory (PIM) architecture to relieve the
                 memory wall problem that embarrasses the modern
                 computing system. However, most existing PIM designs
                 just put computing units closer to memory, rather than
                 a complete integration of them due to their
                 incompatibility in CMOS manufacturing. Fortunately, the
                 emerging Resistive-RAM (ReRAM) offers new hope to this
                 dilemma owing to its inherent memory and computing
                 capability using the same device. In this article, we
                 propose a ReRAM memory structure with efficient PIM
                 capability of both logic and add operations. It first
                 leverages non-linearity to suppress sneak current and
                 thus sustains high memory density. Using a differential
                 bit cell, it also enables efficient processing of
                 arbitrary logic functions using the same memory cells
                 with non-destructive operations. Then, a novel PIM
                 adder is proposed, which customizes a sneak current
                 path as the carry-chain for fast carry propagation and
                 improves adder performance significantly. In the
                 experiment, the proposed PIM demonstrates higher
                 efficiency in both computing area and performance for
                 logic and addition, which greatly increases the ReRAM
                 PIM applicability for future computable
                 architectures.",
  acknowledgement = ack-nhfb,
  articleno =    "25",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Nongpoh:2019:ESE,
  author =       "Bernard Nongpoh and Rajarshi Ray and Moumita Das and
                 Ansuman Banerjee",
  title =        "Enhancing Speculative Execution With Selective
                 Approximate Computing",
  journal =      j-TODAES,
  volume =       "24",
  number =       "2",
  pages =        "26:1--26:??",
  month =        mar,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3307651",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Mar 22 16:58:41 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3307651",
  abstract =     "Speculative execution is an optimization technique
                 used in modern processors by which predicted
                 instructions are executed in advance with an objective
                 of overlapping the latencies of slow operations. Branch
                 prediction and load value speculation are examples of
                 speculative execution used in modern pipelined
                 processors to avoid execution stalls. However,
                 speculative executions incur a performance penalty as
                 an execution rollback when there is a misprediction. In
                 this work, we propose to aid speculative execution with
                 approximate computing by relaxing the execution
                 rollback penalty associated with a misprediction. We
                 propose a sensitivity analysis method for data and
                 branches in a program to identify the data load and
                 branch instructions that can be executed without any
                 rollback in the pipeline and yet can ensure a certain
                 user-specified quality of service of the application
                 with a probabilistic reliability. Our analysis is based
                 on statistical methods, particularly hypothesis testing
                 and Bayesian analysis. We perform an architectural
                 simulation of our proposed approximate execution and
                 report the benefits in terms of CPU cycles and energy
                 utilization on selected applications from the AxBench,
                 ACCEPT, and Parsec 3.0 benchmarks suite.",
  acknowledgement = ack-nhfb,
  articleno =    "26",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Vinco:2019:CLV,
  author =       "Sara Vinco and Nicola Bombieri and Daniele Jahier
                 Pagliari and Franco Fummi and Enrico Macii and Massimo
                 Poncino",
  title =        "A Cross-level Verification Methodology for Digital
                 {IPs} Augmented with Embedded Timing Monitors",
  journal =      j-TODAES,
  volume =       "24",
  number =       "3",
  pages =        "27:1--27:23",
  month =        jun,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3308565",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:30 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3308565",
  abstract =     "Smart systems are characterized by the integration in
                 a single device of multi-domain subsystems of different
                 technological domains, namely, analog, digital,
                 discrete and power devices, MEMS, and power sources.
                 Such challenges, emerging from the \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "27",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Oh:2019:TAS,
  author =       "Deok Keun Oh and Mu Jun Choi and Ju Ho Kim",
  title =        "Thermal-aware {$3$D} Symmetrical Buffered Clock Tree
                 Synthesis",
  journal =      j-TODAES,
  volume =       "24",
  number =       "3",
  pages =        "28:1--28:22",
  month =        jun,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3313798",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:30 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3313798",
  abstract =     "The semiconductor industry has accepted
                 three-dimensional integrated circuits (3D ICs) as a
                 possible solution to address speed and power management
                 problems. In addition, 3D ICs have recently
                 demonstrated a huge potential in reducing wire length
                 and \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "28",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Schwarzer:2019:CDA,
  author =       "Tobias Schwarzer and Joachim Falk and Simone
                 M{\"u}ller and Martin Letras and Christian Heidorn and
                 Stefan Wildermann and J{\"u}rgen Teich",
  title =        "Compilation of Dataflow Applications for Multi-Cores
                 using Adaptive Multi-Objective Optimization",
  journal =      j-TODAES,
  volume =       "24",
  number =       "3",
  pages =        "29:1--29:23",
  month =        jun,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3310249",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:30 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3310249",
  abstract =     "State-of-the-art system synthesis techniques employ
                 meta-heuristic optimization techniques for Design Space
                 Exploration (DSE) to tailor application execution,
                 e.g., defined by a dataflow graph, for a given target
                 platform. Unfortunately, the performance \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "29",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Tu:2019:AOS,
  author =       "Chia-Heng Tu and Te-Sheng Lin",
  title =        "Augmenting Operating Systems with {OpenCL}
                 Accelerators",
  journal =      j-TODAES,
  volume =       "24",
  number =       "3",
  pages =        "30:1--30:29",
  month =        jun,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3315569",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:30 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/pvm.bib;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3315569",
  abstract =     "Heterogeneous computing leverages more than one kind
                 of processors to boost the performance of user-space
                 applications with the heterogeneous programming
                 languages, e.g., OpenCL. While some works have been
                 done to accelerate the computations required by
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "30",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Xu:2019:ESC,
  author =       "Xiaolin Xu and Fahim Rahman and Bicky Shakya and
                 Apostol Vassilev and Domenic Forte and Mark
                 Tehranipoor",
  title =        "Electronics Supply Chain Integrity Enabled by
                 Blockchain",
  journal =      j-TODAES,
  volume =       "24",
  number =       "3",
  pages =        "31:1--31:25",
  month =        jun,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3315571",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:30 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/bitcoin.bib;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3315571",
  abstract =     "Electronic systems are ubiquitous today, playing an
                 irreplaceable role in our personal lives, as well as in
                 critical infrastructures such as power grids, satellite
                 communications, and public transportation. In the past
                 few decades, the security of \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "31",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Valencia:2019:CPA,
  author =       "Juan Valencia and Dip Goswami and Kees Goossens",
  title =        "Comparing Platform-aware Control Design Flows for
                 Composable and Predictable {TDM}-based Execution
                 Platforms",
  journal =      j-TODAES,
  volume =       "24",
  number =       "3",
  pages =        "32:1--32:26",
  month =        jun,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3315572",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:30 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3315572",
  abstract =     "We compare three platform-aware feedback control
                 design flows that are tailored for a composable and
                 predictable Time Division Multiplexing (TDM)-based
                 execution platform. The platform allows for independent
                 execution of multiple applications. Using the
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "32",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Lu:2019:DDA,
  author =       "Sixing Lu and Roman Lysecky",
  title =        "Data-driven Anomaly Detection with Timing Features for
                 Embedded Systems",
  journal =      j-TODAES,
  volume =       "24",
  number =       "3",
  pages =        "33:1--33:27",
  month =        jun,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3279949",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:30 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3279949",
  abstract =     "Malware is a serious threat to network-connected
                 embedded systems, as evidenced by the continued and
                 rapid growth of such devices, commonly referred to as
                 the Internet of Things. Their ubiquitous use in
                 critical applications require robust protection to
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "33",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Metwalli:2019:SAS,
  author =       "Sara Ayman Metwalli and Yuko Hara-Azumi",
  title =        "{SSA-AC}: Static Significance Analysis for Approximate
                 Computing",
  journal =      j-TODAES,
  volume =       "24",
  number =       "3",
  pages =        "34:1--34:17",
  month =        jun,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3314575",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:30 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3314575",
  abstract =     "Recently, the quest to reduce energy consumption in
                 digital systems has been the subject of a number of
                 ongoing studies. One of the most researched focuses is
                 approximate computing (AC). AC is a new computing
                 paradigm in both hardware and software \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "34",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Monteiro:2019:OCF,
  author =       "Jucemar Monteiro and Marcelo Johann and Laleh Behjat",
  title =        "An Optimized Cost Flow Algorithm to Spread Cells in
                 Detailed Placement",
  journal =      j-TODAES,
  volume =       "24",
  number =       "3",
  pages =        "35:1--35:16",
  month =        jun,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3317575",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:30 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3317575",
  abstract =     "Placement is an important and challenging step in VLSI
                 physical design. The placement solution can
                 significantly impact timing and routability. In
                 sub-nanometric technology nodes, several restrictions
                 have been imposed on the placement solutions. These
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "35",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Islam:2019:EIT,
  author =       "Md Nazmul Islam and Sandip Kundu",
  title =        "Enabling {IC} Traceability via Blockchain Pegged to
                 Embedded {PUF}",
  journal =      j-TODAES,
  volume =       "24",
  number =       "3",
  pages =        "36:1--36:23",
  month =        jun,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3315669",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:30 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/bitcoin.bib;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3315669",
  abstract =     "Globalization of IC supply chain has increased the
                 risk of counterfeit, tampered, and re-packaged chips in
                 the market. Counterfeit electronics poses a security
                 risk in safety critical applications like avionics,
                 SCADA systems, and defense. It also \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "36",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Wan:2019:DRP,
  author =       "Bo Wan and Xi Li and Bo Zhang and Caixu Zhao and
                 Xianglan Chen and Chao Wang and Xuehai Zhou",
  title =        "{DCW}: a Reactive and Predictable Programming
                 Framework for {LET}-Based Distributed Real-Time
                 Systems",
  journal =      j-TODAES,
  volume =       "24",
  number =       "3",
  pages =        "37:1--37:35",
  month =        jun,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3317574",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:30 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3317574",
  abstract =     "Real-time systems continuously interact with the
                 physical environment and often have to satisfy
                 stringent timing constraints imposed by their
                 interactions. Those systems involve two main
                 properties: reactivity and predictability. Reactivity
                 allows the \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "37",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Basu:2019:CBA,
  author =       "Kanad Basu and Samah Mohamed Saeed and Christian
                 Pilato and Mohammed Ashraf and Mohammed Thari Nabeel
                 and Krishnendu Chakrabarty and Ramesh Karri",
  title =        "{CAD-Base}: an Attack Vector into the Electronics
                 Supply Chain",
  journal =      j-TODAES,
  volume =       "24",
  number =       "4",
  pages =        "38:1--38:30",
  month =        jul,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3315574",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:31 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3315574",
  abstract =     "Fabless semiconductor companies design system-on-chips
                 (SoC) by using third-party intellectual property (IP)
                 cores and fabricate them in offshore, potentially
                 untrustworthy foundries. Owing to the globally
                 distributed electronics supply chain, security
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "38",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Rokni:2019:SLF,
  author =       "Seyed Ali Rokni and Hassan Ghasemzadeh",
  title =        "{Share-n-Learn}: a Framework for Sharing Activity
                 Recognition Models in Wearable Systems With
                 Context-Varying Sensors",
  journal =      j-TODAES,
  volume =       "24",
  number =       "4",
  pages =        "39:1--39:27",
  month =        jul,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3318044",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:31 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3318044",
  abstract =     "Wearable sensors utilize machine learning algorithms
                 to infer important events such as the behavioral
                 routine and health status of their end users from
                 time-series sensor data. A major obstacle in
                 large-scale utilization of these systems is that the
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "39",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Zimmermann:2019:ADL,
  author =       "Thomas Zimmermann and Mathias Mora and Sebastian
                 Steinhorst and Daniel Mueller-Gritschneder and Andreas
                 Jossen",
  title =        "Analysis of Dissipative Losses in Modular
                 Reconfigurable Energy Storage Systems Using {SystemC
                 TLM} and {SystemC-AMS}",
  journal =      j-TODAES,
  volume =       "24",
  number =       "4",
  pages =        "40:1--40:33",
  month =        jul,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3321387",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:31 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3321387",
  abstract =     "Battery storage systems are becoming more popular in
                 the automotive industry as well as in stationary
                 applications. To fulfill the requirements in terms of
                 power and energy, the literature is increasingly
                 discussing electrically reconfigurable \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "40",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Sayed:2019:CAP,
  author =       "Nour Sayed and Longfei Mao and Rajendra Bishnoi and
                 Mehdi B. Tahoori",
  title =        "Compiler-Assisted and Profiling-Based Analysis for
                 Fast and Efficient {STT-MRAM} On-Chip Cache Design",
  journal =      j-TODAES,
  volume =       "24",
  number =       "4",
  pages =        "41:1--41:25",
  month =        jul,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3321693",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:31 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3321693",
  abstract =     "Spin Transfer Torque Magnetic Random Access Memory
                 (STT-MRAM) is a promising candidate for large on-chip
                 memories as a zero-leakage, high-density and
                 non-volatile alternative to the present SRAM
                 technology. Since memories are the dominating component
                 of \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "41",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Wang:2019:LRA,
  author =       "Naixing Wang and Irith Pomeranz and Sudhakar M. Reddy
                 and Arani Sinha and Srikanth Venkataraman",
  title =        "Layout Resynthesis by Applying
                 Design-for-manufacturability Guidelines to Avoid
                 Low-coverage Areas of a Cell-based Design",
  journal =      j-TODAES,
  volume =       "24",
  number =       "4",
  pages =        "42:1--42:19",
  month =        jul,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3325066",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:31 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3325066",
  abstract =     "Design-for-manufacturability (DFM) guidelines are
                 recommended layout design practices intended to capture
                 layout features that are difficult to manufacture
                 correctly. Avoiding such features prevents the
                 occurrence of potential systematic defects. Layout
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "42",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Burcea:2019:MIR,
  author =       "Florin Burcea and Andreas Herrmann and Bing Li and
                 Helmut Graeb",
  title =        "{MEMS-IC} Robustness Optimization Considering
                 Electrical and Mechanical Design and Process
                 Parameters",
  journal =      j-TODAES,
  volume =       "24",
  number =       "4",
  pages =        "43:1--43:24",
  month =        jul,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3325068",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:31 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3325068",
  abstract =     "MEMS-based sensor circuits are traditionally designed
                 separately using CAD tools specific to each energy
                 domain (electrical and mechanical). This article
                 presents a complete approach for combined MEMS-IC
                 robustness optimization. Advanced methods for
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "43",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Afacan:2019:CRC,
  author =       "Eng{\'\i}n Afacan and G{\"u}nhan D{\"u}ndar and
                 Fa{\'\i}k Baskaya and Al{\'\i} Emre Pusane and Mustafa
                 Berke Yelten",
  title =        "On Chip Reconfigurable {CMOS} Analog Circuit Design
                 and Automation Against Aging Phenomena: Sense and
                 React",
  journal =      j-TODAES,
  volume =       "24",
  number =       "4",
  pages =        "44:1--44:22",
  month =        jul,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3325069",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:31 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3325069",
  abstract =     "Performance of analog circuits degrades over time due
                 to several time-dependent degradation mechanisms. Due
                 to the increased aging problems in ever-shrinking
                 dimensions, reliability of complementary
                 metal-oxide-semiconductor analog circuits has become a
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "44",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Li:2019:ATR,
  author =       "Yanjun Li and Ender Yilmaz and Pete Sarson and Sule
                 Ozev",
  title =        "Adaptive Test for {RF}\slash Analog Circuit Using
                 Higher Order Correlations among Measurements",
  journal =      j-TODAES,
  volume =       "24",
  number =       "4",
  pages =        "45:1--45:16",
  month =        jul,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3308566",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:31 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3308566",
  abstract =     "As process variations increase and devices get more
                 diverse in their behavior, using the same test list for
                 all devices is increasingly inefficient. Methodologies
                 that adapt the test sequence with respect to lot,
                 wafer, or even a device's own behavior \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "45",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Wang:2019:CPR,
  author =       "Chengning Wang and Dan Feng and Wei Tong and Jingning
                 Liu and Zheng Li and Jiayi Chang and Yang Zhang and
                 Bing Wu and Jie Xu and Wei Zhao and Yilin Li and Ruoxi
                 Ren",
  title =        "Cross-point Resistive Memory: Nonideal Properties and
                 Solutions",
  journal =      j-TODAES,
  volume =       "24",
  number =       "4",
  pages =        "46:1--46:37",
  month =        jul,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3325067",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:31 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3325067",
  abstract =     "Emerging computational resistive memory is promising
                 to overcome the challenges of scalability and energy
                 efficiency that DRAM faces and also break through the
                 memory wall bottleneck. However, cell-level and
                 array-level nonideal properties of resistive \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "46",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Jun:2019:FTT,
  author =       "Jaeyung Jun and Yoonah Paik and Gyeong Il Min and Seon
                 Wook Kim and Youngsun Han",
  title =        "Fault Tolerance Technique Offlining Faulty Blocks by
                 Heap Memory Management",
  journal =      j-TODAES,
  volume =       "24",
  number =       "4",
  pages =        "47:1--47:25",
  month =        jul,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3329079",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:31 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3329079",
  abstract =     "As dynamic random access memory (DRAM) cells continue
                 to be scaled down for higher density and capacity, they
                 have more faults. Thus, DRAM reliability becomes a
                 major concern in computer systems. Previous studies
                 have proposed many techniques preserving \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "47",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Vegesna:2019:NRM,
  author =       "S. M. Srinivasavarma Vegesna and Ashok Chakravarthy
                 Nara and Noor Mahammad Sk",
  title =        "A Novel Rule Mapping on {TCAM} for Power Efficient
                 Packet Classification",
  journal =      j-TODAES,
  volume =       "24",
  number =       "5",
  pages =        "48:1--48:23",
  month =        oct,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3328103",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:32 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3328103",
  abstract =     "Packet Classification is the enabling function
                 performed in commodity switches for providing various
                 services such as access control, intrusion detection,
                 load balancing, and so on. Ternary Content Addressable
                 Memories (TCAMs) are the de facto standard \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "48",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Wang:2019:ITD,
  author =       "Hongfei Wang and Kun He",
  title =        "Improving Test and Diagnosis Efficiency through
                 Ensemble Reduction and Learning",
  journal =      j-TODAES,
  volume =       "24",
  number =       "5",
  pages =        "49:1--49:26",
  month =        oct,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3328754",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:32 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3328754",
  abstract =     "Machine learning is a powerful lever for developing,
                 improving, and optimizing test methodologies to cope
                 with the demand from the advanced nodes. Ensemble
                 methods are a particular learning paradigm that uses
                 multiple models to boost performance. In \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "49",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Cakir:2019:RCH,
  author =       "Burcin Cakir and Sharad Malik",
  title =        "Revealing Cluster Hierarchy in Gate-level {ICs} Using
                 Block Diagrams and Cluster Estimates of Circuit
                 Embeddings",
  journal =      j-TODAES,
  volume =       "24",
  number =       "5",
  pages =        "50:1--50:19",
  month =        oct,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3329081",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:32 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3329081",
  abstract =     "Contemporary integrated circuits (ICs) are
                 increasingly being constructed using intellectual
                 property blocks (IPs) obtained from third parties in a
                 globalized supply chain. The increased vulnerability to
                 adversarial changes during this untrusted supply
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "50",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Li:2019:SIP,
  author =       "Tengtao Li and Sachin S. Sapatnekar",
  title =        "Stress-Induced Performance Shifts in {$3$D} {DRAMs}",
  journal =      j-TODAES,
  volume =       "24",
  number =       "5",
  pages =        "51:1--51:21",
  month =        oct,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3331527",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:32 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3331527",
  abstract =     "3D-stacked DRAMs can significantly increase cell
                 density and bandwidth while also lowering power
                 consumption. However, 3D structures experience
                 significant thermomechanical stress due to the
                 differential rate of contraction of the constituent
                 materials, \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "51",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Chakraborty:2019:ERL,
  author =       "Shounak Chakraborty and Hemangee K. Kapoor",
  title =        "Exploring the Role of Large Centralised Caches in
                 Thermal Efficient Chip Design",
  journal =      j-TODAES,
  volume =       "24",
  number =       "5",
  pages =        "52:1--52:28",
  month =        oct,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3339850",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:32 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3339850",
  abstract =     "In the era of short channel length, Dynamic Thermal
                 Management (DTM) has become a challenging task for the
                 architects and designers engineering modern Chip
                 Multi-Processors (CMPs). Ever-increasing demand of
                 processing power along with the developed \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "52",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Choi:2019:RDR,
  author =       "Kyu Hyun Choi and Jaeyung Jun and Minseong Kim and
                 Seon Wook Kim",
  title =        "Reducing {DRAM} Refresh Rate Using Retention Time
                 Aware Universal Hashing Redundancy Repair",
  journal =      j-TODAES,
  volume =       "24",
  number =       "5",
  pages =        "53:1--53:31",
  month =        oct,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3339851",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:32 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/hash.bib;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3339851",
  abstract =     "As the device capacity of Dynamic Random Access Memory
                 (DRAM) increases, refresh operation becomes a
                 significant contributory factor toward total power
                 consumption and memory throughput of the device. To
                 reduce the problems associated with the refresh
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "53",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Li:2019:TMF,
  author =       "Xiangwei Li and Douglas L. Maskell",
  title =        "Time-Multiplexed {FPGA} Overlay Architectures: a
                 Survey",
  journal =      j-TODAES,
  volume =       "24",
  number =       "5",
  pages =        "54:1--54:19",
  month =        oct,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3339861",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:32 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3339861",
  abstract =     "This article presents a comprehensive survey of
                 time-multiplexed (TM) FPGA overlays from the research
                 literature. These overlays are categorized based on
                 their implementation into two groups: processor-based
                 overlays, as their implementation follows \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "54",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Gade:2019:EEC,
  author =       "Sri Harsha Gade and M. Meraj Ahmed and Sujay Deb and
                 Amlan Ganguly",
  title =        "Energy Efficient Chip-to-Chip Wireless Interconnection
                 for Heterogeneous Architectures",
  journal =      j-TODAES,
  volume =       "24",
  number =       "5",
  pages =        "55:1--55:27",
  month =        oct,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3340109",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:32 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3340109",
  abstract =     "Heterogeneous multichip architectures have gained
                 significant interest in high-performance computing
                 clusters to cater to a wide range of applications. In
                 particular, heterogeneous systems with multiple
                 multicore CPUs, GPUs, and memory have become \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "55",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Osawa:2019:ADR,
  author =       "Hisashi Osawa and Yuko Hara-Azumi",
  title =        "Approximate Data Reuse-based Accelerator Design for
                 Embedded Processor",
  journal =      j-TODAES,
  volume =       "24",
  number =       "5",
  pages =        "56:1--56:25",
  month =        oct,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3342098",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:32 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3342098",
  abstract =     "Due to increasing diversity and complexity of
                 applications in embedded systems, accelerator designs
                 trading-off area/energy-efficiency and
                 design-productivity are becoming a further crucial
                 issue. Targeting applications in the category of
                 Recognition, \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "56",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Raval:2019:III,
  author =       "Rajkumar K. Raval and Atta Badii",
  title =        "Investigating the Impact of Image Content on the
                 Energy Efficiency of Hardware-accelerated Digital
                 Spatial Filters",
  journal =      j-TODAES,
  volume =       "24",
  number =       "5",
  pages =        "57:1--57:34",
  month =        oct,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3341819",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:32 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3341819",
  abstract =     "Battery-operated low-power portable computing devices
                 are becoming an inseparable part of human daily life.
                 One of the major goals is to achieve the longest
                 battery life in such a device. Additionally, the need
                 for performance in processing multimedia \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "57",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Bonna:2019:MSD,
  author =       "Ricardo Bonna and Denis S. Loubach and George
                 Ungureanu and Ingo Sander",
  title =        "Modeling and Simulation of Dynamic Applications Using
                 Scenario-Aware Dataflow",
  journal =      j-TODAES,
  volume =       "24",
  number =       "5",
  pages =        "58:1--58:29",
  month =        oct,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3342997",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:32 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3342997",
  abstract =     "The tradeoff between analyzability and expressiveness
                 is a key factor when choosing a suitable dataflow model
                 of computation (MoC) for designing, modeling, and
                 simulating applications considering a formal base. A
                 large number of techniques and analysis \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "58",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Jiang:2019:EEQ,
  author =       "Li Jiang and Zhuoran Song and Haiyue Song and Chengwen
                 Xu and Qiang Xu and Naifeng Jing and Weifeng Zhang and
                 Xiaoyao Liang",
  title =        "Energy-Efficient and Quality-Assured Approximate
                 Computing Framework Using a Co-Training Method",
  journal =      j-TODAES,
  volume =       "24",
  number =       "6",
  pages =        "59:1--59:25",
  month =        nov,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3342239",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:32 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3342239",
  abstract =     "Approximate computing is a promising design paradigm
                 that introduces a new dimension-error-into the original
                 design space. By allowing the inexact computation in
                 error-tolerance applications, approximate computing can
                 gain both performance and energy \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "59",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Charles:2019:ECR,
  author =       "Subodha Charles and Alif Ahmed and Umit Y. Ogras and
                 Prabhat Mishra",
  title =        "Efficient Cache Reconfiguration Using Machine Learning
                 in {NoC}-Based Many-Core {CMPs}",
  journal =      j-TODAES,
  volume =       "24",
  number =       "6",
  pages =        "60:1--60:23",
  month =        nov,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3350422",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:32 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3350422",
  abstract =     "Dynamic cache reconfiguration (DCR) is an effective
                 technique to optimize energy consumption in many-core
                 architectures. While early work on DCR has shown
                 promising energy saving opportunities, prior techniques
                 are not suitable for many-core \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "60",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Song:2019:COR,
  author =       "Youngsoo Song and Daijoon Hyun and Jingon Lee and
                 Jinwook Jung and Youngsoo Shin",
  title =        "Cut Optimization for Redundant Via Insertion in
                 Self-Aligned Double Patterning",
  journal =      j-TODAES,
  volume =       "24",
  number =       "6",
  pages =        "61:1--61:21",
  month =        nov,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3355391",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:32 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3355391",
  abstract =     "Redundant via (RV) insertion helps prevent via defects
                 and hence leads to yield enhancement. However, RV
                 insertion in self-aligned double patterning (SADP)
                 processes is challenging since cut optimization has to
                 be considered together. In SADP, parallel \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "61",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Lee:2019:IEC,
  author =       "Dongjin Lee and Sourav Das and Janardhan Rao Doppa and
                 Partha Pratim Pande and Krishnendu Chakrabarty",
  title =        "Impact of Electrostatic Coupling on Monolithic
                 {$3$D}-enabled Network on Chip",
  journal =      j-TODAES,
  volume =       "24",
  number =       "6",
  pages =        "62:1--62:22",
  month =        nov,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3357158",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:32 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3357158",
  abstract =     "Monolithic-3D-integration (M3D) improves the
                 performance and energy efficiency of 3D ICs over
                 conventional through-silicon-vias-based counterparts.
                 The smaller dimensions of monolithic inter-tier vias
                 offer high-density integration, the flexibility of
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "62",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Kukkala:2019:JSF,
  author =       "Vipin Kumar Kukkala and Sudeep Pasricha and Thomas
                 Bradley",
  title =        "{JAMS-SG}: a Framework for Jitter-Aware Message
                 Scheduling for Time-Triggered Automotive Networks",
  journal =      j-TODAES,
  volume =       "24",
  number =       "6",
  pages =        "63:1--63:31",
  month =        nov,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3355392",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:32 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3355392",
  abstract =     "Time-triggered automotive networks use time-triggered
                 protocols (FlexRay, TTEthernet, etc.) for periodic
                 message transmissions that often originate from safety
                 and time-critical applications. One of the major
                 challenges with time-triggered transmissions \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "63",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Asgarieh:2019:SHA,
  author =       "Yashar Asgarieh and Bill Lin",
  title =        "Smart-Hop Arbitration Request Propagation: Avoiding
                 Quadratic Arbitration Complexity and False Negatives in
                 {SMART NoCs}",
  journal =      j-TODAES,
  volume =       "24",
  number =       "6",
  pages =        "64:1--64:25",
  month =        nov,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3356235",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:32 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3356235",
  abstract =     "SMART-based NoC designs achieve ultra-low latencies by
                 enabling flits to traverse multiple hops within a
                 single clock cycle. Notwithstanding the clear
                 performance benefits, SMART-based NoCs suffer from
                 several shortcomings: each router must arbitrate
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "64",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Shamsi:2019:IPS,
  author =       "Kaveh Shamsi and Meng Li and Kenneth Plaks and Saverio
                 Fazzari and David Z. Pan and Yier Jin",
  title =        "{IP} Protection and Supply Chain Security through
                 Logic Obfuscation: a Systematic Overview",
  journal =      j-TODAES,
  volume =       "24",
  number =       "6",
  pages =        "65:1--65:36",
  month =        nov,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3342099",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:32 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3342099",
  abstract =     "The globalization of the semiconductor supply chain
                 introduces ever-increasing security and privacy risks.
                 Two major concerns are IP theft through reverse
                 engineering and malicious modification of the design.
                 The latter concern in part relies on \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "65",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Wang:2019:RTS,
  author =       "Kankan Wang and Xu Jiang and Nan Guan and Di Liu and
                 Weichen Liu and Qingxu Deng",
  title =        "Real-Time Scheduling of {DAG} Tasks with Arbitrary
                 Deadlines",
  journal =      j-TODAES,
  volume =       "24",
  number =       "6",
  pages =        "66:1--66:22",
  month =        nov,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3358603",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:32 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3358603",
  abstract =     "Real-time and embedded systems are shifting from
                 single-core to multi-core processors, on which the
                 software must be parallelized to fully utilize the
                 computation capacity of the hardware. Recently, much
                 work has been done on real-time scheduling of
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "66",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Chen:2019:OTL,
  author =       "Yung-Chih Chen and Li-Cheng Zheng and Fu-Lian Wong",
  title =        "Optimization of Threshold Logic Networks with Node
                 Merging and Wire Replacement",
  journal =      j-TODAES,
  volume =       "24",
  number =       "6",
  pages =        "67:1--67:18",
  month =        nov,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3358748",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:32 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3358748",
  abstract =     "In this article, we present an optimization method for
                 threshold logic networks (TLNs) based on observability
                 don't-care-based node merging. To reduce gate count in
                 a TLN, it iteratively merges two gates that are
                 functionally equivalent or whose \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "67",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Yan:2019:TSN,
  author =       "Jin-Tai Yan",
  title =        "Two-sided Net Untangling with Internal Detours for
                 Single-layer Bus Routing",
  journal =      j-TODAES,
  volume =       "24",
  number =       "6",
  pages =        "68:1--68:23",
  month =        nov,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3363184",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:32 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3363184",
  abstract =     "It is known that one-sided net untangling can be used
                 to untangle the twisted nets inside a bus for
                 single-layer bus routing. However, limited space behind
                 one pin-row may make one-sided net untangling
                 unsuccessful for single-layer bus routing. In this
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "68",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Wang:2019:RSE,
  author =       "Hai Wang and Tao Xiao and Darong Huang and Lang Zhang
                 and Chi Zhang and He Tang and Yuan Yuan",
  title =        "Runtime Stress Estimation for Three-dimensional {IC}
                 Reliability Management Using Artificial Neural
                 Network",
  journal =      j-TODAES,
  volume =       "24",
  number =       "6",
  pages =        "69:1--69:29",
  month =        nov,
  year =         "2019",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3363185",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:32 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3363185",
  abstract =     "Heat dissipation and the related thermal-mechanical
                 stress problems are the major obstacles in the
                 development of the three-dimensional integrated circuit
                 (3D IC). Reliability management techniques can be used
                 to alleviate such problems and enhance the \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "69",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Mahfouzi:2020:SAR,
  author =       "Rouhollah Mahfouzi and Amir Aminifar and Soheil Samii
                 and Petru Eles and Zebo Peng",
  title =        "Security-aware Routing and Scheduling for Control
                 Applications on {Ethernet TSN} Networks",
  journal =      j-TODAES,
  volume =       "25",
  number =       "1",
  pages =        "1:1--1:26",
  month =        jan,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3358604",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:33 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3358604",
  abstract =     "Today, it is common knowledge in the cyber-physical
                 systems domain that the tight interaction between the
                 cyber and physical elements provides the possibility of
                 substantially improving the performance of these
                 systems that is otherwise impossible. On \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "1",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Shi:2020:ASF,
  author =       "Guoyong Shi",
  title =        "Automatic Stage-form Circuit Reduction for Multistage
                 Opamp Design Equation Generation",
  journal =      j-TODAES,
  volume =       "25",
  number =       "1",
  pages =        "2:1--2:26",
  month =        jan,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3363499",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:33 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3363499",
  abstract =     "An automatic stage-form circuit reduction method for
                 multistage operational amplifiers (opamps) is proposed.
                 A tool based on this method can reduce a multistage
                 opamp into a condensed stage-form macromodel, from
                 which design equations can be generated \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Wang:2020:IBT,
  author =       "Chih-Hao Wang and Tong-Yu Hsieh",
  title =        "An Implication-based Test Scheme for Both Diagnosis
                 and Concurrent Error Detection Applications",
  journal =      j-TODAES,
  volume =       "25",
  number =       "1",
  pages =        "3:1--3:27",
  month =        jan,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3364681",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:33 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3364681",
  abstract =     "This article describes a diagnosis-aware hybrid
                 concurrent error detection ( DAH-CED ) scheme that can
                 facilitate both off-line and on-line test applications.
                 By using the proposed scheme, not only the probability
                 of detecting errors (on-line) but also \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Hoque:2020:HPO,
  author =       "Tamzidul Hoque and Kai Yang and Robert Karam and
                 Shahin Tajik and Domenic Forte and Mark Tehranipoor and
                 Swarup Bhunia",
  title =        "Hidden in Plaintext: an Obfuscation-based
                 Countermeasure against {FPGA} Bitstream Tampering
                 Attacks",
  journal =      j-TODAES,
  volume =       "25",
  number =       "1",
  pages =        "4:1--4:32",
  month =        jan,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3361147",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:33 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3361147",
  abstract =     "Field Programmable Gate Arrays (FPGAs) have become an
                 attractive choice for diverse applications due to their
                 reconfigurability and unique security features.
                 However, designs mapped to FPGAs are prone to malicious
                 modifications or tampering of critical \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "4",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Bhattacharjee:2020:BCA,
  author =       "Sukanta Bhattacharjee and Jack Tang and Sudip Poddar
                 and Mohamed Ibrahim and Ramesh Karri and Krishnendu
                 Chakrabarty",
  title =        "Bio-chemical Assay Locking to Thwart Bio-{IP} Theft",
  journal =      j-TODAES,
  volume =       "25",
  number =       "1",
  pages =        "5:1--5:20",
  month =        jan,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3365579",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:33 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3365579",
  abstract =     "It is expected that as digital microfluidic biochips
                 (DMFBs) mature, the hardware design flow will begin to
                 resemble the current practice in the semiconductor
                 industry: design teams send chip layouts to third-party
                 foundries for fabrication. These \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "5",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Malekpour:2020:HTM,
  author =       "Amin Malekpour and Roshan Ragel and Tuo Li and Haris
                 Javaid and Aleksandar Ignjatovic and Sri Parameswaran",
  title =        "Hardware {Trojan} Mitigation in Pipelined {MPSoCs}",
  journal =      j-TODAES,
  volume =       "25",
  number =       "1",
  pages =        "6:1--6:27",
  month =        jan,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3365578",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:33 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3365578",
  abstract =     "Multiprocessor System-on-Chip (MPSoC) has become
                 necessary due to the the billions of transistors
                 available to the designer, the need for fast design
                 turnaround times, and the power wall. Thus, present
                 embedded systems are designed with MPSoCs, and one
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Pan:2020:ARP,
  author =       "Renjian Pan and Jun Tao and Yangfeng Su and Dian Zhou
                 and Xuan Zeng and Xin Li",
  title =        "Analog\slash {RF} Post-silicon Tuning via {Bayesian}
                 Optimization",
  journal =      j-TODAES,
  volume =       "25",
  number =       "1",
  pages =        "7:1--7:17",
  month =        jan,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3365577",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:33 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3365577",
  abstract =     "Tunable analog/RF circuit has emerged as a promising
                 technique to address the significant performance
                 uncertainties caused by process variations. To optimize
                 these tunable circuits after fabrication, most existing
                 post-silicon programming methods are \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Xu:2020:MCM,
  author =       "Qi Xu and Hao Geng and Song Chen and Bei Yu and Feng
                 Wu",
  title =        "Memristive Crossbar Mapping for Neuromorphic Computing
                 Systems on {$3$D} {IC}",
  journal =      j-TODAES,
  volume =       "25",
  number =       "1",
  pages =        "8:1--8:19",
  month =        jan,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3365576",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:33 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3365576",
  abstract =     "In recent years, neuromorphic computing systems based
                 on memristive crossbar have provided a promising
                 solution to enable acceleration of neural networks.
                 However, most of the neural networks used in realistic
                 applications are often sparse. If such \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Parane:2020:LDL,
  author =       "Khyamling Parane and Prabhu Prasad B. M. and Basavaraj
                 Talawar",
  title =        "{LBNoC}: Design of Low-latency Router Architecture
                 with Lookahead Bypass for Network-on-Chip Using
                 {FPGA}",
  journal =      j-TODAES,
  volume =       "25",
  number =       "1",
  pages =        "9:1--9:26",
  month =        jan,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3365994",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:33 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3365994",
  abstract =     "An FPGA-based Network-on-Chip (NoC) using a
                 low-latency router with a look-ahead bypass (LBNoC) is
                 discussed in this article. The proposed design targets
                 the optimized area with improved network performance.
                 The techniques such as single-cycle router \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Roy:2020:HGM,
  author =       "Pushpita Roy and Ansuman Banerjee and Robert Wille and
                 Bhargab B. Bhattacharya",
  title =        "Harnessing the Granularity of
                 Micro-Electrode-Dot-Array Architectures for Optimizing
                 Droplet Routing in Biochips",
  journal =      j-TODAES,
  volume =       "25",
  number =       "1",
  pages =        "10:1--10:37",
  month =        jan,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3365993",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:33 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3365993",
  abstract =     "In this article, we consider the problem of droplet
                 routing for Microelectrode-Dot-Array (MEDA) biochips.
                 MEDA biochips today provide a host of useful features
                 for droplet movement by making it possible to manoeuvre
                 droplets at a much finer granularity \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Esmaili:2020:EAS,
  author =       "Amirhossein Esmaili and Mahdi Nazemi and Massoud
                 Pedram",
  title =        "Energy-aware Scheduling of Task Graphs with Imprecise
                 Computations and End-to-end Deadlines",
  journal =      j-TODAES,
  volume =       "25",
  number =       "1",
  pages =        "11:1--11:21",
  month =        jan,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3365999",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:33 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3365999",
  abstract =     "Imprecise computations allow scheduling algorithms
                 developed for energy-constrained computing devices to
                 trade off output quality with utilization of system
                 resources. The goal of such scheduling algorithms is to
                 utilize imprecise computations to find a \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Wang:2020:HER,
  author =       "Hongfei Wang and Jianwen Li and Kun He",
  title =        "Hierarchical Ensemble Reduction and Learning for
                 Resource-constrained Computing",
  journal =      j-TODAES,
  volume =       "25",
  number =       "1",
  pages =        "12:1--12:21",
  month =        jan,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3365224",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:33 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3365224",
  abstract =     "Generic tree ensembles (such as Random Forest, RF)
                 rely on a substantial amount of individual models to
                 attain desirable performance. The cost of maintaining a
                 large ensemble could become prohibitive in applications
                 where computing resources are \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "12",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Tseng:2020:MAU,
  author =       "Tien-Hung Tseng and Chung-Han Chou and Kai-Chiang Wu",
  title =        "Making Aging Useful by Recycling Aging-induced Clock
                 Skew",
  journal =      j-TODAES,
  volume =       "25",
  number =       "2",
  pages =        "13:1--13:24",
  month =        jan,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3363186",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:34 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3363186",
  abstract =     "Device aging, which causes significant loss on circuit
                 performance and lifetime, has been a primary factor in
                 reliability degradation of nanoscale designs. In this
                 article, we propose to take advantage of aging-induced
                 clock skews (i.e., make them \ldots{}).",
  acknowledgement = ack-nhfb,
  articleno =    "13",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Richthammer:2020:SSD,
  author =       "Valentina Richthammer and Fabian Fassnacht and Michael
                 Gla{\ss}",
  title =        "Search-space Decomposition for System-level Design
                 Space Exploration of Embedded Systems",
  journal =      j-TODAES,
  volume =       "25",
  number =       "2",
  pages =        "14:1--14:32",
  month =        jan,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3369388",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:34 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3369388",
  abstract =     "The development of large-scale multi- and many-core
                 platforms and the rising complexity of embedded
                 applications have led to a significant increase in the
                 number of implementation possibilities for a single
                 application. Furthermore, rising demands on \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "14",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{He:2020:LHD,
  author =       "Xu He and Yu Deng and Shizhe Zhou and Rui Li and Yao
                 Wang and Yang Guo",
  title =        "Lithography Hotspot Detection with {FFT}-based Feature
                 Extraction and Imbalanced Learning Rate",
  journal =      j-TODAES,
  volume =       "25",
  number =       "2",
  pages =        "15:1--15:21",
  month =        jan,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3372044",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:34 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3372044",
  abstract =     "With the increasing gap between transistor feature
                 size and lithography manufacturing capability, the
                 detection of lithography hotspots becomes a key stage
                 of physical verification flow to enhance manufacturing
                 yield. Although machine learning \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "15",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Tadros:2020:TFT,
  author =       "Ramy N. Tadros and Peter A. Beerel",
  title =        "A Theoretical Foundation for Timing Synchronous
                 Systems Using Asynchronous Structures",
  journal =      j-TODAES,
  volume =       "25",
  number =       "2",
  pages =        "16:1--16:28",
  month =        mar,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3373355",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Mar 18 07:50:32 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3373355",
  abstract =     "Timing of synchronous systems is an everlasting
                 stumbling block to the booming demands for lower power
                 consumption and higher operation speeds in the
                 electronics industry. This hardship is aggravated by
                 the growing levels of variability in state-of-the-.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "16",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Liang:2020:SAE,
  author =       "Tung-Che Liang and Mohammed Shayan and Krishnendu
                 Chakrabarty and Ramesh Karri",
  title =        "Secure Assay Execution on {MEDA} Biochips to Thwart
                 Attacks Using Real-Time Sensing",
  journal =      j-TODAES,
  volume =       "25",
  number =       "2",
  pages =        "17:1--17:25",
  month =        jan,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3374213",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:34 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3374213",
  abstract =     "Digital microfluidic biochips (DMFBs) have emerged as
                 a promising platform for DNA sequencing, clinical
                 chemistry, and point-of-care diagnostics. Recent
                 research has shown that DMFBs are susceptible to
                 various types of malicious attacks. Defenses \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "17",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Pomeranz:2020:TFT,
  author =       "Irith Pomeranz",
  title =        "Target Faults for Test Compaction Based on Multicycle
                 Tests",
  journal =      j-TODAES,
  volume =       "25",
  number =       "2",
  pages =        "18:1--18:14",
  month =        jan,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3375278",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Jan 30 09:00:34 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3375278",
  abstract =     "The use of multicycle tests, with several functional
                 capture cycles between scan operations, contributes
                 significantly to the ability to compact a test set.
                 Multicycle tests have the added benefit that they can
                 contribute to the detection of defects \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "18",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Olney:2020:TFB,
  author =       "Brooks Olney and Robert Karam",
  title =        "Tunable {FPGA} Bitstream Obfuscation with {Boolean}
                 Satisfiability Attack Countermeasure",
  journal =      j-TODAES,
  volume =       "25",
  number =       "2",
  pages =        "19:1--19:22",
  month =        mar,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3373638",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Mar 18 07:50:32 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3373638",
  abstract =     "Field Programmable Gate Arrays (FPGAs) are seeing a
                 surge in usage in many emerging application domains,
                 where the in-field reconfigurability is an attractive
                 characteristic for diverse applications with dynamic
                 design requirements, such as cloud \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "19",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Yang:2020:HSS,
  author =       "Yajun Yang and Zhang Chen and Yuan Liu and Tsung-Yi Ho
                 and Yier Jin and Pingqiang Zhou",
  title =        "How Secure Is Split Manufacturing in Preventing
                 Hardware {Trojan}?",
  journal =      j-TODAES,
  volume =       "25",
  number =       "2",
  pages =        "20:1--20:23",
  month =        mar,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3378163",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Mar 18 07:50:32 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3378163",
  abstract =     "With the trend of outsourcing fabrication, split
                 manufacturing is regarded as a promising way to both
                 acquire the high-end nodes in untrusted external
                 foundries and protect the design from potential
                 attackers. However, in this article, we show that split
                 manufacturing is not inherently secure, that a hardware
                 Trojan attacker can still recover necessary information
                 with a proximity-based or a simulated-annealing-based
                 mapping approach together with a probability-based or
                 net-based pruning method at the placement level. We
                 further propose a defense approach by moving the
                 insecure gates away from their easily attacked
                 candidate locations. Results on benchmark circuits show
                 the effectiveness of our proposed methods.",
  acknowledgement = ack-nhfb,
  articleno =    "20",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Pui:2020:LRB,
  author =       "Chak-Wa Pui and Evangeline F. Y. Young",
  title =        "{Lagrangian} Relaxation-Based Time-Division
                 Multiplexing Optimization for Multi-{FPGA} Systems",
  journal =      j-TODAES,
  volume =       "25",
  number =       "2",
  pages =        "21:1--21:23",
  month =        mar,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3377551",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Mar 18 07:50:32 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3377551",
  abstract =     "\<?tight?\>To increase the resource utilization
                 in multi-FPGA (field-programmable gate array) systems,
                 time-division multiplexing (TDM) is a widely used
                 technique to accommodate a large number of inter-FPGA
                 signals. However, with this technique, the delay
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "21",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Yan:2020:SLO,
  author =       "Jin-Tai Yan",
  title =        "Single-Layer Obstacle-Aware Substrate Routing via
                 Iterative Pin Reassignment and Wire Assignment",
  journal =      j-TODAES,
  volume =       "25",
  number =       "2",
  pages =        "22:1--22:21",
  month =        mar,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3378162",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Mar 18 07:50:32 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3378162",
  abstract =     "It is known that single-layer obstacle-aware substrate
                 routing is necessary for modern IC/Package designs. In
                 this article, given a set of two-pin nets and a set of
                 rectangular obstacles inside a single-layer routing
                 plane, a two-phase routing algorithm \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "22",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Sha:2020:FPT,
  author =       "Shi Sha and Ajinkya S. Bankar and Xiaokun Yang and
                 Wujie Wen and Gang Quan",
  title =        "On Fundamental Principles for Thermal-Aware Design on
                 Periodic Real-Time Multi-Core Systems",
  journal =      j-TODAES,
  volume =       "25",
  number =       "2",
  pages =        "23:1--23:23",
  month =        mar,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3378063",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Mar 18 07:50:32 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3378063",
  abstract =     "With the exponential rise of the transistor count in
                 one chip, the thermal problem has become a pressing
                 issue in computing system design. While there have been
                 extensive methods and techniques published for design
                 optimization with thermal awareness, \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "23",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Nath:2020:RDB,
  author =       "Arijit Nath and Sukarn Agarwal and Hemangee K.
                 Kapoor",
  title =        "Reuse Distance-based Victim Cache for Effective
                 Utilisation of Hybrid Main Memory System",
  journal =      j-TODAES,
  volume =       "25",
  number =       "3",
  pages =        "24:1--24:32",
  month =        may,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3380732",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue May 19 10:15:25 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3380732",
  abstract =     "Hybrid main memories comprising DRAM and Non-volatile
                 memories (NVM) are projected as potential replacements
                 of the traditional DRAM-based memories. However,
                 traditional cache management policies designed for
                 improving the hit rate lack awareness of the \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "24",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Kamal:2020:ADF,
  author =       "Nishant Kamal and Ankur Gupta and Ananya Singla and
                 Shubham Tiwari and Parth Kohli and Sudip Roy and
                 Bhargab B. Bhattacharya",
  title =        "Architectural Design of Flow-Based Microfluidic
                 Biochips for Multi-Target Dilution of Biochemical
                 Fluids",
  journal =      j-TODAES,
  volume =       "25",
  number =       "3",
  pages =        "25:1--25:34",
  month =        may,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3357604",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue May 19 10:15:25 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3357604",
  abstract =     "Microfluidic technologies enable replacement of
                 time-consuming and complex steps of biochemical
                 laboratory protocols with a tiny chip. Sample
                 preparation (i.e., dilution or mixing of fluids) is one
                 of the primary tasks of any bioprotocol. In real-life
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "25",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Nahiyan:2020:SCF,
  author =       "Adib Nahiyan and Jungmin Park and Miao He and Yousef
                 Iskander and Farimah Farahmandi and Domenic Forte and
                 Mark Tehranipoor",
  title =        "{SCRIPT}: a {CAD} Framework for Power Side-channel
                 Vulnerability Assessment Using Information Flow
                 Tracking and Pattern Generation",
  journal =      j-TODAES,
  volume =       "25",
  number =       "3",
  pages =        "26:1--26:27",
  month =        may,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3383445",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue May 19 10:15:25 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3383445",
  abstract =     "Power side-channel attacks (SCAs) have been proven to
                 be effective at extracting secret keys from hardware
                 implementations of cryptographic algorithms. Ideally,
                 the power side-channel leakage (PSCL) of hardware
                 designs of a cryptographic algorithm \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "26",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Chen:2020:SMB,
  author =       "Huili Chen and Seetal Potluri and Farinaz Koushanfar",
  title =        "Security of Microfluidic Biochip: Practical Attacks
                 and Countermeasures",
  journal =      j-TODAES,
  volume =       "25",
  number =       "3",
  pages =        "27:1--27:29",
  month =        may,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3382127",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue May 19 10:15:25 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3382127",
  abstract =     "With the advancement of system miniaturization and
                 automation, Lab-on-a-Chip (LoC) technology has
                 revolutionized traditional experimental procedures.
                 Microfluidic Biochip (MFB) is an emerging branch of LoC
                 with wide medical applications such as DNA \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "27",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Mandal:2020:EAO,
  author =       "Sumit K. Mandal and Ganapati Bhat and Janardhan Rao
                 Doppa and Partha Pratim Pande and Umit Y. Ogras",
  title =        "An Energy-aware Online Learning Framework for Resource
                 Management in Heterogeneous Platforms",
  journal =      j-TODAES,
  volume =       "25",
  number =       "3",
  pages =        "28:1--28:26",
  month =        may,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3386359",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue May 19 10:15:25 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3386359",
  abstract =     "Mobile platforms must satisfy the contradictory
                 requirements of fast response time and minimum energy
                 consumption as a function of dynamically changing
                 applications. To address this need, systems-on-chip
                 (SoC) that are at the heart of these devices \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "28",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Liu:2020:AFD,
  author =       "Mengyun Liu and Lixue Xia and Yu Wang and Krishnendu
                 Chakrabarty",
  title =        "Algorithmic Fault Detection for {RRAM}-based Matrix
                 Operations",
  journal =      j-TODAES,
  volume =       "25",
  number =       "3",
  pages =        "29:1--29:31",
  month =        may,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3386360",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue May 19 10:15:25 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3386360",
  abstract =     "An RRAM-based computing system (RCS) provides an
                 energy-efficient hardware implementation of
                 vector--matrix multiplication for machine-learning
                 hardware. However, it is vulnerable to faults due to
                 the immature RRAM fabrication process. We propose an
                 efficient fault tolerance method for RCS; the proposed
                 method, referred to as extended-ABFT (X-ABFT), is
                 inspired by algorithm-based fault tolerance (ABFT). We
                 utilize row checksums and test-input vectors to extract
                 signatures for fault detection and error correction. We
                 present a solution to alleviate the overflow problem
                 caused by the limited number of voltage levels for the
                 test-input signals. Simulation results show that for a
                 Hopfield classifier with faults in 5\% of its RRAM
                 cells, X-ABFT allows us to achieve nearly the same
                 classification accuracy as in the fault-free case.",
  acknowledgement = ack-nhfb,
  articleno =    "29",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Paik:2020:GRT,
  author =       "Yoonah Paik and Seon Wook Kim and Dongha Jung and
                 Minseong Kim",
  title =        "Generating Representative Test Sequences from Real
                 Workload for Minimizing {DRAM} Verification Overhead",
  journal =      j-TODAES,
  volume =       "25",
  number =       "4",
  pages =        "30:1--30:23",
  month =        sep,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3391891",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Sep 3 14:31:04 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3391891",
  abstract =     "Dynamic Random Access Memory (DRAM) standards have
                 evolved for higher bandwidth, larger capacity, and
                 lower power consumption, so their specifications have
                 become complicated to satisfy the design goals. These
                 complex implementations have significantly \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "30",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Jana:2020:HHC,
  author =       "Rajib Lochan Jana and Soumyajit Dey and Pallab
                 Dasgupta",
  title =        "A Hierarchical {HVAC} Control Scheme for Energy-aware
                 Smart Building Automation",
  journal =      j-TODAES,
  volume =       "25",
  number =       "4",
  pages =        "31:1--31:33",
  month =        sep,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3393666",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Sep 3 14:31:04 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3393666",
  abstract =     "Heating ventilation and air conditioning (HVAC)
                 systems usually account for the highest percentage of
                 overall energy usage in large-sized smart building
                 infrastructures. The performance of HVAC control
                 systems for large buildings strongly depend on the
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "31",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Chatterjee:2020:MLA,
  author =       "Urbi Chatterjee and Soumi Chatterjee and Debdeep
                 Mukhopadhyay and Rajat Subhra Chakraborty",
  title =        "Machine Learning Assisted {PUF} Calibration for
                 Trustworthy Proof of Sensor Data in {IoT}",
  journal =      j-TODAES,
  volume =       "25",
  number =       "4",
  pages =        "32:1--32:21",
  month =        sep,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3393628",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Sep 3 14:31:04 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3393628",
  abstract =     "Remote integrity verification plays a paramount role
                 in resource-constraint devices owing to emerging
                 applications such as Internet-of-Things (IoT), smart
                 homes, e-health, and so on. The concept of Virtual
                 Proof of Reality (VPoR) proposed by R{\"u}hrmair et ...
                 $^$",
  acknowledgement = ack-nhfb,
  articleno =    "32",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Vijayan:2020:RIH,
  author =       "Arunkumar Vijayan and Mehdi B. Tahoori and Krishnendu
                 Chakrabarty",
  title =        "Runtime Identification of Hardware {Trojans} by
                 Feature Analysis on Gate-Level Unstructured Data and
                 Anomaly Detection",
  journal =      j-TODAES,
  volume =       "25",
  number =       "4",
  pages =        "33:1--33:23",
  month =        sep,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3391890",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Sep 3 14:31:04 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3391890",
  abstract =     "As the globalization of chip design and manufacturing
                 process becomes popular, malicious hardware inclusions
                 such as hardware Trojans pose a serious threat to the
                 security of digital systems. Advanced Trojans can mask
                 many architectural-level Trojan \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "33",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Alasad:2020:SLO,
  author =       "Qutaiba Alasad and Jiann-Shuin Yuan and Pramod
                 Subramanyan",
  title =        "Strong Logic Obfuscation with Low Overhead against
                 {IC} Reverse Engineering Attacks",
  journal =      j-TODAES,
  volume =       "25",
  number =       "4",
  pages =        "34:1--34:31",
  month =        sep,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3398012",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Sep 3 14:31:04 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3398012",
  abstract =     "Untrusted foundries pose threats of integrated circuit
                 (IC) piracy and counterfeiting, and this has motivated
                 research into logic locking. Strong logic locking
                 approaches potentially prevent piracy and
                 counterfeiting by preventing unauthorized \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "34",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Alam:2020:SSB,
  author =       "Md Mahbub Alam and Adib Nahiyan and Mehdi Sadi and
                 Domenic Forte and Mark Tehranipoor",
  title =        "{Soft-HaT}: Software-Based Silicon Reprogramming for
                 Hardware {Trojan} Implementation",
  journal =      j-TODAES,
  volume =       "25",
  number =       "4",
  pages =        "35:1--35:22",
  month =        sep,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3396521",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Sep 3 14:31:04 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3396521",
  abstract =     "A hardware Trojan is a malicious modification to an
                 integrated circuit (IC) made by untrusted third-party
                 vendors, fabrication facilities, or rogue designers.
                 Although existing hardware Trojans are designed to be
                 stealthy, they can, in theory, be \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "35",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Henkel:2020:ISI,
  author =       "J{\"o}rg Henkel and Hussam Amrouch and Marilyn Wolf",
  title =        "Introduction to the Special Issue on Machine Learning
                 for {CAD}",
  journal =      j-TODAES,
  volume =       "25",
  number =       "5",
  pages =        "36:1--36:2",
  month =        oct,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3410864",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Oct 3 07:47:57 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3410864",
  acknowledgement = ack-nhfb,
  articleno =    "36",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Szentimrey:2020:MLC,
  author =       "Hannah Szentimrey and Abeer Al-Hyari and Jeremy
                 Foxcroft and Timothy Martin and David Noel and Gary
                 Grewal and Shawki Areibi",
  title =        "Machine Learning for Congestion Management and
                 Routability Prediction within {FPGA} Placement",
  journal =      j-TODAES,
  volume =       "25",
  number =       "5",
  pages =        "37:1--37:25",
  month =        oct,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3373269",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Oct 3 07:47:57 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3373269",
  abstract =     "Placement for Field Programmable Gate Arrays (FPGAs)
                 is one of the most important but time-consuming steps
                 for achieving design closure. This article proposes the
                 integration of three unique machine learning models
                 into the state-of-the-art analytic \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "37",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Liu:2020:FGA,
  author =       "Mengyun Liu and Renjian Pan and Fangming Ye and Xin Li
                 and Krishnendu Chakrabarty and Xinli Gu",
  title =        "Fine-grained Adaptive Testing Based on Quality
                 Prediction",
  journal =      j-TODAES,
  volume =       "25",
  number =       "5",
  pages =        "38:1--38:25",
  month =        oct,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3385261",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Oct 3 07:47:57 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3385261",
  abstract =     "The ever-increasing complexity of integrated circuits
                 inevitably leads to high test cost. Adaptive testing
                 provides an effective solution for test-cost reduction;
                 this testing framework selects the important test items
                 for each set of chips. However, \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "38",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Last:2020:PMC,
  author =       "Felix Last and Max Haeberlein and Ulf Schlichtmann",
  title =        "Predicting Memory Compiler Performance Outputs Using
                 Feed-forward Neural Networks",
  journal =      j-TODAES,
  volume =       "25",
  number =       "5",
  pages =        "39:1--39:19",
  month =        oct,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3385262",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Oct 3 07:47:57 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3385262",
  abstract =     "Typical semiconductor chips include thousands of
                 mostly small memories. As memories contribute an
                 estimated 25\% to 40\% to the overall power,
                 performance, and area (PPA) of a product, memories must
                 be designed carefully to meet the system's
                 requirements. \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "39",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Goli:2020:PAP,
  author =       "Mehran Goli and Rolf Drechsler",
  title =        "{PREASC}: Automatic Portion Resilience Evaluation for
                 Approximating {SystemC}-based Designs Using Regression
                 Analysis Techniques",
  journal =      j-TODAES,
  volume =       "25",
  number =       "5",
  pages =        "40:1--40:28",
  month =        oct,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3388140",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Oct 3 07:47:57 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3388140",
  abstract =     "The increasing functionality of electronic systems due
                 to the constant evolution of the market requirements
                 makes the non-functional aspects of such systems (e.g.,
                 energy consumption, area overhead, or performance) a
                 major concern in the design process. \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "40",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Nasser:2020:NCM,
  author =       "Yehya Nasser and Carlo Sau and Jean-Christophe
                 Pr{\'e}votet and Tiziana Fanni and Francesca Palumbo
                 and Maryline H{\'e}lard and Luigi Raffo",
  title =        "{NeuPow}: a {CAD} Methodology for High-level Power
                 Estimation Based on Machine Learning",
  journal =      j-TODAES,
  volume =       "25",
  number =       "5",
  pages =        "41:1--41:29",
  month =        oct,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3388141",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Oct 3 07:47:57 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3388141",
  abstract =     "In this article, we present a new, simple, accurate,
                 and fast power estimation technique that can be used to
                 explore the power consumption of digital system designs
                 at an early design stage. We exploit the machine
                 learning techniques to aid the \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "41",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Dey:2020:MLA,
  author =       "Sukanta Dey and Sukumar Nandi and Gaurav Trivedi",
  title =        "Machine Learning Approach for Fast Electromigration
                 Aware Aging Prediction in Incremental Design of Large
                 Scale On-chip Power Grid Network",
  journal =      j-TODAES,
  volume =       "25",
  number =       "5",
  pages =        "42:1--42:29",
  month =        oct,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3399677",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Oct 3 07:47:57 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3399677",
  abstract =     "With the advancement of technology nodes,
                 Electromigration (EM) signoff has become increasingly
                 difficult, which requires a considerable amount of time
                 for an incremental change in the power grid (PG)
                 network design in a chip. The traditional Black's
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "42",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Huang:2020:TSD,
  author =       "Qicheng Huang and Chenlei Fang and Soumya Mittal and
                 R. D. (Shawn) Blanton",
  title =        "Towards Smarter Diagnosis: a Learning-based Diagnostic
                 Outcome Previewer",
  journal =      j-TODAES,
  volume =       "25",
  number =       "5",
  pages =        "43:1--43:20",
  month =        oct,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3398267",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Oct 3 07:47:57 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3398267",
  abstract =     "Given the inherent perturbations during the
                 fabrication process of integrated circuits that lead to
                 yield loss, diagnosis of failing chips is a mitigating
                 method employed during both yield ramping and
                 high-volume manufacturing for yield learning.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "43",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Hu:2020:MLA,
  author =       "Yong Hu and Marcel Mettler and Daniel
                 Mueller-Gritschneder and Thomas Wild and Andreas
                 Herkersdorf and Ulf Schlichtmann",
  title =        "Machine Learning Approaches for Efficient Design Space
                 Exploration of Application-Specific {NoCs}",
  journal =      j-TODAES,
  volume =       "25",
  number =       "5",
  pages =        "44:1--44:27",
  month =        oct,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3403584",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Oct 3 07:47:57 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3403584",
  abstract =     "In many Multi-Processor Systems-on-Chip (MPSoCs),
                 traffic between cores is unbalanced. This motivates the
                 use of an application-specific Network-on-Chip (NoC)
                 that is customized and can provide a high performance
                 at low cost in terms of power and area. \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "44",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Wang:2020:MFS,
  author =       "Yi Wang and Paul D. Franzon and David Smart and Brian
                 Swahn",
  title =        "Multi-Fidelity Surrogate-Based Optimization for
                 Electromagnetic Simulation Acceleration",
  journal =      j-TODAES,
  volume =       "25",
  number =       "5",
  pages =        "45:1--45:21",
  month =        oct,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3398268",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Oct 3 07:47:57 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3398268",
  abstract =     "As circuits' speed and frequency increase, fast and
                 accurate capture of the details of the parasitics in
                 metal structures, such as inductors and clock trees,
                 becomes more critical. However, conducting
                 high-fidelity 3D electromagnetic (EM) simulations
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "45",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Agnesina:2020:IFB,
  author =       "Anthony Agnesina and Sung Kyu Lim and Etienne Lepercq
                 and Jose {Escobedo Del Cid}",
  title =        "Improving {FPGA}-Based Logic Emulation Systems through
                 Machine Learning",
  journal =      j-TODAES,
  volume =       "25",
  number =       "5",
  pages =        "46:1--46:20",
  month =        oct,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3399595",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Oct 3 07:47:57 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3399595",
  abstract =     "We present a machine learning (ML) framework to
                 improve the use of computing resources in the FPGA
                 compilation step of a commercial FPGA-based logic
                 emulation flow. Our ML models enable highly accurate
                 predictability of the final place and route design
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "46",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Xama:2020:MLB,
  author =       "Nektar Xama and Martin Andraud and Jhon Gomez and
                 Baris Esen and Wim Dobbelaere and Ronny Vanhooren and
                 Anthony Coyette and Georges Gielen",
  title =        "Machine Learning-based Defect Coverage Boosting of
                 Analog Circuits under Measurement Variations",
  journal =      j-TODAES,
  volume =       "25",
  number =       "5",
  pages =        "47:1--47:27",
  month =        oct,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3408063",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Oct 3 07:47:57 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3408063",
  abstract =     "Safety-critical and mission-critical systems, such as
                 airplanes or (semi-)autonomous cars, are relying on an
                 ever-increasing number of embedded integrated circuits.
                 Consequently, there is a need for complete defect
                 coverage during the testing of these \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "47",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Liu:2020:APA,
  author =       "Kang Liu and Haoyu Yang and Yuzhe Ma and Benjamin Tan
                 and Bei Yu and Evangeline F. Y. Young and Ramesh Karri
                 and Siddharth Garg",
  title =        "Adversarial Perturbation Attacks on {ML}-based {CAD}:
                 a Case Study on {CNN}-based Lithographic Hotspot
                 Detection",
  journal =      j-TODAES,
  volume =       "25",
  number =       "5",
  pages =        "48:1--48:31",
  month =        oct,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3408288",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Sat Oct 3 07:47:57 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3408288",
  abstract =     "There is substantial interest in the use of machine
                 learning (ML)-based techniques throughout the
                 electronic computer-aided design (CAD) flow,
                 particularly those based on deep learning. However,
                 while deep learning methods have surpassed
                 state-of-the-. \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "48",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Hu:2020:EMN,
  author =       "X. Sharon Hu",
  title =        "Editorial: a Message from the New {Editor-in-Chief}",
  journal =      j-TODAES,
  volume =       "25",
  number =       "6",
  pages =        "49e:1--49e:2",
  month =        oct,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3419376",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Oct 13 08:53:01 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3419376",
  acknowledgement = ack-nhfb,
  articleno =    "49e",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Torabi:2020:LAA,
  author =       "Mohammad Torabi and Lihong Zhang",
  title =        "{LDE}-aware Analog Layout Migration with
                 {OPC}-inclusive Routing",
  journal =      j-TODAES,
  volume =       "25",
  number =       "6",
  pages =        "49:1--49:22",
  month =        oct,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3398190",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Oct 13 08:53:01 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3398190",
  abstract =     "Performance degradation in analog circuits due to
                 layout dependent effects (LDEs) has become increasingly
                 challenging in advanced technologies. To address this
                 issue, LDEs have to be seriously considered as
                 performance constraints in the physical design
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "49",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Ma:2020:MEF,
  author =       "Chenlin Ma and Yi Wang and Zhaoyan Shen and Renhai
                 Chen and Zhu Wang and Zili Shao",
  title =        "{MNFTL}: an Efficient Flash Translation Layer for {MLC
                 NAND} Flash Memory",
  journal =      j-TODAES,
  volume =       "25",
  number =       "6",
  pages =        "50:1--50:19",
  month =        oct,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3398037",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Oct 13 08:53:01 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3398037",
  abstract =     "The write constraints of Multi-Level Cell (MLC) NAND
                 flash memory make most of the existing flash
                 translation layer (FTL) schemes inefficient or
                 inapplicable. In this article, we solve several
                 fundamental problems in the design of MLC flash
                 translation \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "50",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Lezos:2020:LOL,
  author =       "Christakis Lezos and Grigoris Dimitroulakos and
                 Ioannis Latifis and Konstantinos Masselos",
  title =        "A Locality Optimizer for Loop-dominated Applications
                 Based on Reuse Distance Analysis",
  journal =      j-TODAES,
  volume =       "25",
  number =       "6",
  pages =        "51:1--51:26",
  month =        oct,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3398189",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Oct 13 08:53:01 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3398189",
  abstract =     "Source code optimization can heavily improve software
                 code implementation quality while still being
                 complementary to conventional compilers' optimizations.
                 Source code analysis tools are very useful in
                 supporting source code optimization. This article
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "51",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Tan:2020:EEG,
  author =       "Jingweijia Tan and Kaige Yan and Shuaiwen Leon Song
                 and Xin Fu",
  title =        "Energy-Efficient {GPU} {L2} Cache Design Using
                 Instruction-Level Data Locality Similarity",
  journal =      j-TODAES,
  volume =       "25",
  number =       "6",
  pages =        "52:1--52:18",
  month =        oct,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3408060",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Oct 13 08:53:01 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3408060",
  abstract =     "This article presents a novel energy-efficient cache
                 design for massively parallel, throughput-oriented
                 architectures like GPUs. Unlike L1 data cache on modern
                 GPUs, L2 cache shared by all of the streaming
                 multiprocessors is not the primary performance
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "52",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Charles:2020:RNC,
  author =       "Subodha Charles and Prabhat Mishra",
  title =        "Reconfigurable Network-on-Chip Security Architecture",
  journal =      j-TODAES,
  volume =       "25",
  number =       "6",
  pages =        "53:1--53:25",
  month =        oct,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3406661",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Oct 13 08:53:01 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3406661",
  abstract =     "Growth of the Internet-of-things has led to complex
                 system-on-chips (SoCs) being used in the edge devices
                 in IoT applications. The increased complexity is
                 demanding designers to consider several critical
                 factors, such as dynamic requirement changes,
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "53",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Pendyala:2020:IAS,
  author =       "Shilpa Pendyala and Sheikh Ariful Islam and Srinivas
                 Katkoori",
  title =        "Interval Arithmetic and Self-Similarity Based {RTL}
                 Input Vector Control for Datapath Leakage
                 Minimization",
  journal =      j-TODAES,
  volume =       "25",
  number =       "6",
  pages =        "54:1--54:26",
  month =        oct,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3408061",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Oct 13 08:53:01 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3408061",
  abstract =     "With technology scaling, subthreshold leakage has
                 dominated the overall power consumption in a
                 design. Input vector control is an effective technique
                 to minimize subthreshold leakage. Low leakage input
                 vector determination is not often possible due to large
                 design space and simulation time. Similarly, applying
                 an appropriate minimum leakage vector (MLV) to each
                 Register Transfer Level (RTL) module instance in a
                 design often results in a low leakage state with
                 significant area overhead. In this work, we propose a
                 top-down and bottom-up approach for propagating the
                 input vector interval to identify low leakage input
                 vector at primary inputs of an RTL datapath. For each
                 module, via Monte Carlo simulation, we identify a set
                 of MLV intervals such that maximum leakage is within
                 (say) 10\% of the lowest leakage points. As the module
                 bit width increases, exhaustive simulation to find the
                 low leakage vector is not feasible. Further, we need to
                 uniformly search the entire input space to obtain as
                 many low leakage intervals as possible. Based on
                 empirical observations, we observe self-similarity in
                 the subthreshold leakage distribution of adder\slash
                 multiplier modules with highly regular bit-slice
                 architectures when input space is partitioned into
                 smaller cells. This property enables the uniform search
                 of low leakage vectors in the entire input space where
                 the time taken for characterization increases linearly
                 with the module size. We further process the reduced
                 interval set with simulated annealing to arrive at the
                 best low-leakage vector at the primary inputs. We also
                 propose to reduce area overhead (in some cases to 0\%)
                 by choosing Primary Input (PI) MLVs such that resultant
                 inputs to internal nodes are also MLVs. Compared to
                 existing work, experimental results for DSP filters
                 simulated in 16nm technology demonstrated leakage
                 savings of 93.6\% and 89.2\% for top-down and bottom-up
                 approaches with no area overhead.",
  acknowledgement = ack-nhfb,
  articleno =    "54",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Chi:2020:WLO,
  author =       "Hao Yu Chi and Chien Nan Jimmy Liu and Hung Ming
                 Chen",
  title =        "Wire Load Oriented Analog Routing with Matching
                 Constraints",
  journal =      j-TODAES,
  volume =       "25",
  number =       "6",
  pages =        "55:1--55:26",
  month =        oct,
  year =         "2020",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3403932",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Oct 13 08:53:01 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3403932",
  abstract =     "As design complexity is increased exponentially,
                 electronic design automation (EDA) tools are essential
                 to reduce design efforts. However, the analog layout
                 design has still been done manually for decades because
                 it is a sensitive and error-prone task. \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "55",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Goel:2021:MNN,
  author =       "Abhinav Goel and Sara Aghajanzadeh and Caleb Tung and
                 Shuo-Han Chen and George K. Thiruvathukal and
                 Yung-Hsiang Lu",
  title =        "Modular Neural Networks for Low-Power Image
                 Classification on Embedded Devices",
  journal =      j-TODAES,
  volume =       "26",
  number =       "1",
  pages =        "1:1--1:35",
  month =        jan,
  year =         "2021",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3408062",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Feb 25 10:17:13 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3408062",
  abstract =     "Embedded devices are generally small, battery-powered
                 computers with limited hardware resources. It is
                 difficult to run deep neural networks (DNNs) on these
                 devices, because DNNs perform millions of operations
                 and consume significant amounts of energy. \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "1",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Roy:2021:FAA,
  author =       "Indrani Roy and Chester Rebeiro and Aritra Hazra and
                 Swarup Bhunia",
  title =        "{FaultDroid}: an Algorithmic Approach for
                 Fault-Induced Information Leakage Analysis",
  journal =      j-TODAES,
  volume =       "26",
  number =       "1",
  pages =        "2:1--2:27",
  month =        jan,
  year =         "2021",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3410336",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Feb 25 10:17:13 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3410336",
  abstract =     "Fault attacks belong to a potent class of
                 implementation-based attacks that can compromise a
                 crypto-device within a few milliseconds. Out of the
                 large numbers of faults that can occur in the device,
                 only a very few are exploitable in terms of leaking
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Li:2021:MNI,
  author =       "Jun Li and Bowen Huang and Zhibing Sha and Zhigang Cai
                 and Jianwei Liao and Balazs Gerofi and Yutaka
                 Ishikawa",
  title =        "Mitigating Negative Impacts of Read Disturb in
                 {SSDs}",
  journal =      j-TODAES,
  volume =       "26",
  number =       "1",
  pages =        "3:1--3:24",
  month =        jan,
  year =         "2021",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3410332",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Feb 25 10:17:13 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3410332",
  abstract =     "Read disturb is a circuit-level noise in solid-state
                 drives (SSDs), which may corrupt existing data in SSD
                 blocks and then cause high read error rate and longer
                 read latency. The approach of read refresh is commonly
                 used to avoid read disturb errors by \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Mondal:2021:IFS,
  author =       "Ankit Mondal and Ankur Srivastava",
  title =        "{Ising-FPGA}: a Spintronics-based Reconfigurable
                 {Ising} Model Solver",
  journal =      j-TODAES,
  volume =       "26",
  number =       "1",
  pages =        "4:1--4:27",
  month =        jan,
  year =         "2021",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3411511",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Feb 25 10:17:13 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3411511",
  abstract =     "The Ising model has been explored as a framework for
                 modeling NP-hard problems, with several diverse systems
                 proposed to solve it. The Magnetic Tunnel Junction-
                 (MTJ) based Magnetic RAM is capable of replacing CMOS
                 in memory chips. In this article, we \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "4",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Rokni:2021:TMS,
  author =       "Seyed Ali Rokni and Marjan Nourollahi and Parastoo
                 Alinia and Iman Mirzadeh and Mahdi Pedram and Hassan
                 Ghasemzadeh",
  title =        "{TransNet}: Minimally Supervised Deep Transfer
                 Learning for Dynamic Adaptation of Wearable Systems",
  journal =      j-TODAES,
  volume =       "26",
  number =       "1",
  pages =        "5:1--5:31",
  month =        jan,
  year =         "2021",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3414062",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Feb 25 10:17:13 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3414062",
  abstract =     "Wearables are poised to transform health and wellness
                 through automation of cost-effective, objective, and
                 real-time health monitoring. However, machine learning
                 models for these systems are designed based on labeled
                 data collected, and feature \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "5",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Islam:2021:HLS,
  author =       "Sheikh Ariful Islam and Love Kumar Sah and Srinivas
                 Katkoori",
  title =        "High-Level Synthesis of Key-Obfuscated {RTL IP} with
                 Design Lockout and Camouflaging",
  journal =      j-TODAES,
  volume =       "26",
  number =       "1",
  pages =        "6:1--6:35",
  month =        jan,
  year =         "2021",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3410337",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Feb 25 10:17:13 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3410337",
  abstract =     "We propose three orthogonal techniques to secure
                 Register-Transfer-Level (RTL) Intellectual Property
                 (IP). In the first technique, the key-based RTL
                 obfuscation scheme is proposed at an early design phase
                 during High-Level Synthesis (HLS). Given a \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Poddar:2021:RMT,
  author =       "Sudip Poddar and Tapalina Banerjee and Robert Wille
                 and Bhargab B. Bhattacharya",
  title =        "Robust Multi-Target Sample Preparation on {MEDA}
                 Biochips Obviating Waste Production",
  journal =      j-TODAES,
  volume =       "26",
  number =       "1",
  pages =        "7:1--7:29",
  month =        jan,
  year =         "2021",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3414061",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Feb 25 10:17:13 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3414061",
  abstract =     "Digital microfluidic biochips have fueled a paradigm
                 shift in implementing bench-top laboratory experiments
                 on a single tiny chip, thus replacing costly and bulky
                 equipment. However, because of imprecise fluidic
                 functions, several volumetric split \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Zhang:2021:DPR,
  author =       "Ying Zhang and Xinpeng Hong and Zhongsheng Chen and
                 Zebo Peng and Jianhui Jiang",
  title =        "A Deterministic-Path Routing Algorithm for Tolerating
                 Many Faults on Very-Large-Scale Network-on-Chip",
  journal =      j-TODAES,
  volume =       "26",
  number =       "1",
  pages =        "8:1--8:26",
  month =        jan,
  year =         "2021",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3414060",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Feb 25 10:17:13 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3414060",
  abstract =     "Very-large-scale network-on-chip (VLS-NoC) has become
                 a promising fabric for supercomputers, but this fabric
                 may encounter the many-fault problem. This article
                 proposes a deterministic routing algorithm to tolerate
                 the effects of many faults in VLS-. \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Goncalves:2021:SAT,
  author =       "St{\`e}phano M. M. Gon{\c{c}}alves and Leomar S. da
                 Rosa Jr and Felipe S. Marques",
  title =        "{SmartDR}: Algorithms and Techniques for Fast Detailed
                 Routing with Good Design Rule Handling",
  journal =      j-TODAES,
  volume =       "26",
  number =       "2",
  pages =        "9:1--9:38",
  month =        feb,
  year =         "2021",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3417133",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Feb 25 10:17:14 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3417133",
  abstract =     "Detailed routing is one of the most time-consuming
                 steps of physical synthesis of integrated circuits.
                 Also, it is very challenging due to the complexity of
                 the design rules that the router must obey. In this
                 article, we present SmartDR, a detailed \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Liao:2021:EPA,
  author =       "Tuotian Liao and Lihong Zhang",
  title =        "Efficient Parasitic-aware $ g^m $ \slash {$ I^D $} ---
                 based Hybrid Sizing Methodology for Analog and {RF}
                 Integrated Circuits",
  journal =      j-TODAES,
  volume =       "26",
  number =       "2",
  pages =        "10:1--10:31",
  month =        feb,
  year =         "2021",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3416946",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Feb 25 10:17:14 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3416946",
  abstract =     "As the primary second-order effect, parasitic issues
                 have to be seriously addressed when synthesizing
                 high-performance analog and RF integrated circuits
                 (ICs). In this article, a two-phase hybrid sizing
                 methodology for analog and RF ICs is proposed to
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Wu:2021:CPO,
  author =       "Nan Wu and Lei Deng and Guoqi Li and Yuan Xie",
  title =        "Core Placement Optimization for Multi-chip Many-core
                 Neural Network Systems with Reinforcement Learning",
  journal =      j-TODAES,
  volume =       "26",
  number =       "2",
  pages =        "11:1--11:27",
  month =        feb,
  year =         "2021",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3418498",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Feb 25 10:17:14 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3418498",
  abstract =     "Multi-chip many-core neural network systems are
                 capable of providing high parallelism benefited from
                 decentralized execution, and they can be scaled to very
                 large systems with reasonable fabrication costs. As
                 multi-chip many-core systems scale up, \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Siddhu:2021:LAD,
  author =       "Lokesh Siddhu and Rajesh Kedia and Preeti Ranjan
                 Panda",
  title =        "Leakage-Aware Dynamic Thermal Management of {$3$D}
                 Memories",
  journal =      j-TODAES,
  volume =       "26",
  number =       "2",
  pages =        "12:1--12:31",
  month =        feb,
  year =         "2021",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3419468",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Feb 25 10:17:14 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3419468",
  abstract =     "3D memory systems offer several advantages in terms of
                 area, bandwidth, and energy efficiency. However,
                 thermal issues arising out of higher power densities
                 have limited their widespread use. While prior works
                 have looked at reducing dynamic power \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "12",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Ghosh:2021:PDP,
  author =       "Sumana Ghosh and Soumyajit Dey and Pallab Dasgupta",
  title =        "Performance-Driven Post-Processing of Control Loop
                 Execution Schedules",
  journal =      j-TODAES,
  volume =       "26",
  number =       "2",
  pages =        "13:1--13:27",
  month =        feb,
  year =         "2021",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3421505",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Feb 25 10:17:14 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3421505",
  abstract =     "The increasing demand for mapping diverse embedded
                 features onto shared electronic control units has
                 brought about novel ways to co-design control tasks and
                 their schedules. These techniques replace traditional
                 implementations of control with new \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "13",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Luo:2021:TMF,
  author =       "Yingyi Luo and Joshua C. Zhao and Arnav Aggarwal and
                 Seda Ogrenci-Memik and Kazutomo Yoshii",
  title =        "Thermal Management for {FPGA} Nodes in {HPC} Systems",
  journal =      j-TODAES,
  volume =       "26",
  number =       "2",
  pages =        "14:1--14:17",
  month =        feb,
  year =         "2021",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3423494",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Feb 25 10:17:14 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3423494",
  abstract =     "The integration of FPGAs into large-scale computing
                 systems is gaining attention. In these systems,
                 real-time data handling for networking, tasks for
                 scientific computing, and machine learning can be
                 executed with customized datapaths on reconfigurable
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "14",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Chen:2021:RMB,
  author =       "Jianli Chen and Ziran Zhu and Wenxing Zhu and Chang
                 Yao-Wen",
  title =        "A Robust Modulus-Based Matrix Splitting Iteration
                 Method for Mixed-Cell-Height Circuit Legalization",
  journal =      j-TODAES,
  volume =       "26",
  number =       "2",
  pages =        "15:1--15:28",
  month =        feb,
  year =         "2021",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3423326",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Feb 25 10:17:14 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3423326",
  abstract =     "Modern circuits often contain standard cells of
                 different row heights to meet various design
                 requirements. Taller cells give larger drive strengths
                 and higher speed at the cost of larger areas and power.
                 Multi-row height standard cells incur challenging
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "15",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Arka:2021:HHM,
  author =       "Aqeeb Iqbal Arka and Biresh Kumar Joardar and Ryan
                 Gary Kim and Dae Hyun Kim and Janardhan Rao Doppa and
                 Partha Pratim Pande",
  title =        "{HeM$3$D}: Heterogeneous Manycore Architecture Based
                 on Monolithic {$3$D} Vertical Integration",
  journal =      j-TODAES,
  volume =       "26",
  number =       "2",
  pages =        "16:1--16:21",
  month =        feb,
  year =         "2021",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3424239",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Feb 25 10:17:14 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3424239",
  abstract =     "Heterogeneous manycore architectures are the key to
                 efficiently execute compute- and data-intensive
                 applications. Through-silicon-via (TSV)-based 3D
                 manycore system is a promising solution in this
                 direction as it enables the integration of disparate
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "16",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Deb:2021:CRC,
  author =       "Dipika Deb and John Jose and Maurizio Palesi",
  title =        "{COPE}: Reducing Cache Pollution and Network
                 Contention by Inter-tile Coordinated Prefetching in
                 {NoC}-based {MPSoCs}",
  journal =      j-TODAES,
  volume =       "26",
  number =       "3",
  pages =        "17:1--17:31",
  month =        feb,
  year =         "2021",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3428149",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Feb 25 10:17:15 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3428149",
  abstract =     "Prefetching helps in reducing the memory access
                 latency in multi-banked NUCA architecture, where the
                 Last Level Cache (LLC) is shared. In such systems, an
                 application running on core generates significant
                 traffic on the shared resources, the underlying
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "17",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Letras:2021:MOO,
  author =       "Martin Letras and Joachim Falk and Tobias Schwarzer
                 and J{\"u}rgen Teich",
  title =        "Multi-objective Optimization of Mapping Dataflow
                 Applications to {MPSoCs} Using a Hybrid Evaluation
                 Combining Analytic Models and Measurements",
  journal =      j-TODAES,
  volume =       "26",
  number =       "3",
  pages =        "18:1--18:33",
  month =        feb,
  year =         "2021",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3431814",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Feb 25 10:17:15 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3431814",
  abstract =     "Dataflow modeling is well suited for a large variety
                 of applications for modern multi-core architectures,
                 e.g., from the signal processing and the control
                 domain. Furthermore, Design Space Exploration (DSE) can
                 be used to explore mappings of tasks to \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "18",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Pomeranz:2021:LDH,
  author =       "Irith Pomeranz and M. Enamul Amyeen",
  title =        "Logic Diagnosis with Hybrid Fail Data",
  journal =      j-TODAES,
  volume =       "26",
  number =       "3",
  pages =        "19:1--19:13",
  month =        feb,
  year =         "2021",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3433929",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Feb 25 10:17:15 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3433929",
  abstract =     "Yield improvement requires information about the
                 defects present in faulty units. This information is
                 derived by applying a logic diagnosis procedure to the
                 fail data collected by a tester from faulty units. It
                 is typical in the early stages of yield \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "19",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Ince:2021:FBB,
  author =       "Mehmet Ince and Ender Yilmaz and Wei Fu and Joonsung
                 Park and Krishnaswamy Nagaraj and Leroy Winemberg and
                 Sule Ozev",
  title =        "Fault-based Built-in Self-test and Evaluation of Phase
                 Locked Loops",
  journal =      j-TODAES,
  volume =       "26",
  number =       "3",
  pages =        "20:1--20:18",
  month =        feb,
  year =         "2021",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3427911",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Feb 25 10:17:15 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3427911",
  abstract =     "With the increasing pressure to obtain near-zero
                 defect rates for the automotive industry, there is a
                 need to explore built-in self-test and other
                 non-traditional test techniques for embedded
                 mixed-signal components, such as PLLs, DC-DC
                 converters, and \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "20",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Gebregirogis:2021:ALF,
  author =       "Anteneh Gebregirogis and Mehdi Tahoori",
  title =        "Approximate Learning and Fault-Tolerant Mapping for
                 Energy-Efficient Neuromorphic Systems",
  journal =      j-TODAES,
  volume =       "26",
  number =       "3",
  pages =        "21:1--21:23",
  month =        feb,
  year =         "2021",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3436491",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Feb 25 10:17:15 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3436491",
  abstract =     "Brain-inspired deep neural networks such as
                 Convolutional Neural Network (CNN) have shown great
                 potential in solving difficult cognitive problems such
                 as object recognition and classification. However, such
                 architectures have high computational energy \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "21",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Lyu:2021:MSC,
  author =       "Yangdi Lyu and Prabhat Mishra",
  title =        "{MaxSense}: Side-channel Sensitivity Maximization for
                 {Trojan} Detection Using Statistical Test Patterns",
  journal =      j-TODAES,
  volume =       "26",
  number =       "3",
  pages =        "22:1--22:21",
  month =        feb,
  year =         "2021",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3436820",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Feb 25 10:17:15 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3436820",
  abstract =     "Detection of hardware Trojans is vital to ensure the
                 security and trustworthiness of System-on-Chip (SoC)
                 designs. Side-channel analysis is effective for Trojan
                 detection by analyzing various side-channel signatures
                 such as power, current, and delay. In \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "22",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Pomeranz:2021:CTH,
  author =       "Irith Pomeranz",
  title =        "Covering Test Holes of Functional Broadside Tests",
  journal =      j-TODAES,
  volume =       "26",
  number =       "3",
  pages =        "23:1--23:15",
  month =        feb,
  year =         "2021",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3441282",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Feb 25 10:17:15 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3441282",
  abstract =     "Functional broadside tests were developed to avoid
                 overtesting of delay faults. The tests achieve this
                 goal by creating functional operation conditions during
                 their functional capture cycles. To increase the
                 achievable fault coverage, close-to-. \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "23",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Roy:2021:MLS,
  author =       "Urmimala Roy and Tanmoy Pramanik and Subhendu Roy and
                 Avhishek Chatterjee and Leonard F. Register and Sanjay
                 K. Banerjee",
  title =        "Machine Learning for Statistical Modeling: The Case of
                 Perpendicular Spin-Transfer-Torque Random Access
                 Memory",
  journal =      j-TODAES,
  volume =       "26",
  number =       "3",
  pages =        "24:1--24:17",
  month =        feb,
  year =         "2021",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3440014",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Feb 25 10:17:15 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3440014",
  abstract =     "We propose a methodology to perform process
                 variation-aware device and circuit design using fully
                 physics-based simulations within limited computational
                 resources, without developing a compact model. Machine
                 learning (ML), specifically a support vector \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "24",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Pomeranz:2021:EFU,
  author =       "Irith Pomeranz",
  title =        "Equivalent Faults under Launch-on-Shift {(LOS)} Tests
                 with Equal Primary Input Vectors",
  journal =      j-TODAES,
  volume =       "26",
  number =       "4",
  pages =        "25:1--25:15",
  month =        apr,
  year =         "2021",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3440013",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Apr 27 08:06:34 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3440013",
  abstract =     "A recent work showed that it is possible to transform
                 a single-cycle test for stuck-at faults into a
                 launch-on-shift (LOS) test that is guaranteed to detect
                 the same stuck-at faults without any logic or fault
                 simulation. The LOS test also detects \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "25",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Witharana:2021:DTG,
  author =       "Hasini Witharana and Yangdi Lyu and Prabhat Mishra",
  title =        "Directed Test Generation for Activation of Security
                 Assertions in {RTL} Models",
  journal =      j-TODAES,
  volume =       "26",
  number =       "4",
  pages =        "26:1--26:28",
  month =        apr,
  year =         "2021",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3441297",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Apr 27 08:06:34 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3441297",
  abstract =     "Assertions are widely used for functional validation
                 as well as coverage analysis for both software and
                 hardware designs. Assertions enable runtime error
                 detection as well as faster localization of errors.
                 While there is a vast literature on both \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "26",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Mohammadzadeh:2021:EOP,
  author =       "Naser Mohammadzadeh and Robert Wille and Oliver
                 Keszocze",
  title =        "Efficient One-pass Synthesis for Digital Microfluidic
                 Biochips",
  journal =      j-TODAES,
  volume =       "26",
  number =       "4",
  pages =        "27:1--27:21",
  month =        apr,
  year =         "2021",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3446880",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Apr 27 08:06:34 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3446880",
  abstract =     "Digital microfluidics biochips are a promising
                 emerging technology that provides fluidic experimental
                 capabilities on a chip (i.e., following the
                 lab-on-a-chip paradigm). However, the design of such
                 biochips still constitutes a challenging task that is
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "27",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Jain:2021:TTA,
  author =       "Ayush Jain and Ziqi Zhou and Ujjwal Guin",
  title =        "{TAAL}: Tampering Attack on Any Key-based Logic Locked
                 Circuits",
  journal =      j-TODAES,
  volume =       "26",
  number =       "4",
  pages =        "28:1--28:22",
  month =        apr,
  year =         "2021",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3442379",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Apr 27 08:06:34 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3442379",
  abstract =     "Due to the globalization of semiconductor
                 manufacturing and test processes, the system-on-a-chip
                 (SoC) designers no longer design the complete SoC and
                 manufacture chips on their own. This outsourcing of the
                 design and manufacturing of Integrated \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "28",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Rahman:2021:SAD,
  author =       "M. Sazadur Rahman and Adib Nahiyan and Fahim Rahman
                 and Saverio Fazzari and Kenneth Plaks and Farimah
                 Farahmandi and Domenic Forte and Mark Tehranipoor",
  title =        "Security Assessment of Dynamically Obfuscated Scan
                 Chain Against Oracle-guided Attacks",
  journal =      j-TODAES,
  volume =       "26",
  number =       "4",
  pages =        "29:1--29:27",
  month =        apr,
  year =         "2021",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3444960",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Apr 27 08:06:34 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3444960",
  abstract =     "Logic locking has emerged as a promising solution to
                 protect integrated circuits against piracy and
                 tampering. However, the security provided by existing
                 logic locking techniques is often thwarted by Boolean
                 satisfiability (SAT)-based oracle-guided \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "29",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Sinha:2021:DSO,
  author =       "Mitali Sinha and Gade Sri Harsha and Pramit
                 Bhattacharyya and Sujay Deb",
  title =        "Design Space Optimization of Shared Memory
                 Architecture in Accelerator-rich Systems",
  journal =      j-TODAES,
  volume =       "26",
  number =       "4",
  pages =        "30:1--30:31",
  month =        apr,
  year =         "2021",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3446001",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Apr 27 08:06:34 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3446001",
  abstract =     "Shared memory architectures, as opposed to
                 private-only memories, provide a viable alternative to
                 meet the ever-increasing memory requirements of
                 multi-accelerator systems to achieve high performance
                 under stringent area and energy constraints. However,
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "30",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Palchaudhuri:2021:DAT,
  author =       "Ayan Palchaudhuri and Sandeep Sharma and Anindya
                 Sundar Dhar",
  title =        "Design Automation for Tree-based Nearest
                 Neighborhood-aware Placement of High-speed Cellular
                 Automata on {FPGA} with Scan Path Insertion",
  journal =      j-TODAES,
  volume =       "26",
  number =       "4",
  pages =        "31:1--31:34",
  month =        apr,
  year =         "2021",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3446206",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Apr 27 08:06:34 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3446206",
  abstract =     "Cellular Automata (CA) is attractive for high-speed
                 VLSI implementation due to modularity, cascadability,
                 and locality of interconnections confined to
                 neighboring logic cells. However, this outcome is not
                 easily transferable to tree-structured CA, since
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "31",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Yellu:2021:STA,
  author =       "Pruthvy Yellu and Landon Buell and Miguel Mark and
                 Michel A. Kinsy and Dongpeng Xu and Qiaoyan Yu",
  title =        "Security Threat Analyses and Attack Models for
                 Approximate Computing Systems: From Hardware and
                 Micro-architecture Perspectives",
  journal =      j-TODAES,
  volume =       "26",
  number =       "4",
  pages =        "32:1--32:31",
  month =        apr,
  year =         "2021",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3442380",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Apr 27 08:06:34 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3442380",
  abstract =     "Approximate computing (AC) represents a paradigm shift
                 from conventional precise processing to inexact
                 computation but still satisfying the system requirement
                 on accuracy. The rapid progress on the development of
                 diverse AC techniques allows us to apply \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "32",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Jayasinghe:2021:QQB,
  author =       "Darshana Jayasinghe and Aleksandar Ignjatovic and
                 Roshan Ragel and Jude Angelo Ambrose and Sri
                 Parameswaran",
  title =        "{QuadSeal}: Quadruple Balancing to Mitigate Power
                 Analysis Attacks with Variability Effects and
                 Electromagnetic Fault Injection Attacks",
  journal =      j-TODAES,
  volume =       "26",
  number =       "5",
  pages =        "33:1--33:36",
  month =        jun,
  year =         "2021",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3443706",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Jun 22 08:18:59 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3443706",
  abstract =     "Side channel analysis attacks employ the emanated side
                 channel information to deduce the secret keys from
                 cryptographic implementations by analyzing the power
                 traces during execution or scrutinizing faulty outputs.
                 To be effective, a countermeasure must \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "33",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Wu:2021:DHC,
  author =       "Chin-Hsien Wu and Hao-Wei Zhang and Chia-Wei Liu and
                 Ta-Ching Yu and Chi-Yen Yang",
  title =        "A Dynamic {Huffman} Coding Method for Reliable {TLC
                 NAND} Flash Memory",
  journal =      j-TODAES,
  volume =       "26",
  number =       "5",
  pages =        "34:1--34:25",
  month =        jun,
  year =         "2021",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3446771",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Jun 22 08:18:59 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3446771",
  abstract =     "With the progress of the manufacturing process, NAND
                 flash memory has evolved from the single-level cell and
                 multi-level cell into the triple-level cell (TLC). NAND
                 flash memory has physical problems such as the
                 characteristic of erase-before-write and \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "34",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Jeong:2021:DMB,
  author =       "Eunjin Jeong and Dowhan Jeong and Soonhoi Ha",
  title =        "Dataflow Model-based Software Synthesis Framework for
                 Parallel and Distributed Embedded Systems",
  journal =      j-TODAES,
  volume =       "26",
  number =       "5",
  pages =        "35:1--35:38",
  month =        jun,
  year =         "2021",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3447680",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Jun 22 08:18:59 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3447680",
  abstract =     "Existing software development methodologies mostly
                 assume that an application runs on a single device
                 without concern about the non-functional requirements
                 of an embedded system such as latency and resource
                 consumption. Besides, embedded software is \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "35",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Xie:2021:DFM,
  author =       "Guoqi Xie and Hao Peng and Xiongren Xiao and Yao Liu
                 and Renfa Li",
  title =        "Design Flow and Methodology for Dynamic and Static
                 Energy-constrained Scheduling Framework in
                 Heterogeneous Multicore Embedded Devices",
  journal =      j-TODAES,
  volume =       "26",
  number =       "5",
  pages =        "36:1--36:18",
  month =        jun,
  year =         "2021",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3450448",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Jun 22 08:18:59 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3450448",
  abstract =     "With Internet of things technologies, billions of
                 embedded devices, including smart gateways, smart
                 phones, and mobile robots, are connected and deeply
                 integrated. Almost all these embedded devices are
                 battery-constrained and energy-limited systems. In
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "36",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Park:2021:PPD,
  author =       "Heechun Park and Bon Woong Ku and Kyungwook Chang and
                 Da Eun Shim and Sung Kyu Lim",
  title =        "Pseudo-{$3$D} Physical Design Flow for Monolithic
                 {$3$D} {ICs}: Comparisons and Enhancements",
  journal =      j-TODAES,
  volume =       "26",
  number =       "5",
  pages =        "37:1--37:25",
  month =        jun,
  year =         "2021",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3453480",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Jun 22 08:18:59 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3453480",
  abstract =     "Studies have shown that monolithic 3D (M3D) ICs
                 outperform the existing through-silicon-via (TSV)
                 -based 3D ICs in terms of power, performance, and area
                 (PPA) metrics, primarily due to the orders of magnitude
                 denser vertical interconnections offered by \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "37",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Hassanpourghadi:2021:MLG,
  author =       "Mohsen Hassanpourghadi and Rezwan A. Rasul and Mike
                 Shuo-Wei Chen",
  title =        "A Module-Linking Graph Assisted Hybrid Optimization
                 Framework for Custom Analog and Mixed-Signal Circuit
                 Parameter Synthesis",
  journal =      j-TODAES,
  volume =       "26",
  number =       "5",
  pages =        "38:1--38:22",
  month =        jun,
  year =         "2021",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3456722",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Jun 22 08:18:59 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3456722",
  abstract =     "Analog and mixed-signal (AMS) computer-aided design
                 tools are of increasing interest owing to demand for
                 the wide range of AMS circuit specifications in the
                 modern system on a chip and faster time to market
                 requirement. Traditionally, to accelerate the
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "38",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Feng:2021:FRT,
  author =       "Lang Feng and Jeff Huang and Jiang Hu and Abhijith
                 Reddy",
  title =        "{FastCFI}: Real-time Control-Flow Integrity Using
                 {FPGA} without Code Instrumentation",
  journal =      j-TODAES,
  volume =       "26",
  number =       "5",
  pages =        "39:1--39:39",
  month =        jun,
  year =         "2021",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3458471",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Jun 22 08:18:59 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3458471",
  abstract =     "Control-Flow Integrity (CFI) is an effective defense
                 technique against a variety of memory-based cyber
                 attacks. CFI is usually enforced through software
                 methods, which entail considerable performance
                 overhead. Hardware-based CFI techniques can largely
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "39",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Huang:2021:MLE,
  author =       "Guyue Huang and Jingbo Hu and Yifan He and Jialong Liu
                 and Mingyuan Ma and Zhaoyang Shen and Juejian Wu and
                 Yuanfan Xu and Hengrui Zhang and Kai Zhong and Xuefei
                 Ning and Yuzhe Ma and Haoyu Yang and Bei Yu and
                 Huazhong Yang and Yu Wang",
  title =        "Machine Learning for Electronic Design Automation: a
                 Survey",
  journal =      j-TODAES,
  volume =       "26",
  number =       "5",
  pages =        "40:1--40:46",
  month =        jun,
  year =         "2021",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3451179",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Jun 22 08:18:59 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3451179",
  abstract =     "With the down-scaling of CMOS technology, the design
                 complexity of very large-scale integrated is
                 increasing. Although the application of machine
                 learning (ML) techniques in electronic design
                 automation (EDA) can trace its history back to the
                 1990s, the \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "40",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Chattopadhyay:2021:CCP,
  author =       "Saranyu Chattopadhyay and Pranesh Santikellur and
                 Rajat Subhra Chakraborty and Jimson Mathew and Marco
                 Ottavi",
  title =        "A Conditionally Chaotic Physically Unclonable Function
                 Design Framework with High Reliability",
  journal =      j-TODAES,
  volume =       "26",
  number =       "6",
  pages =        "41:1--41:24",
  month =        nov,
  year =         "2021",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3460004",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 19 08:44:49 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3460004",
  abstract =     "Physically Unclonable Function (PUF) circuits are
                 promising low-overhead hardware security primitives,
                 but are often gravely susceptible to machine
                 learning-based modeling attacks. Recently, chaotic PUF
                 circuits have been proposed that show greater
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "41",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Jiang:2021:PDM,
  author =       "Chen Jiang and Bo Yuan and Tsung-Yi Ho and Xin Yao",
  title =        "Placement of Digital Microfluidic Biochips via a New
                 Evolutionary Algorithm",
  journal =      j-TODAES,
  volume =       "26",
  number =       "6",
  pages =        "42:1--42:22",
  month =        nov,
  year =         "2021",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3460230",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 19 08:44:49 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3460230",
  abstract =     "Digital microfluidic biochips (DMFBs) have been a
                 revolutionary platform for automating and miniaturizing
                 laboratory procedures with the advantages of
                 flexibility and reconfigurability. The placement
                 problem is one of the most challenging issues in the
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "42",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Gnad:2021:VBC,
  author =       "Dennis R. E. Gnad and Cong Dang Khoa Nguyen and Syed
                 Hashim Gillani and Mehdi B. Tahoori",
  title =        "Voltage-Based Covert Channels Using {FPGAs}",
  journal =      j-TODAES,
  volume =       "26",
  number =       "6",
  pages =        "43:1--43:25",
  month =        nov,
  year =         "2021",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3460229",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 19 08:44:49 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3460229",
  abstract =     "Field Programmable Gate Arrays (FPGAs) are
                 increasingly used in cloud applications and being
                 integrated into Systems-on-Chip. For these systems,
                 various side-channel attacks on cryptographic
                 implementations have been reported, motivating one to
                 apply \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "43",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Ning:2021:FND,
  author =       "Xuefei Ning and Guangjun Ge and Wenshuo Li and Zhenhua
                 Zhu and Yin Zheng and Xiaoming Chen and Zhen Gao and Yu
                 Wang and Huazhong Yang",
  title =        "{FTT-NAS}: Discovering Fault-tolerant Convolutional
                 Neural Architecture",
  journal =      j-TODAES,
  volume =       "26",
  number =       "6",
  pages =        "44:1--44:24",
  month =        nov,
  year =         "2021",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3460288",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 19 08:44:49 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3460288",
  abstract =     "With the fast evolvement of embedded deep-learning
                 computing systems, applications powered by deep
                 learning are moving from the cloud to the edge. When
                 deploying neural networks (NNs) onto the devices under
                 complex environments, there are various types of
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "44",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Lu:2021:RRD,
  author =       "Anni Lu and Xiaochen Peng and Yandong Luo and Shanshi
                 Huang and Shimeng Yu",
  title =        "A Runtime Reconfigurable Design of
                 Compute-in-Memory-Based Hardware Accelerator for Deep
                 Learning Inference",
  journal =      j-TODAES,
  volume =       "26",
  number =       "6",
  pages =        "45:1--45:18",
  month =        nov,
  year =         "2021",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3460436",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 19 08:44:49 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3460436",
  abstract =     "Compute-in-memory (CIM) is an attractive solution to
                 address the ``memory wall'' challenges for the
                 extensive computation in deep learning hardware
                 accelerators. For custom ASIC design, a specific chip
                 instance is restricted to a specific network during
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "45",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Roy:2021:FVS,
  author =       "Pushpita Roy and Ansuman Banerjee",
  title =        "A Framework for Validation of Synthesized
                 {MicroElectrode} Dot Array Actuations for Digital
                 Microfluidic Biochips",
  journal =      j-TODAES,
  volume =       "26",
  number =       "6",
  pages =        "46:1--46:36",
  month =        nov,
  year =         "2021",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3460437",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 19 08:44:49 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3460437",
  abstract =     "Digital Microfluidics is an emerging technology for
                 automating laboratory procedures in biochemistry. With
                 more and more complex biochemical protocols getting
                 mapped to biochip devices and microfluidics receiving a
                 wide adoption, it is becoming \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "46",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Li:2021:VAH,
  author =       "Xi Li and Soheil Nazar Shahsavani and Xuan Zhou and
                 Massoud Pedram and Peter A. Beerel",
  title =        "A Variation-aware Hold Time Fixing Methodology for
                 Single Flux Quantum Logic Circuits",
  journal =      j-TODAES,
  volume =       "26",
  number =       "6",
  pages =        "47:1--47:17",
  month =        nov,
  year =         "2021",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3460289",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 19 08:44:49 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3460289",
  abstract =     "Single flux quantum (SFQ) logic is a promising
                 technology to replace complementary
                 metal-oxide-semiconductor logic for future exa-scale
                 supercomputing but requires the development of reliable
                 EDA tools that are tailored to the unique
                 characteristics of \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "47",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Park:2021:HTN,
  author =       "Naebeom Park and Sungju Ryu and Jaeha Kung and
                 Jae-Joon Kim",
  title =        "High-throughput Near-Memory Processing on {CNNs} with
                 {$3$D} {HBM}-like Memory",
  journal =      j-TODAES,
  volume =       "26",
  number =       "6",
  pages =        "48:1--48:20",
  month =        nov,
  year =         "2021",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3460971",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 19 08:44:49 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3460971",
  abstract =     "This article discusses the high-performance
                 near-memory neural network (NN) accelerator
                 architecture utilizing the logic die in
                 three-dimensional (3D) High Bandwidth Memory- (HBM)
                 like memory. As most of the previously reported 3D
                 memory-based near-memory \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "48",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Maleki:2021:EEI,
  author =       "Mohammad-Ali Maleki and Alireza Nabipour-Meybodi and
                 Mehdi Kamal and Ali Afzali-Kusha and Massoud Pedram",
  title =        "An Energy-Efficient Inference Method in Convolutional
                 Neural Networks Based on Dynamic Adjustment of the
                 Pruning Level",
  journal =      j-TODAES,
  volume =       "26",
  number =       "6",
  pages =        "49:1--49:20",
  month =        nov,
  year =         "2021",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3460972",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 19 08:44:49 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3460972",
  abstract =     "In this article, we present a low-energy inference
                 method for convolutional neural networks in image
                 classification applications. The lower energy
                 consumption is achieved by using a highly pruned
                 (lower-energy) network if the resulting network can
                 provide \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "49",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Lin:2021:DAS,
  author =       "Dave Y.-W. Lin and Charles H.-P. Wen",
  title =        "A Delay-Adjustable, Self-Testable Flip-Flop for
                 Soft-Error Tolerability and Delay-Fault Testability",
  journal =      j-TODAES,
  volume =       "26",
  number =       "6",
  pages =        "50:1--50:12",
  month =        nov,
  year =         "2021",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3462171",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Aug 19 08:44:49 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3462171",
  abstract =     "As the demand of safety-critical applications (e.g.,
                 automobile electronics) increases, various
                 radiation-hardened flip-flops are proposed for
                 enhancing design reliability. Among all flip-flops,
                 Delay-Adjustable D-Flip-Flop (DAD-FF) is specialized in
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "50",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Polychronou:2022:CSA,
  author =       "Nikolaos-Foivos Polychronou and Pierre-Henri Thevenon
                 and Maxime Puys and Vincent Beroulle",
  title =        "A Comprehensive Survey of Attacks without Physical
                 Access Targeting Hardware Vulnerabilities in {IoT\slash
                 IIoT} Devices, and Their Detection Mechanisms",
  journal =      j-TODAES,
  volume =       "27",
  number =       "1",
  pages =        "1:1--1:35",
  month =        jan,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3471936",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jan 7 08:25:44 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3471936",
  abstract =     "With the advances in the field of the Internet of
                 Things (IoT) and Industrial IoT (IIoT), these devices
                 are increasingly used in daily life or industry. To
                 reduce costs related to the time required to develop
                 these devices, security features are usually \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "1",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Gade:2022:NHC,
  author =       "Sri Harsha Gade and Sujay Deb",
  title =        "A Novel Hybrid Cache Coherence with Global Snooping
                 for Many-core Architectures",
  journal =      j-TODAES,
  volume =       "27",
  number =       "1",
  pages =        "2:1--2:31",
  month =        jan,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3462775",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jan 7 08:25:44 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3462775",
  abstract =     "Cache coherence ensures correctness of cached data in
                 multi-core processors. Traditional implementations of
                 existing protocols make them unscalable for many core
                 architectures. While snoopy coherence requires
                 unscalable ordered networks, directory \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Han:2022:EEF,
  author =       "Ding Han and Guohui Li and Quan Zhou and Jianjun Li
                 and Yong Yang and Xiaofei Hu",
  title =        "An Efficient Execution Framework of Two-Part Execution
                 Scenario Analysis",
  journal =      j-TODAES,
  volume =       "27",
  number =       "1",
  pages =        "3:1--3:24",
  month =        jan,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3465474",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jan 7 08:25:44 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3465474",
  abstract =     "Response Time Analysis (RTA) is an important and
                 promising technique for analyzing the schedulability of
                 real-time tasks under both Global Fixed-Priority (G-FP)
                 scheduling and Global Earliest Deadline First (G-EDF)
                 scheduling. Most existing RTA methods \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{He:2022:DME,
  author =       "Jingyu He and Yao Xiao and Corina Bogdan and Shahin
                 Nazarian and Paul Bogdan",
  title =        "A Design Methodology for Energy-Aware Processing in
                 Unmanned Aerial Vehicles",
  journal =      j-TODAES,
  volume =       "27",
  number =       "1",
  pages =        "4:1--4:20",
  month =        jan,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3470451",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jan 7 08:25:44 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3470451",
  abstract =     "Unmanned Aerial Vehicles (UAVs) have rapidly become
                 popular for monitoring, delivery, and actuation in many
                 application domains such as environmental management,
                 disaster mitigation, homeland security, energy,
                 transportation, and manufacturing. However, \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "4",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Cui:2022:ILD,
  author =       "Lanlan Cui and Fei Wu and Xiaojian Liu and Meng Zhang
                 and Renzhi Xiao and Changsheng Xie",
  title =        "Improving {LDPC} Decoding Performance for {$3$D TLC
                 NAND} Flash by {LLR} Optimization Scheme for Hard and
                 Soft Decision",
  journal =      j-TODAES,
  volume =       "27",
  number =       "1",
  pages =        "5:1--5:20",
  month =        jan,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3473305",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jan 7 08:25:44 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3473305",
  abstract =     "Low-density parity-check (LDPC) codes have been widely
                 adopted in NAND flash in recent years to enhance data
                 reliability. There are two types of decoding,
                 hard-decision and soft-decision decoding. However, for
                 the two types, their error correction \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "5",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Li:2022:NSI,
  author =       "Bo Li and Guoyong Shi",
  title =        "A Native {SPICE} Implementation of Memristor Models
                 for Simulation of Neuromorphic Analog Signal Processing
                 Circuits",
  journal =      j-TODAES,
  volume =       "27",
  number =       "1",
  pages =        "6:1--6:24",
  month =        jan,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3474364",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jan 7 08:25:44 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3474364",
  abstract =     "Since the memristor emerged as a programmable analog
                 storage device, it has stimulated research on the
                 design of analog/mixed-signal circuits with the
                 memristor as the enabler of in-memory computation. Due
                 to the difficulty in evaluating the circuit-level
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Poddar:2022:DDM,
  author =       "Sudip Poddar and Sukanta Bhattacharjee and Shao-Yun
                 Fang and Tsung-Yi Ho and B. B. Bhattacharya",
  title =        "Demand-Driven Multi-Target Sample Preparation on
                 Resource-Constrained Digital Microfluidic Biochips",
  journal =      j-TODAES,
  volume =       "27",
  number =       "1",
  pages =        "7:1--7:21",
  month =        jan,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3474392",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jan 7 08:25:44 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3474392",
  abstract =     "Microfluidic lab-on-chips offer promising technology
                 for the automation of various biochemical laboratory
                 protocols on a minuscule chip. Sample preparation (SP)
                 is an essential part of any biochemical experiments,
                 which aims to produce dilution of a \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Liu:2022:FIA,
  author =       "Qiang Liu and Honghui Tang and Peiran Zhang",
  title =        "Fault Injection Attack Emulation Framework for Early
                 Evaluation of {IC} Designs",
  journal =      j-TODAES,
  volume =       "27",
  number =       "1",
  pages =        "8:1--8:25",
  month =        jan,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3480962",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jan 7 08:25:44 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3480962",
  abstract =     "Fault injection attack (FIA) has become a serious
                 threat to the confidentiality and fault tolerance of
                 integrated circuits (ICs). Circuit designers need an
                 effective method to evaluate the countermeasures of the
                 IC designs against the FIAs at the design \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Ge:2022:SBN,
  author =       "Mengke Ge and Xiaobing Ni and Xu Qi and Song Chen and
                 Jinglei Huang and Yi Kang and Feng Wu",
  title =        "Synthesizing Brain-network-inspired Interconnections
                 for Large-scale Network-on-chips",
  journal =      j-TODAES,
  volume =       "27",
  number =       "1",
  pages =        "9:1--9:30",
  month =        jan,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3480961",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Jan 7 08:25:44 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3480961",
  abstract =     "Brain network is a large-scale complex network with
                 scale-free, small-world, and modularity properties,
                 which largely supports this high-efficiency massive
                 system. In this article, we propose to synthesize
                 brain-network-inspired interconnections for large-.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Alaghi:2022:ISI,
  author =       "Armin Alaghi and Eva Darulova and Andreas Gerstlauer
                 and Phillip Stanley-Marbell",
  title =        "Introduction to the Special Issue on Approximate
                 Systems",
  journal =      j-TODAES,
  volume =       "27",
  number =       "2",
  pages =        "10:1--10:2",
  month =        mar,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3488726",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Feb 17 07:56:21 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3488726",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Bu:2022:TFG,
  author =       "Tiancong Bu and Kaige Yan and Jingweijia Tan",
  title =        "Towards Fine-Grained Online Adaptive Approximation
                 Control for Dense {SLAM} on Embedded {GPUs}",
  journal =      j-TODAES,
  volume =       "27",
  number =       "2",
  pages =        "11:1--11:19",
  month =        mar,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3486612",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Feb 17 07:56:21 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3486612",
  abstract =     "Dense SLAM is an important application on an embedded
                 environment. However, embedded platforms usually fail
                 to provide enough computation resources for
                 high-accuracy real-time dense SLAM, even with
                 high-parallelism architecture such as GPUs. To tackle
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Singh:2022:PFE,
  author =       "Somesh Singh and Tejas Shah and Rupesh Nasre",
  title =        "{ParTBC}: Faster Estimation of Top-$k$ Betweenness
                 Centrality Vertices on {GPU}",
  journal =      j-TODAES,
  volume =       "27",
  number =       "2",
  pages =        "12:1--12:25",
  month =        mar,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3486613",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Feb 17 07:56:21 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3486613",
  acknowledgement = ack-nhfb,
  articleno =    "12",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Liu:2022:AAF,
  author =       "Liu Liu and Sibren Isaacman and Ulrich Kremer",
  title =        "An Adaptive Application Framework with Customizable
                 Quality Metrics",
  journal =      j-TODAES,
  volume =       "27",
  number =       "2",
  pages =        "13:1--13:33",
  month =        mar,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3477428",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Feb 17 07:56:21 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3477428",
  abstract =     "Many embedded environments require applications to
                 produce outcomes under different, potentially changing,
                 resource constraints. Relaxing application semantics
                 through approximations enables trading off resource
                 usage for outcome quality. Although quality \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "13",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Chowdhury:2022:LAH,
  author =       "Prattay Chowdhury and Benjamin Carrion Schafer",
  title =        "Leveraging Automatic High-Level Synthesis Resource
                 Sharing to Maximize Dynamical Voltage Overscaling with
                 Error Control",
  journal =      j-TODAES,
  volume =       "27",
  number =       "2",
  pages =        "14:1--14:18",
  month =        mar,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3473909",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Feb 17 07:56:21 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3473909",
  abstract =     "Approximate Computing has emerged as an alternative
                 way to further reduce the power consumption of
                 integrated circuits (ICs) by trading off errors at the
                 output with simpler, more efficient logic. So far the
                 main approaches in approximate computing have
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "14",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Han:2022:DSL,
  author =       "Ming Han and Ye Wang and Jian Dong and Gang Qu",
  title =        "Double-Shift: a Low-Power {DNN} Weights Storage and
                 Access Framework based on Approximate Decomposition and
                 Quantization",
  journal =      j-TODAES,
  volume =       "27",
  number =       "2",
  pages =        "15:1--15:16",
  month =        mar,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3477047",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Feb 17 07:56:21 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3477047",
  abstract =     "One major challenge in deploying Deep Neural Network
                 (DNN) in resource-constrained applications, such as
                 edge nodes, mobile embedded systems, and IoT devices,
                 is its high energy cost. The emerging approximate
                 computing methodology can effectively reduce \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "15",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Ebrahimi:2022:PCL,
  author =       "Zahra Ebrahimi and Dennis Klar and Mohammad Aasim
                 Ekhtiyar and Akash Kumar",
  title =        "Plasticine: a Cross-layer Approximation Methodology
                 for Multi-kernel Applications through Minimally Biased,
                 High-throughput, and Energy-efficient {SIMD} Soft
                 Multiplier-divider",
  journal =      j-TODAES,
  volume =       "27",
  number =       "2",
  pages =        "16:1--16:33",
  month =        mar,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3486616",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Feb 17 07:56:21 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3486616",
  abstract =     "The rapid evolution of error-resilient programs
                 intertwined with their quest for high throughput has
                 motivated the use of Single Instruction, Multiple Data
                 (SIMD) components in Field-Programmable Gate Arrays
                 (FPGAs). Particularly, to exploit the error-.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "16",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Lee:2022:DAA,
  author =       "Jaechul Lee and C{\'e}dric Killian and Sebastien {Le
                 Beux} and Daniel Chillet",
  title =        "Distance-aware Approximate Nanophotonic Interconnect",
  journal =      j-TODAES,
  volume =       "27",
  number =       "2",
  pages =        "17:1--17:30",
  month =        mar,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3484309",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Feb 17 07:56:21 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3484309",
  abstract =     "The energy consumption of manycore architectures is
                 dominated by data movement, which calls for
                 energy-efficient and high-bandwidth interconnects. To
                 overcome the bandwidth limitation of electrical
                 interconnects, integrated optics appear as a promising
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "17",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Angizi:2022:MRN,
  author =       "Shaahin Angizi and Navid Khoshavi and Andrew Marshall
                 and Peter Dowben and Deliang Fan",
  title =        "{MeF-RAM}: a New Non-Volatile Cache Memory Based on
                 Magneto-Electric {FET}",
  journal =      j-TODAES,
  volume =       "27",
  number =       "2",
  pages =        "18:1--18:18",
  month =        mar,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3484222",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Feb 17 07:56:21 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3484222",
  abstract =     "Magneto-Electric FET (MEFET) is a recently developed
                 post-CMOS FET, which offers intriguing characteristics
                 for high-speed and low-power design in both logic and
                 memory applications. In this article, we present
                 MeF-RAM, a non-volatile cache memory design \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "18",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Shi:2022:CHD,
  author =       "Xiao Shi and Hao Yan and Qiancun Huang and Chengzhen
                 Xuan and Lei He and Longxing Shi",
  title =        "A Compact High-Dimensional Yield Analysis Method using
                 Low-Rank Tensor Approximation",
  journal =      j-TODAES,
  volume =       "27",
  number =       "2",
  pages =        "19:1--19:23",
  month =        mar,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3483941",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Feb 17 07:56:21 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3483941",
  abstract =     "``Curse of dimensionality'' has become the major
                 challenge for existing high-sigma yield analysis
                 methods. In this article, we develop a meta-model using
                 Low-Rank Tensor Approximation (LRTA) to substitute
                 expensive SPICE simulation. The polynomial degree of
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "19",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Cai:2022:EDL,
  author =       "Han Cai and Ji Lin and Yujun Lin and Zhijian Liu and
                 Haotian Tang and Hanrui Wang and Ligeng Zhu and Song
                 Han",
  title =        "Enable Deep Learning on Mobile Devices: Methods,
                 Systems, and Applications",
  journal =      j-TODAES,
  volume =       "27",
  number =       "3",
  pages =        "20:1--20:50",
  month =        may,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3486618",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Mar 24 16:05:33 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3486618",
  abstract =     "Deep neural networks (DNNs) have achieved
                 unprecedented success in the field of artificial
                 intelligence (AI), including computer vision, natural
                 language processing, and speech recognition. However,
                 their superior performance comes at the considerable
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "20",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{S:2022:EEE,
  author =       "Skandha Deepsita S. and Dhayala Kumar M. and Noor
                 Mahammad SK",
  title =        "Energy Efficient Error Resilient Multiplier Using
                 Low-power Compressors",
  journal =      j-TODAES,
  volume =       "27",
  number =       "3",
  pages =        "21:1--21:26",
  month =        may,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3488837",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Mar 24 16:05:33 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3488837",
  abstract =     "The approximate hardware design can save huge energy
                 at the cost of errors incurred in the design. This
                 article proposes the approximate algorithm for
                 low-power compressors, utilized to build approximate
                 multiplier with low energy and acceptable error
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "21",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Oldja:2022:HSS,
  author =       "Mari-Liis Oldja and Jangryul Kim and Dowhan Jeong and
                 Soonhoi Ha",
  title =        "Hierarchical Scheduling of an {SDF/L} Graph onto
                 Multiple Processors",
  journal =      j-TODAES,
  volume =       "27",
  number =       "3",
  pages =        "22:1--22:23",
  month =        may,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3489469",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Mar 24 16:05:33 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3489469",
  abstract =     "Although dataflow models are known to thrive at
                 exploiting task-level parallelism of an application, it
                 is difficult to exploit the parallelism of data,
                 represented well with loop structures, since these
                 structures are not explicitly specified in existing
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "22",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Chen:2022:UTB,
  author =       "Si Chen and Guoqi Xie and Renfa Li and Keqin Li",
  title =        "Uncertainty Theory Based Partitioning for
                 Cyber-Physical Systems with Uncertain Reliability
                 Analysis",
  journal =      j-TODAES,
  volume =       "27",
  number =       "3",
  pages =        "23:1--23:19",
  month =        may,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3490177",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Mar 24 16:05:33 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3490177",
  abstract =     "Reasonable partitioning is a critical issue for
                 cyber-physical system (CPS) design. Traditional CPS
                 partitioning methods run in a determined context and
                 depend on the parameter pre-estimations, but they
                 ignore the uncertainty of parameters and hardly
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "23",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Luo:2022:FDF,
  author =       "Yukui Luo and Shijin Duan and Xiaolin Xu",
  title =        "{FPGAPRO}: a Defense Framework Against
                 Crosstalk-Induced Secret Leakage in {FPGA}",
  journal =      j-TODAES,
  volume =       "27",
  number =       "3",
  pages =        "24:1--24:31",
  month =        may,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3491214",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Mar 24 16:05:33 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3491214",
  abstract =     "With the emerging cloud-computing development, FPGAs
                 are being integrated with cloud servers for higher
                 performance. Recently, it has been explored to enable
                 multiple users to share the hardware resources of a
                 remote FPGA, i.e., to execute their own \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "24",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Feng:2022:TTO,
  author =       "Lang Feng and Jiayi Huang and Jeff Huang and Jiang
                 Hu",
  title =        "Toward Taming the Overhead Monster for Data-flow
                 Integrity",
  journal =      j-TODAES,
  volume =       "27",
  number =       "3",
  pages =        "25:1--25:24",
  month =        may,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3490176",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Mar 24 16:05:33 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3490176",
  abstract =     "Data-Flow Integrity (DFI) is a well-known approach to
                 effectively detecting a wide range of software attacks.
                 However, its real-world application has been quite
                 limited so far because of the prohibitive performance
                 overhead it incurs. Moreover, the \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "25",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Mahalat:2022:ICA,
  author =       "Mahabub Hasan Mahalat and Suraj Mandal and Anindan
                 Mondal and Bibhash Sen and Rajat Subhra Chakraborty",
  title =        "Implementation, Characterization and Application of
                 Path Changing Switch based Arbiter {PUF} on {FPGA} as a
                 lightweight Security Primitive for {IoT}",
  journal =      j-TODAES,
  volume =       "27",
  number =       "3",
  pages =        "26:1--26:26",
  month =        may,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3491212",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Mar 24 16:05:33 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3491212",
  abstract =     "Secure authentication of any Internet-of-Things (IoT)
                 device becomes the utmost necessity due to the lack of
                 specifically designed IoT standards and intrinsic
                 vulnerabilities with limited resources and
                 heterogeneous technologies. Despite the suitability ...
                 $^$",
  acknowledgement = ack-nhfb,
  articleno =    "26",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Baker:2022:CMA,
  author =       "Timothy J. Baker and John P. Hayes",
  title =        "{CeMux}: Maximizing the Accuracy of Stochastic Mux
                 Adders and an Application to Filter Design",
  journal =      j-TODAES,
  volume =       "27",
  number =       "3",
  pages =        "27:1--27:26",
  month =        may,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3491213",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Mar 24 16:05:33 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3491213",
  abstract =     "Stochastic computing (SC) is a low-cost computational
                 paradigm that has promising applications in digital
                 filter design, image processing, and neural networks.
                 Fundamental to these applications is the weighted
                 addition operation, which is most often \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "27",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Elangovan:2022:ABA,
  author =       "Reena Elangovan and Shubham Jain and Anand
                 Raghunathan",
  title =        "{Ax-BxP}: Approximate Blocked Computation for
                 Precision-reconfigurable Deep Neural Network
                 Acceleration",
  journal =      j-TODAES,
  volume =       "27",
  number =       "3",
  pages =        "28:1--28:20",
  month =        may,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3492733",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Thu Mar 24 16:05:33 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3492733",
  abstract =     "Precision scaling has emerged as a popular technique
                 to optimize the compute and storage requirements of
                 Deep Neural Networks (DNNs). Efforts toward creating
                 ultra-low-precision (sub-8-bit) DNNs for efficient
                 inference suggest that the minimum precision \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "28",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Pilato:2022:ISS,
  author =       "Christian Pilato and Zhenman Fang and Yuko Hara-Azumi
                 and Jim Hwang",
  title =        "Introduction to the Special Section on High-level
                 Synthesis for {FPGA}: Next-generation Technologies and
                 Applications",
  journal =      j-TODAES,
  volume =       "27",
  number =       "4",
  pages =        "29:1--29:2",
  month =        jul,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3519279",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed May 25 08:20:01 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3519279",
  acknowledgement = ack-nhfb,
  articleno =    "29",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Ramanathan:2022:CPF,
  author =       "Nadesh Ramanathan and George A. Constantinides and
                 John Wickerson",
  title =        "A Case for Precise, Fine-Grained Pointer Synthesis in
                 High-Level Synthesis",
  journal =      j-TODAES,
  volume =       "27",
  number =       "4",
  pages =        "30:1--30:26",
  month =        jul,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3491430",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed May 25 08:20:01 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3491430",
  abstract =     "This article combines two practical approaches to
                 improve pointer synthesis within HLS tools. Both
                 approaches focus on inefficiencies in how HLS tools
                 treat the points-to graph- a mapping that connects each
                 instruction to the memory locations that it might
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "30",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Sun:2022:CMO,
  author =       "Qi Sun and Tinghuan Chen and Siting Liu and Jianli
                 Chen and Hao Yu and Bei Yu",
  title =        "Correlated Multi-objective Multi-fidelity Optimization
                 for {HLS} Directives Design",
  journal =      j-TODAES,
  volume =       "27",
  number =       "4",
  pages =        "31:1--31:27",
  month =        jul,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3503540",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed May 25 08:20:01 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3503540",
  abstract =     "High-level synthesis (HLS) tools have gained great
                 attention in recent years because it emancipates
                 engineers from the complicated and heavy hardware
                 description language writing and facilitates the
                 implementations of modern applications (e.g., deep
                 \ldots{})",
  acknowledgement = ack-nhfb,
  articleno =    "31",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Sohrabizadeh:2022:AES,
  author =       "Atefeh Sohrabizadeh and Cody Hao Yu and Min Gao and
                 Jason Cong",
  title =        "{AutoDSE}: Enabling Software Programmers to Design
                 Efficient {FPGA} Accelerators",
  journal =      j-TODAES,
  volume =       "27",
  number =       "4",
  pages =        "32:1--32:27",
  month =        jul,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3494534",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed May 25 08:20:01 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3494534",
  abstract =     "Adopting FPGA as an accelerator in datacenters is
                 becoming mainstream for customized computing, but the
                 fact that FPGAs are hard to program creates a steep
                 learning curve for software programmers. Even with the
                 help of high-level synthesis (HLS), \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "32",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Gautier:2022:SMO,
  author =       "Quentin Gautier and Alric Althoff and Christopher L.
                 Crutchfield and Ryan Kastner",
  title =        "{Sherlock}: a Multi-Objective Design Space Exploration
                 Framework",
  journal =      j-TODAES,
  volume =       "27",
  number =       "4",
  pages =        "33:1--33:20",
  month =        jul,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3511472",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed May 25 08:20:01 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3511472",
  abstract =     "Design space exploration (DSE) provides intelligent
                 methods to tune the large number of optimization
                 parameters present in modern FPGA high-level synthesis
                 tools. High-level synthesis parameter tuning is a
                 time-consuming process due to lengthy hardware
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "33",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Wang:2022:LPE,
  author =       "Zi Wang and Benjamin Carrion Schafer",
  title =        "Learning from the Past: Efficient High-level Synthesis
                 Design Space Exploration for {FPGAs}",
  journal =      j-TODAES,
  volume =       "27",
  number =       "4",
  pages =        "34:1--34:23",
  month =        jul,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3495531",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed May 25 08:20:01 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3495531",
  abstract =     "The quest to democratize the use of Field-Programmable
                 Gate Arrays (FPGAs) has given High-Level Synthesis
                 (HLS) the final push to be widely accepted with FPGA
                 vendors strongly supporting this VLSI design
                 methodology to expand the FPGA user base. HLS takes
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "34",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Sjovall:2022:HLS,
  author =       "Panu Sj{\"o}vall and Ari Lemmetti and Jarno Vanne and
                 Sakari Lahti and Timo D. H{\"a}m{\"a}l{\"a}inen",
  title =        "High-Level Synthesis Implementation of an Embedded
                 Real-Time {HEVC} Intra Encoder on {FPGA} for Media
                 Applications",
  journal =      j-TODAES,
  volume =       "27",
  number =       "4",
  pages =        "35:1--35:34",
  month =        jul,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3491215",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed May 25 08:20:01 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3491215",
  abstract =     "High Efficiency Video Coding (HEVC) is the key
                 enabling technology for numerous modern media
                 applications. Overcoming its computational complexity
                 and customizing its rich features for real-time HEVC
                 encoder implementations, calls for automated design
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "35",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Liu:2022:LOH,
  author =       "Yanjiang Liu and Tongzhou Qu and Zibin Dai",
  title =        "A Low-Overhead and High-Security Cryptographic Circuit
                 Design Utilizing the {TIGFET}-Based Three-Phase
                 Single-Rail Pulse Register against Side-Channel
                 Attacks",
  journal =      j-TODAES,
  volume =       "27",
  number =       "4",
  pages =        "36:1--36:13",
  month =        jul,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3498339",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed May 25 08:20:01 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3498339",
  abstract =     "Side-channel attack (SCA) reveals confidential
                 information by statistically analyzing physical
                 manifestations, which is the serious threat to
                 cryptographic circuits. Various SCA circuit-level
                 countermeasures have been proposed as fundamental
                 solutions to \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "36",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Huang:2022:AHS,
  author =       "Shanshi Huang and Xiaoyu Sun and Xiaochen Peng and
                 Hongwu Jiang and Shimeng Yu",
  title =        "Achieving High In Situ Training Accuracy and Energy
                 Efficiency with Analog Non-Volatile Synaptic Devices",
  journal =      j-TODAES,
  volume =       "27",
  number =       "4",
  pages =        "37:1--37:19",
  month =        jul,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3500929",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed May 25 08:20:01 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3500929",
  abstract =     "On-device embedded artificial intelligence prefers the
                 adaptive learning capability when deployed in the
                 field, and thus in situ training is required. The
                 compute-in-memory approach, which exploits the analog
                 computation within the memory array, is a \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "37",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Uysal:2022:SCN,
  author =       "Necati Uysal and Rickard Ewetz",
  title =        "Synthesis of Clock Networks with a Mode-Reconfigurable
                 Topology",
  journal =      j-TODAES,
  volume =       "27",
  number =       "4",
  pages =        "38:1--38:22",
  month =        jul,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3503538",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed May 25 08:20:01 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3503538",
  abstract =     "Modern digital circuits are often required to operate
                 in multiple modes to cater to variable frequency and
                 power requirements. Consequently, the clock networks
                 for such circuits must be synthesized, meeting
                 different timing constraints in different \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "38",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Handique:2022:FLS,
  author =       "Mousum Handique and Jantindra Kumar Deka and Santosh
                 Biswas",
  title =        "Fault Localization Scheme for Missing Gate Faults in
                 Reversible Circuits",
  journal =      j-TODAES,
  volume =       "27",
  number =       "4",
  pages =        "39:1--39:29",
  month =        jul,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3503539",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed May 25 08:20:01 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3503539",
  abstract =     "This article introduces a fault localization method to
                 extract the exact location of single and multiple
                 missing gate faults in reversible \( k \) -CNOT -based
                 circuits. The primary target of the proposed method is
                 to obtain the complete test set for \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "39",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Guo:2022:SSD,
  author =       "Wenzhong Guo and Sihuang Lian and Chen Dong and Zhenyi
                 Chen and Xing Huang",
  title =        "A Survey on Security of Digital Microfluidic Biochips:
                 Technology, Attack, and Defense",
  journal =      j-TODAES,
  volume =       "27",
  number =       "4",
  pages =        "40:1--40:33",
  month =        jul,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3494697",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed May 25 08:20:01 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3494697",
  abstract =     "As an emerging lab-on-a-chip technology platform,
                 digital microfluidic biochips (DMFBs) have been widely
                 used for executing various laboratory procedures in
                 biochemistry and biomedicine such as gene sequencing
                 and near-patient diagnosis, with the \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "40",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Chandra:2022:ISS,
  author =       "Vikas Chandra and Yiran Chen and Sungjoo Yoo",
  title =        "Introduction to the Special Section on
                 Energy-Efficient {AI} Chips",
  journal =      j-TODAES,
  volume =       "27",
  number =       "5",
  pages =        "41:1--41:2",
  month =        sep,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3538502",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Sep 28 11:01:08 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3538502",
  acknowledgement = ack-nhfb,
  articleno =    "41",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Lee:2022:MEC,
  author =       "Sunjung Lee and Jaewan Choi and Wonkyung Jung and
                 Byeongho Kim and Jaehyun Park and Hweesoo Kim and Jung
                 Ho Ahn",
  title =        "{MVP}: an Efficient {CNN} Accelerator with Matrix,
                 Vector, and Processing-Near-Memory Units",
  journal =      j-TODAES,
  volume =       "27",
  number =       "5",
  pages =        "42:1--42:25",
  month =        sep,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3497745",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Sep 28 11:01:08 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3497745",
  abstract =     "Mobile and edge devices become common platforms for
                 inferring convolutional neural networks (CNNs) due to
                 superior privacy and service quality. To reduce the
                 computational costs of convolution (CONV), recent CNN
                 models adopt depth-wise CONV (DW-CONV) and \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "42",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Cicek:2022:EEB,
  author =       "Nihat Mert Cicek and Xipeng Shen and Ozcan Ozturk",
  title =        "Energy Efficient Boosting of {GEMM} Accelerators for
                 {DNN} via Reuse",
  journal =      j-TODAES,
  volume =       "27",
  number =       "5",
  pages =        "43:1--43:26",
  month =        sep,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3503469",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Sep 28 11:01:08 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3503469",
  abstract =     "Reuse-centric convolutional neural networks (CNN)
                 acceleration speeds up CNN inference by reusing
                 computations for similar neuron vectors in CNN's input
                 layer or activation maps. This new paradigm of
                 optimizations is, however, largely limited by the
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "43",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Chen:2022:EEL,
  author =       "Zhe Chen and Hugh T. Blair and Jason Cong",
  title =        "Energy-Efficient {LSTM} Inference Accelerator for
                 Real-Time Causal Prediction",
  journal =      j-TODAES,
  volume =       "27",
  number =       "5",
  pages =        "44:1--44:19",
  month =        sep,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3495006",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Sep 28 11:01:08 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3495006",
  abstract =     "Ever-growing edge applications often require short
                 processing latency and high energy efficiency to meet
                 strict timing and power budget. In this work, we
                 propose that the compact long short-term memory (LSTM)
                 model can approximate conventional acausal. \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "44",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Shiri:2022:EEE,
  author =       "Aidin Shiri and Uttej Kallakuri and Hasib-Al Rashid
                 and Bharat Prakash and Nicholas R. Waytowich and Tim
                 Oates and Tinoosh Mohsenin",
  title =        "{E2HRL}: an Energy-efficient Hardware Accelerator for
                 Hierarchical Deep Reinforcement Learning",
  journal =      j-TODAES,
  volume =       "27",
  number =       "5",
  pages =        "45:1--45:19",
  month =        sep,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3498327",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Sep 28 11:01:08 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3498327",
  abstract =     "Recently, Reinforcement Learning (RL) has shown great
                 performance in solving sequential decision-making and
                 control in dynamic environment problems. Despite its
                 achievements, deploying Deep Neural Network (DNN)-based
                 RL is expensive in terms of time and \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "45",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Laubeuf:2022:DQR,
  author =       "Nathan Laubeuf and Jonas Doevenspeck and Ioannis A.
                 Papistas and Michele Caselli and Stefan Cosemans and
                 Peter Vrancx and Debjyoti Bhattacharjee and Arindam
                 Mallik and Peter Debacker and Diederik Verkest and
                 Francky Catthoor and Rudy Lauwereins",
  title =        "Dynamic Quantization Range Control for
                 Analog-in-Memory Neural Networks Acceleration",
  journal =      j-TODAES,
  volume =       "27",
  number =       "5",
  pages =        "46:1--46:21",
  month =        sep,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3498328",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Sep 28 11:01:08 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3498328",
  abstract =     "Analog in Memory Computing (AiMC) based neural network
                 acceleration is a promising solution to increase the
                 energy efficiency of deep neural networks deployment.
                 However, the quantization requirements of these analog
                 systems are not compatible with state-. \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "46",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Gong:2022:AMB,
  author =       "Yifan Gong and Geng Yuan and Zheng Zhan and Wei Niu
                 and Zhengang Li and Pu Zhao and Yuxuan Cai and Sijia
                 Liu and Bin Ren and Xue Lin and Xulong Tang and Yanzhi
                 Wang",
  title =        "Automatic Mapping of the Best-Suited {DNN} Pruning
                 Schemes for Real-Time Mobile Acceleration",
  journal =      j-TODAES,
  volume =       "27",
  number =       "5",
  pages =        "47:1--47:26",
  month =        sep,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3495532",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Sep 28 11:01:08 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3495532",
  abstract =     "Weight pruning is an effective model compression
                 technique to tackle the challenges of achieving
                 real-time deep neural network (DNN) inference on mobile
                 devices. However, prior pruning schemes have limited
                 application scenarios due to accuracy degradation,.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "47",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Lee:2022:ION,
  author =       "Jooyeon Lee and Junsang Park and Seunghyun Lee and
                 Jaeha Kung",
  title =        "Implication of Optimizing {NPU} Dataflows on Neural
                 Architecture Search for Mobile Devices",
  journal =      j-TODAES,
  volume =       "27",
  number =       "5",
  pages =        "48:1--48:24",
  month =        sep,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3513085",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Sep 28 11:01:08 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3513085",
  abstract =     "Recent advances in deep learning have made it possible
                 to implement artificial intelligence in mobile devices.
                 Many studies have put a lot of effort into developing
                 lightweight deep learning models optimized for mobile
                 devices. To overcome the performance \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "48",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Tang:2022:ETE,
  author =       "Yue Tang and Xinyi Zhang and Peipei Zhou and Jingtong
                 Hu",
  title =        "{EF-Train}: Enable Efficient On-device {CNN} Training
                 on {FPGA} through Data Reshaping for Online Adaptation
                 or Personalization",
  journal =      j-TODAES,
  volume =       "27",
  number =       "5",
  pages =        "49:1--49:36",
  month =        sep,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3505633",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Sep 28 11:01:08 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3505633",
  abstract =     "Conventionally, DNN models are trained once in the
                 cloud and deployed in edge devices such as cars,
                 robots, or unmanned aerial vehicles (UAVs) for
                 real-time inference. However, there are many cases that
                 require the models to adapt to new environments,
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "49",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Li:2022:DDN,
  author =       "Chaojian Li and Wuyang Chen and Yuchen Gu and Tianlong
                 Chen and Yonggan Fu and Zhangyang Wang and Yingyan
                 Lin",
  title =        "{DANCE}: {DAta-Network Co-optimization for Efficient}
                 Segmentation Model Training and Inference",
  journal =      j-TODAES,
  volume =       "27",
  number =       "5",
  pages =        "50:1--50:20",
  month =        sep,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3510835",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Sep 28 11:01:08 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3510835",
  abstract =     "Semantic segmentation for scene understanding is
                 nowadays widely demanded, raising significant
                 challenges for the algorithm efficiency, especially its
                 applications on resource-limited platforms. Current
                 segmentation models are trained and evaluated on
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "50",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Kee:2022:LPP,
  author =       "Minkwan Kee and Gi-Ho Park",
  title =        "A Low-power Programmable Machine Learning Hardware
                 Accelerator Design for Intelligent Edge Devices",
  journal =      j-TODAES,
  volume =       "27",
  number =       "5",
  pages =        "51:1--51:13",
  month =        sep,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3531479",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Sep 28 11:01:08 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3531479",
  abstract =     "With the advent of the machine learning and IoT, many
                 low-power edge devices, such as wearable devices with
                 various sensors, are used for machine learning-based
                 intelligent applications, such as healthcare or motion
                 recognition. While these applications \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "51",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Wen:2022:MCT,
  author =       "Chenyi Wen and Xiao Dong and Baixin Chen and
                 Umamaheswara Rao Tida and Yiyu Shi and Cheng Zhuo",
  title =        "Magnetic Core {TSV}-Inductor Design and Optimization
                 for On-chip {DC-DC} Converter",
  journal =      j-TODAES,
  volume =       "27",
  number =       "5",
  pages =        "52:1--52:23",
  month =        sep,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3507700",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Sep 28 11:01:08 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3507700",
  abstract =     "The conventional on-chip spiral inductor consumes a
                 significant top-metal routing area, thereby preventing
                 its popularity in many on-chip applications. Recently
                 through-silicon-via- (TSV) based inductor (also known
                 as a TSV-inductor) with a magnetic core \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "52",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Dewan:2022:DAA,
  author =       "Monzurul Islam Dewan and Dae Hyun Kim",
  title =        "Design Automation Algorithms for the {NP}-Separate
                 {VLSI} Design Methodology",
  journal =      j-TODAES,
  volume =       "27",
  number =       "5",
  pages =        "53:1--53:20",
  month =        sep,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3508375",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Sep 28 11:01:08 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3508375",
  abstract =     "The NP-Separate design methodology for
                 very-large-scale integration (VLSI) design
                 fine-controls the sizes of transistors, thereby
                 achieving significant power, performance, and area
                 improvement compared to the conventional
                 standard-cell-based design \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "53",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Pomeranz:2022:IFC,
  author =       "Irith Pomeranz",
  title =        "Increasing the Fault Coverage of a Truncated Test
                 Set",
  journal =      j-TODAES,
  volume =       "27",
  number =       "6",
  pages =        "54:1--54:??",
  month =        nov,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3508459",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Nov 25 09:11:49 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3508459",
  abstract =     "Defect-aware, cell-aware, and gate-exhaustive faults
                 are described by input patterns of subcircuits or cells
                 that are expected to activate defects. Even with
                 single-cycle faults, an \( n \)-input subcircuit can
                 have up to \( 2^n \) faults with unique \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "54",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Jagadheesh:2022:NAM,
  author =       "Samala Jagadheesh and P. Veda Bhanu and Soumya J.",
  title =        "{NoC} Application Mapping Optimization Using
                 Reinforcement Learning",
  journal =      j-TODAES,
  volume =       "27",
  number =       "6",
  pages =        "55:1--55:??",
  month =        nov,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3510381",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Nov 25 09:11:49 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3510381",
  abstract =     "Application mapping is one of the early stage design
                 processes aimed to improve the performance of
                 Network-on-Chip. Mapping is an NP-hard problem. A
                 massive amount of high-quality supervised data is
                 required to solve the application mapping problem using
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "55",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Kolhe:2022:BDS,
  author =       "Gaurav Kolhe and Tyler David Sheaves and Sai Manoj P.
                 D. and Hamid Mahmoodi and Setareh Rafatirad and Avesta
                 Sasan and Houman Homayoun",
  title =        "Breaking the Design and Security Trade-off of
                 Look-up-table-based Obfuscation",
  journal =      j-TODAES,
  volume =       "27",
  number =       "6",
  pages =        "56:1--56:??",
  month =        nov,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3510421",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Nov 25 09:11:49 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3510421",
  abstract =     "Logic locking and Integrated Circuit (IC) camouflaging
                 are the most prevalent protection schemes that can
                 thwart most hardware security threats. However, the
                 state-of-the-art attacks, including Boolean
                 Satisfiability (SAT) and approximation-based attacks,
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "56",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Li:2022:NAD,
  author =       "Taozhong Li and Naifeng Jing and Jianfei Jiang and Qin
                 Wang and Zhigang Mao and Yiran Chen",
  title =        "A Novel Architecture Design for Output Significance
                 Aligned Flow with Adaptive Control in {ReRAM}-based
                 Neural Network Accelerator",
  journal =      j-TODAES,
  volume =       "27",
  number =       "6",
  pages =        "57:1--57:??",
  month =        nov,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3510819",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Nov 25 09:11:49 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3510819",
  abstract =     "Resistive-RAM-based (ReRAM-based) computing shows
                 great potential on accelerating DNN inference by its
                 highly parallel structure. Regrettably, computing
                 accuracy in practical is much lower than expected due
                 to the non-ideal ReRAM device. Conventional \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "57",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Brunner:2022:THR,
  author =       "Michaela Brunner and Alexander Hepp and Johanna Baehr
                 and Georg Sigl",
  title =        "Toward a Human-Readable State Machine Extraction",
  journal =      j-TODAES,
  volume =       "27",
  number =       "6",
  pages =        "58:1--58:??",
  month =        nov,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3513086",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Nov 25 09:11:49 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3513086",
  abstract =     "The target of sequential reverse engineering is to
                 extract the state machine of a design. Sequential
                 reverse engineering of a gate-level netlist consists of
                 the identification of so-called state flip-flops
                 (sFFs), as well as the extraction of the state
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "58",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Zhou:2022:QCT,
  author =       "Xiangzhen Zhou and Yuan Feng and Sanjiang Li",
  title =        "Quantum Circuit Transformation: a {Monte Carlo} Tree
                 Search Framework",
  journal =      j-TODAES,
  volume =       "27",
  number =       "6",
  pages =        "59:1--59:??",
  month =        nov,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3514239",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Nov 25 09:11:49 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3514239",
  abstract =     "In the noisy intermediate-scale quantum era, quantum
                 processing units suffer from, among others, highly
                 limited connectivity between physical qubits. To make a
                 quantum circuit effectively executable, a circuit
                 transformation process is necessary to \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "59",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Hong:2022:TNB,
  author =       "Xin Hong and Xiangzhen Zhou and Sanjiang Li and Yuan
                 Feng and Mingsheng Ying",
  title =        "A Tensor Network based Decision Diagram for
                 Representation of Quantum Circuits",
  journal =      j-TODAES,
  volume =       "27",
  number =       "6",
  pages =        "60:1--60:??",
  month =        nov,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3514355",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Nov 25 09:11:49 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3514355",
  abstract =     "Tensor networks have been successfully applied in
                 simulation of quantum physical systems for decades.
                 Recently, they have also been employed in classical
                 simulation of quantum computing, in particular, random
                 quantum circuits. This article proposes a \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "60",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Choudhury:2022:SHC,
  author =       "Dwaipayan Choudhury and Reet Barik and Aravind
                 Sukumaran Rajam and Ananth Kalyanaraman and Partha
                 Pratim Pande",
  title =        "Software\slash Hardware Co-design of {$3$D}
                 {NoC}-based {GPU} Architectures for Accelerated Graph
                 Computations",
  journal =      j-TODAES,
  volume =       "27",
  number =       "6",
  pages =        "61:1--61:??",
  month =        nov,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3514354",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Nov 25 09:11:49 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3514354",
  abstract =     "Manycore GPU architectures have become the mainstay
                 for accelerating graph computations. One of the primary
                 bottlenecks to performance of graph computations on
                 manycore architectures is the data movement. Since most
                 of the accesses in graph processing are \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "61",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Jiang:2022:ELH,
  author =       "Yiyang Jiang and Fan Yang and Bei Yu and Dian Zhou and
                 Xuan Zeng",
  title =        "Efficient Layout Hotspot Detection via Neural
                 Architecture Search",
  journal =      j-TODAES,
  volume =       "27",
  number =       "6",
  pages =        "62:1--62:??",
  month =        nov,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3517130",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Nov 25 09:11:49 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3517130",
  abstract =     "Layout hotspot detection is of great importance in the
                 physical verification flow. Deep neural network models
                 have been applied to hotspot detection and achieved
                 great success. Despite their success, high-performance
                 neural networks are still quite \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "62",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Abel:2022:FSS,
  author =       "Inga Abel and Helmut Graeb",
  title =        "{FUBOCO}: Structure Synthesis of Basic Op-Amps by
                 {FUnctional BlOck COmposition}",
  journal =      j-TODAES,
  volume =       "27",
  number =       "6",
  pages =        "63:1--63:??",
  month =        nov,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3522738",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Nov 25 09:11:49 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3522738",
  abstract =     "This article presents a method to automatically
                 synthesize the structure and initial sizing of an
                 operational amplifier. It is positioned between
                 approaches with fixed design plans and a small search
                 space of structures and approaches with generic
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "63",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Sha:2022:DMB,
  author =       "Zhibing Sha and Jun Li and Zhigang Cai and Min Huang
                 and Jianwei Liao and Francois Trahay",
  title =        "Degraded Mode-benefited {I/O} Scheduling to Ensure
                 {I/O} Responsiveness in {RAID}-enabled {SSDs}",
  journal =      j-TODAES,
  volume =       "27",
  number =       "6",
  pages =        "64:1--64:??",
  month =        nov,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3522755",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Nov 25 09:11:49 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3522755",
  abstract =     "RAID-enabled SSDs commonly have unbalanced I/O
                 workloads on their components (e.g., SSD channels), as
                 the data/parity chunks in the same stripe may have
                 varied access frequency, which greatly impacts I/O
                 responsiveness. This article proposes a I/O \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "64",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Bai:2022:RER,
  author =       "Yunkai Bai and Andrew Stern and Jungmin Park and Mark
                 Tehranipoor and Domenic Forte",
  title =        "{RASCv2}: Enabling Remote Access to Side-Channels for
                 Mission Critical and {IoT} Systems",
  journal =      j-TODAES,
  volume =       "27",
  number =       "6",
  pages =        "65:1--65:??",
  month =        nov,
  year =         "2022",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3524123",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Nov 25 09:11:49 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3524123",
  abstract =     "The Internet of Things (IoT) and smart devices are
                 currently being deployed in systems such as autonomous
                 vehicles and medical monitoring devices. The
                 introduction of IoT devices into these systems enables
                 network connectivity for data transfer, cloud
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "65",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Hung:2023:DDR,
  author =       "Jos{\'e} Romero Hung and Chao Li and Taolei Wang and
                 Jinyang Guo and Pengyu Wang and Chuanming Shao and Jing
                 Wang and Guoyong Shi and Xiangwen Liu and Hanqing Wu",
  title =        "{DRAGON}: Dynamic Recurrent Accelerator for Graph
                 Online Convolution",
  journal =      j-TODAES,
  volume =       "28",
  number =       "1",
  pages =        "1:1--1:??",
  month =        jan,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3524124",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Apr 5 10:07:22 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3524124",
  abstract =     "Despite the extraordinary applicative potentiality
                 that dynamic graph inference may entail, its
                 practical-physical implementation has been a topic
                 seldom explored in literature. Although graph inference
                 through neural networks has received plenty of
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "1",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Minakova:2023:MTT,
  author =       "Svetlana Minakova and Todor Stefanov",
  title =        "Memory-Throughput Trade-off for {CNN}-Based
                 Applications at the Edge",
  journal =      j-TODAES,
  volume =       "28",
  number =       "1",
  pages =        "2:1--2:??",
  month =        jan,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3527457",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Apr 5 10:07:22 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3527457",
  abstract =     "Many modern applications require execution of
                 Convolutional Neural Networks (CNNs) on edge devices,
                 such as mobile phones or embedded platforms. This can
                 be challenging, as the state-of-the art CNNs are memory
                 costly, whereas the memory budget of edge \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Chhabria:2023:EDN,
  author =       "Vidya A. Chhabria and Vipul Ahuja and Ashwath Prabhu
                 and Nikhil Patil and Palkesh Jain and Sachin S.
                 Sapatnekar",
  title =        "Encoder-Decoder Networks for Analyzing Thermal and
                 Power Delivery Networks",
  journal =      j-TODAES,
  volume =       "28",
  number =       "1",
  pages =        "3:1--3:??",
  month =        jan,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3526115",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Apr 5 10:07:22 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3526115",
  abstract =     "Power delivery network (PDN) analysis and thermal
                 analysis are computationally expensive tasks that are
                 essential for successful integrated circuit (IC)
                 design. Algorithmically, both these analyses have
                 similar computational structure and complexity as
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Spieck:2023:LBM,
  author =       "Jan Spieck and Stefan Wildermann and J{\"u}rgen
                 Teich",
  title =        "A Learning-based Methodology for Scenario-aware
                 Mapping of Soft Real-time Applications onto
                 Heterogeneous {MPSoCs}",
  journal =      j-TODAES,
  volume =       "28",
  number =       "1",
  pages =        "4:1--4:??",
  month =        jan,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3529230",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Apr 5 10:07:22 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3529230",
  abstract =     "Soft real-time streaming applications often process
                 input data that evoke varying workloads for their
                 tasks. This may lead to high energy consumption or
                 deadline misses in case their mapping onto a
                 heterogeneous MPSoC target architecture is not adapted,
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "4",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Li:2023:EES,
  author =       "Chunqiao Li and Chengtao An and Fan Yang and Xuan
                 Zeng",
  title =        "{ESPSim}: an Efficient Scalable Power Grid Simulator
                 Based on Parallel Algebraic Multigrid",
  journal =      j-TODAES,
  volume =       "28",
  number =       "1",
  pages =        "5:1--5:??",
  month =        jan,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3529533",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Apr 5 10:07:22 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3529533",
  abstract =     "Fast verification for the extremely large-scale power
                 grid is demanding as CMOS technology advances
                 consistently. In this work, we propose ESPSim, an
                 efficient scalable power grid simulator based on a
                 parallel smoothed aggregation-based algebraic
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "5",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Huang:2023:RRB,
  author =       "Chenglong Huang and Nuo Xu and Junwei Zeng and Wenqing
                 Wang and Yihong Hu and Liang Fang and Desheng Ma and
                 Yanting Chen",
  title =        "Rescuing {ReRAM}-based Neural Computing Systems from
                 Device Variation",
  journal =      j-TODAES,
  volume =       "28",
  number =       "1",
  pages =        "6:1--6:??",
  month =        jan,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3533706",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Apr 5 10:07:22 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3533706",
  abstract =     "Resistive random-access memory (ReRAM)-based crossbar
                 array (RCA) is a promising platform to accelerate
                 vector-matrix multiplication in deep neural networks
                 (DNNs). There are, however, some practical issues,
                 especially device variation, that hinder the \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Ding:2023:MAP,
  author =       "Bo Ding and Jinglei Huang and Qi Xu and Junpeng Wang
                 and Song Chen and Yi Kang",
  title =        "Memory-aware Partitioning, Scheduling, and
                 Floorplanning for Partially Dynamically Reconfigurable
                 Systems",
  journal =      j-TODAES,
  volume =       "28",
  number =       "1",
  pages =        "7:1--7:??",
  month =        jan,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3534968",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Apr 5 10:07:22 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3534968",
  abstract =     "Partially dynamic reconfiguration (PDR) technology can
                 accelerate the reconfiguration process and overcome
                 hardware resource constraints when facing the challenge
                 of high performance with respect to applications and
                 resources constraints on field-. \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Zeng:2023:AMM,
  author =       "Junwei Zeng and Nuo Xu and Yabo Chen and Chenglong
                 Huang and Zhiwei Li and Liang Fang",
  title =        "{AIMCU-MESO}: an In-Memory Computing Unit Constructed
                 by {MESO} Device",
  journal =      j-TODAES,
  volume =       "28",
  number =       "1",
  pages =        "8:1--8:??",
  month =        jan,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3539575",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Apr 5 10:07:22 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3539575",
  abstract =     "Traditional CMOS-based von-Neumann computer
                 architecture faces the issue of memory wall that the
                 limitation of bus-bandwidth and the speed mismatch
                 between processor and memory restrict the efficiency of
                 data processing along with an irreducible energy
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Das:2023:CCV,
  author =       "Sourav Das and Sayandeep Sanyal and Aritra Hazra and
                 Pallab Dasgupta",
  title =        "{CoVerPlan}: a Comprehensive Verification Planning
                 Framework Leveraging {PSS} Specifications",
  journal =      j-TODAES,
  volume =       "28",
  number =       "1",
  pages =        "9:1--9:??",
  month =        jan,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3543175",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Apr 5 10:07:22 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3543175",
  abstract =     "With increasing design complexity, the portability of
                 tests across different designs and platforms becomes a
                 key criterion for accelerating verification closure.
                 The Portable Test and Stimulus Standard (PSS) is an
                 emerging industry standard prepared by \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Song:2023:VEE,
  author =       "Zhuoran Song and Naifeng Jing and Xiaoyao Liang",
  title =        "{E$^2$-VOR}: an End-to-End En\slash Decoder
                 Architecture for Efficient Video Object Recognition",
  journal =      j-TODAES,
  volume =       "28",
  number =       "1",
  pages =        "10:1--10:??",
  month =        jan,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3543852",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Apr 5 10:07:22 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3543852",
  abstract =     "High-resolution video object recognition (VOR) evolves
                 so fast but is very compute-intensive. This is because
                 VOR leverages compute-intensive deep neural network
                 (DNN) for better accuracy. Although many works have
                 been proposed for speedup, they mostly \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Zhao:2023:MSF,
  author =       "Zhiqiang Zhao and Zhuo Feng",
  title =        "A Multilevel Spectral Framework for Scalable
                 Vectorless Power\slash Thermal Integrity Verification",
  journal =      j-TODAES,
  volume =       "28",
  number =       "1",
  pages =        "11:1--11:??",
  month =        jan,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3529534",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Apr 5 10:07:22 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3529534",
  abstract =     "Vectorless integrity verification is becoming
                 increasingly critical to the robust design of nanoscale
                 integrated circuits. This article introduces a general
                 vectorless integrity verification framework that allows
                 computing the worst-case voltage drops or \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Huang:2023:SDP,
  author =       "Kai Huang and Bowen Li and Dongliang Xiong and Haitian
                 Jiang and Xiaowen Jiang and Xiaolang Yan and Luc
                 Claesen and Dehong Liu and Junjian Chen and Zhili Liu",
  title =        "Structured Dynamic Precision for Deep Neural Networks
                 Quantization",
  journal =      j-TODAES,
  volume =       "28",
  number =       "1",
  pages =        "12:1--12:??",
  month =        jan,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3549535",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Apr 5 10:07:22 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3549535",
  abstract =     "Deep Neural Networks (DNNs) have achieved remarkable
                 success in various Artificial Intelligence
                 applications. Quantization is a critical step in DNNs
                 compression and acceleration for deployment. To further
                 boost DNN execution efficiency, many works \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "12",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Ebrahimi-Azandaryani:2023:ACA,
  author =       "Farhad Ebrahimi-Azandaryani and Omid Akbari and Mehdi
                 Kamal and Ali Afzali-Kusha and Massoud Pedram",
  title =        "Accuracy Configurable Adders with Negligible Delay
                 Overhead in Exact Operating Mode",
  journal =      j-TODAES,
  volume =       "28",
  number =       "1",
  pages =        "13:1--13:??",
  month =        jan,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3549936",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Apr 5 10:07:22 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3549936",
  abstract =     "In this paper, two accuracy configurable adders
                 capable of operating in approximate and exact modes are
                 proposed. In the adders, which include a block-based
                 carry propagate and a parallel prefix structure, the
                 carry chains are cut off in the approximate \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "13",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Lin:2023:ISI,
  author =       "Yibo Lin and Avi Ziv and Haoxing Ren",
  title =        "Introduction to the Special Issue on Machine Learning
                 for {CAD\slash EDA}",
  journal =      j-TODAES,
  volume =       "28",
  number =       "2",
  pages =        "14:1--14:??",
  month =        mar,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3586208",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Apr 5 10:07:23 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3586208",
  acknowledgement = ack-nhfb,
  articleno =    "14",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Sanchez:2023:CSE,
  author =       "Daniela S{\'a}nchez and Lorenzo Servadei and Gamze Naz
                 Kiprit and Robert Wille and Wolfgang Ecker",
  title =        "A Comprehensive Survey on Electronic Design Automation
                 and Graph Neural Networks: Theory and Applications",
  journal =      j-TODAES,
  volume =       "28",
  number =       "2",
  pages =        "15:1--15:??",
  month =        mar,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3543853",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Apr 5 10:07:23 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3543853",
  abstract =     "Driven by Moore's law, the chip design complexity is
                 steadily increasing. Electronic Design Automation (EDA)
                 has been able to cope with the challenging very
                 large-scale integration process, assuring scalability,
                 reliability, and proper time-to-market. \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "15",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Koblah:2023:SPA,
  author =       "David Koblah and Rabin Acharya and Daniel Capecci and
                 Olivia Dizon-Paradis and Shahin Tajik and Fatemeh Ganji
                 and Damon Woodard and Domenic Forte",
  title =        "A Survey and Perspective on Artificial Intelligence
                 for Security-Aware Electronic Design Automation",
  journal =      j-TODAES,
  volume =       "28",
  number =       "2",
  pages =        "16:1--16:??",
  month =        mar,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3563391",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Apr 5 10:07:23 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3563391",
  abstract =     "Artificial intelligence (AI) and machine learning (ML)
                 techniques have been increasingly used in several
                 fields to improve performance and the level of
                 automation. In recent years, this use has exponentially
                 increased due to the advancement of high-. \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "16",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Fan:2023:PCC,
  author =       "Shaoze Fan and Shun Zhang and Jianbo Liu and Ningyuan
                 Cao and Xiaoxiao Guo and Jing Li and Xin Zhang",
  title =        "Power Converter Circuit Design Automation Using
                 Parallel {Monte Carlo} Tree Search",
  journal =      j-TODAES,
  volume =       "28",
  number =       "2",
  pages =        "17:1--17:??",
  month =        mar,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3549538",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Apr 5 10:07:23 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3549538",
  abstract =     "The tidal waves of modern electronic/electrical
                 devices have led to increasing demands for ubiquitous
                 application-specific power converters. A conventional
                 manual design procedure of such power converters is
                 computation- and labor-intensive, which \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "17",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Song:2023:MLA,
  author =       "Ling-Yen Song and Chih-Yun Chou and Tung-Chieh Kuo and
                 Chien-Nan Liu and Juinn-Dar Huang",
  title =        "Machine Learning Assisted Circuit Sizing Approach for
                 Low-Voltage Analog Circuits with Efficient
                 Variation-Aware Optimization",
  journal =      j-TODAES,
  volume =       "28",
  number =       "2",
  pages =        "18:1--18:??",
  month =        mar,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3567422",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Apr 5 10:07:23 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3567422",
  abstract =     "Low-power analog design is a hot topic for various
                 power efficient applications. Sizing low-power analog
                 circuits is not easy because the increasing
                 uncertainties from low-voltage techniques magnify
                 process variation effects on the design yield.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "18",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Li:2023:PDW,
  author =       "Yaguang Li and Yishuang Lin and Meghna Madhusudan and
                 Arvind Sharma and Sachin Sapatnekar and Ramesh Harjani
                 and Jiang Hu",
  title =        "Performance-driven Wire Sizing for Analog Integrated
                 Circuits",
  journal =      j-TODAES,
  volume =       "28",
  number =       "2",
  pages =        "19:1--19:??",
  month =        mar,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3559542",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Apr 5 10:07:23 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3559542",
  abstract =     "Analog IC performance has a strong dependence on
                 interconnect RC parasitics, which are significantly
                 affected by wire sizes in recent technologies, where
                 minimum-width wires have high resistance. However,
                 performance-driven wire sizing for analog ICs has
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "19",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Cheng:2023:MLD,
  author =       "Jiawen Cheng and Yong Xiao and Yun Shao and Guanghai
                 Dong and Songlin Lyu and Wenjian Yu",
  title =        "Machine-learning-driven Architectural Selection of
                 Adders and Multipliers in Logic Synthesis",
  journal =      j-TODAES,
  volume =       "28",
  number =       "2",
  pages =        "20:1--20:??",
  month =        mar,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3560712",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Apr 5 10:07:23 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3560712",
  abstract =     "Designing high-performance adders and multiplier
                 components for diverse specifications and constraints
                 is of practical concern. However, selecting the best
                 architecture for adder or multiplier, which largely
                 affects the performance of synthesized circuits,.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "20",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Liu:2023:GFG,
  author =       "Yiting Liu and Ziyi Ju and Zhengming Li and Mingzhi
                 Dong and Hai Zhou and Jia Wang and Fan Yang and Xuan
                 Zeng and Li Shang",
  title =        "{GraphPlanner}: Floorplanning with Graph Neural
                 Network",
  journal =      j-TODAES,
  volume =       "28",
  number =       "2",
  pages =        "21:1--21:??",
  month =        mar,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3555804",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Apr 5 10:07:23 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3555804",
  abstract =     "Chip floorplanning has long been a critical task with
                 high computation complexity in the physical
                 implementation of VLSI chips. Its key objective is to
                 determine the initial locations of large chip modules
                 with minimized wirelength while adhering to the
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "21",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Fang:2023:ETC,
  author =       "Chenlei Fang and Qicheng Huang and Zeye Liu and
                 Ruizhou Ding and Ronald D. Blanton",
  title =        "Efficient Test Chip Design via Smart Computation",
  journal =      j-TODAES,
  volume =       "28",
  number =       "2",
  pages =        "22:1--22:??",
  month =        mar,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3558393",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Apr 5 10:07:23 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3558393",
  abstract =     "Submitted to the Special Issue on Machine Learning for
                 CAD (ML-CAD). Competitive strength in semiconductor
                 field depends on yield. The challenges associated with
                 designing and manufacturing of leading-edge integrated
                 circuits (ICs) have increased that \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "22",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Lozano:2023:LBP,
  author =       "Erika Susana Alcorta Lozano and Andreas Gerstlauer",
  title =        "Learning-based Phase-aware Multi-core {CPU} Workload
                 Forecasting",
  journal =      j-TODAES,
  volume =       "28",
  number =       "2",
  pages =        "23:1--23:??",
  month =        mar,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3564929",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Apr 5 10:07:23 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3564929",
  abstract =     "Predicting workload behavior during workload execution
                 is essential for dynamic resource optimization in
                 multi-processor systems. Recent studies have proposed
                 advanced machine learning techniques for dynamic
                 workload prediction. Workload prediction can be
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "23",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Li:2023:MLB,
  author =       "Benzheng Li and Xi Zhang and Hailong You and Zhongdong
                 Qi and Yuming Zhang",
  title =        "Machine Learning Based Framework for Fast Resource
                 Estimation of {RTL} Designs Targeting {FPGAs}",
  journal =      j-TODAES,
  volume =       "28",
  number =       "2",
  pages =        "24:1--24:??",
  month =        mar,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3555047",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Apr 5 10:07:23 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3555047",
  abstract =     "Field-programmable gate arrays (FPGAs) have grown to
                 be an important platform for integrated circuit design
                 and hardware emulation. However, with the dramatic
                 increase in design scale, it has become a key challenge
                 to partition very large scale \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "24",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Ferretti:2023:GNN,
  author =       "Lorenzo Ferretti and Andrea Cini and Georgios
                 Zacharopoulos and Cesare Alippi and Laura Pozzi",
  title =        "Graph Neural Networks for High-Level Synthesis Design
                 Space Exploration",
  journal =      j-TODAES,
  volume =       "28",
  number =       "2",
  pages =        "25:1--25:??",
  month =        mar,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3570925",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Apr 5 10:07:23 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3570925",
  abstract =     "High-level Synthesis (HLS) Design-Space Exploration
                 (DSE) aims at identifying Pareto-optimal synthesis
                 configurations whose exhaustive search is unfeasible
                 due to the design-space dimensionality and the
                 prohibitive computational cost of the synthesis
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "25",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Last:2023:TPM,
  author =       "Felix Last and Ulf Schlichtmann",
  title =        "Training {PPA} Models for Embedded Memories on a
                 Low-data Diet",
  journal =      j-TODAES,
  volume =       "28",
  number =       "2",
  pages =        "26:1--26:??",
  month =        mar,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3556539",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Apr 5 10:07:23 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3556539",
  abstract =     "Supervised machine learning requires large amounts of
                 labeled data for training. In power, performance, and
                 area (PPA) estimation of embedded memories, every new
                 memory compiler version is considered independently of
                 previous compiler versions. Since the \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "26",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Xing:2023:BPB,
  author =       "Wei W. Xing and Xiang Jin and Tian Feng and Dan Niu
                 and Weisheng Zhao and Zhou Jin",
  title =        "{BoA-PTA}: a {Bayesian Optimization Accelerated PTA}
                 Solver for {SPICE} Simulation",
  journal =      j-TODAES,
  volume =       "28",
  number =       "2",
  pages =        "27:1--27:??",
  month =        mar,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3555805",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Apr 5 10:07:23 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3555805",
  abstract =     "One of the greatest challenges in integrated circuit
                 design is the repeated executions of computationally
                 expensive SPICE simulations, particularly when highly
                 complex chip testing/verification is involved.
                 Recently, pseudo-transient analysis (PTA) has
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "27",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Dai:2023:SAD,
  author =       "Ruochen Dai and Tuba Yavuz",
  title =        "A Symbolic Approach to Detecting Hardware {Trojans}
                 Triggered by Don't Care Transitions",
  journal =      j-TODAES,
  volume =       "28",
  number =       "2",
  pages =        "28:1--28:??",
  month =        mar,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3558392",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Apr 5 10:07:23 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3558392",
  abstract =     "Due to the globalization of Integrated Circuit supply
                 chain, hardware Trojans and the attacks that can
                 trigger them have become an important security issue.
                 One type of hardware Trojans leverages the ``don't care
                 transitions'' in Finite-state Machines (FSMs).
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "28",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Chen:2023:AMC,
  author =       "Zhisheng Chen and Wenzhong Guo and Genggeng Liu and
                 Xing Huang",
  title =        "Application Mapping and Control-system Design for
                 Microfluidic Biochips with Distributed Channel
                 Storage",
  journal =      j-TODAES,
  volume =       "28",
  number =       "2",
  pages =        "29:1--29:??",
  month =        mar,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3564288",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed Apr 5 10:07:23 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3564288",
  abstract =     "Continuous-flow microfluidic biochips have emerged as
                 a potential low-cost and fast-responsive lab-on-chip
                 platform. They have attracted much attention due to
                 their capability of performing various biochemical
                 applications concurrently and automatically \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "29",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Choudhury:2023:AGC,
  author =       "Dwaipayan Choudhury and Lizhi Xiang and Aravind Rajam
                 and Anantharaman Kalyanaraman and Partha Pratim Pande",
  title =        "Accelerating Graph Computations on {$3$D}
                 {NoC}-Enabled {PIM} Architectures",
  journal =      j-TODAES,
  volume =       "28",
  number =       "3",
  pages =        "30:1--30:??",
  month =        may,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3564290",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed May 17 08:06:20 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3564290",
  abstract =     "Graph application workloads are dominated by random
                 memory accesses with the poor locality. To tackle the
                 irregular and sparse nature of computation, ReRAM-based
                 Processing-in-Memory (PIM) architectures have been
                 proposed recently. Most of these ReRAM \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "30",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Lee:2023:VEL,
  author =       "Jayoung Lee and Pengcheng Wang and Ran Xu and Sarthak
                 Jain and Venkat Dasari and Noah Weston and Yin Li and
                 Saurabh Bagchi and Somali Chaterji",
  title =        "Virtuoso: Energy- and Latency-aware Streamlining of
                 Streaming Videos on Systems-on-Chips",
  journal =      j-TODAES,
  volume =       "28",
  number =       "3",
  pages =        "31:1--31:??",
  month =        may,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3564289",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed May 17 08:06:20 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3564289",
  abstract =     "Efficient and adaptive computer vision systems have
                 been proposed to make computer vision tasks, such as
                 image classification and object detection, optimized
                 for embedded or mobile devices. These solutions, quite
                 recent in their origin, focus on \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "31",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Bommana:2023:DST,
  author =       "Ashish Reddy Bommana and Susheel Ujwal Siddamshetty
                 and Dhilleswararao Pudi and Arvind Thumatti K. R. and
                 Srinivas Boppu and M. Sabarimalai Manikandan and Linga
                 Reddy Cenkeramaddi",
  title =        "Design of Synthesis-time Vectorized Arithmetic
                 Hardware for Tapered Floating-point Addition and
                 Subtraction",
  journal =      j-TODAES,
  volume =       "28",
  number =       "3",
  pages =        "32:1--32:??",
  month =        may,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3567423",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed May 17 08:06:20 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/fparith.bib;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3567423",
  abstract =     "Energy efficiency has become the new performance
                 criterion in this era of pervasive embedded computing;
                 thus, accelerator-rich multi-processor system-on-chips
                 are commonly used in embedded computing hardware. Once
                 computationally intensive machine learning applications
                 gained much traction, they are now deployed in many
                 application domains due to abundant and cheaply
                 available computational capacity. In addition, there is
                 a growing trend toward developing hardware accelerators
                 for machine learning applications for embedded edge
                 devices where performance and energy efficiency are
                 critical. Although these hardware accelerators
                 frequently use floating-point operations for accuracy,
                 reduced-width floating-point formats are also used to
                 reduce hardware complexity; thus, power consumption
                 while maintaining accuracy. Vectorization concepts can
                 also be used to improve performance, energy efficiency,
                 and memory bandwidth. We propose the design of a
                 vectorized floating-point adder/subtractor that
                 supports arbitrary length floating-point formats with
                 varying exponent and mantissa widths in this article.
                 In comparison to existing designs in the literature,
                 the proposed design is 2.57$ \times $ area- and 1.56$
                 \times $ power-efficient, and it supports true
                 vectorization with no restrictions on exponent and
                 mantissa widths.",
  acknowledgement = ack-nhfb,
  articleno =    "32",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Yang:2023:ATF,
  author =       "Chun-Chieh Yang and Yi-Ru Chen and Hui-Hsin Liao and
                 Yuan-Ming Chang and Jenq-Kuen Lee",
  title =        "Auto-tuning Fixed-point Precision with {TVM} on
                 {RISC-V} Packed {SIMD} Extension",
  journal =      j-TODAES,
  volume =       "28",
  number =       "3",
  pages =        "33:1--33:??",
  month =        may,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3569939",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed May 17 08:06:20 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/fparith.bib;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3569939",
  abstract =     "Today, as deep learning (DL) is applied more often in
                 daily life, dedicated processors such as CPUs and GPUs
                 have become very important for accelerating model
                 executions. With the growth of technology, people are
                 becoming accustomed to using edge devices, such as
                 mobile phones, smart watches, and VR devices in their
                 daily lives. A variety of technologies using DL are
                 gradually being applied to these edge devices. However,
                 there is a large number of computations in DL. It faces
                 a challenging problem how to provide solutions in the
                 edge devices. In this article, the proposed method
                 enables a flow with the RISC-V Packed extension (P
                 extension) in TVM. TVM, an open deep learning compiler
                 for neural network models, is growing as a key
                 infrastructure for DL computing. RISC-V is an open
                 instruction set architecture (ISA) with customized and
                 flexible features. The Packed-SIMD extension is a
                 RISC-V extension that enables subword
                 single-instruction multiple-data (SIMD) computations in
                 RISC-V architectures to support fallback engines in AI
                 computing. In the proposed flow, a fixed-point type
                 that is supported by an integer of 16-bit type and
                 saturation instructions is added to replace the
                 original 32-bit float type. In addition, an auto-tuning
                 method is proposed to use a uniform selector mechanism
                 (USM) to find the binary point position for fixed-point
                 type use. The tensorization feature of TVM can be used
                 to optimize specific hardware such as subword SIMD
                 instructions with RISC-V P extension. With our
                 experiment on the Spike simulator, the proposed method
                 with the USM can improve performance by approximately
                 2.54 to 6.15$ \times $ in terms of instruction counts
                 with little accuracy loss.",
  acknowledgement = ack-nhfb,
  articleno =    "33",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Huang:2023:HAQ,
  author =       "Shanshi Huang and Hongwu Jiang and Shimeng Yu",
  title =        "Hardware-aware Quantization\slash Mapping Strategies
                 for Compute-in-Memory Accelerators",
  journal =      j-TODAES,
  volume =       "28",
  number =       "3",
  pages =        "34:1--34:??",
  month =        may,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3569940",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed May 17 08:06:20 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3569940",
  abstract =     "The emerging non-volatile memory (eNVM) based
                 mixed-signal Compute-in-Memory (CIM) accelerators are
                 of great interest in today's AI accelerators design due
                 to their high energy efficiency. Various CIM
                 architectures and circuit-level designs have been
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "34",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Feng:2023:GGA,
  author =       "Lang Feng and Wenjian Liu and Chuliang Guo and Ke Tang
                 and Cheng Zhuo and Zhongfeng Wang",
  title =        "{GANDSE}: Generative Adversarial Network-based Design
                 Space Exploration for Neural Network Accelerator
                 Design",
  journal =      j-TODAES,
  volume =       "28",
  number =       "3",
  pages =        "35:1--35:??",
  month =        may,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3570926",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed May 17 08:06:20 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3570926",
  abstract =     "With the popularity of deep learning, the hardware
                 implementation platform of deep learning has received
                 increasing interest. Unlike the general purpose
                 devices, e.g., CPU or GPU, where the deep learning
                 algorithms are executed at the software level,
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "35",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Wang:2023:DDD,
  author =       "Junpeng Wang and Haitao Du and Bo Ding and Qi Xu and
                 Song Chen and Yi Kang",
  title =        "{DDAM}: Data Distribution-Aware Mapping of {CNNs} on
                 Processing-In-Memory Systems",
  journal =      j-TODAES,
  volume =       "28",
  number =       "3",
  pages =        "36:1--36:??",
  month =        may,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3576196",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed May 17 08:06:20 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3576196",
  abstract =     "Convolution neural networks (CNNs) are widely used
                 algorithms in image processing, natural language
                 processing and many other fields. The large amount of
                 memory access of CNNs is one of the major concerns in
                 CNN accelerator designs that influences the \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "36",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Rawat:2023:SNB,
  author =       "Bhawna Rawat and Poornima Mittal",
  title =        "A Switching {NMOS} Based Single Ended Sense Amplifier
                 for High Density {SRAM} Applications",
  journal =      j-TODAES,
  volume =       "28",
  number =       "3",
  pages =        "37:1--37:??",
  month =        may,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3576198",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed May 17 08:06:20 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3576198",
  abstract =     "The demand for single ended static random access
                 memory is growing, driven by the decreasing technology
                 node and increasing processing load. This mandates the
                 need for a single ended sense amplifier to be used
                 along with the memory. Consequently, a single
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "37",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Pereira:2023:IED,
  author =       "Danny Pereira and Anirban Ghose and Sumana Ghosh and
                 Soumyajit Dey",
  title =        "Inferencing on Edge Devices: a Time- and Space-aware
                 Co-scheduling Approach",
  journal =      j-TODAES,
  volume =       "28",
  number =       "3",
  pages =        "38:1--38:??",
  month =        may,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3576197",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed May 17 08:06:20 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3576197",
  abstract =     "Neural Network (NN)-based real-time inferencing tasks
                 are often co-scheduled on GPGPU-style edge platforms.
                 Existing works advocate using different NN parameters
                 for the same detection task in different environments.
                 However, realizing such approaches \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "38",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Huang:2023:CFD,
  author =       "Yanze Huang and Kui Wen and Limei Lin and Li Xu and
                 Sun-Yuan Hsieh",
  title =        "Component Fault Diagnosability of Hierarchical Cubic
                 Networks",
  journal =      j-TODAES,
  volume =       "28",
  number =       "3",
  pages =        "39:1--39:??",
  month =        may,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3577018",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed May 17 08:06:20 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3577018",
  abstract =     "The fault diagnosability of a network indicates the
                 self-diagnosis ability of the network, thus it is an
                 important measure of robustness of the network. As a
                 neoteric feature for measuring fault diagnosability,
                 the r -component diagnosability \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "39",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Nie:2023:CMD,
  author =       "Qi Nie and Sharad Malik",
  title =        "{CNNFlow}: Memory-driven Data Flow Optimization for
                 Convolutional Neural Networks",
  journal =      j-TODAES,
  volume =       "28",
  number =       "3",
  pages =        "40:1--40:??",
  month =        may,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3577017",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed May 17 08:06:20 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3577017",
  abstract =     "Convolution Neural Networks (CNNs) are widely deployed
                 in computer vision applications. The datasets are
                 large, and the data reuse across different parts is
                 heavily interleaved. Given that memory access (SRAM and
                 especially DRAM) is more expensive in both \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "40",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{deOliveira:2023:MOO,
  author =       "Ricardo Gonzalez de Oliveira and Nicolas Navet and
                 Achim Henkel",
  title =        "Multi-Objective Optimization for Safety-Related
                 Available {E\slash E} Architectures Scoping Highly
                 Automated Driving Vehicles",
  journal =      j-TODAES,
  volume =       "28",
  number =       "3",
  pages =        "41:1--41:??",
  month =        may,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3582004",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed May 17 08:06:20 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3582004",
  abstract =     "Megatrends such as Highly Automated Driving (HAD) (SAE
                 >= Level 3), electrification, and connectivity are
                 reshaping the automotive industry. Together with the
                 new technologies, the business models will also evolve,
                 opening up new possibilities and new \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "41",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Mahmoud:2023:LEP,
  author =       "Mervat M. A. Mahmoud and Nahla E. Elashkar and Heba H.
                 Draz",
  title =        "Low-energy Pipelined Hardware Design for Approximate
                 Medium Filter",
  journal =      j-TODAES,
  volume =       "28",
  number =       "3",
  pages =        "42:1--42:??",
  month =        may,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3582005",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed May 17 08:06:20 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3582005",
  abstract =     "Image and video processing algorithms are currently
                 crucial for many applications. Hardware implementation
                 of these algorithms provides higher speed for large
                 computation applications. Removing noise is often a
                 typical pre-processing step to enhance the \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "42",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Cardona:2023:AMC,
  author =       "Jordi Cardona and Carles Hern{\'a}ndez and Jaume
                 Abella and Enrico Mezzetti and Francisco J. Cazorla",
  title =        "Accurately Measuring Contention in Mesh {NoCs} in
                 Time-Sensitive Embedded Systems",
  journal =      j-TODAES,
  volume =       "28",
  number =       "3",
  pages =        "43:1--43:??",
  month =        may,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3582006",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed May 17 08:06:20 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3582006",
  abstract =     "The computing capacity demanded by embedded systems is
                 on the rise as software implements more
                 functionalities, ranging from best-effort entertainment
                 functions to performance-guaranteed safety-related
                 functions. Heterogeneous manycore processors, using
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "43",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Du:2023:TLR,
  author =       "Yajuan Du and Siyi Huang and Yao Zhou and Qiao Li",
  title =        "Towards {LDPC} Read Performance of {$3$D} Flash
                 Memories with Layer-induced Error Characteristics",
  journal =      j-TODAES,
  volume =       "28",
  number =       "3",
  pages =        "44:1--44:??",
  month =        may,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3585075",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed May 17 08:06:20 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3585075",
  abstract =     "3D flash memories have been widely developed to
                 further increase the storage capacity of SSDs by
                 vertically stacking multiple layers. However, this
                 special physical structure brings new error
                 characteristics. Existing studies have discovered that
                 there \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "44",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Zhou:2023:FAO,
  author =       "Yuhao Zhou and Zhenxue He and Jianhui Jiang and Jia
                 Liu and Juncai He and Tao Wang and Limin Xiao and Xiang
                 Wang",
  title =        "Fast Area Optimization Approach for {XNOR\slash
                 OR}-based Fixed Polarity {Reed--Muller} Logic Circuits
                 based on Multi-strategy Wolf Pack Algorithm",
  journal =      j-TODAES,
  volume =       "28",
  number =       "3",
  pages =        "45:1--45:??",
  month =        may,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3587818",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed May 17 08:06:20 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3587818",
  abstract =     "Area optimization is one of the most important
                 contents of circuits logic synthesis. The smaller area
                 has stronger testability and lower cost. However,
                 searching for a circuit with the smallest area in a
                 large-scale space of polarity is a combinatorial
                 \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "45",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Wang:2023:TPI,
  author =       "Senling Wang and Xihong Zhou and Yoshinobu Higami and
                 Hiroshi Takahashi and Hiroyuki Iwata and Yoichi Maeda
                 and Jun Matsushima",
  title =        "Test Point Insertion for Multi-Cycle Power-On
                 Self-Test",
  journal =      j-TODAES,
  volume =       "28",
  number =       "3",
  pages =        "46:1--46:??",
  month =        may,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3563552",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed May 17 08:06:20 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3563552",
  abstract =     "Under the functional safety standard ISO26262,
                 automotive systems require testing in the field, such
                 as the power-on self-test (POST). Unlike the production
                 test, the POST requires reducing the test application
                 time to meet the indispensable test quality \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "46",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Le:2023:PBM,
  author =       "Trung Le and Zhao Zhang and Zhichun Zhu",
  title =        "Polling-Based Memory Interface",
  journal =      j-TODAES,
  volume =       "28",
  number =       "3",
  pages =        "47:1--47:??",
  month =        may,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3572919",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Wed May 17 08:06:20 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3572919",
  abstract =     "Non-volatile memory has been extensively researched as
                 the alternative for a DRAM-based system; however, the
                 traditional memory controller cannot efficiently track
                 and schedule operations for all the memory devices in
                 heterogeneous systems due to \ldots{}",
  acknowledgement = ack-nhfb,
  articleno =    "47",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Markov:2023:GEI,
  author =       "Igor Markov and Fan Yang and Li Shang and Hai Zhou",
  title =        "{Guest Editor}'s Introduction: Machine Learning for
                 {VLSI} Physical Design",
  journal =      j-TODAES,
  volume =       "28",
  number =       "4",
  pages =        "48:1--48:??",
  month =        jul,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3592606",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Sep 18 09:07:08 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3592606",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "48",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Kashyap:2023:IIM,
  author =       "Suhas Krishna Kashyap and Sule Ozev",
  title =        "{IMPRoVED}: Integrated Method to Predict {PostRouting}
                 setup Violations in Early Design Stages",
  journal =      j-TODAES,
  volume =       "28",
  number =       "4",
  pages =        "49:1--49:??",
  month =        jul,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3572546",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Sep 18 09:07:08 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3572546",
  abstract =     "The detail routing process is by far the most time
                 consuming during the physical design flow. Routing
                 starts with an estimation of timing slacks and aims to
                 meet the timing specifications at signoff. In this
                 paper, we propose an improved method to predict
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "49",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Hyun:2023:ROE,
  author =       "Daijoon Hyun and Sunwha Koh and Younggwang Jung and
                 Taeyoung Kim and Youngsoo Shin",
  title =        "Routability Optimization of Extreme Aspect Ratio
                 Design through Non-uniform Placement Utilization and
                 Selective Flip-flop Stacking",
  journal =      j-TODAES,
  volume =       "28",
  number =       "4",
  pages =        "50:1--50:??",
  month =        jul,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3573387",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Sep 18 09:07:08 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3573387",
  abstract =     "Circuits that are placed with very low (or high)
                 aspect ratio are susceptible to routing overflows. Such
                 designs are difficult to close and usually end up with
                 larger area with low area utilization. In this article,
                 we propose two routability optimization \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "50",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Utyamishev:2023:MPP,
  author =       "Dmitry Utyamishev and Inna Partin-Vaisband",
  title =        "Multiterminal Pathfinding in Practical {VLSI} Systems
                 with Deep Neural Networks",
  journal =      j-TODAES,
  volume =       "28",
  number =       "4",
  pages =        "51:1--51:??",
  month =        jul,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3564930",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Sep 18 09:07:08 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3564930",
  abstract =     "A multiterminal obstacle-avoiding pathfinding approach
                 is proposed. The approach is inspired by deep image
                 learning. The key idea is based on training a
                 conditional generative adversarial network (cGAN) to
                 interpret a pathfinding task as a graphical \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "51",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Cheng:2023:DDG,
  author =       "Chung-Kuan Cheng and Chester Holtz and Andrew B. Kahng
                 and Bill Lin and Uday Mallappa",
  title =        "{DAGSizer}: a Directed Graph Convolutional Network
                 Approach to Discrete Gate Sizing of {VLSI} Graphs",
  journal =      j-TODAES,
  volume =       "28",
  number =       "4",
  pages =        "52:1--52:??",
  month =        jul,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3577019",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Sep 18 09:07:08 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3577019",
  abstract =     "The objective of a leakage recovery step is to make
                 use of positive slack and reduce power by performing
                 appropriate standard-cell swaps such as
                 threshold-voltage ( V$_{th}$ ) or channel-length
                 reassignments. The resulting engineering change order
                 netlist needs \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "52",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Huang:2023:RDP,
  author =       "Ping-Wei Huang and Yao-Wen Chang",
  title =        "Routability-driven Power\slash Ground Network
                 Optimization Based on Machine Learning",
  journal =      j-TODAES,
  volume =       "28",
  number =       "4",
  pages =        "53:1--53:??",
  month =        jul,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3587817",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Sep 18 09:07:08 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3587817",
  abstract =     "The dynamic IR drop of a power/ground (PG) network is
                 a critical problem in modern circuit designs. Excessive
                 IR drop slows down circuit performance and causes
                 potential functional failures. Most industrial
                 practices tend to over-design the PG network for
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "53",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Dong:2023:WCP,
  author =       "Xiao Dong and Yufei Chen and Jun Chen and Yucheng Wang
                 and Ji Li and Tianming Ni and Zhiguo Shi and Xunzhao
                 Yin and Cheng Zhuo",
  title =        "Worst-case Power Integrity Prediction Using
                 Convolutional Neural Network",
  journal =      j-TODAES,
  volume =       "28",
  number =       "4",
  pages =        "54:1--54:??",
  month =        jul,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3564932",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Sep 18 09:07:08 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3564932",
  abstract =     "Power integrity analysis is an essential step in power
                 distribution network (PDN) sign-off to ensure the
                 performance and reliability of chips. However, with the
                 growing PDN size and increasing scenarios to be
                 validated, it becomes very time- and resource-.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "54",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Lu:2023:EGS,
  author =       "Yi-Chen Lu and Siddhartha Nath and Sai Pentapati and
                 Sung Kyu Lim",
  title =        "{ECO-GNN}: Signoff Power Prediction Using Graph Neural
                 Networks with Subgraph Approximation",
  journal =      j-TODAES,
  volume =       "28",
  number =       "4",
  pages =        "55:1--55:??",
  month =        jul,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3569942",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Sep 18 09:07:08 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3569942",
  abstract =     "Modern electronic design automation flows depend on
                 both implementation and signoff tools to perform
                 timing-constrained power optimization through
                 Engineering Change Orders (ECOs), which involve gate
                 sizing and threshold-voltage ( V$_{th}$ )-assignment of
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "55",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Yang:2023:CCE,
  author =       "Dingcheng Yang and Haoyuan Li and Wenjian Yu and
                 Yuanbo Guo and Wenjie Liang",
  title =        "{CNN-Cap}: Effective Convolutional Neural
                 Network-based Capacitance Models for Interconnect
                 Capacitance Extraction",
  journal =      j-TODAES,
  volume =       "28",
  number =       "4",
  pages =        "56:1--56:??",
  month =        jul,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3564931",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Sep 18 09:07:08 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3564931",
  abstract =     "Accurate capacitance extraction is becoming more
                 important for designing integrated circuits under
                 advanced process technology. The pattern matching-based
                 full-chip extraction methodology delivers fast
                 computational speed but suffers from large error and
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "56",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Hou:2023:DLF,
  author =       "Tianshu Hou and Peining Zhen and Zhigang Ji and
                 Hai-Bao Chen",
  title =        "A Deep Learning Framework for Solving Stress-based
                 Partial Differential Equations in Electromigration
                 Analysis",
  journal =      j-TODAES,
  volume =       "28",
  number =       "4",
  pages =        "57:1--57:??",
  month =        jul,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3567424",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Sep 18 09:07:08 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3567424",
  abstract =     "The electromigration-induced reliability issues (EM)
                 in very large scale integration (VLSI) circuits have
                 attracted continuous attention due to technology
                 scaling. Traditional EM methods lead to inaccurate
                 results incompatible with the advanced technology
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "57",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Zhang:2023:CCM,
  author =       "Qing Zhang and Huajie Huang and Jizuo Li and Yuhang
                 Zhang and Yongfu Li",
  title =        "{CmpCNN}: {CMP} Modeling with Transfer Learning {CNN}
                 Architecture",
  journal =      j-TODAES,
  volume =       "28",
  number =       "4",
  pages =        "58:1--58:??",
  month =        jul,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3569941",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Sep 18 09:07:08 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3569941",
  abstract =     "Performing chemical mechanical polishing (CMP)
                 modeling for physical verification on an integrated
                 circuit (IC) chip is vital to minimize its
                 manufacturing yield loss. Traditional CMP models
                 calculate post-CMP topography height of the IC's layout
                 based on \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "58",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Aseeri:2023:PTA,
  author =       "Ahmad O. Aseeri",
  title =        "A Problem-tailored Adversarial Deep Neural
                 Network-Based Attack Model for Feed-Forward Physical
                 Unclonable Functions",
  journal =      j-TODAES,
  volume =       "28",
  number =       "4",
  pages =        "59:1--59:??",
  month =        jul,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3557742",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Sep 18 09:07:08 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3557742",
  abstract =     "With the exceeding advancement in technology, the
                 sophistication of attacks is considerably increasing.
                 Standard security methods fall short of achieving the
                 security essentials of IoT against physical attacks due
                 to the nature of IoTs being resource-. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "59",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Bhattacharjee:2023:SGG,
  author =       "Abhiroop Bhattacharjee and Priyadarshini Panda",
  title =        "{SwitchX}: Gmin-Gmax Switching for Energy-efficient
                 and Robust Implementation of Binarized Neural Networks
                 on {ReRAM} Xbars",
  journal =      j-TODAES,
  volume =       "28",
  number =       "4",
  pages =        "60:1--60:??",
  month =        jul,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3576195",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Sep 18 09:07:08 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3576195",
  abstract =     "Memristive crossbars can efficiently implement
                 Binarized Neural Networks (BNNs) wherein the weights
                 are stored in high-resistance states (HRS) and
                 low-resistance states (LRS) of the synapses. We propose
                 SwitchX mapping of BNN weights onto ReRAM crossbars
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "60",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Huang:2023:STB,
  author =       "Po-Hsuan Huang and Chia-Heng Tu and Shen-Ming Chung
                 and Pei-Yuan Wu and Tung-Lin Tsai and Yi-An Lin and
                 Chun-Yi Dai and Tzu-Yi Liao",
  title =        "{SecureTVM}: a {TVM}-based Compiler Framework for
                 Selective Privacy-preserving Neural Inference",
  journal =      j-TODAES,
  volume =       "28",
  number =       "4",
  pages =        "61:1--61:??",
  month =        jul,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3579049",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Sep 18 09:07:08 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3579049",
  abstract =     "Privacy-preserving neural inference helps protect both
                 the user input data and the model weights from being
                 leaked to others during the inference of a deep
                 learning model. To achieve data protection, the
                 inference is often performed within a secure domain,.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "61",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Ibrahim:2023:OPR,
  author =       "Abrar A. Ibrahim and Ahmed M. Y. Ibrahim and Mohamed
                 Watheq El-Kharashi and Mona Safar",
  title =        "Optimal Pattern Retargeting in {IEEE 1687} Networks: a
                 {SAT}-based Upper-Bound Computation",
  journal =      j-TODAES,
  volume =       "28",
  number =       "4",
  pages =        "62:1--62:??",
  month =        jul,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3585074",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Sep 18 09:07:08 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3585074",
  abstract =     "A growing number of embedded instruments is being
                 integrated into System-on-Chips for testing,
                 monitoring, and several other purposes. To standardize
                 their access protocols, the IEEE 1687 (IJTAG) standard
                 has defined a flexible network infrastructure.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "62",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Ferres:2023:CFF,
  author =       "Bruno Ferres and Olivier Muller and Fr{\'e}d{\'e}ric
                 Rousseau",
  title =        "A Chisel Framework for Flexible Design Space
                 Exploration through a Functional Approach",
  journal =      j-TODAES,
  volume =       "28",
  number =       "4",
  pages =        "63:1--63:??",
  month =        jul,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3590769",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Sep 18 09:07:08 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3590769",
  abstract =     "As the need for efficient digital circuits is ever
                 growing in the industry, the design of such systems
                 remains daunting, requiring both expertise and time. In
                 an attempt to close the gap between software
                 development and hardware design, powerful features
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "63",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Khan:2023:HEC,
  author =       "Muhammad Imran Khan",
  title =        "Harmonic Estimation and Comparative Analysis of
                 Ultra-High Speed Flip-Flop and Latch Topologies for Low
                 Power and High Performance Future Generation
                 Micro-\slash Nano Electronic Systems",
  journal =      j-TODAES,
  volume =       "28",
  number =       "4",
  pages =        "64:1--64:??",
  month =        jul,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3590770",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Sep 18 09:07:08 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3590770",
  abstract =     "This paper presents estimation and analysis of the
                 higher order harmonics, power features, and real
                 performance of flip-flop and master-slave latch
                 topologies. This research article outlines the impact
                 of transistor model quality and input signal \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "64",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{He:2023:SEM,
  author =       "Xu He and Yao Wang and Chang Liu and Qiang Wu and Juan
                 Luo and Yang Guo",
  title =        "A Soft-Error Mitigation Approach Using Pulse Quenching
                 Enhancement at Detailed Placement for Combinational
                 Circuits",
  journal =      j-TODAES,
  volume =       "28",
  number =       "4",
  pages =        "65:1--65:??",
  month =        jul,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3595637",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Sep 18 09:07:08 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3595637",
  abstract =     "As technology continuously shrinks, radiation-induced
                 soft errors have become a great threat to the circuit
                 reliability. Among all the causes, the Single-Event
                 Transient (SET) effect is the dominating one for the
                 radiation-induced soft errors. SET-induced \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "65",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Kazerooni-Zand:2023:MBM,
  author =       "Reza Kazerooni-Zand and Mehdi Kamal and Ali
                 Afzali-Kusha and Massoud Pedram",
  title =        "Memristive-based Mixed-signal {CGRA} for Accelerating
                 Deep Neural Network Inference",
  journal =      j-TODAES,
  volume =       "28",
  number =       "4",
  pages =        "66:1--66:??",
  month =        jul,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3595638",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Sep 18 09:07:08 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3595638",
  abstract =     "In this paper, a mixed-signal coarse-grained
                 reconfigurable architecture (CGRA) for accelerating
                 inference in deep neural networks (DNNs) is presented.
                 It is based on performing dot-product computations
                 using analog computing to achieve a considerable
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "66",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Chu:2023:ADC,
  author =       "Cheng Chu and Cheng Liu and Dawen Xu and Ying Wang and
                 Tao Luo and Huawei Li and Xiaowei Li",
  title =        "Accelerating Deformable Convolution Networks with
                 Dynamic and Irregular Memory Accesses",
  journal =      j-TODAES,
  volume =       "28",
  number =       "4",
  pages =        "67:1--67:??",
  month =        jul,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3597431",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Sep 18 09:07:08 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3597431",
  abstract =     "Deformable convolution networks (DCNs) proposed to
                 address image recognition with geometric or photometric
                 variations typically involve deformable convolution
                 that convolves on arbitrary locations of input
                 features. The locations change with different
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "67",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Jiang:2023:ISS,
  author =       "Iris Hru Jiang and David Chinnery and Gracieli Posser
                 and Jens Lienig",
  title =        "Introduction to the Special Section on Advances in
                 Physical Design Automation",
  journal =      j-TODAES,
  volume =       "28",
  number =       "5",
  pages =        "68:1--68:??",
  month =        sep,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3604593",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Sep 18 09:07:10 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3604593",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "68",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Gopalakrishnan:2023:GMW,
  author =       "Ramprasath Srinivasa Gopalakrishnan and Meghna
                 Madhusudan and Arvind K. Sharma and Jitesh Poojary and
                 Soner Yaldiz and Ramesh Harjani and Steven M. Burns and
                 Sachin S. Sapatnekar",
  title =        "A Generalized Methodology for Well Island Generation
                 and Well-tap Insertion in Analog\slash Mixed-signal
                 Layouts",
  journal =      j-TODAES,
  volume =       "28",
  number =       "5",
  pages =        "69:1--69:??",
  month =        sep,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3580477",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Sep 18 09:07:10 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3580477",
  abstract =     "Well island generation and well tap placement is an
                 important problem in analog/mixed-signal (AMS)
                 circuits. Well taps can only prevent latchups within a
                 certain radius of influence within a well island, and
                 hence must be appropriately inserted to cover
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "69",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Wei:2023:APP,
  author =       "Min Wei and Xingyu Tong and Yuan Wen and Jianli Chen
                 and Jun Yu and Wenxing Zhu and Yao-Wen Chang",
  title =        "Analytical Placement with {$3$D} {Poisson}'s Equation
                 and {ADMM}-based Optimization for Large-scale {2.5D}
                 Heterogeneous {FPGAs}",
  journal =      j-TODAES,
  volume =       "28",
  number =       "5",
  pages =        "70:1--70:??",
  month =        sep,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3582554",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Sep 18 09:07:10 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3582554",
  abstract =     "As design complexity keeps increasing, the 2.5D
                 field-programmable gate array (FPGA) with large logic
                 capacity has become popular in modern circuit
                 applications. A 2.5D FPGA consists of multiple dies
                 connected through super long lines (SLLs) on an
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "70",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Hougardy:2023:FOD,
  author =       "Stefan Hougardy and Meike Neuwohner and Ulrike
                 Schorr",
  title =        "A Fast Optimal Double-row Legalization Algorithm",
  journal =      j-TODAES,
  volume =       "28",
  number =       "5",
  pages =        "71:1--71:??",
  month =        sep,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3579844",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Sep 18 09:07:10 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3579844",
  abstract =     "In Placement Legalization, it is often assumed that
                 (almost) all standard cells possess the same height and
                 can therefore be aligned in cell rows, which can then
                 be treated independently. However, this is no longer
                 true for recent technologies, where a \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "71",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Daboul:2023:GIO,
  author =       "Siad Daboul and Stephan Held and Bento Natura and
                 Daniel Rotter",
  title =        "Global Interconnect Optimization",
  journal =      j-TODAES,
  volume =       "28",
  number =       "5",
  pages =        "72:1--72:??",
  month =        sep,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3587044",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Sep 18 09:07:10 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3587044",
  abstract =     "We propose a new comprehensive solution to global
                 interconnect optimization. Traditional buffering
                 algorithms mostly insert repeaters on a net-by-net
                 basis based on slacks and possibly guided by global
                 wires. We show how to integrate routing congestion,
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "72",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Zhou:2023:MMR,
  author =       "Zhonghua Zhou and Yuxuan Pan and Guy G. F. Lemieux and
                 Andr{\'e} Ivanov",
  title =        "{MEDUSA}: a Multi-Resolution Machine Learning
                 Congestion Estimation Method for {$2$D} and {$3$D}
                 Global Routing",
  journal =      j-TODAES,
  volume =       "28",
  number =       "5",
  pages =        "73:1--73:??",
  month =        sep,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3590768",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Sep 18 09:07:10 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3590768",
  abstract =     "Routing congestion is one of the many factors that
                 need to be minimized during the physical design phase
                 of large integrated circuits. In this article, we
                 propose a novel congestion estimation method, called
                 MEDUSA, that consists of three parts: (1) a \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "73",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Zheng:2023:BVD,
  author =       "Su Zheng and Hao Geng and Chen Bai and Bei Yu and
                 Martin D. F. Wong",
  title =        "Boosting {VLSI} Design Flow Parameter Tuning with
                 Random Embedding and Multi-objective Trust-region
                 {Bayesian} Optimization",
  journal =      j-TODAES,
  volume =       "28",
  number =       "5",
  pages =        "74:1--74:??",
  month =        sep,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3597931",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Sep 18 09:07:10 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3597931",
  abstract =     "Modern very large-scale integration (VLSI) design
                 requires the implementation of integrated circuits
                 using electronic design automation (EDA) tools. Due to
                 the complexity of EDA algorithms, there are numerous
                 tool parameters that have imperative impacts \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "74",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Murali:2023:PSR,
  author =       "Gauthaman Murali and Anthony Agnesina and Sung Kyu
                 Lim",
  title =        "A {PPA} Study of Reinforced Placement Parameter
                 Autotuning: Pseudo-{$3$D} vs. True-{$3$D} Placers",
  journal =      j-TODAES,
  volume =       "28",
  number =       "5",
  pages =        "75:1--75:??",
  month =        sep,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3582007",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Sep 18 09:07:10 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3582007",
  abstract =     "D Place and Route (P\&R) flows either involve true-3D
                 placement algorithms or use commercial 2D tools to
                 transform a 2D design into a 3D design. Irrespective of
                 the nature of the placers, several placement parameters
                 in these tools affect the quality of \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "75",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Vanna-Iampikul:2023:GBM,
  author =       "Pruek Vanna-Iampikul and Yi-Chen Lu and Da Eun Shim
                 and Sung Kyu Lim",
  title =        "{GNN}-based Multi-bit Flip-flop Clustering and
                 Post-clustering Design Optimization for
                 Energy-efficient {$3$D} {ICs}",
  journal =      j-TODAES,
  volume =       "28",
  number =       "5",
  pages =        "76:1--76:??",
  month =        sep,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3588570",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Sep 18 09:07:10 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3588570",
  abstract =     "In high-performance three-dimensional Integrated
                 Circuits (3D ICs), clock networks consume a large
                 portion of the full-chip power. However, no previous 3D
                 IC work has ever optimized 3D clock networks for both
                 power and performance simultaneously, which \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "76",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Wu:2023:IBS,
  author =       "Jun-Sheng Wu and Chi-An Pan and Yi-Yu Liu",
  title =        "{ILP}-based Substrate Routing with Mismatched Via
                 Dimension Consideration for Wire-bonding {FBGA} Package
                 Design",
  journal =      j-TODAES,
  volume =       "28",
  number =       "5",
  pages =        "77:1--77:??",
  month =        sep,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3579843",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Sep 18 09:07:10 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3579843",
  abstract =     "With the rapidly growing demand for system-level
                 integration, package substrates have become one of the
                 most important carriers in semiconductor industry. Fine
                 pitch ball grid array (FBGA) packaging is a widely used
                 technology thanks to its relative cost-. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "77",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Liu:2023:CPN,
  author =       "Yanjiang Liu and Junwei Li and Tongzhou Qu and Zibin
                 Dai",
  title =        "{CBDC-PUF}: a Novel Physical Unclonable Function
                 Design Framework Utilizing Configurable Butterfly Delay
                 Chain Against Modeling Attack",
  journal =      j-TODAES,
  volume =       "28",
  number =       "5",
  pages =        "78:1--78:??",
  month =        sep,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3588435",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Sep 18 09:07:10 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3588435",
  abstract =     "Physical unclonable function (PUF) is a promising
                 security-based primitive, which provides an extremely
                 large number of responses for key generation and
                 authentication applications. Various PUFs have been
                 developed as central building blocks in \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "78",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Aghaeekiasaraee:2023:CFR,
  author =       "Erfan Aghaeekiasaraee and Aysa Fakheri Tabrizi and
                 Tiago Augusto Fontana and Renan Netto and Sheiny Fabre
                 Almeida and Upma Gandhi and Jos{\'e} Lu{\'\i}s
                 G{\"u}ntzel and David Westwick and Laleh Behjat",
  title =        "{CRP2.0}: a Fast and Robust Cooperation between
                 Routing and Placement in Advanced Technology Nodes",
  journal =      j-TODAES,
  volume =       "28",
  number =       "5",
  pages =        "79:1--79:??",
  month =        sep,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3590962",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Sep 18 09:07:10 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3590962",
  abstract =     "Traditionally, the placement and routing stages of a
                 physical design are performed separately. Because of
                 the additional complexities arising in advanced
                 technology nodes, they have become more interdependent.
                 Therefore, creating efficient cooperation \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "79",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Zhu:2023:DSE,
  author =       "Binwu Zhu and Xinyun Zhang and Yibo Lin and Bei Yu and
                 Martin Wong",
  title =        "{DRC-SG 2.0}: Efficient Design Rule Checking Script
                 Generation via Key Information Extraction",
  journal =      j-TODAES,
  volume =       "28",
  number =       "5",
  pages =        "80:1--80:??",
  month =        sep,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3594666",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Sep 18 09:07:10 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3594666",
  abstract =     "Design Rule Checking (DRC) is a critical step in
                 integrated circuit design. DRC requires formatted
                 scripts as the input to design rule checkers. However,
                 these scripts are manually generated in the foundry,
                 which is tedious and error prone for generation
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "80",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Kritikakou:2023:MMS,
  author =       "Angeliki Kritikakou and Stefanos Skalistis",
  title =        "Mitigating Mode-switch through Run-time Computation of
                 Response Time",
  journal =      j-TODAES,
  volume =       "28",
  number =       "5",
  pages =        "81:1--81:??",
  month =        sep,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3597432",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Sep 18 09:07:10 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3597432",
  abstract =     "Mixed-critical systems consist of applications with
                 different criticality. In these systems, different
                 confidence levels of Worst-Case Execution Time (WCET)
                 estimations are used. Dual criticality systems use a
                 less pessimistic, but with lower level of \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "81",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Wang:2023:BIH,
  author =       "Zilu Wang and Xinming Shi and Xin Yao",
  title =        "A Brain-Inspired Hardware Architecture for
                 Evolutionary Algorithms Based on Memristive Arrays",
  journal =      j-TODAES,
  volume =       "28",
  number =       "5",
  pages =        "82:1--82:??",
  month =        sep,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3598421",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Sep 18 09:07:10 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3598421",
  abstract =     "Brain-inspired computing takes inspiration from the
                 brain to create energy-efficient hardware systems for
                 information processing, capable of performing highly
                 sophisticated tasks. Systems built with emerging
                 electronics, such as memristive devices, can \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "82",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Monjur:2023:HSR,
  author =       "Mohammad Monjur and Joshua Calzadillas and Qiaoyan
                 Yu",
  title =        "Hardware Security Risks and Threat Analyses in
                 Advanced Manufacturing Industry",
  journal =      j-TODAES,
  volume =       "28",
  number =       "5",
  pages =        "83:1--83:??",
  month =        sep,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3603502",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Sep 18 09:07:10 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3603502",
  abstract =     "The advanced manufacturing industry (AMI) faces many
                 unique challenges from the cyber-physical domain.
                 Security threats are originated from two integral
                 parts: software and hardware. Over the past decade,
                 software security has been addressed extensively,
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "83",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Narang:2023:DPM,
  author =       "Gaurav Narang and Aryan Deshwal and Raid Ayoub and
                 Michael Kishinevsky and Janardhan Rao Doppa and Partha
                 Pratim Pande",
  title =        "Dynamic Power Management in Large Manycore Systems: a
                 Learning-to-Search Framework",
  journal =      j-TODAES,
  volume =       "28",
  number =       "5",
  pages =        "84:1--84:??",
  month =        sep,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3603501",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Sep 18 09:07:10 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3603501",
  abstract =     "The complexity of manycore System-on-chips (SoCs) is
                 growing faster than our ability to manage them to
                 reduce the overall energy consumption. Further, as SoC
                 design moves toward three-dimensional (3D)
                 architectures, the core's power density increases
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "84",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Tan:2023:IPC,
  author =       "Jingweijia Tan and Weiren Wang and Maodi Ma and
                 Xiaohui Wei and Kaige Yan",
  title =        "Improving the Performance of {CNN} Accelerator
                 Architecture under the Impact of Process Variations",
  journal =      j-TODAES,
  volume =       "28",
  number =       "5",
  pages =        "85:1--85:??",
  month =        sep,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3604236",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Sep 18 09:07:10 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3604236",
  abstract =     "Convolutional neural network (CNN) accelerators are
                 popular specialized platforms for efficient CNN
                 processing. As semiconductor manufacturing technology
                 scales down to nano scale, process variation
                 dramatically affects the chip's quality. Process
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "85",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Li:2023:CAT,
  author =       "Meng-Jing Li and Yu-Chuan Yen and Yi-Ting Li and
                 Yung-Chih Chen and Chun-Yao Wang",
  title =        "A Constructive Approach for Threshold Function
                 Identification",
  journal =      j-TODAES,
  volume =       "28",
  number =       "5",
  pages =        "86:1--86:??",
  month =        sep,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3606371",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Sep 18 09:07:10 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3606371",
  abstract =     "Threshold Function (TF) is a subset of Boolean
                 function that can be represented with a single linear
                 threshold gate (LTG). In the research about threshold
                 logic, the identification of TF is an important task
                 that determines whether a given function is a
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "86",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Yamin:2023:UAE,
  author =       "Nuzhat Yamin and Ganapati Bhat",
  title =        "Uncertainty-aware Energy Harvest Prediction and
                 Management for {IoT} Devices",
  journal =      j-TODAES,
  volume =       "28",
  number =       "5",
  pages =        "87:1--87:??",
  month =        sep,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3606372",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Sep 18 09:07:10 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3606372",
  abstract =     "Internet of things (IoT) devices are popular in
                 several high-impact applications such as mobile
                 healthcare and digital agriculture. However, IoT
                 devices have limited operating lifetime due to their
                 small form factor. Harvesting energy from ambient
                 sources \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "87",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Zhang:2023:SKR,
  author =       "Ruisi Zhang and Shehzeen Hussain and Huili Chen and
                 Mojan Javaheripi and Farinaz Koushanfar",
  title =        "Systemization of Knowledge: Robust Deep Learning using
                 Hardware--Software Co-design in Centralized and
                 Federated Settings",
  journal =      j-TODAES,
  volume =       "28",
  number =       "6",
  pages =        "88:1--88:??",
  month =        nov,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3616868",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Nov 10 09:53:53 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3616868",
  abstract =     "Deep learning (DL) models are enabling a significant
                 paradigm shift in a diverse range of fields, including
                 natural language processing and computer vision, as
                 well as the design and automation of complex integrated
                 circuits. While the deep models --- and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "88",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Lu:2023:SPI,
  author =       "Huaixi Lu and Yue Xing and Aarti Gupta and Sharad
                 Malik",
  title =        "{SoC} Protocol Implementation Verification Using
                 Instruction-Level Abstraction Specifications",
  journal =      j-TODAES,
  volume =       "28",
  number =       "6",
  pages =        "89:1--89:??",
  month =        nov,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3610292",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Nov 10 09:53:53 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3610292",
  abstract =     "In modern systems-on-chips, several hardware protocols
                 are used for communication and interaction among
                 different modules. These protocols are complex and need
                 to be implemented correctly for correct operation of
                 the system-on-chip. Therefore, protocol \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "89",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{He:2023:GLP,
  author =       "Xu He and Yao Wang and Zhiyong Fu and Yipei Wang and
                 Yang Guo",
  title =        "A General Layout Pattern Clustering Using Geometric
                 Matching-based Clip Relocation and Lower-bound Aided
                 Optimization",
  journal =      j-TODAES,
  volume =       "28",
  number =       "6",
  pages =        "90:1--90:??",
  month =        nov,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3610293",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Nov 10 09:53:53 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3610293",
  abstract =     "With the continuous shrinking of feature size,
                 detection of lithography hotspots has been raised as
                 one of the major concerns in
                 Design-for-Manufacturability (DFM) of semiconductor
                 processing. Hotspot detection, along with other DFM
                 measures, trades off \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "90",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Chang:2023:HPM,
  author =       "Yajing Chang and Yingjian Yan and Chunsheng Zhu and
                 Yanjiang Liu",
  title =        "A High-performance Masking Design Approach for {Saber}
                 against High-order Side-channel Attack",
  journal =      j-TODAES,
  volume =       "28",
  number =       "6",
  pages =        "91:1--91:??",
  month =        nov,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3611670",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Nov 10 09:53:53 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3611670",
  abstract =     "Post-quantum cryptography (PQC) has become the most
                 promising cryptographic scheme against the threat of
                 quantum computing to conventional public-key
                 cryptographic schemes. Saber, as the finalist in the
                 third round of the PQC standardization procedure,
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "91",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Venieris:2023:MMW,
  author =       "Stylianos I. Venieris and Javier Fernandez-Marques and
                 Nicholas D. Lane",
  title =        "Mitigating Memory Wall Effects in {CNN} Engines with
                 On-the-Fly Weights Generation",
  journal =      j-TODAES,
  volume =       "28",
  number =       "6",
  pages =        "92:1--92:??",
  month =        nov,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3611673",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Nov 10 09:53:53 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3611673",
  abstract =     "The unprecedented accuracy of convolutional neural
                 networks (CNNs) across a broad range of AI tasks has
                 led to their widespread deployment in mobile and
                 embedded settings. In a pursuit for high-performance
                 and energy-efficient inference, significant \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "92",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Choudhury:2023:EPF,
  author =       "Muhtadi Choudhury and Minyan Gao and Avinash Varna and
                 Elad Peer and Domenic Forte",
  title =        "Enhanced {PATRON}: Fault Injection and Power-aware
                 {FSM} Encoding Through Linear Programming",
  journal =      j-TODAES,
  volume =       "28",
  number =       "6",
  pages =        "93:1--93:??",
  month =        nov,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3611669",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Nov 10 09:53:53 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3611669",
  abstract =     "Since finite state machines (FSMs) regulate the
                 control flow in circuits, a computing system's security
                 might be breached by attacking the FSM. Physical
                 attacks are especially worrisome because they can
                 bypass software countermeasures. For example, an
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "93",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Dahiya:2023:MDS,
  author =       "Ayush Dahiya and Poornima Mittal and Rajesh Rohilla",
  title =        "Modified Decoupled Sense Amplifier with Improved
                 Sensing Speed for Low-Voltage Differential {SRAM}",
  journal =      j-TODAES,
  volume =       "28",
  number =       "6",
  pages =        "94:1--94:??",
  month =        nov,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3611672",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Nov 10 09:53:53 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3611672",
  abstract =     "A modified decoupled sense amplifier (MDSA) and
                 modified decoupled sense amplifier with NMOS
                 foot-switch is proposed for improved sensing in
                 differential SRAM for low-voltage operation at the
                 22-nm technology node. The MDSA and MDSANF both offer
                 notable \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "94",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Naseer:2023:QGA,
  author =       "Mahum Naseer and Osman Hasan and Muhammad Shafique",
  title =        "{QuanDA}: {GPU} Accelerated Quantitative Deep Neural
                 Network Analysis",
  journal =      j-TODAES,
  volume =       "28",
  number =       "6",
  pages =        "95:1--95:??",
  month =        nov,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3611671",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Nov 10 09:53:53 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3611671",
  abstract =     "Over the past years, numerous studies demonstrated the
                 vulnerability of deep neural networks (DNNs) to make
                 correct classifications in the presence of small noise.
                 This motivated the formal analysis of DNNs to ensure
                 that they delineate acceptable \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "95",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Rawat:2023:RSB,
  author =       "Bhawna Rawat and Poornima Mittal",
  title =        "A Reconfigurable {7T} {SRAM} Bit Cell for High Speed,
                 Power Saving and Low Voltage Application",
  journal =      j-TODAES,
  volume =       "28",
  number =       "6",
  pages =        "96:1--96:??",
  month =        nov,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3616872",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Nov 10 09:53:53 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3616872",
  abstract =     "The decreasing operational voltage and scaled
                 technology node for memory designing has widened the
                 gap between two crucial parameters for an SRAM ---
                 delay and power. As the demand for internet of things
                 is increasing, the need for round the clock \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "96",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Sivakumar:2023:SAL,
  author =       "S. Sivakumar and John Jose",
  title =        "Self Adaptive Logical Split Cache Techniques for
                 Delayed Aging of {NVM LLC}",
  journal =      j-TODAES,
  volume =       "28",
  number =       "6",
  pages =        "97:1--97:??",
  month =        nov,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3616871",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Nov 10 09:53:53 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3616871",
  abstract =     "Due to the technological advancements in the last few
                 decades, several applications have emerged that demand
                 more computing power and on-chip and off-chip memories.
                 However, the scaling of memory technologies is not at
                 par with computing throughput of \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "97",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Esper:2023:ASF,
  author =       "Khalil Esper and Stefan Wildermann and J{\"u}rgen
                 Teich",
  title =        "Automatic Synthesis of {FSMs} for Enforcing
                 Non-functional Requirements on {MPSoCs} Using
                 Multi-objective Evolutionary Algorithms",
  journal =      j-TODAES,
  volume =       "28",
  number =       "6",
  pages =        "98:1--98:??",
  month =        nov,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3617832",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Nov 10 09:53:53 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3617832",
  abstract =     "Embedded system applications often require guarantees
                 regarding non-functional properties when executed on a
                 given MPSoC platform. Examples of such requirements
                 include real-time, energy, or safety properties on
                 corresponding programs. One option to \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "98",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Senapati:2023:TTA,
  author =       "Debabrata Senapati and Kousik Rajesh and Chandan Karfa
                 and Arnab Sarkar",
  title =        "{TMDS}: Temperature-aware Makespan Minimizing {DAG}
                 Scheduler for Heterogeneous Distributed Systems",
  journal =      j-TODAES,
  volume =       "28",
  number =       "6",
  pages =        "99:1--99:??",
  month =        nov,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3616869",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Nov 10 09:53:53 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3616869",
  abstract =     "To meet application-specific performance demands,
                 recent embedded platforms often involve the use of
                 intricate micro-architectural designs and very small
                 feature sizes leading to complex chips with
                 multi-million gates. Such ultra-high gate densities
                 often \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "99",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Hong:2023:PMC,
  author =       "Qinghui Hong and Richeng Huang and Pingdan Xiao and
                 Jun Li and Jingru Sun and Jiliang Zhang",
  title =        "Programmable In-memory Computing Circuit of {Fast
                 Hartley Transform}",
  journal =      j-TODAES,
  volume =       "28",
  number =       "6",
  pages =        "100:1--100:??",
  month =        nov,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3618112",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Nov 10 09:53:53 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3618112",
  abstract =     "Discrete Hartley transform is a core component of
                 digital signal processing because of its advantages of
                 fast computing speed and less power consumption.
                 Traditional FPGA-based implementation methods have the
                 disadvantage of high latency, which cannot \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "100",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Kundu:2023:MTF,
  author =       "Debraj Kundu and Sudip Roy",
  title =        "Multi-target Fluid Mixing in {MEDA} Biochips: Theory
                 and an Attempt toward Waste Minimization",
  journal =      j-TODAES,
  volume =       "28",
  number =       "6",
  pages =        "101:1--101:??",
  month =        nov,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3622785",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Nov 10 09:53:53 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3622785",
  abstract =     "Sample preparation is an inherent procedure of many
                 biochemical applications, and digital microfluidic
                 biochips (DMBs) have proved to be very effective in
                 performing such a procedure. In a single mixing step,
                 conventional DMBs can mix two droplets in a 1:.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "101",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Zhou:2023:SLR,
  author =       "Shanglin Zhou and Mikhail A. Bragin and Deniz Gurevin
                 and Lynn Pepin and Fei Miao and Caiwen Ding",
  title =        "Surrogate {Lagrangian} Relaxation: a Path to
                 Retrain-Free Deep Neural Network Pruning",
  journal =      j-TODAES,
  volume =       "28",
  number =       "6",
  pages =        "102:1--102:??",
  month =        nov,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3624476",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Nov 10 09:53:53 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3624476",
  abstract =     "Network pruning is a widely used technique to reduce
                 computation cost and model size for deep neural
                 networks. However, the typical three-stage pipeline
                 (i.e., training, pruning, and retraining (fine-tuning))
                 significantly increases the overall training \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "102",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Ding:2023:TMP,
  author =       "Bo Ding and Jinglei Huang and Junpeng Wang and Qi Xu
                 and Song Chen and Yi Kang",
  title =        "Task Modules Partitioning, Scheduling and
                 Floorplanning for Partially Dynamically Reconfigurable
                 Systems with Heterogeneous Resources",
  journal =      j-TODAES,
  volume =       "28",
  number =       "6",
  pages =        "103:1--103:??",
  month =        nov,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3625295",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Nov 10 09:53:53 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3625295",
  abstract =     "Some field programmable gate arrays (FPGAs) can be
                 partially dynamically reconfigurable with heterogeneous
                 resources distributed on the chip. FPGA-based partially
                 dynamically reconfigurable system (FPGA-PDRS) can be
                 used to accelerate computing and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "103",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Lin:2023:SRB,
  author =       "Wenxiong Lin and Haojie Wu and Peng Gao and Wenjun Luo
                 and Shuting Cai and Xiaoming Xiong",
  title =        "Sequential Routing-based Time-division Multiplexing
                 Optimization for Multi-{FPGA} Systems",
  journal =      j-TODAES,
  volume =       "28",
  number =       "6",
  pages =        "104:1--104:??",
  month =        nov,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3626322",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Nov 10 09:53:53 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3626322",
  abstract =     "Multi-field programming gate array (FPGA) systems are
                 widely used in various circuit design-related areas,
                 such as hardware emulation, virtual prototypes, and
                 chiplet design methodologies. However, a physical
                 resource clash between inter-FPGA signals and
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "104",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Praveen:2023:DER,
  author =       "Pushkar Praveen and R. K. Singh",
  title =        "Design of Enhanced Reversible {9T} {SRAM} Design for
                 the Reduction in Sub-threshold Leakage Current with14nm
                 {FinFET} Technology",
  journal =      j-TODAES,
  volume =       "28",
  number =       "6",
  pages =        "105:1--105:??",
  month =        nov,
  year =         "2023",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3616538",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Fri Nov 10 09:53:53 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3616538",
  abstract =     "Power dissipation is considered one of the important
                 issues in low power Very-large-scale integration (VLSI)
                 circuit design and is related to the threshold voltage.
                 Generally, the sub-threshold leakage current and the
                 leakage power dissipation are \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "105",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Ni:2024:ISI,
  author =       "Tianming Ni and Xiaoqing Wen and Hussam Amrouch and
                 Cheng Zhuo and Peilin Song",
  title =        "Introduction to the Special Issue on Design for
                 Testability and Reliability of Security-aware
                 Hardware",
  journal =      j-TODAES,
  volume =       "29",
  number =       "1",
  pages =        "1:1--1:??",
  month =        jan,
  year =         "2024",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3631476",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 15 11:14:18 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3631476",
  abstract =     "The research on design for testability and reliability
                 of security-aware hardware has been important in both
                 academia and industry. With ever-growing globalization,
                 commercial hardware design, manufacturing,
                 transportation, and supply now involve many \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "1",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Cui:2024:ERO,
  author =       "Yijun Cui and Jiang Li and Yunpeng Chen and Chenghua
                 Wang and Chongyan Gu and M{\'a}ire O'neill and Weiqiang
                 Liu",
  title =        "An Efficient Ring Oscillator {PUF} Using Programmable
                 Delay Units on {FPGA}",
  journal =      j-TODAES,
  volume =       "29",
  number =       "1",
  pages =        "2:1--2:??",
  month =        jan,
  year =         "2024",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3593807",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 15 11:14:18 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/prng.bib;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3593807",
  abstract =     "The ring oscillator (RO) PUF can be implemented on
                 different FPGA platforms with high uniqueness and
                 reliability. To decrease the hardware cost of
                 conventional RO PUFs, a new design using the
                 programmable delay units is proposed, namely, PRO PUF.
                 The \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "2",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Li:2024:PLC,
  author =       "Taixin Li and Boran Sun and Hongtao Zhong and Yixin Xu
                 and Vijaykrishnan Narayanan and Liang Shi and Tianyi
                 Wang and Yao Yu and Thomas K{\"a}mpfe and Kai Ni and
                 Huazhong Yang and Xueqing Li",
  title =        "{ProtFe}: Low-Cost Secure Power Side-Channel
                 Protection for General and Custom {FeFET}-Based
                 Memories",
  journal =      j-TODAES,
  volume =       "29",
  number =       "1",
  pages =        "3:1--3:??",
  month =        jan,
  year =         "2024",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3604589",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 15 11:14:18 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3604589",
  abstract =     "Ferroelectric Field Effect Transistors (FeFETs) have
                 spurred increasing interest in both memories and
                 computing applications, thanks to their CMOS
                 compatibility, low-power operation, and high
                 scalability. However, new security threats to the
                 FeFET-based \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "3",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Pan:2024:CEP,
  author =       "Zijin Pan and Xunyu Li and Weiquan Hao and Runyu Miao
                 and Albert Wang",
  title =        "On-chip {ESD} Protection Design Methodologies by {CAD}
                 Simulation",
  journal =      j-TODAES,
  volume =       "29",
  number =       "1",
  pages =        "4:1--4:??",
  month =        jan,
  year =         "2024",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3593808",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 15 11:14:18 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3593808",
  abstract =     "Electrostatic discharge (ESD) can cause malfunction or
                 failure of integrated circuits (ICs). On-chip ESD
                 protection design is a major IC design-for-reliability
                 (DfR) challenge, particularly for complex chips made in
                 advanced technology nodes. Traditional \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "4",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Bian:2024:RAS,
  author =       "Jingchang Bian and Zhengfeng Huang and Peng Ye and
                 Zhao Yang and Huaguo Liang",
  title =        "A Reliability-Aware Splitting Duty-Cycle Physical
                 Unclonable Function Based on Trade-off Process,
                 Voltage, and Temperature Variations",
  journal =      j-TODAES,
  volume =       "29",
  number =       "1",
  pages =        "5:1--5:??",
  month =        jan,
  year =         "2024",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3594667",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 15 11:14:18 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3594667",
  abstract =     "The physical unclonable function (PUF) is a hardware
                 security primitive that can be used to prevent
                 malicious attacks aimed at obtaining device information
                 at the hardware level. The ring oscillator (RO) PUF has
                 attracted considerable research attention. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "5",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Zhang:2024:HTS,
  author =       "Yuan Zhang and Jiliang Zhang",
  title =        "A High Throughput {STR}-based {TRNG} by Jitter Precise
                 Quantization Superposing",
  journal =      j-TODAES,
  volume =       "29",
  number =       "1",
  pages =        "6:1--6:??",
  month =        jan,
  year =         "2024",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3606373",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 15 11:14:18 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/prng.bib;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3606373",
  abstract =     "With the rapid development of integrated circuits and
                 the continuous progress of computing capability, higher
                 demands have been placed on the security and speed of
                 data encryption in security systems. As a basic
                 hardware security primitive, the true \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "6",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Xiang:2024:TCL,
  author =       "Dong Xiang",
  title =        "Test Compression for Launch-on-Capture Transition
                 Fault Testing",
  journal =      j-TODAES,
  volume =       "29",
  number =       "1",
  pages =        "7:1--7:??",
  month =        jan,
  year =         "2024",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3597433",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 15 11:14:18 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3597433",
  abstract =     "A new low-power test compression scheme, called
                 Dcompress, is proposed for launch-on-capture transition
                 fault testing by using a new seed encoding scheme, a
                 new design for testability architecture, and a new
                 low-power test application procedure. The new
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "7",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Bi:2024:AVA,
  author =       "Yongtian Bi and Qi Xu and Hao Geng and Song Chen and
                 Yi Kang",
  title =        "{AD$^2$VNCS}: Adversarial Defense and Device
                 Variation-tolerance in Memristive Crossbar-based
                 Neuromorphic Computing Systems",
  journal =      j-TODAES,
  volume =       "29",
  number =       "1",
  pages =        "8:1--8:??",
  month =        jan,
  year =         "2024",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3600231",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 15 11:14:18 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3600231",
  abstract =     "In recent years, memristive crossbar-based
                 neuromorphic computing systems (NCS) have obtained
                 extremely high performance in neural network
                 acceleration. However, adversarial attacks and
                 conductance variations of memristors bring reliability
                 challenges to \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "8",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Calzada:2024:HIS,
  author =       "Paul E. Calzada and Md. Sami {Ul Islam Sami} and Kimia
                 Zamiri Azar and Fahim Rahman and Farimah Farahmandi and
                 Mark Tehranipoor",
  title =        "Heterogeneous Integration Supply Chain Integrity
                 Through Blockchain and {CHSM}",
  journal =      j-TODAES,
  volume =       "29",
  number =       "1",
  pages =        "9:1--9:??",
  month =        jan,
  year =         "2024",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3625823",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 15 11:14:18 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3625823",
  abstract =     "Over the past few decades, electronics have become
                 commonplace in government, commercial, and social
                 domains. These devices have developed rapidly, as seen
                 in the prevalent use of system-on-chips rather than
                 separate integrated circuits on a single \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "9",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Cui:2024:RAA,
  author =       "Xiaole Cui and Mingqi Yin and Hanqing Liu and Xiaoxin
                 Cui",
  title =        "The Resistance Analysis Attack and Security
                 Enhancement of the {IMC LUT} Based on the Complementary
                 Resistive Switch Cells",
  journal =      j-TODAES,
  volume =       "29",
  number =       "1",
  pages =        "10:1--10:??",
  month =        jan,
  year =         "2024",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3616870",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 15 11:14:18 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3616870",
  abstract =     "The resistive random access memory (RRAM) based
                 in-memory computing (IMC) is an emerging architecture
                 to address the challenge of the ``memory wall''
                 problem. The complementary resistive switch (CRS) cell
                 connects two bipolar RRAM elements anti-serially to
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "10",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Xiao:2024:IRI,
  author =       "Jie Xiao and Yingying Ge and Ru Wang and Jungang Lou",
  title =        "{ICP-RL}: Identifying Critical Paths for Fault
                 Diagnosis Using Reinforcement Learning",
  journal =      j-TODAES,
  volume =       "29",
  number =       "1",
  pages =        "11:1--11:??",
  month =        jan,
  year =         "2024",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3610294",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 15 11:14:18 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3610294",
  abstract =     "Identifying the critical paths is crucial to reducing
                 the complexity of performance analysis and reliability
                 calculation for logic circuits. In this article, we
                 propose a method for identifying the critical path in a
                 combination circuit using a \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "11",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Guo:2024:YOA,
  author =       "Nanlin Guo and Fulin Peng and Jiahe Shi and Fan Yang
                 and Jun Tao and Xuan Zeng",
  title =        "Yield Optimization for Analog Circuits over Multiple
                 Corners via {Bayesian} Neural Networks: Enhancing
                 Circuit Reliability under Environmental Variation",
  journal =      j-TODAES,
  volume =       "29",
  number =       "1",
  pages =        "12:1--12:??",
  month =        jan,
  year =         "2024",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3626321",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 15 11:14:18 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3626321",
  abstract =     "The reliability of circuits is significantly affected
                 by process variations in manufacturing and
                 environmental variation during operation. Current yield
                 optimization algorithms take process variations into
                 consideration to improve circuit reliability.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "12",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Peng:2024:CTD,
  author =       "Qingsong Peng and Jingchang Bian and Zhengfeng Huang
                 and Senling Wang and Aibin Yan",
  title =        "A Compact {TRNG} Design for {FPGA} Based on the
                 Metastability of {RO}-driven Shift Registers",
  journal =      j-TODAES,
  volume =       "29",
  number =       "1",
  pages =        "13:1--13:??",
  month =        jan,
  year =         "2024",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3610295",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 15 11:14:18 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/prng.bib;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3610295",
  abstract =     "True random number generators (TRNGs), as an important
                 component of security systems, have received a lot of
                 attention for their related research. The previous
                 researches have provided a large number of TRNG
                 solutions, however, they still failed to reach
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "13",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Sun:2024:LLD,
  author =       "Rihui Sun and Pengfei Qiu and Yongqiang Lyu and Jian
                 Dong and Haixia Wang and Dongsheng Wang and Gang Qu",
  title =        "{Lightning}: Leveraging {DVFS-induced} Transient Fault
                 Injection to Attack Deep Learning Accelerator of
                 {GPUs}",
  journal =      j-TODAES,
  volume =       "29",
  number =       "1",
  pages =        "14:1--14:??",
  month =        jan,
  year =         "2024",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3617893",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 15 11:14:18 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3617893",
  abstract =     "Graphics Processing Units (GPU) are widely used as
                 deep learning accelerators because of its high
                 performance and low power consumption. Additionally, it
                 remains secure against hardware-induced transient fault
                 injection attacks, a classic type of attacks \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "14",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Saglican:2024:MDV,
  author =       "Enes Sa{\u{g}}lican and Engin Afacan",
  title =        "{MOEA\slash D} vs. {NSGA-II}: a Comprehensive
                 Comparison for Multi\slash Many Objective Analog\slash
                 {RF} Circuit Optimization through a Generic Benchmark",
  journal =      j-TODAES,
  volume =       "29",
  number =       "1",
  pages =        "15:1--15:??",
  month =        jan,
  year =         "2024",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3626096",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 15 11:14:18 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3626096",
  abstract =     "Thanks to the enhanced computational capacity of
                 modern computers, even sophisticated analog/radio
                 frequency (RF) circuit sizing problems can be solved
                 via electronic design automation (EDA) tools. Recently,
                 several analog/RF circuit optimization \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "15",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Rapp:2024:NAI,
  author =       "Martin Rapp and Heba Khdr and Nikita Krohmer and
                 J{\"o}rg Henkel",
  title =        "{NPU}-Accelerated Imitation Learning for Thermal
                 Optimization of {QoS}-Constrained Heterogeneous
                 Multi-Cores",
  journal =      j-TODAES,
  volume =       "29",
  number =       "1",
  pages =        "16:1--16:??",
  month =        jan,
  year =         "2024",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3626320",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 15 11:14:18 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3626320",
  abstract =     "Thermal optimization of a heterogeneous clustered
                 multi-core processor under user-defined QoS targets
                 requires application migration and DVFS. However,
                 selecting the core to execute each application and the
                 VF levels of each cluster is a complex problem
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "16",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Dewan:2024:CAM,
  author =       "Monzurul Islam Dewan and Sheng-En David Lin and Dae
                 Hyun Kim",
  title =        "Construction of All Multilayer Monolithic {RSMTs} and
                 Its Application to Monolithic {$3$D} {IC} Routing",
  journal =      j-TODAES,
  volume =       "29",
  number =       "1",
  pages =        "17:1--17:??",
  month =        jan,
  year =         "2024",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3626958",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 15 11:14:18 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3626958",
  abstract =     "Monolithic three-dimensional (M3D) integration allows
                 ultra-thin silicon tier stacking in a single package.
                 The high-density stacking is acquiring interest and is
                 becoming more popular for smaller footprint areas,
                 shorter wirelength, higher performance, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "17",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Chhabria:2024:MLA,
  author =       "Vidya A. Chhabria and Wenjing Jiang and Andrew B.
                 Kahng and Sachin S. Sapatnekar",
  title =        "A Machine Learning Approach to Improving Timing
                 Consistency between Global Route and Detailed Route",
  journal =      j-TODAES,
  volume =       "29",
  number =       "1",
  pages =        "18:1--18:??",
  month =        jan,
  year =         "2024",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3626959",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 15 11:14:18 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3626959",
  abstract =     "Due to the unavailability of routing information in
                 design stages prior to detailed routing (DR), the tasks
                 of timing prediction and optimization pose major
                 challenges. Inaccurate timing prediction wastes design
                 effort, hurts circuit performance, and may \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "18",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Pandey:2024:NDT,
  author =       "Shailja Pandey and Lokesh Siddhu and Preeti Ranjan
                 Panda",
  title =        "{NeuroCool}: Dynamic Thermal Management of {$3$D}
                 {DRAM} for Deep Neural Networks through Customized
                 Prefetching",
  journal =      j-TODAES,
  volume =       "29",
  number =       "1",
  pages =        "19:1--19:??",
  month =        jan,
  year =         "2024",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3630012",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 15 11:14:18 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3630012",
  abstract =     "Deep neural network (DNN) implementations are
                 typically characterized by huge datasets and concurrent
                 computation, resulting in a demand for high memory
                 bandwidth due to intensive data movement between
                 processors and off-chip memory. Performing DNN
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "19",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Bai:2024:BER,
  author =       "Chen Bai and Qi Sun and Jianwang Zhai and Yuzhe Ma and
                 Bei Yu and Martin D. F. Wong",
  title =        "{BOOM-Explorer}: {RISC-V} {BOOM} Microarchitecture
                 Design Space Exploration",
  journal =      j-TODAES,
  volume =       "29",
  number =       "1",
  pages =        "20:1--20:??",
  month =        jan,
  year =         "2024",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3630013",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 15 11:14:18 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/risc-v.bib;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3630013",
  abstract =     "Microarchitecture parameters tuning is critical in the
                 microprocessor design cycle. It is a non-trivial design
                 space exploration (DSE) problem due to the large
                 solution space, cycle-accurate simulators' modeling
                 inaccuracy, and high simulation runtime for performance
                 evaluations. Previous methods require massive expert
                 efforts to construct interpretable equations or high
                 computing resource demands to train black-box
                 prediction models. This article follows the black-box
                 methods due to better solution qualities than
                 analytical methods in general. We summarize two learned
                 lessons and propose BOOM-Explorer accordingly. First,
                 embedding microarchitecture domain knowledge in the DSE
                 improves the solution quality. Second, BOOM-Explorer
                 makes the microarchitecture DSE for
                 register-transfer-level designs within the limited time
                 budget feasible. We enhance BOOM-Explorer with the
                 diversity-guidance, further improving the algorithm
                 performance. Experimental results with RISC-V
                 Berkeley-Out-of-Order Machine under 7-nm technology
                 show that our proposed methodology achieves an average
                 of 18.75\% higher Pareto hypervolume, 35.47\% less
                 average distance to reference set, and 65.38\% less
                 overall running time compared to previous approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "20",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Li:2024:MFO,
  author =       "Wanqian Li and Yinhe Han and Xiaoming Chen",
  title =        "Mathematical Framework for Optimizing Crossbar
                 Allocation for {ReRAM}-based {CNN} Accelerators",
  journal =      j-TODAES,
  volume =       "29",
  number =       "1",
  pages =        "21:1--21:??",
  month =        jan,
  year =         "2024",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3631523",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 15 11:14:18 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3631523",
  abstract =     "The resistive random-access memory (ReRAM) has widely
                 been used to accelerate convolutional neural networks
                 (CNNs) thanks to its analog in-memory computing
                 capability. ReRAM crossbars not only store layers'
                 weights, but also perform in-situ matrix-vector
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "21",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Wu:2024:FDC,
  author =       "Dan Wu and Peng Chen * and Thilini Kaushalya Bandara
                 and Zhaoying Li and Tulika Mitra",
  title =        "{Flip}: Data-centric Edge {CGRA} Accelerator",
  journal =      j-TODAES,
  volume =       "29",
  number =       "1",
  pages =        "22:1--22:??",
  month =        jan,
  year =         "2024",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3631118",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 15 11:14:18 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3631118",
  abstract =     "Coarse-Grained Reconfigurable Arrays (CGRA) are
                 promising edge accelerators due to the outstanding
                 balance in flexibility, performance, and energy
                 efficiency. Classic CGRAs statically map compute
                 operations onto the processing elements (PE) and route
                 the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "22",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Wu:2024:SAM,
  author =       "Ying Wu and Chuangtao Chen and Weihua Xiao and Xuan
                 Wang and Chenyi Wen and Jie Han and Xunzhao Yin and
                 Weikang Qian and Cheng Zhuo",
  title =        "A Survey on Approximate Multiplier Designs for Energy
                 Efficiency: From Algorithms to Circuits",
  journal =      j-TODAES,
  volume =       "29",
  number =       "1",
  pages =        "23:1--23:??",
  month =        jan,
  year =         "2024",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3610291",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Mon Jan 15 11:14:18 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/fparith.bib;
                 https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3610291",
  abstract =     "Given the stringent requirements of energy efficiency
                 for Internet-of-Things edge devices, approximate
                 multipliers, as a basic component of many processors
                 and accelerators, have been constantly proposed and
                 studied for decades, especially in error-resilient
                 applications. The computation error and energy
                 efficiency largely depend on how and where the
                 approximation is introduced into a design. Thus, this
                 article aims to provide a comprehensive review of the
                 approximation techniques in multiplier designs ranging
                 from algorithms and architectures to circuits. We have
                 implemented representative approximate multiplier
                 designs in each category to understand the impact of
                 the design techniques on accuracy and efficiency. The
                 designs can then be effectively deployed in high-level
                 applications, such as machine learning, to gain energy
                 efficiency at the cost of slight accuracy loss.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "23",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Liang:2024:DAU,
  author =       "Tung-Che Liang and Yi-Chen Chang and Zhanwei Zhong and
                 Yaas Bigdeli and Tsung-Yi Ho and Krishnendu Chakrabarty
                 and Richard Fair",
  title =        "Dynamic Adaptation Using Deep Reinforcement Learning
                 for Digital Microfluidic Biochips",
  journal =      j-TODAES,
  volume =       "29",
  number =       "2",
  pages =        "24:1--24:??",
  month =        mar,
  year =         "2024",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3633458",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Mar 19 08:17:52 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3633458",
  abstract =     "We describe an exciting new application domain for
                 deep reinforcement learning (RL): droplet routing on
                 digital microfluidic biochips (DMFBs). A DMFB consists
                 of a two-dimensional electrode array, and it
                 manipulates droplets of liquid to automatically
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "24",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Qian:2024:ERL,
  author =       "Yu Qian and Xuegong Zhou and Hao Zhou and Lingli
                 Wang",
  title =        "An Efficient Reinforcement Learning Based Framework
                 for Exploring Logic Synthesis",
  journal =      j-TODAES,
  volume =       "29",
  number =       "2",
  pages =        "25:1--25:??",
  month =        mar,
  year =         "2024",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3632174",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Mar 19 08:17:52 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3632174",
  abstract =     "Logic synthesis is a crucial step in electronic design
                 automation tools. The rapid developments of
                 reinforcement learning (RL) have enabled the automated
                 exploration of logic synthesis. Existing RL based
                 methods may lead to data inefficiency, and the
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "25",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Wang:2024:SSG,
  author =       "Bo Wang and Sheng Ma and Shengbai Luo and Lizhou Wu
                 and Jianmin Zhang and Chunyuan Zhang and Tiejun Li",
  title =        "{SparGD}: a Sparse {GEMM} Accelerator with Dynamic
                 Dataflow",
  journal =      j-TODAES,
  volume =       "29",
  number =       "2",
  pages =        "26:1--26:??",
  month =        mar,
  year =         "2024",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3634703",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Mar 19 08:17:52 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3634703",
  abstract =     "Deep learning has become a highly popular research
                 field, and previously deep learning algorithms ran
                 primarily on CPUs and GPUs. However, with the rapid
                 development of deep learning, it was discovered that
                 existing processors could not meet the specific
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "26",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Kaur:2024:RRS,
  author =       "Jaspinder Kaur and Shirshendu Das",
  title =        "{RSPP}: Restricted Static Pseudo-Partitioning for
                 Mitigation of Cross-Core Covert Channel Attacks",
  journal =      j-TODAES,
  volume =       "29",
  number =       "2",
  pages =        "27:1--27:??",
  month =        mar,
  year =         "2024",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3637222",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Mar 19 08:17:52 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3637222",
  abstract =     "Cache timing channel attacks exploit the inherent
                 properties of cache memories: hit and miss time along
                 with the shared nature of the cache to leak secret
                 information. The side channel and covert channel are
                 the two well-known cache timing channel \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "27",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Kim:2024:OMP,
  author =       "Seok Young Kim and Jaewook Lee and Yoonah Paik and
                 Chang Hyun Kim and Won Jun Lee and Seon Wook Kim",
  title =        "Optimal Model Partitioning with Low-Overhead Profiling
                 on the {PIM}-based Platform for Deep Learning
                 Inference",
  journal =      j-TODAES,
  volume =       "29",
  number =       "2",
  pages =        "28:1--28:??",
  month =        mar,
  year =         "2024",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3628599",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Mar 19 08:17:52 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3628599",
  abstract =     "Recently Processing-in-Memory (PIM) has become a
                 promising solution to achieve energy-efficient
                 computation in data-intensive applications by placing
                 computation near or inside the memory. In most Deep
                 Learning (DL) frameworks, a user manually partitions
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "28",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Niu:2024:ECS,
  author =       "Linwei Niu and Danda B. Rawat and Jonathan Musselwhite
                 and Zonghua Gu and Qingxu Deng",
  title =        "Energy-Constrained Scheduling for Weakly Hard
                 Real-Time Systems Using Standby-Sparing",
  journal =      j-TODAES,
  volume =       "29",
  number =       "2",
  pages =        "29:1--29:??",
  month =        mar,
  year =         "2024",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3631587",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Mar 19 08:17:52 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3631587",
  abstract =     "For real-time embedded systems, QoS (Quality of
                 Service), fault tolerance, and energy budget constraint
                 are among the primary design concerns. In this
                 research, we investigate the problem of energy
                 constrained standby-sparing for both periodic and
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "29",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Ardalani:2024:DCS,
  author =       "Newsha Ardalani and Saptadeep Pal and Puneet Gupta",
  title =        "{DeepFlow}: a Cross-Stack Pathfinding Framework for
                 Distributed {AI} Systems",
  journal =      j-TODAES,
  volume =       "29",
  number =       "2",
  pages =        "30:1--30:??",
  month =        mar,
  year =         "2024",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3635867",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Mar 19 08:17:52 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3635867",
  abstract =     "Over the past decade, machine learning model
                 complexity has grown at an extraordinary rate, as has
                 the scale of the systems training such large models.
                 However, there is an alarmingly low hardware
                 utilization (5-20\%) in large scale AI systems. The low
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "30",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{S:2024:SAS,
  author =       "Deepanjali S. and Noor Mahammad SK",
  title =        "Scalable and Accelerated Self-healing Control Circuit
                 Using Evolvable Hardware",
  journal =      j-TODAES,
  volume =       "29",
  number =       "2",
  pages =        "31:1--31:??",
  month =        mar,
  year =         "2024",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3634682",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Mar 19 08:17:52 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3634682",
  abstract =     "Controllers are mission-critical components of any
                 electronic design. By sending control signals, they
                 decide which and when other data path elements must
                 operate. Faults, especially Single Event Upset (SEU)
                 occurrence in these components, can lead to \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "31",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Lu:2024:GPA,
  author =       "Yi-Chen Lu and Haoxing Ren and Hao-Hsiang Hsiao and
                 Sung Kyu Lim",
  title =        "{GAN-Place}: Advancing Open Source Placers to
                 Commercial-quality Using Generative Adversarial
                 Networks and Transfer Learning",
  journal =      j-TODAES,
  volume =       "29",
  number =       "2",
  pages =        "32:1--32:??",
  month =        mar,
  year =         "2024",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3636461",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Mar 19 08:17:52 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3636461",
  abstract =     "Recently, GPU-accelerated placers such as DREAMPlace
                 and Xplace have demonstrated their superiority over
                 traditional CPU-reliant placers by achieving orders of
                 magnitude speed up in placement runtime. However, due
                 to their limited focus in placement \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "32",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Deng:2024:ERT,
  author =       "Libing Deng and Gang Zeng and Ryo Kurachi and Hiroaki
                 Takada and Xiongren Xiao and Renfa Li and Guoqi Xie",
  title =        "Enhanced Real-time Scheduling of {AVB} Flows in
                 Time-Sensitive Networking",
  journal =      j-TODAES,
  volume =       "29",
  number =       "2",
  pages =        "33:1--33:??",
  month =        mar,
  year =         "2024",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3637878",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Mar 19 08:17:52 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3637878",
  abstract =     "Time-Sensitive Networking (TSN) realizes high
                 bandwidth and time determinism for data transmission
                 and thus becomes the crucial communication technology
                 in time-critical systems. The Gate Control List (GCL)
                 is used to control the transmission of different
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "33",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Sankar:2024:TTA,
  author =       "Syam Sankar and Ruchika Gupta and John Jose and
                 Sukumar Nandi",
  title =        "{TROP}: {TRust-aware OPportunistic} Routing in {NoC}
                 with Hardware {Trojans}",
  journal =      j-TODAES,
  volume =       "29",
  number =       "2",
  pages =        "34:1--34:??",
  month =        mar,
  year =         "2024",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3639821",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Mar 19 08:17:52 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3639821",
  abstract =     "Multiple software and hardware intellectual property
                 (IP) components are combined on a single chip to form
                 Multi-Processor Systems-on-Chips (MPSoCs). Due to the
                 rigid time-to-market constraints, some of the IPs are
                 from outsourced third parties. Due to \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "34",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Huang:2024:ALV,
  author =       "Bo-Yuan Huang and Steven Lyubomirsky and Yi Li and
                 Mike He and Gus Henry Smith and Thierry Tambe and Akash
                 Gaonkar and Vishal Canumalla and Andrew Cheung and
                 Gu-Yeon Wei and Aarti Gupta and Zachary Tatlock and
                 Sharad Malik",
  title =        "Application-level Validation of Accelerator Designs
                 Using a Formal Software\slash Hardware Interface",
  journal =      j-TODAES,
  volume =       "29",
  number =       "2",
  pages =        "35:1--35:??",
  month =        mar,
  year =         "2024",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3639051",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Mar 19 08:17:52 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3639051",
  abstract =     "Ideally, accelerator development should be as easy as
                 software development. Several recent design
                 languages/tools are working toward this goal, but
                 actually testing early designs on real applications
                 end-to-end remains prohibitively difficult due to the
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "35",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Tang:2024:MIP,
  author =       "Ke Tang and Lang Feng and Zhongfeng Wang",
  title =        "Mixed Integer Programming based Placement Refinement
                 by {RSMT} Model with Movable Pins",
  journal =      j-TODAES,
  volume =       "29",
  number =       "2",
  pages =        "36:1--36:??",
  month =        mar,
  year =         "2024",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3639365",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Mar 19 08:17:52 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3639365",
  abstract =     "Placement is a critical step in the physical design
                 for digital application specific integrated circuits
                 (ASICs), as it can directly affect the design qualities
                 such as wirelength and timing. For many domain specific
                 designs, the demands for high \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "36",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{NS:2024:POA,
  author =       "Karthik Somayaji NS and Peng Li",
  title =        "{Pareto} Optimization of Analog Circuits Using
                 Reinforcement Learning",
  journal =      j-TODAES,
  volume =       "29",
  number =       "2",
  pages =        "37:1--37:??",
  month =        mar,
  year =         "2024",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3640463",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Mar 19 08:17:52 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3640463",
  abstract =     "Analog circuit optimization and design presents a
                 unique set of challenges in the IC design process. Many
                 applications require the designer to optimize for
                 multiple competing objectives, which poses a crucial
                 challenge. Motivated by these practical \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "37",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Jiang:2024:RHF,
  author =       "Danping Jiang and Zibin Dai and Yanjiang Liu and
                 Zongren Zhang",
  title =        "{RGMU}: a High-flexibility and Low-cost Reconfigurable
                 Galois Field Multiplication Unit Design Approach for
                 {CGRCA}",
  journal =      j-TODAES,
  volume =       "29",
  number =       "2",
  pages =        "38:1--38:??",
  month =        mar,
  year =         "2024",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3639820",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Mar 19 08:17:52 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3639820",
  abstract =     "Finite field multiplication is a non-linear
                 transformation operator that appears in the majority of
                 symmetric cryptographic algorithms. Numerous specified
                 finite field multiplication units have been proposed as
                 a fundamental module in the coarse-grained \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "38",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Wang:2024:MLC,
  author =       "Jianfeng Wang and Zhonghao Chen and Jiahao Zhang and
                 Yixin Xu and Tongguang Yu and Ziheng Zheng and Enze Ye
                 and Sumitha George and Huazhong Yang and Yongpan Liu
                 and Kai Ni and Vijaykrishnan Narayanan and Xueqing Li",
  title =        "A Module-Level Configuration Methodology for
                 Programmable Camouflaged Logic",
  journal =      j-TODAES,
  volume =       "29",
  number =       "2",
  pages =        "39:1--39:??",
  month =        mar,
  year =         "2024",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3640462",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Mar 19 08:17:52 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3640462",
  abstract =     "Logic camouflage is a widely adopted technique that
                 mitigates the threat of intellectual property (IP)
                 piracy and overproduction in the integrated circuit
                 (IC) supply chain. Camouflaged logic achieves
                 functional obfuscation through physical-level
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "39",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}

@Article{Weerasena:2024:SEO,
  author =       "Hansika Weerasena and Prabhat Mishra",
  title =        "Security of Electrical, Optical, and Wireless On-chip
                 Interconnects: a Survey",
  journal =      j-TODAES,
  volume =       "29",
  number =       "2",
  pages =        "40:1--40:??",
  month =        mar,
  year =         "2024",
  CODEN =        "ATASFO",
  DOI =          "https://doi.org/10.1145/3631117",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  ISSN-L =       "1084-4309",
  bibdate =      "Tue Mar 19 08:17:52 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3631117",
  abstract =     "The advancement of manufacturing technologies has
                 enabled the integration of more intellectual property
                 (IP) cores on the same system-on-chip (SoC). Scalable
                 and high throughput on-chip communication architecture
                 has become a vital component in today's \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Transact. Des. Automat. Electron. Syst.",
  articleno =    "40",
  fjournal =     "ACM Transactions on Design Automation of Electronic
                 Systems",
  journal-URL =  "https://dl.acm.org/loi/todaes",
}