%%% -*-BibTeX-*-
%%% ====================================================================
%%% BibTeX-file{
%%%     author          = "Nelson H. F. Beebe",
%%%     version         = "1.55",
%%%     date            = "25 August 2014",
%%%     time            = "19:04:53 MDT",
%%%     filename        = "todaes.bib",
%%%     address         = "University of Utah
%%%                        Department of Mathematics, 110 LCB
%%%                        155 S 1400 E RM 233
%%%                        Salt Lake City, UT 84112-0090
%%%                        USA",
%%%     telephone       = "+1 801 581 5254",
%%%     FAX             = "+1 801 581 4148",
%%%     URL             = "http://www.math.utah.edu/~beebe",
%%%     checksum        = "63965 28490 147749 1472922",
%%%     email           = "beebe at math.utah.edu, beebe at acm.org,
%%%                        beebe at computer.org (Internet)",
%%%     codetable       = "ISO/ASCII",
%%%     keywords        = "bibliography; BibTeX; ACM Transactions on
%%%                        Design Automation of Electronic Systems;
%%%                        TODAES",
%%%     license         = "public domain",
%%%     supported       = "yes",
%%%     docstring       = "This is a COMPLETE BibTeX bibliography for
%%%                        ACM Transactions on Design Automation of
%%%                        Electronic Systems (CODEN ATASFO, ISSN
%%%                        1084-4309), completely covering all issues
%%%                        from volume 1, number 1, January 1996 to
%%%                        date.
%%%
%%%                        The ACM maintains World Wide Web pages with
%%%                        journal tables of contents for 1996--date at
%%%
%%%                            http://www.acm.org/todaes/
%%%                            http://www.acm.org/pubs/contents/journals/todaes/
%%%                            http://portal.acm.org/browse_dl.cfm?idx=J776
%%%
%%%                        That data has been automatically converted to
%%%                        BibTeX form, corrected for spelling and page
%%%                        number errors, and merged into this file.
%%%
%%%                        At version 1.55, the COMPLETE year coverage
%%%                        looks like this:
%%%
%%%                             1996 (  20)    2003 (  30)    2010 (  35)
%%%                             1997 (  19)    2004 (  21)    2011 (  40)
%%%                             1998 (  29)    2005 (  35)    2012 (  67)
%%%                             1999 (  17)    2006 (  44)    2013 (  55)
%%%                             2000 (  35)    2007 (  52)    2014 (  32)
%%%                             2001 (  28)    2008 (  68)
%%%                             2002 (  31)    2009 (  65)
%%%
%%%                             Article:        723
%%%
%%%                             Total entries:  723
%%%
%%%                        Numerous errors in the sources noted above
%%%                        have been corrected.   Spelling has been
%%%                        verified with the UNIX spell and GNU ispell
%%%                        programs using the exception dictionary
%%%                        stored in the companion file with extension
%%%                        .sok.
%%%
%%%                        ACM copyrights explicitly permit abstracting
%%%                        with credit, so article abstracts, keywords,
%%%                        and subject classifications have been
%%%                        included in this bibliography wherever
%%%                        available.  Article reviews have been
%%%                        omitted, until their copyright status has
%%%                        been clarified.
%%%
%%%                        bibsource keys in the bibliography entries
%%%                        below indicate the entry originally came
%%%                        from the computer science bibliography
%%%                        archive, even though it has likely since
%%%                        been corrected and updated.
%%%
%%%                        URL keys in the bibliography point to
%%%                        World Wide Web locations of additional
%%%                        information about the entry.
%%%
%%%                        BibTeX citation tags are uniformly chosen
%%%                        as name:year:abbrev, where name is the
%%%                        family name of the first author or editor,
%%%                        year is a 4-digit number, and abbrev is a
%%%                        3-letter condensation of important title
%%%                        words. Citation tags were automatically
%%%                        generated by software developed by the
%%%                        author for the BibNet Project.
%%%
%%%                        In this bibliography, entries are sorted
%%%                        by journal, and then by publication order,
%%%                        with the help of ``bibsort -byvolume''.  The
%%%                        bibsort utility is available from
%%%                        ftp://ftp.math.utah.edu/pub/tex/bib.
%%%
%%%                        The author will be grateful for reports of
%%%                        errors of any kind in this bibliography.
%%%
%%%                        The checksum field above contains a CRC-16
%%%                        checksum as the first value, followed by the
%%%                        equivalent of the standard UNIX wc (word
%%%                        count) utility output of lines, words, and
%%%                        characters.  This is produced by Robert
%%%                        Solovay's checksum utility."
%%%     }
%%% ====================================================================

@Preamble{
    "\input bibnames.sty"
  # "\ifx \undefined \circled \def \circled #1{(#1)}\fi"
  # "\ifx \undefined \reg \def \reg {\circled{R}}\fi"
  # "\ifx \undefined \TM \def \TM {${}^{\sc TM}$} \fi"
}

%%% ====================================================================
%%% Acknowledgement abbreviations:

@String{ack-nhfb = "Nelson H. F. Beebe,
                    University of Utah,
                    Department of Mathematics, 110 LCB,
                    155 S 1400 E RM 233,
                    Salt Lake City, UT 84112-0090, USA,
                    Tel: +1 801 581 5254,
                    FAX: +1 801 581 4148,
                    e-mail: \path|beebe@math.utah.edu|,
                            \path|beebe@acm.org|,
                            \path|beebe@computer.org| (Internet),
                    URL: \path|http://www.math.utah.edu/~beebe/|"}

%%% ====================================================================
%%% Journal abbreviations:

@String{j-TODAES                = "ACM Transactions on Design Automation of
                                   Electronic Systems"}

%%% ====================================================================
%%% Bibliography entries from Communications of the ACM.

@Article{Pedram:1996:PMI,
  author =       "Massoud Pedram",
  title =        "Power minimization in {IC} design: principles and
                 applications",
  journal =      j-TODAES,
  volume =       "1",
  number =       "1",
  pages =        "3--56",
  month =        jan,
  year =         "1996",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1996-1-1/p3-pedram/p3-pedram.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1996-1-1/p3-pedram/",
  abstract =     "Low power has emerged as a principal theme in today's
                 electronics industry. The need for low power has caused
                 a major paradigm shift in which power dissipation is as
                 important as performance and area. This article
                 presents an in-depth survey of CAD methodologies and
                 techniques for designing low power digital CMOS
                 circuits and systems and describes the many issues
                 facing designers at architectural, logical, and
                 physical levels of design abstraction. It reviews some
                 of the techniques and tools that have been proposed to
                 overcome these difficulties and outlines the future
                 challenges that must be met to design low power, high
                 performance systems.",
  acknowledgement = ack-nhfb,
  generalterms = "Algorithms; Design; Experimentation; Performance",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "adiabatic circuits; CMOS circuits; computer-aided
                 design of VLSI; dynamic power dissipation; energy-delay
                 product; gated clocks; layout; low power layout; low
                 power synthesis; lower-power design; power analysis and
                 estimation; power management; power minimization and
                 management; probabilistic analysis;
                 silicon-on-insulator technology; statistical sampling;
                 switched capacitance; switching activity; symbolic
                 simulation; synthesis; system design",
  subject =      "Hardware --- Integrated Circuits --- Types and Design
                 Styles (B.7.1): {\bf VLSI (very large scale
                 integration)}; Computer Applications --- Computer-Aided
                 Engineering (J.6): {\bf Computer-aided design (CAD)};
                 Hardware --- Integrated Circuits --- General (B.7.0)",
}

@Article{Cheng:1996:AGF,
  author =       "Kwang-Ting Cheng and A. S. Krishnakumar",
  title =        "Automatic generation of functional vectors using the
                 extended finite state machine model",
  journal =      j-TODAES,
  volume =       "1",
  number =       "1",
  pages =        "57--79",
  month =        jan,
  year =         "1996",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1996-1-1/p57-cheng/p57-cheng.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1996-1-1/p57-cheng/",
  abstract =     "We present a method of automatic generation of
                 functional vectors for sequential circuits. These
                 vectors can be used for design verification,
                 manufacturing testing, or power estimation. A
                 high-level description of the circuit in VHDL or C is
                 assumed available. Our method automatically transforms
                 the high-level description of a circuit in VHDL or C
                 into an extended finite state machine (EFSM) model that
                 is used to generate functional vectors. The EFSM model
                 is a generalization of the traditional state machine
                 model. It is a compact representation of models with
                 local data variables and preserves many nice properties
                 of a traditional state machine model. The theoretical
                 background of the EFSM model is addressed in this
                 article. Our method guarantees that the generated
                 vectors cover every statement in the high-level
                 description at least once. Experimental results show
                 that a set of comprehensive functional vectors for
                 sequential circuits with more than a hundred flip-flops
                 can be generated automatically in a few minutes of CPU
                 time using our prototype system.",
  acknowledgement = ack-nhfb,
  generalterms = "Algorithms; Experimentation; Languages; Theory;
                 Verification",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "automatic test generation; design verification;
                 extended finite state machines; functional testing",
  subject =      "Hardware --- Integrated Circuits --- Design Aids
                 (B.7.2): {\bf Verification}; Hardware --- Logic Design
                 --- Design Styles (B.6.1): {\bf Sequential circuits};
                 Theory of Computation --- Computation by Abstract
                 Devices --- Models of Computation (F.1.1): {\bf
                 Automata}; Mathematics of Computing --- Discrete
                 Mathematics --- Graph Theory (G.2.2): {\bf Graph
                 algorithms}; Hardware --- Integrated Circuits ---
                 Reliability and Testing** (B.7.3): {\bf Testability**};
                 Hardware --- Logic Design --- Design Aids (B.6.3): {\bf
                 Hardware description languages}; Hardware ---
                 Register-Transfer-Level Implementation --- Design Aids
                 (B.5.2)",
}

@Article{Chang:1996:USM,
  author =       "Yao-Wen Chang and D. F. Wong and C. K. Wong",
  title =        "Universal switch modules for {FPGA} design",
  journal =      j-TODAES,
  volume =       "1",
  number =       "1",
  pages =        "80--101",
  month =        jan,
  year =         "1996",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1996-1-1/p80-chang/p80-chang.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1996-1-1/p80-chang/",
  abstract =     "A switch module $M$ with $W$ terminals on each side is
                 said to be {\em universal\/} if every set of nets
                 satisfying the dimensional constraint (i.e., the number
                 of nets on each side of $M$ is at most $W$) is
                 simultaneously rout able through $M$. In this article,
                 we present a class of universal switch modules. Each of
                 our switch modules has $6W$ switches and {\em
                 switch-module flexibility\/} three (i.e., $F_S = 3$).
                 We prove that no switch module with less than $6W$
                 switches can be universal. We also compare our switch
                 modules with those used in the Xilinx XC4000 family
                 FPGAs and the {\em antisymmetric\/} switch modules
                 (with $F_S = 3$) suggested by Rose and Brown [1991].
                 Although these two kinds of switch modules also have
                 $F_S = 3$ and $6W$ switches, we show that they are not
                 universal. Based on combinatorial counting techniques,
                 we show that each of our universal switch modules can
                 accommodate up to 25\% more routing instances, compared
                 with the XC4000-type switch module of the same size.
                 Experimental results demonstrate that our universal
                 switch modules improve routability at the chip level.
                 Finally, our work also provides a theoretical insight
                 into the important observation by Rose and Brown [1991]
                 (based on extensive experiments) that $F_S = 3$ is
                 often sufficient to provide high routability.",
  acknowledgement = ack-nhfb,
  generalterms = "Algorithms; Design; Experimentation; Measurement;
                 Performance; Theory; Verification",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  subject =      "Hardware --- Integrated Circuits --- Types and Design
                 Styles (B.7.1): {\bf Gate arrays}; Hardware ---
                 Integrated Circuits --- Design Aids (B.7.2): {\bf
                 Placement and routing}",
}

@Article{Thakur:1996:SPF,
  author =       "Shashidhar Thakur and D. F. Wong",
  title =        "Series-parallel functions and {FPGA} logic module
                 design",
  journal =      j-TODAES,
  volume =       "1",
  number =       "1",
  pages =        "102--122",
  month =        jan,
  year =         "1996",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1996-1-1/p102-thakur/p102-thakur.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1996-1-1/p102-thakur/",
  abstract =     "The need for a two-way interaction between logic
                 synthesis and FPGA logic module design has been
                 stressed recently. Having a logic module that can
                 implement many functions is a good idea only if one can
                 also give a synthesis strategy that makes efficient use
                 of this functionality. Traditionally, technology
                 mapping algorithms have been developed after the logic
                 architecture has been designed. We follow a dual
                 approach, by focusing on a specific technology mapping
                 algorithm, namely, the structural tree-based mapping
                 algorithm, and designing a logic module that can be
                 mapped efficiently by this algorithm. It is known that
                 the tree-based mapping algorithm makes optimal use of a
                 library of functions, each of which can be represented
                 by a tree of AND, OR, and NOT gates (series-parallel or
                 SP functions). We show how to design a SP function with
                 a minimum number of inputs that can implement all
                 possible SP functions with a specified number of
                 inputs. For instances, we demonstrate a seven-input SP
                 function that can implement all four-input SP
                 functions. Mapping results show that, on an average,
                 the number blocks of this function needed to map
                 benchmark circuits are 12\% less than those for Actel's
                 ACT1 logic modules. Our logic modules show a 4\%
                 improvement over ACT1, if the block count is scaled to
                 take into account the number of transistors needed to
                 implement different logic modules.",
  acknowledgement = ack-nhfb,
  generalterms = "Algorithms; Design; Experimentation; Performance;
                 Theory; Verification",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "field programmable gate arrays; series-parallel
                 technology mapping; tree-based technology mapping
                 algorithm; universal logic modules",
  subject =      "Hardware --- Integrated Circuits --- Types and Design
                 Styles (B.7.1): {\bf Gate arrays}; Hardware --- Logic
                 Design --- Design Styles (B.6.1): {\bf Combinational
                 logic}; Hardware --- Logic Design --- Design Aids
                 (B.6.3); Computer Applications --- Computer-Aided
                 Engineering (J.6): {\bf Computer-aided design (CAD)};
                 Mathematics of Computing --- Discrete Mathematics ---
                 Graph Theory (G.2.2): {\bf Trees}",
}

@Article{Thanvantri:1996:OFS,
  author =       "Venkat Thanvantri and Sartaj Sahni",
  title =        "Optimal folding of standard and custom cells",
  journal =      j-TODAES,
  volume =       "1",
  number =       "1",
  pages =        "123--143",
  month =        jan,
  year =         "1996",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1996-1-1/p123-thanvantri/p123-thanvantri.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1996-1-1/p123-thanvantri/",
  abstract =     "We study the problem of folding an ordered list of
                 standard and custom cells into rows of a chip so as to
                 minimize either the routing area or the total chip
                 area. Nine versions of the folding problem are
                 formulated and fast polynomial time algorithms are
                 obtained for each. Two of our formulations correspond
                 to problems formulated in Paik and Sahni [1993] for the
                 folding of a stack of bit-slice components. Our
                 algorithms for these two formulations are
                 asymptotically superior to those of Paik and Sahni
                 [1993].",
  acknowledgement = ack-nhfb,
  generalterms = "Algorithms; Design; Experimentation; Measurement;
                 Performance; Theory",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "custom cell folding; layout area; standard cell
                 folding",
  subject =      "Hardware --- Integrated Circuits --- Types and Design
                 Styles (B.7.1): {\bf Gate arrays}; Hardware ---
                 Integrated Circuits --- Design Aids (B.7.2): {\bf
                 Layout}; Theory of Computation --- Analysis of
                 Algorithms and Problem Complexity --- Nonnumerical
                 Algorithms and Problems (F.2.2): {\bf Routing and
                 layout}",
}

@Article{Cong:1996:CLS,
  author =       "Jason Cong and Yuzheng Ding",
  title =        "Combinational logic synthesis for {LUT} based field
                 programmable gate arrays",
  journal =      j-TODAES,
  volume =       "1",
  number =       "2",
  pages =        "145--204",
  month =        apr,
  year =         "1996",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1996-1-2/p145-cong/p145-cong.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1996-1-2/p145-cong/",
  abstract =     "The increasing popularity of the field programmable
                 gate-array (FPGA) technology has generated a great deal
                 of interest in the algorithmic study and tool
                 development for FPGA-specific design automation
                 problems. The most widely used FPGAs are LUT based
                 FPGAs, in which the basic logic element is a $K$-input
                 one-output lookup-table (LUT) that can implement any
                 Boolean function of up to $K$ variables. This unique
                 feature of the LUT has brought new challenges to logic
                 synthesis and optimization, resulting in many new
                 techniques reported in recent years. This article
                 summarizes the research results on combinational logic
                 synthesis for LUT based FPGAs under a coherent
                 framework. These results were dispersed in various
                 conference proceedings and journals and under various
                 formulations and terminologies. We first present
                 general problem formulations, various optimization
                 objectives and measurements, then focus on a set of
                 commonly used basic concepts and techniques, and
                 finally summarize existing synthesis algorithms and
                 systems. We classify and summarize the basic techniques
                 into two categories, namely, {\em logic optimization\/}
                 and {\em technology mapping}, and describe the existing
                 algorithms and systems in terms of how they use the
                 classified basic techniques. A comprehensive list of
                 references is compiled in the attached bibliography.",
  acknowledgement = ack-nhfb,
  generalterms = "Algorithms; Design; Experimentation; Measurement;
                 Performance; Theory",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "area minimization; computer-aided design of VLSI;
                 decomposition; delay minimization; delay modeling;
                 FPGA; logic optimization; power minimization;
                 programmable logic; routing; simplification; synthesis;
                 system design; technology mapping",
  subject =      "Hardware --- Logic Design --- Design Styles (B.6.1):
                 {\bf Combinational logic}; Hardware --- Logic Design
                 --- Design Aids (B.6.3): {\bf Automatic synthesis};
                 Hardware --- Logic Design --- Design Aids (B.6.3): {\bf
                 Optimization}; Hardware --- Integrated Circuits ---
                 Types and Design Styles (B.7.1): {\bf Gate arrays};
                 Computer Applications --- Computer-Aided Engineering
                 (J.6): {\bf Computer-aided design (CAD)}",
}

@Article{Middelhoek:1996:VEF,
  author =       "Peter F. A. Middelhoek and Sreeranga P. Rajan",
  title =        "From {VHDL} to efficient and first-time-right designs:
                 a formal approach",
  journal =      j-TODAES,
  volume =       "1",
  number =       "2",
  pages =        "205--250",
  month =        apr,
  year =         "1996",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1996-1-2/p205-middelhoek/p205-middelhoek.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1996-1-2/p205-middelhoek/",
  abstract =     "In this article we provide a practical
                 transformational approach to the synthesis of correct
                 synchronous digital hardware designs from high-level
                 specifications. We do this while taking into account
                 the complete life cycle of a design from early
                 prototype to full custom implementation. Besides
                 time-to-market, both flexibility with respect to target
                 architecture and efficiency issues are addressed by the
                 methodology. The utilization of user-selected
                 behavior-preserving transformation steps ensures
                 first-time-right design while exploiting the
                 experience, flexibility, and creativity of the
                 designer. \par

                 To ensure that design transformations are indeed
                 behavior-preserving a novel mechanized approach to the
                 specification and verification of design
                 transformations on control data flow graphs which is
                 independent of a specific behavioral model or graph
                 size has been developed. \par

                 As a demonstration of an industrial application we use
                 a video processing algorithm needed for the conversion
                 from a line-interlaced to progressively scanned video
                 format. Both a video signal processor-based prototype
                 implementation as well as a very efficient full custom
                 implementation are developed starting from a single
                 high-level behavioral specification of the algorithm in
                 VHDL. Results are compared with those previously
                 obtained using different tools and methodologies.",
  acknowledgement = ack-nhfb,
  generalterms = "Design; Human Factors; Languages; Theory;
                 Verification",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "CDFG; correctness by construction; design methodology;
                 rapid system prototyping; SFG; transformational design;
                 VHDL",
  subject =      "Hardware --- Register-Transfer-Level Implementation
                 --- Design (B.5.1): {\bf Arithmetic and logic units};
                 Hardware --- Register-Transfer-Level Implementation ---
                 Design (B.5.1): {\bf Control design}; Hardware ---
                 Register-Transfer-Level Implementation --- Design
                 (B.5.1): {\bf Data-path design}; Hardware ---
                 Register-Transfer-Level Implementation --- Design
                 (B.5.1): {\bf Styles}; Hardware ---
                 Register-Transfer-Level Implementation --- Design Aids
                 (B.5.2): {\bf Automatic synthesis}; Hardware ---
                 Register-Transfer-Level Implementation --- Design Aids
                 (B.5.2): {\bf Hardware description languages}; Hardware
                 --- Register-Transfer-Level Implementation --- Design
                 Aids (B.5.2): {\bf Verification}; Hardware --- Logic
                 Design --- Design Aids (B.6.3): {\bf Automatic
                 synthesis}; Hardware --- Logic Design --- Design Aids
                 (B.6.3): {\bf Hardware description languages}; Hardware
                 --- Logic Design --- Design Aids (B.6.3): {\bf
                 Optimization}; Hardware --- Logic Design --- Design
                 Aids (B.6.3): {\bf Verification}; Software ---
                 Programming Languages --- Language Classifications
                 (D.3.2): {\bf Applicative (functional) languages};
                 Software --- Programming Languages --- Language
                 Classifications (D.3.2): {\bf Data-flow languages};
                 Theory of Computation --- Logics and Meanings of
                 Programs --- Specifying and Verifying and Reasoning
                 about Programs (F.3.1): {\bf Mechanical verification};
                 Theory of Computation --- Mathematical Logic and Formal
                 Languages --- Mathematical Logic (F.4.1): {\bf
                 Mechanical theorem proving}; Computer Applications ---
                 Computer-Aided Engineering (J.6): {\bf Computer-aided
                 design (CAD)}; Hardware --- Register-Transfer-Level
                 Implementation --- Design Aids (B.5.2): {\bf
                 Optimization}; Software --- Software Engineering ---
                 Software/Program Verification (D.2.4): {\bf Correctness
                 proofs}; Hardware --- Logic Design --- Design Aids
                 (B.6.3): {\bf VHDL}",
}

@Article{Kolson:1996:ORA,
  author =       "David J. Kolson and Alexandru Nicolau and Nikil Dutt
                 and Ken Kennedy",
  title =        "Optimal register assignment to loops for embedded code
                 generation",
  journal =      j-TODAES,
  volume =       "1",
  number =       "2",
  pages =        "251--279",
  month =        apr,
  year =         "1996",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1996-1-2/p251-kolson/p251-kolson.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1996-1-2/p251-kolson/",
  abstract =     "One of the challenging tasks in code generation for
                 embedded systems is register assignment. When more live
                 variables than registers exist, some variables will
                 necessarily be accessed from data memory. Because loops
                 are typically executed many times and are often
                 time-critical, good register assignment in loops is
                 exceedingly important as accessing data memory can
                 degrade performance. The issue of finding an optimal
                 register assignment to loops has been open for some
                 time. In this article, we present a technique for
                 optimal (i.e., spill minimizing) register assignment to
                 loops. First we present a technique for register
                 assignment to architecture styles that are
                 characterized by a consolidated register file. Then we
                 extend the technique to include architecture styles
                 that are characterized by distributed memories and/or a
                 combination of general- and special-purpose registers.
                 Experimental results demonstrate that although the
                 optimal algorithm may be computationally prohibitive,
                 heuristic versions obtain results with performance
                 better than that of an existing graph coloring
                 approach.",
  acknowledgement = ack-nhfb,
  generalterms = "Algorithms; Languages",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "code generation; embedded systems; system design",
  subject =      "Software --- Programming Languages --- Processors
                 (D.3.4): {\bf Compilers}; Software --- Programming
                 Languages --- Processors (D.3.4): {\bf Optimization};
                 Software --- Programming Languages --- Processors
                 (D.3.4): {\bf Code generation}",
}

@Article{Prasad:1996:TRP,
  author =       "S. C. Prasad and K. Roy",
  title =        "Transistor reordering for power minimization under
                 delay constraint",
  journal =      j-TODAES,
  volume =       "1",
  number =       "2",
  pages =        "280--300",
  month =        apr,
  year =         "1996",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1996-1-2/p280-prasad/p280-prasad.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1996-1-2/p280-prasad/",
  abstract =     "In this article we address the problem of optimization
                 of VLSI circuits to minimize power consumption while
                 meeting performance goals. We present a method of
                 estimating power consumption of a basic or complex CMOS
                 gate which takes the internal capacitances of the gate
                 into account. This method is used to select an ordering
                 of series-connected transistors found in CMOS gates to
                 achieve lower power consumption. The method is very
                 efficient when used by library-based design styles. We
                 describe a multipass algorithm that makes use of
                 transistor reordering to optimize performance and power
                 consumption of circuits, has a linear time complexity
                 per pass, and converges to a solution in a small number
                 of passes. Transformations in addition to transistor
                 reordering can be used by the algorithm. The algorithm
                 has been benchmarked on several large examples and the
                 results are presented.",
  acknowledgement = ack-nhfb,
  generalterms = "Algorithms; Design; Performance",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "circuit optimization; critical path enumeration; gate
                 input reordering; power estimation; transistor
                 reordering",
  subject =      "Hardware --- Logic Design --- Design Aids (B.6.3):
                 {\bf Optimization}; Hardware --- Integrated Circuits
                 --- Types and Design Styles (B.7.1): {\bf VLSI (very
                 large scale integration)}",
}

@Article{Wolf:1996:OOC,
  author =       "Wayne Wolf",
  title =        "Object-oriented cosynthesis of distributed embedded
                 systems",
  journal =      j-TODAES,
  volume =       "1",
  number =       "3",
  pages =        "301--314",
  month =        jul,
  year =         "1996",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1996-1-3/p301-wolf/p301-wolf.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1996-1-3/p301-wolf/",
  abstract =     "This article describes a new hardware-software
                 cosynthesis algorithm that takes advantage of the
                 structure inherent in an object-oriented specification.
                 The algorithm creates a distributed system
                 implementation with arbitrary topology, using the
                 object-oriented structure to partition functionality in
                 addition to scheduling and allocating processes.
                 Process partitioning is an especially important
                 optimization for such systems because the specification
                 will not, in general, take into account the process
                 structure required for efficient execution on the
                 distributed engine. The object-oriented specification
                 naturally provides both coarse-grained and fine-grained
                 partitions of the system. Our algorithm uses that
                 multilevel structure to guide synthesis. Experimental
                 results show that our algorithm takes advantage of the
                 object-oriented specification to quickly converge on
                 high-quality implementations.",
  acknowledgement = ack-nhfb,
  generalterms = "Design",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "distributed embedded systems; hardware-software
                 co-design; object-oriented co-synthesis",
  subject =      "Computer Systems Organization --- Special-Purpose and
                 Application-Based Systems (C.3): {\bf
                 Microprocessor/microcomputer applications}; Computer
                 Systems Organization --- Special-Purpose and
                 Application-Based Systems (C.3): {\bf Real-time and
                 embedded systems}",
}

@Article{Chow:1996:LPR,
  author =       "Sue-Hong Chow and Yi-Cheng Ho and TingTing Hwang and
                 C. L. Liu",
  title =        "Low power realization of finite state machines --- a
                 decomposition approach",
  journal =      j-TODAES,
  volume =       "1",
  number =       "3",
  pages =        "315--340",
  month =        jul,
  year =         "1996",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1996-1-3/p315-chow/p315-chow.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1996-1-3/p315-chow/",
  abstract =     "We present in this article a new approach to the
                 synthesis problem for finite state machines with the
                 reduction of power dissipation as a design objective. A
                 finite state machine is decomposed into a number of
                 {\em coupled\/} submachines. Most of the time, only one
                 of the submachines will be activated which,
                 consequently, could lead to substantial savings in
                 power consumption. The key steps in our approach are:
                 (1) decomposition of a finite state machine into
                 submachines so that there is a high probability that
                 state transitions will be confined to the smaller of
                 the submachines most of the time, and (2) synthesis of
                 the coupled submachines to optimize the logic circuits.
                 Experimental results confirmed that our approach
                 produced very good results (in particular, for finite
                 state machines with a large number of states.)",
  acknowledgement = ack-nhfb,
  generalterms = "Design; Performance",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "decomposition of finite state machines; lower power
                 design; state assignment",
  subject =      "Hardware --- Logic Design --- Design Styles (B.6.1):
                 {\bf Sequential circuits}; Hardware --- Logic Design
                 --- Design Aids (B.6.3): {\bf Automatic synthesis};
                 Hardware --- Logic Design --- Design Aids (B.6.3): {\bf
                 Optimization}; Computer Applications --- Computer-Aided
                 Engineering (J.6): {\bf Computer-aided design (CAD)}",
}

@Article{Kagaris:1996:FAM,
  author =       "Dimitrios Kagaris and Spyros Tragoudas",
  title =        "A fast algorithm for minimizing {FPGA} combinational
                 and sequential modules",
  journal =      j-TODAES,
  volume =       "1",
  number =       "3",
  pages =        "341--351",
  month =        jul,
  year =         "1996",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1996-1-3/p341-kagaris/p341-kagaris.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1996-1-3/p341-kagaris/",
  abstract =     "We present a quadratic-time algorithm for minimizing
                 the number of modules in an FPGA with combinational and
                 sequential modules (like the C-modules and S-modules of
                 the ACT2 and ACT3 architectures). The constraint is
                 that a combinational module can be combined with one
                 flip-flop in a single sequential module, only if the
                 combinational module drives no other combinational
                 modules. Our algorithm uses a minimum-cost flow
                 formulation to solve the problem with a significant
                 time improvement over a previous approach that used a
                 general linear program.",
  acknowledgement = ack-nhfb,
  generalterms = "Algorithms; Design",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "field programmable gate arrays; retiming",
  subject =      "Hardware --- Logic Design --- Design Aids (B.6.3):
                 {\bf Automatic synthesis}; Hardware --- Logic Design
                 --- Design Aids (B.6.3): {\bf Optimization}; Hardware
                 --- Integrated Circuits --- Types and Design Styles
                 (B.7.1): {\bf Gate arrays}",
}

@Article{Chang:1996:OCP,
  author =       "En-Shou Chang and Daniel D. Gajski and Sanjiv
                 Narayan",
  title =        "An optimal clock period selection method based on
                 slack minimization criteria",
  journal =      j-TODAES,
  volume =       "1",
  number =       "3",
  pages =        "352--370",
  month =        jul,
  year =         "1996",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1996-1-3/p352-chang/p352-chang.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1996-1-3/p352-chang/",
  abstract =     "An important decision in synthesizing a hardware
                 implementation from a behavioral description is
                 selecting the clock period to schedule the datapath
                 operations into control steps. Prior to scheduling,
                 most existing behavioral synthesis systems either
                 require the designer to specify the clock period
                 explicitly or require that the delays of the operators
                 used in the design be specified in multiples of the
                 clock period. An unfavorable choice of clock period
                 could result in operations being idle for a large
                 portion of the clock period and, consequently, affect
                 the performance of the synthesized design. In this
                 article, we demonstrate the effect of clock slack on
                 the performance of designs and present an algorithm to
                 find a slack-minimal clock period. We prove the
                 optimality of our method and apply it to several
                 examples to demonstrate its effectiveness in maximizing
                 design performance.",
  acknowledgement = ack-nhfb,
  generalterms = "Design; Measurement; Performance",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "clock period; clock slack; performance estimation;
                 scheduling",
  subject =      "Hardware --- Register-Transfer-Level Implementation
                 --- Design Aids (B.5.2)",
}

@Article{Lopez:1996:EDP,
  author =       "Mario A. Lopez and Dinesh P. Mehta",
  title =        "Efficient decomposition of polygons into {L-shapes}
                 with application to {VLSI} layouts",
  journal =      j-TODAES,
  volume =       "1",
  number =       "3",
  pages =        "371--395",
  month =        jul,
  year =         "1996",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1996-1-3/p371-lopez/p371-lopez.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1996-1-3/p371-lopez/",
  abstract =     "We present two practical algorithms for partitioning
                 circuit components represented by rectilinear polygons
                 so that they can be stored using the L-shaped corner
                 stitching data structure; that is, our algorithms
                 decompose a simple polygon into a set of nonoverlapping
                 L-shapes and rectangles by using horizontal cuts only.
                 The more general of our algorithms computes and optimal
                 configuration for a wide variety of optimization
                 functions, whereas the other computes a minimum
                 configuration of rectangles and L-shapes. Both
                 algorithms run in $O(n + h \log h)$ time, where $n$ is
                 the number of vertices in the polygon and $h$ is the
                 number of H-pairs. Because for VLSI data $h$ is small,
                 in practice these algorithms are linear in $n$.
                 Experimental results on actual VLSI data compare our
                 algorithms and demonstrate the gains in performance for
                 corner stitching (as measured by different objective
                 functions) obtained by using them instead of more
                 traditional rectangular partitioning algorithms.",
  acknowledgement = ack-nhfb,
  generalterms = "Algorithms",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "corner stitching; L-shapes; partition; rectangle;
                 rectilinear polygons",
  subject =      "Hardware --- Integrated Circuits --- Design Aids
                 (B.7.2): {\bf Layout}; Theory of Computation ---
                 Analysis of Algorithms and Problem Complexity ---
                 Nonnumerical Algorithms and Problems (F.2.2): {\bf
                 Geometrical problems and computations}; Mathematics of
                 Computing --- Discrete Mathematics --- Graph Theory
                 (G.2.2): {\bf Graph algorithms}",
}

@Article{Moreno:1996:REU,
  author =       "R. Moreno and R. Hermida and M. Fern{\'a}ndez",
  title =        "Register estimation in unscheduled dataflow graphs",
  journal =      j-TODAES,
  volume =       "1",
  number =       "3",
  pages =        "396--403",
  month =        jul,
  year =         "1996",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1996-1-3/p396-moreno/p396-moreno.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1996-1-3/p396-moreno/",
  abstract =     "A method for register number estimation in unscheduled
                 or partially scheduled dataflow graphs is presented.
                 The strategy consists of studying the probability that
                 an edge between two nodes crosses the boundary between
                 two control steps, and its is based on a model that
                 associates probabilities with the different scheduling
                 alternatives of each node. These probabilities are
                 computed by means of an analytic method that takes into
                 account the distribution of operations in the dataflow
                 graph and the hardware modules available in the
                 library. The results highlight that the estimation
                 method is very accurate because the error between the
                 estimated value and the real value is always within a
                 narrow margin.",
  acknowledgement = ack-nhfb,
  generalterms = "Design",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "area estimation; high-level synthesis; probabilities;
                 register estimation; scheduling",
  subject =      "Hardware --- Register-Transfer-Level Implementation
                 --- Design (B.5.1): {\bf Data-path design}",
}

@Article{Cheng:1996:GLT,
  author =       "Kwang-Ting Cheng",
  title =        "Gate-level test generation for sequential circuits",
  journal =      j-TODAES,
  volume =       "1",
  number =       "4",
  pages =        "405--442",
  month =        oct,
  year =         "1996",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1996-1-4/p405-cheng/p405-cheng.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1996-1-4/p405-cheng/",
  abstract =     "This paper discusses the gate-level automatic test
                 pattern generation (ATPG) methods and techniques for
                 sequential circuits. The basic concepts, examples,
                 advantages, and limitations of representative methods
                 are reviewed in detail. The relationship between
                 gate-level sequential circuit ATPG and the partial scan
                 design is also discussed.",
  acknowledgement = ack-nhfb,
  generalterms = "Algorithms; Reliability; Verification",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "automatic test generation; IC testing; sequential
                 circuit test generation; testing",
  subject =      "Hardware --- Integrated Circuits --- Reliability and
                 Testing** (B.7.3); Hardware --- Integrated Circuits ---
                 Types and Design Styles (B.7.1)",
}

@Article{Langevin:1996:RTC,
  author =       "M. Langevin and E. Cerny",
  title =        "A recursive technique for computing lower-bound
                 performance of schedules",
  journal =      j-TODAES,
  volume =       "1",
  number =       "4",
  pages =        "443--455",
  month =        oct,
  year =         "1996",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1996-1-4/p443-langevin/p443-langevin.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1996-1-4/p443-langevin/",
  abstract =     "We present a fast recursive technique for estimating
                 lower-bound performance of data path schedules. The
                 method relies on the determination of an ASAPUC a(s
                 Soon As Possible Under Constraint) time-step value for
                 each node of the DFG (Data-Flow Graph) that is based on
                 the ASAPUC values of its predecessor nodes. That is,
                 the lower-bound estimation is applied to each subgraph
                 permitting the derivation of a tight lower bound on the
                 performance of the complete DFG. Applying the greedy
                 lower-bound estimator of Rim and Jain [1994] to each
                 subgraph improves the complete lower bound in more than
                 50\% of the experiments reported in Rim and Jain
                 [1994], and the CPU time is only about twice as long.
                 The recursive methodology can be extended to exploit
                 other lower-bound techniques, for example, considering
                 other constraints such as the number of busses or
                 registers.",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "dataflow graph; lower-bound on performance; microcode
                 optimization; resource constraints; scheduling",
  subject =      "Hardware --- Register-Transfer-Level Implementation
                 --- Design Aids (B.5.2): {\bf Optimization}; Hardware
                 --- Register-Transfer-Level Implementation --- Design
                 Aids (B.5.2): {\bf Automatic synthesis}",
}

@Article{Sosic:1996:UAF,
  author =       "Rok Sosi{\={c}} and Jun Gu and Robert R. Johnson",
  title =        "The {Unison} algorithm: fast evaluation of {Boolean}
                 expressions",
  journal =      j-TODAES,
  volume =       "1",
  number =       "4",
  pages =        "456--477",
  month =        oct,
  year =         "1996",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Tue Oct 22 15:33:01 2002",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1996-1-4/p456-sosic/p456-sosic.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1996-1-4/p456-sosic/",
  abstract =     "We present a Unison algorithm to evaluate arbitrarily
                 complex Boolean expressions. This novel algorithm,
                 based on the total differential of a Boolean function,
                 enables fast evaluation of Boolean expressions in
                 software. Any combination of Boolean operations can be
                 packed into the bits of one computer word and evaluated
                 in parallel by bitwise logical operations. Sample runs
                 of the Unison algorithm show that many Boolean
                 operations can evaluated in one clock cycle. The Unison
                 algorithm is able to evaluate Boolean expressions at an
                 execution speed that is comparable to compiled
                 evaluation while retaining the flexibility of
                 interpreted approaches. The algorithm lends itself well
                 to many practical applications.",
  acknowledgement = ack-nhfb,
  generalterms = "Algorithms; Design; Performance; Reliability;
                 Verification",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Boolean differential; Boolean evaluation; Boolean
                 expressions; Unison algorithm",
  subject =      "Hardware --- Logic Design --- General (B.6.0); Theory
                 of Computation --- Analysis of Algorithms and Problem
                 Complexity --- Nonnumerical Algorithms and Problems
                 (F.2.2)",
}

@Article{Cong:1996:OWI,
  author =       "Jason Cong and Lei He",
  title =        "Optimal wiresizing for interconnects with multiple
                 sources",
  journal =      j-TODAES,
  volume =       "1",
  number =       "4",
  pages =        "478--511",
  month =        oct,
  year =         "1996",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1996-1-4/p478-cong/p478-cong.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1996-1-4/p478-cong/",
  abstract =     "In this paper, we study the optimal wiresizing problem
                 for nets with multiple sources under the RC tree model
                 and the Elmore delay model. We decompose the routing
                 tree for a multisource net into the source subtree
                 (SST) and a set of loading subtrees (LSTs), and show
                 that the optimal wiresizing solution satisfies a number
                 of interesting properties, including: LST separability,
                 the LST monotone property, the SST local monotone
                 property, and the dominance property. Furthermore, we
                 study the optimal wiresizing problem using a variable
                 segment-division rather than an a priori fixed
                 segment-division as in all previous works and reveal
                 the bundled refinement property. These properties lead
                 to efficient algorithms to compute the optimal
                 solutions. We have tested our algorithm on nets
                 extracted from the multilayer layout for a
                 high-performance Intel microprocessor. Accurate SPICE
                 simulation shows that our methods reduce the average
                 delay by up to 23.5\% and the maximum delay by up to
                 37.8\%, respectively, for the submicron CMOS technology
                 when compared to the minimal wire width solution. In
                 addition, the algorithm based on the variable
                 segment-division yields a speedup of over 100$\times$
                 time and does not lose any accuracy, when compared with
                 the algorithm based on the a priori fixed
                 segment-division.",
  acknowledgement = ack-nhfb,
  generalterms = "Algorithms; Design; Experimentation; Performance",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "bundled refinement; decomposition of multi-source
                 routing tree; dominance property; Elmore delay;
                 fidelity; high performance; interconnect optimization;
                 layout optimization; local refinement; multi-source
                 net; multi-source routing tree; optimal wiresizing;
                 variable segment-division",
  subject =      "Hardware --- Integrated Circuits --- Design Aids
                 (B.7.2): {\bf Placement and routing}; Hardware ---
                 Integrated Circuits --- Design Aids (B.7.2): {\bf
                 Simulation}; Computer Applications --- Computer-Aided
                 Engineering (J.6): {\bf Computer-aided design (CAD)};
                 Hardware --- Integrated Circuits --- Design Aids
                 (B.7.2): {\bf Layout}; Mathematics of Computing ---
                 Discrete Mathematics --- Graph Theory (G.2.2); Hardware
                 --- Integrated Circuits --- Types and Design Styles
                 (B.7.1); Hardware --- Integrated Circuits --- Design
                 Aids (B.7.2): {\bf SPICE}; Hardware --- Input/Output
                 and Data Communications --- Interconnections
                 (Subsystems) (B.4.3)",
}

@Article{Ganley:1996:RST,
  author =       "Joseph L. Ganley and James P. Cohoon",
  title =        "Rectilinear {Steiner} trees on a checkerboard",
  journal =      j-TODAES,
  volume =       "1",
  number =       "4",
  pages =        "512--522",
  month =        oct,
  year =         "1996",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1996-1-4/p512-ganley/p512-ganley.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1996-1-4/p512-ganley/",
  abstract =     "The rectilinear Steiner tree problem is to find a
                 minimum-length set of horizontal and vertical line
                 segments that interconnect a given set of points in the
                 plane. Here we study the {\em thumbnail rectilinear
                 Steiner tree\/} problem, where the input points are
                 drawn from a small integer grid. Specifically, we
                 devise a fully-set decomposition algorithm for
                 computing optimal thumbnail rectilinear Steiner trees.
                 We then present experimental results comparing the
                 performance of this algorithm with two existing
                 algorithms for computing optimal rectilinear Steiner
                 trees. The thumbnail rectilinear Steiner tree problem
                 has applications in VLSI placement algorithms, based on
                 geometric partitioning, global routing of
                 field-programmable gate arrays, and routing estimation
                 during floorplanning.",
  acknowledgement = ack-nhfb,
  generalterms = "Algorithms; Theory",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "exact algorithms; full-set decomposition; rectilinear
                 Steiner tree; routing",
  subject =      "Hardware --- Integrated Circuits --- Design Aids
                 (B.7.2): {\bf Placement and routing}; Theory of
                 Computation --- Analysis of Algorithms and Problem
                 Complexity --- Nonnumerical Algorithms and Problems
                 (F.2.2): {\bf Geometrical problems and computations};
                 Mathematics of Computing --- Discrete Mathematics ---
                 Graph Theory (G.2.2): {\bf Graph algorithms};
                 Mathematics of Computing --- Discrete Mathematics ---
                 Graph Theory (G.2.2): {\bf Trees}",
}

@Article{Lin:1997:RDH,
  author =       "Youn-Long Lin",
  title =        "Recent developments in high-level synthesis",
  journal =      j-TODAES,
  volume =       "2",
  number =       "1",
  pages =        "2--21",
  month =        jan,
  year =         "1997",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1997-2-1/p2-lin/p2-lin.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1997-2-1/p2-lin/",
  abstract =     "We survey recent developments in high level synthesis
                 technology for VLSI design. The need for higher-level
                 design automation tools are discussed first. We then
                 describe some basic techniques for various subtasks of
                 high-level synthesis. Techniques that have been
                 proposed in the past few years (since 1994) for various
                 subtasks of high-level synthesis are surveyed. We also
                 survey some new synthesis objectives including
                 testability, power efficiency, and reliability.",
  acknowledgement = ack-nhfb,
  generalterms = "Design; Experimentation; Languages; Reliability",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "design automation; design methodology; high level
                 synthesis; VLSI design",
  subject =      "Hardware --- Register-Transfer-Level Implementation
                 --- Design (B.5.1): {\bf Data-path design}; Hardware
                 --- Register-Transfer-Level Implementation --- Design
                 Aids (B.5.2): {\bf Automatic synthesis}; Hardware ---
                 Register-Transfer-Level Implementation --- Design Aids
                 (B.5.2): {\bf Hardware description languages}; Hardware
                 --- Register-Transfer-Level Implementation --- Design
                 Aids (B.5.2): {\bf Optimization}",
}

@Article{Gong:1997:MRH,
  author =       "Jie Gong and Daniel D. Gajski and Smita Bakshi",
  title =        "Model refinement for hardware-software codesign",
  journal =      j-TODAES,
  volume =       "2",
  number =       "1",
  pages =        "22--41",
  month =        jan,
  year =         "1997",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1997-2-1/p22-gong/p22-gong.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1997-2-1/p22-gong/",
  abstract =     "Hardware-software codesign, which implements a given
                 specification with a set of system components such as
                 ASICs and processors, includes several key tasks such
                 as system component allocation, functional
                 partitioning, quality metrics estimation, and model
                 refinement. In this work, we focus on the model
                 refinement task which transforms a specification from
                 an original functional model to a refined
                 implementation model. First, we categorize several
                 commonly used implementation models and describe a set
                 of refinement procedures to transform a specification
                 to each of these implementation models. We also present
                 a set of experimental results to compare the
                 implementation models and to demonstrate how the
                 proposed approach can be used to explore different
                 implementation styles.",
  acknowledgement = ack-nhfb,
  generalterms = "Algorithms; Design; Experimentation; Languages;
                 Measurement",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "functional model; implementation model; model
                 refinement; software-hardware codesign",
  subject =      "Computer Applications --- Computer-Aided Engineering
                 (J.6): {\bf Computer-aided design (CAD)}; Computer
                 Systems Organization --- General (C.0): {\bf
                 Hardware/software interfaces}; Hardware ---
                 Register-Transfer-Level Implementation --- General
                 (B.5.0); Computer Systems Organization --- General
                 (C.0): {\bf Modeling of computer architecture}",
}

@Article{deAbreuMoreira:1997:ADC,
  author =       "Dilvan {de Abreu Moreira} and Les T. Walczowski",
  title =        "{AGENTS} a distributed client-server system for leaf
                 cell generation",
  journal =      j-TODAES,
  volume =       "2",
  number =       "1",
  pages =        "42--61",
  month =        jan,
  year =         "1997",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Oct 31 06:28:35 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1997-2-1/p42-moreira/p42-moreira.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1997-2-1/p42-moreira/",
  abstract =     "The AGENTS system is a set of programs designed to
                 generate automatically the mask-level layout of full
                 custom CMOS, BICMOS, and bipolar leaf cells. The system
                 is formed from four sever programs: the placer, router,
                 database, and broker. \par

                 The placer places components in a cell, the router
                 wires the circuits sent to it, the database stores all
                 the information that is dependent upon the fabrication
                 process, such as the design rules, and the Broker makes
                 the services of the other servers available. \par

                 These servers communicate over a computer network using
                 the TCP/IP Internet Protocol. The Placer server
                 receives from its client the description and netlist of
                 the circuit to be generated using EDIF (Electronic
                 Design Interchange Format.) The output to its client is
                 the mask layout of the circuit, again codified in EDIF.
                 The concept of agents as software components which have
                 the ability to communicate and cooperate with each
                 other is at the heart of the AGENTS system. This
                 concept is not only used at the higher level, for the
                 four servers, but at a lower level as well, inside the
                 Router and Placer servers, where small relatively
                 simple agents work together to accomplish complex
                 tasks. These small agents are responsible for all the
                 reasoning carried out by the two servers, as they hold
                 the basic inference routines and the knowledge needed
                 by the servers. The system's philosophy is that
                 competence should emerge out of the collective behavior
                 of a large number of relatively simple agents. In
                 addition and integrated to these small agents, the
                 system uses a genetic algorithm to improve components'
                 placement before routing.",
  acknowledgement = ack-nhfb,
  generalterms = "Design",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "client/server model; genetic algorithms; software
                 agents",
  subject =      "Hardware --- Integrated Circuits --- Design Aids
                 (B.7.2): {\bf Placement and routing}; Hardware ---
                 Integrated Circuits --- Types and Design Styles
                 (B.7.1)",
}

@Article{Esbensen:1997:PDI,
  author =       "Henrik Esbensen and Ernest S. Kuh",
  title =        "A performance-driven {IC\slash MCM} placement
                 algorithm featuring explicit design space exploration",
  journal =      j-TODAES,
  volume =       "2",
  number =       "1",
  pages =        "62--80",
  month =        jan,
  year =         "1997",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1997-2-1/p62-esbensen/p62-esbensen.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1997-2-1/p62-esbensen/",
  abstract =     "A genetic algorithm for building-block placement of
                 ICs and MCMs is presented that simultaneously minimizes
                 layout area and an Elmore-based estimate of the maximum
                 path delay while trying to meet a target aspect ratio.
                 Explicit design space exploration is performed by using
                 a vector-valued, 3-dimensional cost function and
                 searching for a set of distinct solutions representing
                 the best trade-offs of the cost dimensions. From the
                 output solutions, the designer can choose the solution
                 with the preferred trade-off. In contrast to existing
                 approaches, the required properties of the output
                 solutions are specified without using weights or
                 bounds. Consequently, the practical problems of
                 specifying these quantities are eliminated. Promising
                 experimental results are obtained for various placement
                 problems, including a real-world design. Solution sets
                 representing good, balanced cost trade-offs are found
                 using a reasonable amount of runtime. Furthermore, the
                 performance is shown to be comparable to that of
                 simulated annealing in the special case of
                 1-dimensional optimization, in which direct comparison
                 is possible.",
  acknowledgement = ack-nhfb,
  generalterms = "Algorithms; Design",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "design space exploration; timing-driven building-block
                 placement",
  subject =      "Hardware --- Integrated Circuits --- Types and Design
                 Styles (B.7.1): {\bf VLSI (very large scale
                 integration)}; Hardware --- Integrated Circuits ---
                 Design Aids (B.7.2): {\bf Placement and routing};
                 Computing Methodologies --- Artificial Intelligence ---
                 Problem Solving, Control Methods, and Search (I.2.8):
                 {\bf Heuristic methods}",
}

@Article{Lin:1997:STV,
  author =       "Yann-Rue Lin and Cheng-Tsung Hwang and Allen C.-H.
                 Wu",
  title =        "Scheduling techniques for variable voltage low power
                 designs",
  journal =      j-TODAES,
  volume =       "2",
  number =       "2",
  pages =        "81--97",
  month =        jan,
  year =         "1997",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1997-2-2/p81-lin/p81-lin.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1997-2-2/p81-lin/",
  abstract =     "This paper presents an integer linear programming
                 (ILP) model and a heuristic for the variable voltage
                 scheduling problem. We present the variable voltage
                 scheduling techniques that consider in turn timing
                 constraints alone, resource constraints alone, and
                 timing and resource constraints together for design
                 space exploration. Experimental results show that our
                 heuristic produces results competitive with those of
                 the ILP method in a fraction of the run-time. The
                 results also show that a wide range of design
                 alternatives can be generated using our design space
                 exploration method. Using different cost/delay
                 combinations, power consumption in a single design can
                 differ by as much as a factor of 6 when using mixed
                 3.3V and 5V supply voltages.",
  acknowledgement = ack-nhfb,
  generalterms = "Algorithms; Design; Performance",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "high-level synthesis; lower power design; scheduling;
                 variable voltage",
  subject =      "Hardware --- Register-Transfer-Level Implementation
                 --- Design Aids (B.5.2); Hardware ---
                 Register-Transfer-Level Implementation --- Design
                 (B.5.1): {\bf Styles}; Hardware --- Integrated Circuits
                 --- Types and Design Styles (B.7.1): {\bf VLSI (very
                 large scale integration)}",
}

@Article{Fummi:1997:FDT,
  author =       "F. Fummi and U. Rovati and D. Sciuto",
  title =        "Functional design for testability of control-dominated
                 architectures",
  journal =      j-TODAES,
  volume =       "2",
  number =       "2",
  pages =        "98--122",
  month =        jan,
  year =         "1997",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1997-2-2/p98-fummi/p98-fummi.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1997-2-2/p98-fummi/",
  abstract =     "Control-dominated architectures are usually described
                 in a hardware description language (HDL) by means of
                 interacting FSMs. A VHDL or Verilog specification can
                 be translated into an interacting FSM (IFSM)
                 representation as described here. The IFSM model allows
                 us to approach the testable synthesis problem at the
                 level of each FSM. The functionality is modified by the
                 addition of transparency to data flow. The complete
                 testability of the IFSM implementation is thus achieved
                 by connecting fully testable implementations of each
                 modified FSM. In this way, test sequences separately
                 generated for each FSM are directly applied to the IFSM
                 to achieve complete fault coverage. The addition of
                 test functionality to each FSM description, and its
                 simultaneous synthesis with the FSM functionality,
                 produces a lower area overhead than that necessary for
                 the application of a partial-scan technique. Moreover,
                 the test generation problem is highly simplified since
                 it is reduced to the test generation for each separate
                 FSM.",
  acknowledgement = ack-nhfb,
  generalterms = "Measurement; Performance; Reliability",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "functional testing; interacting FSMs",
  subject =      "Hardware --- Register-Transfer-Level Implementation
                 --- Reliability and Testing** (B.5.3): {\bf Test
                 generation**}; Hardware --- Register-Transfer-Level
                 Implementation --- Reliability and Testing** (B.5.3):
                 {\bf Testability**}; Hardware --- Logic Design ---
                 Design Aids (B.6.3): {\bf Hardware description
                 languages}",
}

@Article{Kormicki:1997:PLS,
  author =       "Maciek Kormicki and Ausif Mahmood and Bradley S.
                 Carlson",
  title =        "Parallel logic simulation on a network of workstations
                 using parallel virtual machine",
  journal =      j-TODAES,
  volume =       "2",
  number =       "2",
  pages =        "123--134",
  month =        jan,
  year =         "1997",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1997-2-2/p123-kormicki/p123-kormicki.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1997-2-2/p123-kormicki/",
  abstract =     "This paper explores parallel logic simulation on a
                 network of workstations using a parallel virtual
                 machine (PVM). A novel parallel implementation of the
                 centralized-time event-driven logic simulation
                 algorithm is carried out such that no global
                 controlling workstation is needed to synchronize the
                 advance of simulation time. Further advantages of our
                 new approach include a random partitioning of the
                 circuit onto available workstations and a pipelined
                 execution of the different phases of the simulation
                 algorithm. To achieve a better load balance, we employ
                 a semioptimistic scheme for gate evaluations (in
                 conjunction with a centralized-time algorithm) such
                 that no rollback is required. The performance of this
                 implementation has been evaluated using the ISCAS
                 benchmark circuits. Speedups improve with the size of
                 the circuit and the activity level in the circuit.
                 Analyses of the communication overhead show that the
                 techniques developed here will yield even higher gains
                 as newer networking technologies like ATM are employed
                 to connect workstations.",
  acknowledgement = ack-nhfb,
  generalterms = "Algorithms; Performance; Verification",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "distributed computing; parallel logic simulation; PVM;
                 synchronous simulation",
  subject =      "Hardware --- Logic Design --- Design Aids (B.6.3):
                 {\bf Simulation}; Hardware --- Integrated Circuits ---
                 Design Aids (B.7.2): {\bf Simulation}",
}

@Article{Yang:1997:HFM,
  author =       "Cheng-Hsing Yang and Chia-Chun Tsai and Jan-Ming Ho
                 and Sao-Jie Chen",
  title =        "Hmap: a fast mapper for {EPGAs} using extended {GBDD}
                 hash tables",
  journal =      j-TODAES,
  volume =       "2",
  number =       "2",
  pages =        "135--150",
  month =        jan,
  year =         "1997",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1997-2-2/p135-yang/p135-yang.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1997-2-2/p135-yang/",
  abstract =     "A fast and efficient algorithm for technology mapping
                 of electrically programmable gate arrays (EPGAs) is
                 proposed. This Hmap algorithm covers the Boolean
                 network with programmed logic modules bottom-up. The
                 covering operation is based on collapsing the fanins of
                 a node to form a bigger supernode such that fewer
                 clusters are needed to be detected. Then Boolean
                 matching is used to detect whether the collapsed
                 supernode can be mapped into a logic module by looking
                 up an extended GBDD hash table. The use of this table
                 look-up matching can shorten the matching time
                 significantly. As shown in the experiments, the average
                 running time of Hmap is 20 times faster than that of
                 MIS-pga2.",
  acknowledgement = ack-nhfb,
  generalterms = "Design",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  subject =      "Hardware --- Integrated Circuits --- Design Aids
                 (B.7.2); Hardware --- Integrated Circuits --- Types and
                 Design Styles (B.7.1): {\bf Gate arrays}",
}

@Article{Mak:1997:BLM,
  author =       "Wai-Kei Mak and D. F. Wong",
  title =        "Board-level multiterminal net routing for {FPGA-based}
                 logic emulation",
  journal =      j-TODAES,
  volume =       "2",
  number =       "2",
  pages =        "151--167",
  month =        jan,
  year =         "1997",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1997-2-2/p151-mak/p151-mak.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1997-2-2/p151-mak/",
  abstract =     "We consider a board-level routing problem applicable
                 to FPGA-based logic emulation systems such as the
                 Realizer System [Varghese et al. 1993] and the
                 Enterprise Emulation System [Maliniak 1992]
                 manufactured by Quickturn Design Systems. Optimal
                 algorithms have been proposed for the case where all
                 nets are two-terminal nets [Chan and Schlag 1993; Mak
                 and Wong 1995]. We show how multiterminal nets can be
                 handled by decomposition into two-terminal nets. We
                 show that the multiterminal net decomposition problem
                 can be modeled as a bounded-degree hypergraph-to-graph
                 transformation problem where hyperedges are transformed
                 to spanning trees. A network flow-based algorithm that
                 solves both problems is proposed. It determines if
                 there is a feasible decomposition and gives one
                 whenever such a decomposition exists.",
  acknowledgement = ack-nhfb,
  generalterms = "Algorithms; Verification",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "board-level routing; crossbars; field programmable
                 gate arrays; logic emulation; multi-terminal net
                 decomposition",
  subject =      "Hardware --- Integrated Circuits --- Types and Design
                 Styles (B.7.1): {\bf Gate arrays}; Hardware ---
                 Integrated Circuits --- Design Aids (B.7.2): {\bf
                 Placement and routing}; Hardware --- Integrated
                 Circuits --- Design Aids (B.7.2): {\bf Verification}",
}

@Article{Kahng:1997:ARI,
  author =       "Andrew B. Kahng and Sudhakar Muddu",
  title =        "Analysis of {RC} interconnections under ramp input",
  journal =      j-TODAES,
  volume =       "2",
  number =       "2",
  pages =        "168--192",
  month =        jan,
  year =         "1997",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1997-2-2/p168-kahng/p168-kahng.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1997-2-2/p168-kahng/",
  abstract =     "We give new methods for calculating the time-domain
                 response for a finite-length distributed {\em RC\/}
                 line that is stimulated by a ramp input. The following
                 are our contributions. First, we obtain the solution of
                 the diffusion equation for a seminfinite distributed
                 {\em RC\/} line with ramp input. We then present a
                 general and, in the limit, {\em exact\/} approach to
                 compute the time-domain response for finite-length {\em
                 RC\/} lines under ramp input by summing distinct
                 diffusions starting at either end of the line. Next, we
                 obtain analytical expressions for the finite
                 time-domain voltage response for an open-ended finite
                 {\em RC\/} line and for a finite {\em RC\/} line with
                 capacitive load. The delay estimates using this method
                 are very close to SPICE-computing delays. Finally, we
                 present a general recursive equation for computing the
                 higher-order diffusion components due to reflections at
                 the source and load ends. Future work extends our
                 method to response computations in general
                 interconnection trees by modeling both reflection and
                 transmission coefficients at discontinuities.",
  acknowledgement = ack-nhfb,
  generalterms = "Algorithms; Design; Performance; Theory;
                 Verification",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "diffusion equation analysis; ramp input response; VLSI
                 interconnects",
  subject =      "Hardware --- Integrated Circuits --- Types and Design
                 Styles (B.7.1): {\bf VLSI (very large scale
                 integration)}; Hardware --- Integrated Circuits ---
                 Design Aids (B.7.2): {\bf Layout}",
}

@Article{Benini:1997:SBM,
  author =       "Luca Benini and Giovanni {De Micheli}",
  title =        "A survey of {Boolean} matching techniques for library
                 binding",
  journal =      j-TODAES,
  volume =       "2",
  number =       "3",
  pages =        "193--226",
  month =        jan,
  year =         "1997",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1997-2-3/p193-benini/p193-benini.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1997-2-3/p193-benini/",
  abstract =     "When binding a logic network to a set of cells, a
                 fundamental problem is recognizing whether a cell can
                 implement a portion of the network. Boolean matching
                 means solving this task using a formalism based on
                 Boolean algebra. In its simplest form, Boolean matching
                 can be posed as a tautology check. We review several
                 approaches to Boolean matching as well as to its
                 generalization to cases involving {\em don't care\/}
                 conditions and its restriction to specific libraries
                 such as those typical of anti-fuse based FPGAs. We then
                 present a general formulation of Boolean matching
                 supporting multiple-output logic cells.",
  acknowledgement = ack-nhfb,
  generalterms = "Design; Measurement; Performance",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  subject =      "Hardware --- Logic Design --- Design Styles (B.6.1)",
}

@Article{Johnson:1997:DSM,
  author =       "Mark C. Johnson and Kaushik Roy",
  title =        "Datapath scheduling with multiple supply voltages and
                 level converters",
  journal =      j-TODAES,
  volume =       "2",
  number =       "3",
  pages =        "227--248",
  month =        jan,
  year =         "1997",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1997-2-3/p227-johnson/p227-johnson.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1997-2-3/p227-johnson/",
  abstract =     "We present an algorithm called MOVER (Multiple
                 Operating Voltage Energy Reduction) to minimize
                 datapath energy dissipation through use of multiple
                 supply voltages. In a single voltage design, the
                 critical path length, clock period, and number of
                 control steps limit minimization of voltage and power.
                 Multiple supply voltages permit localized voltage
                 reductions to take up remaining schedule slack. MOVER
                 initially finds one minimum voltage for an entire
                 datapath. It then determines a second voltage for
                 operations where there is still schedule slack. New
                 voltages con be introduced and minimized until no
                 schedule slack remains. MOVER was exercised for a
                 variety of DSP datapath examples. Energy savings ranged
                 from 0\% to 50\% when comparing dual to single voltage
                 results. The benefit of going from two to three
                 voltages never exceeded 15\%. Power supply costs are
                 not reflected in these savings, but a simple analysis
                 shows that energy savings can be achieved even with
                 relatively inefficient DC-DC converters. Datapath
                 resource requirements were found to vary greatly with
                 respect to number of supplies. Area penalties ranged
                 from 0\% to 170\%. Implications of multiple voltage
                 design for IC layout and power supply requirements are
                 discussed.",
  acknowledgement = ack-nhfb,
  generalterms = "Algorithms; Design",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "datapath scheduling; DSP; high-level synthesis; level
                 conversion; low power design; multiple voltage; power
                 optimization; scheduling",
  subject =      "Hardware --- Register-Transfer-Level Implementation
                 --- Design (B.5.1): {\bf Data-path design}; Hardware
                 --- Register-Transfer-Level Implementation --- Design
                 Aids (B.5.2): {\bf Optimization}; Mathematics of
                 Computing --- Numerical Analysis --- Optimization
                 (G.1.6): {\bf Integer programming}",
}

@Article{Yalcin:1997:EPC,
  author =       "Hakan Yalcin and John P. Hayes",
  title =        "Event propagation conditions in circuit delay
                 computation",
  journal =      j-TODAES,
  volume =       "2",
  number =       "3",
  pages =        "249--280",
  month =        jan,
  year =         "1997",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1997-2-3/p249-yalcin/p249-yalcin.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1997-2-3/p249-yalcin/",
  abstract =     "Accurate and efficient computation of delays is a
                 central problem in computer-aided design of complex
                 VLSI circuits. Delays are determined by events (signal
                 transitions) propagated from the inputs of a circuit to
                 its outputs, so precise characterization of event
                 propagation is required for accurate delay computation.
                 Although many different propagation conditions (PCs)
                 have been proposed for delay computation, their
                 properties and relationships have been far from clear.
                 We present a systematic analysis of delay computation
                 based on a series of waveform models that capture
                 signal behavior rigorously at different levels of
                 details. The most general model, called the exact of W0
                 model, specifies each event occurring in a circuit
                 signal. A novel method is presented that generates
                 approximate waveforms by progressively eliminating
                 signal values from the exact model. For each waveform
                 model, we drive the PCs that correctly capture the
                 requirements under which an event propagates along a
                 path. The waveform models and their PCs are shown to
                 form a well-defined hierarchy, which provides a means
                 to trade accuracy for computational effort. The
                 relationships among the derived PCs and existing ones
                 are analyzed in depth. It is proven that though many
                 PCs, such as the popular floating mode condition,
                 produce a correct upper bound on the circuit delay,
                 they can fail to recognize event propagation in some
                 instances. This analysis further enables us to derive
                 new and useful PCs. We describe such a PC, called safe
                 static. Experimental results demonstrate that safe
                 static provides an excellent accuracy/efficiency
                 tradeoff.",
  acknowledgement = ack-nhfb,
  generalterms = "Performance; Theory; Verification",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "delay computation; event propagation; false path; path
                 sensitization; propagation condition; timing analysis;
                 waveform modeling",
  subject =      "Hardware --- Integrated Circuits --- Design Aids
                 (B.7.2): {\bf Verification}; Hardware --- Logic Design
                 --- Design Aids (B.6.3): {\bf Verification}",
}

@Article{Thadikaran:1997:ACB,
  author =       "Paul Thadikaran and Sreejit Chakravarty and Janak
                 Patel",
  title =        "Algorithms to compute bridging fault coverage of
                 {IDDQ} test sets",
  journal =      j-TODAES,
  volume =       "2",
  number =       "3",
  pages =        "281--305",
  month =        jan,
  year =         "1997",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1997-2-3/p281-thadikaran/p281-thadikaran.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1997-2-3/p281-thadikaran/",
  abstract =     "We present two algorithms, called list-based scheme
                 and tree-based scheme, to compute bridging fault (BF)
                 coverage of {\em I DDQ\/} tests. These algorithms use
                 the novel ideal of ``indistinguishable pairs,'' which
                 makes it more efficient and versatile than known fault
                 simulation algorithms. Unlike known algorithms, the two
                 algorithms can be used for combinational as well as
                 sequential circuits and for arbitrary sets of BFs.
                 Experiments show that the tree-based scheme is, in
                 general, better than the list-based scheme. But the
                 list-based scheme is better for some classes of
                 faults.",
  acknowledgement = ack-nhfb,
  generalterms = "Algorithms; Design",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  subject =      "Hardware --- Logic Design --- Design Aids (B.6.3):
                 {\bf Simulation}",
}

%%% Check page gap: 306--311 between issues 3 and 4 of volume 2 ??

@Article{Xu:1997:LDR,
  author =       "Min Xu and Fadi J. Kurdahi",
  title =        "Layout-driven {RTL} binding techniques for high-level
                 synthesis using accurate estimators",
  journal =      j-TODAES,
  volume =       "2",
  number =       "4",
  pages =        "312--343",
  month =        jan,
  year =         "1997",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1997-2-4/p312-xu/p312-xu.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1997-2-4/p312-xu/",
  abstract =     "The importance of effective and efficient accounting
                 of layout effects is well established in High-Level
                 Synthesis (HLS), since it allows more realistic
                 exploration of the design space and the generation of
                 solutions with predictable metrics. This feature is
                 highly desirable in order to avoid unnecessary
                 iterations through the design process. In this article,
                 we address the problem of layout-driven
                 register-transfer-level (RTL) binding as this step has
                 a direct relevance to the final performance of the
                 design. By producing not only an RTL design but also an
                 approximate physical topology of the chip-level
                 implementation, we ensure that the solution will
                 perform at the predicted metric once implemented, thus
                 avoiding unnecessary delays in the design process.",
  acknowledgement = ack-nhfb,
  generalterms = "Algorithms; Design; Measurement; Performance",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  subject =      "Hardware --- Register-Transfer-Level Implementation
                 --- Design (B.5.1); Hardware --- Integrated Circuits
                 --- Types and Design Styles (B.7.1): {\bf Gate arrays};
                 Hardware --- Integrated Circuits --- Design Aids
                 (B.7.2): {\bf Layout}",
}

@Article{Munch:1997:EIB,
  author =       "Michael M{\"u}nch and Norbert Wehn and Manfred
                 Glesner",
  title =        "An efficient {ILP-based} scheduling algorithm for
                 control-dominated {VHDL} descriptions",
  journal =      j-TODAES,
  volume =       "2",
  number =       "4",
  pages =        "344--364",
  month =        jan,
  year =         "1997",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1997-2-4/p344-munch/p344-munch.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1997-2-4/p344-munch/",
  abstract =     "To adopt behavioral synthesis techniques in existing
                 design flows, the synthesis methodology must provide
                 the designer with a mechanism to specify a component's
                 interface timing. This will permit pre- and
                 postsynthesis validation through cosimulation with
                 other subsystems or even through formal verification.
                 In control-flow dominated designs, additional timing
                 constraints will result in a complex
                 specification/constraint system for which the
                 scheduling problem has been shown to be NP-complete. In
                 this article, we present a mathematical framework for
                 solving a special instance of the scheduling problem in
                 control-flow dominated behavioral VHDL descriptions
                 given that the timing of I/O signals has been
                 completely or partially specified. It is based on a
                 code-transformation approach that fully preserves the
                 VHDL semantics. The scheduling problem is mapped onto
                 an integer linear program (ILP) solvable in polynomial
                 time assuming a restricted partial order on selected
                 statements. It captures both control-flow and timing
                 constraints in a single model and also exploits
                 dataflow information to optimize the statement sequence
                 across basic block boundaries.",
  acknowledgement = ack-nhfb,
  generalterms = "Algorithms",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  subject =      "Hardware --- Register-Transfer-Level Implementation
                 --- Design (B.5.1): {\bf Control design}; Hardware ---
                 Register-Transfer-Level Implementation --- Design Aids
                 (B.5.2): {\bf Automatic synthesis}; Hardware ---
                 Register-Transfer-Level Implementation --- Design Aids
                 (B.5.2): {\bf Optimization}",
}

@Article{Freund:1997:CEA,
  author =       "L. Freund and M. Israel and F. Rousseau and J. M.
                 Berg{\'e} and M. Auguin and C. Belleudy and G.
                 Gogniat",
  title =        "A codesign experiment in acoustic echo cancellation
                 {GMDF}",
  journal =      j-TODAES,
  volume =       "2",
  number =       "4",
  pages =        "365--383",
  month =        jan,
  year =         "1997",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1997-2-4/p365-freund/p365-freund.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1997-2-4/p365-freund/",
  abstract =     "Continuous advances in processor and ASIC technologies
                 enable the integration of more and more complex
                 embedded systems. Embedded systems have become
                 commonplace in recent years. Since their
                 implementations generally require the use of
                 heterogeneous resources (e.g., processor cores, ASICs)
                 in one system with hard design constraints, the
                 importance of hardware/software codesign methodologies
                 increases steadily. HW/SW codesign approaches consist
                 generally of HW/SW partitioning and scheduling,
                 constrained code generation, and hardware and interface
                 synthesis. This article presents the codesign of an
                 industrial experiment in acoustic echo cancellation
                 (GMDF algorithm); and emphasizes the partitioning and
                 communication synthesis steps. This experiment brings
                 to light interesting problems such as data and program
                 distribution between system memories and the modeling
                 of communications in the partitioning process",
  acknowledgement = ack-nhfb,
  generalterms = "Design; Experimentation",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  subject =      "Computer Systems Organization --- Special-Purpose and
                 Application-Based Systems (C.3)",
}

@Article{Panda:1997:MDO,
  author =       "Preeti Ranjan Panda and Nikil D. Dutt and Alexandru
                 Nicolau",
  title =        "Memory data organization for improved cache
                 performance in embedded processor applications",
  journal =      j-TODAES,
  volume =       "2",
  number =       "4",
  pages =        "384--409",
  month =        jan,
  year =         "1997",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1997-2-4/p384-panda/p384-panda.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1997-2-4/p384-panda/",
  abstract =     "Code generation for embedded processors opens up the
                 possibility for several performance optimization
                 techniques that have been ignored by traditional
                 compilers due to compilation time constraints. We
                 present techniques that take into account the
                 parameters of the data caches for organizing scalar and
                 array variables declared in embedded code into memory,
                 with the objective of improving data cache performance.
                 We present techniques for clustering variables to
                 minimize compulsory cache misses, and for solving the
                 memory assignment problem to minimize conflict cache
                 misses. Our experiments with benchmark code kernels
                 from DSP and other domains on the CW4001 embedded
                 processor from LSI Logic indicate significant
                 improvements in data cache performance by the
                 application of our memory organization technique.",
  acknowledgement = ack-nhfb,
  generalterms = "Performance; Verification",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "cache memory; data cache; memory synthesis; system
                 design; system synthesis",
  subject =      "Hardware --- Memory Structures --- Design Styles
                 (B.3.2): {\bf Cache memories}; Software --- Programming
                 Languages --- Processors (D.3.4): {\bf Compilers}",
}

@Article{Tomiyama:1997:CPT,
  author =       "Hiroyuki Tomiyama and Hiroto Yasuura",
  title =        "Code placement techniques for cache miss rate
                 reduction",
  journal =      j-TODAES,
  volume =       "2",
  number =       "4",
  pages =        "410--429",
  month =        jan,
  year =         "1997",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1997-2-4/p410-tomiyama/p410-tomiyama.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1997-2-4/p410-tomiyama/",
  abstract =     "In the design of embedded systems with cache memories,
                 it is important to minimize the cache miss rates to
                 reduce power consumption of the systems as well as
                 improve the performance. In this article, we propose
                 two code placement methods (a simplified method and a
                 refined one) to reduce miss rates of instruction
                 caches. We first define a simplified code placement
                 problem without an attempt to minimize the code size.
                 The problem is formulated as an integer linear
                 programming (ILP) problem, by which an optimal
                 placement can be found. Experimental results show that
                 the simplified method reduces cache misses by an
                 average of 30\% (max. 77\%). However, the code size
                 obtained by the simplified method tends to be large,
                 which inevitably leads to a larger memory size. In
                 order to overcome this limitation, we further propose a
                 refined code placement method in which the code size
                 provided by the system designers must be satisfied. The
                 effectiveness of the refined method is also
                 demonstrated.",
  acknowledgement = ack-nhfb,
  generalterms = "Design; Performance",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  subject =      "Software --- Programming Languages --- Processors
                 (D.3.4): {\bf Code generation}; Hardware --- Control
                 Structures and Microprogramming --- Microprogram Design
                 Aids (B.1.4): {\bf Languages and compilers}; Software
                 --- Programming Languages --- Processors (D.3.4): {\bf
                 Optimization}; Hardware --- Control Structures and
                 Microprogramming --- Microprogram Design Aids (B.1.4):
                 {\bf Optimization}",
}

@Article{Johnson:1998:MAS,
  author =       "E. W. Johnson and J. B. Brockman",
  title =        "Measurement and analysis of sequential design
                 processes",
  journal =      j-TODAES,
  volume =       "3",
  number =       "1",
  pages =        "1--20",
  month =        jan,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-1/p1-johnson/p1-johnson.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-1/p1-johnson/",
  abstract =     "As design processes continue to increase in complexity
                 it is important to base process-improvement decisions
                 on quantitative analysis. We describe the development
                 of an analytical approach for evaluating sequential
                 design-process completion time and for determining the
                 sensitivities of design time with respect to individual
                 task durations and transition probabilities. Techniques
                 are also detailed for collecting process metadata and
                 calibrating a design process model. Example
                 applications illustrate the use of the methodology in
                 analyzing and improving software and hardware design
                 processes.",
  acknowledgement = ack-nhfb,
  generalterms = "Design; Documentation; Human Factors; Management;
                 Measurement",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "management science; sensitivity analysis; workflow",
  subject =      "Computer Applications --- Computer-Aided Engineering
                 (J.6); Computing Milieux --- Computers and Education
                 --- Computer and Information Science Education
                 (K.3.2)",
}

@Article{Khordoc:1998:SVA,
  author =       "K. Khordoc and E. Cerny",
  title =        "Semantics and verification of action diagrams with
                 linear timing",
  journal =      j-TODAES,
  volume =       "3",
  number =       "1",
  pages =        "21--50",
  month =        jan,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-1/p21-khordoc/p21-khordoc.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-1/p21-khordoc/",
  abstract =     "Specifications containing linear timing constraints,
                 such as found in action diagrams (timing diagrams)
                 defining interface behaviors, are often used in
                 practice. Although efficient $O(n 3)$ shortest path
                 algorithms exist for computing the minimum and maximum
                 time distances between actions, subject to the timing
                 constraints, there is so far no accurate method that
                 can decide (a) whether a specification of this kind is
                 realizable (i.e., can be simulated by a causal system),
                 and (b) given the action diagrams of the interfaces of
                 two or more communicating systems, whether the systems
                 implementing such independent specifications will
                 correctly interoperate (i.e., satisfy the respective
                 protocols and timing assumptions). First we illustrate
                 the weakness of existing action diagram verification
                 techniques: the causality issue is not addressed, and
                 the proposed methods to answer the compatibility
                 (interoperability) question yield false negative
                 answers in many practical situations. We then define
                 the meaning of causality in an action diagram
                 specification and state a set of sufficient conditions
                 for causality to hold. This development then leads to
                 an exact procedure for the verification of the
                 interface compatibility of communicating action
                 diagrams. the results are illustrated on a practical
                 example.",
  acknowledgement = ack-nhfb,
  generalterms = "Algorithms; Design; Theory; Verification",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "causality; compatibility of interfaces; hardware
                 interfaces; timing diagrams; timing verification",
  subject =      "Hardware --- Integrated Circuits --- Design Aids
                 (B.7.2); Software --- Software Engineering ---
                 Requirements/Specifications (D.2.1)",
}

@Article{Liao:1998:NVC,
  author =       "S. Liao and K. Keutzer and S. Tjiang and S. Devadas",
  title =        "A new viewpoint on code generation for directed
                 acyclic graphs",
  journal =      j-TODAES,
  volume =       "3",
  number =       "1",
  pages =        "51--75",
  month =        jan,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-1/p51-liao/p51-liao.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-1/p51-liao/",
  abstract =     "We present a new viewpoint on code generation for
                 directed acyclic graphs (DAGs). Our formulation is
                 based on {\em binate covering}, the problem of
                 satisfying, with minimum cost, a set of disjunctive
                 clauses, and can take into account commutativity of
                 operators and of the machine model. An important
                 contribution of this work is a set of necessary and
                 sufficient conditions for a valid schedule to be
                 derived, based on the notion of {\em worms\/} and {\em
                 worm-partitions}. This set of conditions can be
                 compactly expressed with clauses that relate scheduling
                 to code selection. For the case of one-register
                 machines, we can derive clauses that lead to generation
                 of optimal code for the DAG. Recent advances in exact
                 binate covering algorithms allows us to use this
                 strategy to generate optimal code for large basic
                 blocks. The optimal code generated by our algorithm
                 results in significant reductions in overall code
                 size.",
  acknowledgement = ack-nhfb,
  generalterms = "Algorithms; Design; Theory",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "binate covering; code generation; directed acyclic
                 graphs",
  subject =      "Software --- Programming Languages --- Processors
                 (D.3.4); Mathematics of Computing --- Discrete
                 Mathematics --- Graph Theory (G.2.2)",
}

@Article{Shi:1998:CCT,
  author =       "C.-J. Shi and J. A. Brzozowski",
  title =        "Cluster-cover a theoretical framework for a class of
                 {VLSI-CAD} optimization problems",
  journal =      j-TODAES,
  volume =       "3",
  number =       "1",
  pages =        "76--107",
  month =        jan,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-1/p76-shi/p76-shi.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-1/p76-shi/",
  abstract =     "This article introduces a mathematical framework
                 called cluster-cover. We show that this framework
                 captures the combinatorial structure of a class of VLSI
                 design optimization problems, including two-level logic
                 minimization, constrained encoding, multilayer
                 topological planar routing, application timing
                 assignment for delay-fault testing, and minimization of
                 monitoring logic for BIST enhancement. These apparently
                 unrelated problems can all be cast into two
                 metaproblems in our framework: finding a maximum
                 cluster and finding a minimum cover. We describe
                 paradigms for developing algorithms for these problems.
                 First, a simple heuristic called greedy peeling is
                 presented and characterized. We derive sufficient
                 conditions that guarantee optimum solutions by greedy
                 peeling. We generalize the performance analysis of a
                 multilayer topological planar routing heuristic to
                 greedy peeling for the general cluster-cover problems.
                 We propose a performance bound of greedy set covering
                 that can be computed efficiently for a given problem
                 instance; this bound is much tighter than the
                 previously known bounds. Second, prime covering ---
                 originally developed for logic minimization --- is
                 generalized to finding exact solutions for
                 cluster-cover problems. Previously, only the connection
                 between logic minimization and constrained encoding was
                 known.",
  acknowledgement = ack-nhfb,
  generalterms = "Design; Theory",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "cluster-cover; logic minimization; NP-completeness;
                 self-checking logic design; state assignment;
                 topological routing",
  subject =      "Hardware --- Logic Design --- Design Aids (B.6.3);
                 Hardware --- Integrated Circuits --- General (B.7.0);
                 Theory of Computation --- Analysis of Algorithms and
                 Problem Complexity --- Nonnumerical Algorithms and
                 Problems (F.2.2)",
}

@Article{Hsiung:1998:IIC,
  author =       "Pao-Ann Hsiung and Chung-Hwang Chen and Trong-Yen Lee
                 and Sao-Jie Chen",
  title =        "{ICOS}: an intelligent concurrent object-oriented
                 synthesis methodology for multiprocessor systems",
  journal =      j-TODAES,
  volume =       "3",
  number =       "2",
  pages =        "109--135",
  month =        apr,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-2/p109-hsiung/p109-hsiung.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-2/p109-hsiung/",
  abstract =     "The design of multiprocessor architectures differs
                 from uniprocessor systems in that the number of
                 processors and their interconnection must be
                 considered. This leads to an enormous increase in the
                 design-space exploration time, which is exponential in
                 the total number of system components. The methodology
                 proposed here, called {\em Intelligent Concurrent
                 Object-Oriented Synthesis\/} (ICOS) methodology, makes
                 feasible the synthesis of complex multiprocessor
                 systems through the application of several techniques
                 that speed up the design process. ICOS is based on {\em
                 Performance Synthesis Methodology\/} (PSM), a recently
                 proposed object-oriented system-level design
                 methodology. Four major techniques: object-oriented
                 design, fuzzy design-space exploration, concurrent
                 design, and intelligent reuse of complete subsystems
                 are integrated in ICOS. First, object-oriented modeling
                 and design, through the use of object-oriented
                 relationships and operators, make the whole design
                 process manageable and maintainable in ICOS. Second,
                 fuzzy comparison applied to the specializations or
                 instances of components reduces the exponential growth
                 of design-space exploration in ICOS. Third, independent
                 components from different design alternatives are
                 synthesized in parallel; this design concurrency
                 shortens the overall design time. Lastly, the
                 resynthesis of complete subsystems can be avoided
                 through the application of learning, thus making the
                 methodology intelligent enough to reuse previous design
                 configurations. Experiments show that all these applied
                 techniques contribute to the synthesis efficiency and
                 the degree of automation in ICOS.",
  acknowledgement = ack-nhfb,
  generalterms = "Design",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "concurrent object-oriented system-level synthesis;
                 fuzzy design-space exploration; learning",
  subject =      "Computer Applications --- Computer-Aided Engineering
                 (J.6): {\bf Computer-aided design (CAD)}; Computing
                 Methodologies --- Artificial Intelligence --- Learning
                 (I.2.6): {\bf Knowledge acquisition}; Computing
                 Methodologies --- Artificial Intelligence --- Learning
                 (I.2.6): {\bf Analogies}; Computing Methodologies ---
                 Artificial Intelligence --- Deduction and Theorem
                 Proving (I.2.3): {\bf Deduction}; Computer Systems
                 Organization --- Processor Architectures --- Multiple
                 Data Stream Architectures (Multiprocessors) (C.1.2)",
}

@Article{Araujo:1998:CGF,
  author =       "Guido Araujo and Sharad Malik",
  title =        "Code generation for fixed-point {DSPs}",
  journal =      j-TODAES,
  volume =       "3",
  number =       "2",
  pages =        "136--161",
  month =        apr,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-2/p136-araujo/p136-araujo.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-2/p136-araujo/",
  abstract =     "This paper examines the problem of code-generation for
                 Digital Signal Processors (DSPs). We make two major
                 contributions. First, for an important class of DSP
                 architectures, we propose an optimal $O(n)$ algorithm
                 for the tasks of register allocation and instruction
                 scheduling for expression trees. Optimality is
                 guaranteed by sufficient conditions derived from a
                 structural representation of the processor Instruction
                 Set Architecture (ISA). Second, we develop heuristics
                 for the case when basic blocks are Directed Acyclic
                 Graphs (DAGs).",
  acknowledgement = ack-nhfb,
  generalterms = "Algorithms",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "code generation; register allocation; scheduling",
  subject =      "Software --- Programming Languages --- Processors
                 (D.3.4): {\bf Optimization}; Software --- Programming
                 Languages --- Processors (D.3.4): {\bf Code
                 generation}",
}

@Article{Tiruvuri:1998:ELB,
  author =       "Giri Tiruvuri and Moon Chung",
  title =        "Estimation of lower bounds in scheduling algorithms
                 for high-level synthesis",
  journal =      j-TODAES,
  volume =       "3",
  number =       "2",
  pages =        "162--180",
  month =        apr,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-2/p162-tiruvuri/p162-tiruvuri.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-2/p162-tiruvuri/",
  abstract =     "To produce efficient design, a high-level synthesis
                 system should be able to analyze a variety of
                 cost-performance tradeoffs. The system can use
                 lower-bound performance estimated methods to identify
                 and prune inferior designs without producing complete
                 designs. We present a lower-bound performance estimate
                 method that is not only faster than existing methods,
                 but also produces better lower bounds. In most cases,
                 the lower bound produced by our algorithm is tight.
                 \par

                 Scheduling algorithms such as branch-and-bound need
                 fast and effective lower-bound estimate methods, often
                 for a large number of partially scheduled dataflow
                 graphs, to reduce the search space. We extend our
                 method to efficiently estimate completion time of
                 partial schedules. This problem is not addressed by
                 existing methods in the literature. Our lower-bound
                 estimate is shown to by very effective in reducing the
                 size of the search space when used in a
                 branch-and-bound scheduling algorithm. \par

                 Our methods can handle multicycle operations, pipelined
                 functional units, and chaining of operations. We also
                 present an extension to handle conditional branches. A
                 salient feature of the extended method is its
                 applicability to speculative execution as well as
                 C-select implementation of conditional branches.",
  acknowledgement = ack-nhfb,
  generalterms = "Algorithms; Design; Experimentation; Measurement;
                 Performance",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "dynamic programming; high-level synthesis; lower-bound
                 estimated; scheduling",
  subject =      "Hardware --- Register-Transfer-Level Implementation
                 --- General (B.5.0); Hardware --- Performance and
                 Reliability --- Performance Analysis and Design Aids
                 (B.8.2); Theory of Computation --- Analysis of
                 Algorithms and Problem Complexity --- Nonnumerical
                 Algorithms and Problems (F.2.2): {\bf Sequencing and
                 scheduling}; Hardware --- Integrated Circuits ---
                 General (B.7.0)",
}

@Article{Vahid:1998:FPI,
  author =       "Frank Vahid and Thuy Dm Le and Yu-Chin Hsu",
  title =        "Functional partitioning improvements over structural
                 partitioning for packaging constraints and synthesis:
                 tool performance",
  journal =      j-TODAES,
  volume =       "3",
  number =       "2",
  pages =        "181--208",
  month =        apr,
  year =         "1998",
  CODEN =        "ATASFO",
  DOI =          "http://dx.doi.org/10.1145/290833.290841",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-2/p181-vahid/p181-vahid.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-2/p181-vahid/",
  abstract =     "Incorporating functional partitioning into a synthesis
                 methodology leads to several important advantages. In
                 functional partitioning, we first partition a
                 functional specification into smaller subspecifications
                 and then synthesize structure for each, in contrast to
                 the current approach of first synthesizing structure
                 for the entire specification and then partitioning that
                 structure. One advantage is the improvement in I/O
                 performance and package count, when partitioning among
                 hardware blocks with size and I/O constraints, such as
                 FPGAs or blocks within an ASIC. A second advantage is
                 reduction in synthesis runtimes. We describe these
                 important advantages, concluding that further research
                 on functional partitioning can lead to improved results
                 from synthesis environments.",
  acknowledgement = ack-nhfb,
  generalterms = "Design",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "behavioral synthesis; functional partitioning;
                 system-level design",
  subject =      "Hardware --- Register-Transfer-Level Implementation
                 --- Design Aids (B.5.2): {\bf Automatic synthesis};
                 Hardware --- Register-Transfer-Level Implementation ---
                 Design Aids (B.5.2): {\bf Hardware description
                 languages}; Hardware --- Register-Transfer-Level
                 Implementation --- Design Aids (B.5.2): {\bf
                 Optimization}; Computer Applications --- Computer-Aided
                 Engineering (J.6): {\bf Computer-aided design (CAD)}",
}

@Article{Koch:1998:BBD,
  author =       "Gernot H. Koch and W. Rosenstiel and U. Kebschull",
  title =        "Breakpoints and breakpoint detection in source-level
                 emulation",
  journal =      j-TODAES,
  volume =       "3",
  number =       "2",
  pages =        "209--230",
  month =        apr,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-2/p209-koch/p209-koch.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-2/p209-koch/",
  abstract =     "We present an approach for accelerating the validation
                 speed of behavioral system descriptions through
                 hardware emulation. The method allows source-level
                 debugging of running hardware specified in behavioral
                 VH DL in a way similar to source-leve debugging in
                 software programming languages. We discuss breakpoints
                 in source-level emulation and how the circuit generated
                 by high-level synthesis has to be modified to work with
                 breakpoints. Breakpoint encoding and detection are
                 shown in detail. Our approach allows breakpoint
                 detection by hardware with seriously slowing the
                 circuit or dramatically increasing its size.",
  acknowledgement = ack-nhfb,
  generalterms = "Design; Performance; Verification",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "debugging; emulation; high-level synthesis",
  subject =      "Hardware --- Integrated Circuits --- Design Aids
                 (B.7.2): {\bf Simulation}; Hardware --- Logic Design
                 --- Design Aids (B.6.3): {\bf VHDL}",
}

@Article{Pomeranz:1998:FTG,
  author =       "Irith Pomeranz and Sudhakar M. Reddy",
  title =        "Functional test generation for delay faults in
                 combinational circuits",
  journal =      j-TODAES,
  volume =       "3",
  number =       "2",
  pages =        "231--248",
  month =        apr,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-2/p231-pomeranz/p231-pomeranz.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-2/p231-pomeranz/",
  abstract =     "We propose a functional fault model for delay faults
                 in combinational circuits and describe a functional
                 test generation procedure based on this model. The
                 proposed method is most suitable when a gate-level
                 description of the circuit-under-test, necessary for
                 employing existing gate-level delay fault test
                 generators, is not available or does not accurately
                 describe the circuit. It is also suitable for
                 generating tests in early design stages of a circuit,
                 before a gate-level implementation is selected. In
                 addition, it can potentially be employed to supplement
                 conventional test generators for gate-level circuits to
                 reduce the cost of handling large numbers of paths. A
                 parameter called is used to control the number of
                 functional faults targeted and thus the number of tests
                 generated. If is unlimited, the functional test set
                 detects every robustly testable path delay fault in any
                 gate-level implementation of the given ciruit. An
                 appropriate subset of tests can be selected once the
                 implementation is known. The test sets generated for
                 various values of are fault simulated on gate-level
                 realizations to demonstrate their effectiveness. The
                 experiments indicate that functional test sets may be
                 able to identify functions whose realizations have low
                 path delay fault coverage.",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "delay faults; function-robust tests; functional delay
                 fault model; path delay faults; robust tests",
  subject =      "Hardware --- Performance and Reliability ---
                 Reliability, Testing, and Fault-Tolerance (B.8.1);
                 Hardware --- Integrated Circuits --- Types and Design
                 Styles (B.7.1)",
}

@Article{Chen:1998:SDI,
  author =       "X. T. Chen and F. J. Meyer and F. Lombardi",
  title =        "Structural diagnosis of interconnects by coloring",
  journal =      j-TODAES,
  volume =       "3",
  number =       "2",
  pages =        "249--271",
  month =        apr,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-2/p249-chen/p249-chen.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-2/p249-chen/",
  abstract =     "This paper presents a new approach for diagnosing
                 shorts in interconnects in which the adjacencies
                 between nets are known. This structural approach
                 exploits different graph coloring techniques to
                 generate a test set with no aliasing and confounding,
                 i.e., full diagnosis (detection and location) is
                 accomplished. Initially, a simple coloring approach
                 based on a greedy condition of the adjacency graph is
                 proposed for fault detection. Then, the conditions for
                 aliasing and confounding are analyzed with respect to
                 the sizes of the possible shorts. These results are
                 used to generate new colors using a process called
                 color mixing. Color mixing guarantees that additional
                 tests, required in order to avoid aliasing/confounding,
                 will use appropriate codes. The characteristics of
                 unbalanced/balanced codes for encoding the colors in
                 the vector-generation process of interconnect diagnosis
                 are discussed and are proved to yield full diagnosis
                 using a novel method. An algorithm for full diagnosis
                 is then presented; this algorithm has an execution
                 complexity of $O(\max(N^2, N \times D^3))$ where $N$ is
                 the number of nets and $D$ is the maximum degree of the
                 nodes in the adjacency graph. Simulation results show
                 that the proposed approach requires a smaller number of
                 test vectors than previous approaches.",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "balanced code; diagnosis; graph coloring;
                 interconnect; syndrome",
  subject =      "Mathematics of Computing --- Discrete Mathematics ---
                 Graph Theory (G.2.2); Computer Applications ---
                 Computer-Aided Engineering (J.6); Hardware ---
                 Performance and Reliability --- Reliability, Testing,
                 and Fault-Tolerance (B.8.1)",
}

@Article{Mehta:1998:ESR,
  author =       "Dinesh P. Mehta",
  title =        "Estimating the storage requirements of the rectangular
                 and {L-shaped} corner stitching data structures",
  journal =      j-TODAES,
  volume =       "3",
  number =       "2",
  pages =        "272--284",
  month =        apr,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-2/p272-mehta/p272-mehta.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-2/p272-mehta/",
  abstract =     "This paper proposes a technique for estimating the
                 storage requirements of the Rectangular Corner
                 Stitching (RCS) data structure [Ousterhout 1984] and
                 the L-shaped Corner Stitching (LCS) data structure
                 [Mehta and Blust 1997] on a given circuit by studying
                 its (the circuit's) geometric properties. This provides
                 a method for estimating the storage requirements of a
                 circuit without having to implement the corner
                 stitching data structure, which is a tedious and
                 time-consuming task. This technique can also be used to
                 estimate the amount of space saved by employing the LCS
                 data structure over the RCS data structure on a given
                 circuit.",
  acknowledgement = ack-nhfb,
  generalterms = "Algorithms",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "corner stitching; data structures; L-shapes; memory
                 requirements analysis; rectangle; rectilinear
                 polygons",
  subject =      "Hardware --- Integrated Circuits --- Design Aids
                 (B.7.2): {\bf Layout}; Data --- Data Storage
                 Representations (E.2): {\bf Linked representations};
                 Theory of Computation --- Analysis of Algorithms and
                 Problem Complexity --- Nonnumerical Algorithms and
                 Problems (F.2.2): {\bf Geometrical problems and
                 computations}",
}

@Article{Bhattacharya:1998:ERS,
  author =       "Subhrajit Bhattacharya and Sujit Dey and Franc
                 Breglez",
  title =        "Effects of resource sharing on circuit delay: an
                 assignment algorithm for clock period optimization",
  journal =      j-TODAES,
  volume =       "3",
  number =       "2",
  pages =        "285--307",
  month =        apr,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-2/p285-bhattacharya/p285-bhattacharya.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-2/p285-bhattacharya/",
  abstract =     "This paper analyzes the effect of resource sharing and
                 assignment on the clock period of the synthesized
                 circuit. The assignment phase assigns or binds
                 operations of the scheduled behavioral description to a
                 set of allocated resources. We focus on control-flow
                 intensive descriptions, characterized by the presence
                 of mutually exclusive paths due to the presence of
                 nested conditional branches and loops. \par

                 We show that clustering multiple operations in the same
                 state of the schedule, possibly leading to chaining of
                 functional units (FUs) in the RTL circuit, is an
                 effective way to minimize the total number of clock
                 cycles, and hence total execution time. We present an
                 assignment algorithm that is particularly effective for
                 such design styles by minimizing data chaining and
                 hence the clock period of the circuit, thereby leading
                 to further reduction in total execution time.
                 \par

                 Existing resource sharing and assignment approaches for
                 reducing the clock period of the resulting circuit
                 either increase the resource allocation or use faster
                 modules, both leading to leading to larger area
                 requirements. In this paper we show that even when the
                 type of available resource units and the number of
                 resource units of each type is fixed, different
                 assignments may lead to circuits with significant
                 differences in clock period. \par

                 We provide a comprehensive analysis of how resource
                 sharing and assignment introduces long paths in the
                 circuit. Based on the analysis, we develop an
                 assignment algorithm that uses a high-level delay
                 estimator to assign operations to a fixed set of
                 available resources so as to minimize the clock period
                 of the resultant circuit, with no or minimal effect on
                 the area of the circuit. Experimental results on
                 several conditional-intensive designs demonstrate the
                 effectiveness of the assignment algorithm.",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "clock period; high-level synthesis; resource sharing",
  subject =      "Hardware --- Register-Transfer-Level Implementation
                 --- Design Aids (B.5.2): {\bf Optimization}",
}

@Article{Cabodi:1998:AVB,
  author =       "Gianpiero Cabodi and Paolo Camurati and Stefano Quer",
  title =        "Auxiliary variables for {BDD-based} representation and
                 manipulation of {Boolean} functions",
  journal =      j-TODAES,
  volume =       "3",
  number =       "3",
  pages =        "309--340",
  month =        jul,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-3/p309-cabodi/p309-cabodi.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-3/p309-cabodi/",
  abstract =     "BDDs are the state-of-the-art technique for
                 representing and manipulating Boolean functions. Their
                 introduction caused a major leap forward in synthesis,
                 verification, and testing. However, they are often
                 unmanageable because of the large amount of nodes. To
                 attack this problem, we insert auxiliary variables that
                 decompose monolithic BDDs in smaller ones. This method
                 works very well for Boolean function representation. As
                 far as combinational circuits are concerned,
                 representing their functions is the main issue. Going
                 into the sequential domain, we focus on traversal
                 techniques. We show that, once we have Boolean
                 functions in decomposed form, symbolic manipulations
                 are viable and efficient. We investigate the relation
                 between auxiliary variables and static and dynamic
                 ordering strategies. Experimental evidence shows that
                 we achieve a certain degree of independence from
                 variable ordering. Thus, this approach can be an
                 alternative to dynamic re-ordering. Experimental
                 results on Boolean function representation, and exact
                 and approximate forward symbolic traversal of FSMs,
                 demonstrate the benefits both in terms of memory
                 requirements and of CPU time.",
  acknowledgement = ack-nhfb,
  generalterms = "Design; Verification",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "binary decision diagrams; finite state machines;
                 functional decompositions; reachability analysis",
  subject =      "Hardware --- Logic Design --- Design Aids (B.6.3):
                 {\bf Verification}",
}

@Article{Cong:1998:BSC,
  author =       "Jason Cong and Andrew B. Kahng and Cheng-Kok Koh and
                 C.-W. Albert Tsao",
  title =        "Bounded-skew clock and {Steiner} routing",
  journal =      j-TODAES,
  volume =       "3",
  number =       "3",
  pages =        "341--388",
  month =        jul,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-3/p341-cong/p341-cong.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-3/p341-cong/",
  abstract =     "We study the minimum-cost bounded-skew routing tree
                 problem under the pathlength (linear) and Elmore delay
                 models. This problem captures several engineering
                 tradeoffs in the design of routing topologies with
                 controlled skew. Our bounded-skew routing algorithm,
                 called the BST/DME algorithm, extends the DME algorithm
                 for exact zero-skew trees via the concept of {\em a
                 merging region}. For a {\em prescribed topology},
                 BST/DME constructs a bounded-skew tree (BST) in two
                 phases: (i) a bottom-up phase to construct a binary
                 tree of merging regions which represent the loci of
                 possible embedding points of the internal nodes, and
                 (ii) a top-down phase to determine the exact locations
                 of the internal nodes. We present two approaches to
                 construct the merging regions: (i) the {\em Boundary
                 Merging and Embedding\/} (BME) method which utilizes
                 merging points that are restricted to the {\em
                 boundaries\/} of merging regions, and (ii) the {\em
                 Interior Merging and Embedding\/} (IME) algorithm which
                 employs a sampling strategy and a dynamic
                 programming-based selection technique to consider
                 merging points that are {\em interior\/} to, as well as
                 on the boundary of, the merging regions. When the
                 topology is not prescribed, we propose a new {\em
                 Greedy\/}-BST/DME algorithm which combines the merging
                 region computation with topology generation. The
                 Greedy-BST/DME algorithm very closely matches the best
                 known heuristics for the zero-skew case and for the
                 unbounded-skew case (i.e., the Steiner minimal tree
                 problem). Experimental results show that our BST
                 algorithms can produce a set of routing solutions with
                 smooth skew and wire length tradeoffs.",
  acknowledgement = ack-nhfb,
  generalterms = "Algorithms; Design; Experimentation; Performance",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "(inter)connection; boundary merging and embedding;
                 bounded-skew; clock tree; Elmore delay; interior
                 merging and embedding; low power; merging region;
                 merging segment; pathlength delay; Steiner tree;
                 synchronization; VLSI; zero-skew",
  subject =      "Hardware --- Integrated Circuits --- Design Aids
                 (B.7.2): {\bf Placement and routing}; Computer
                 Applications --- Computer-Aided Engineering (J.6): {\bf
                 Computer-aided design (CAD)}",
}

@Article{Jone:1998:CAD,
  author =       "Wen-Ben Jone and K. S. Tsai",
  title =        "Confidence analysis for defect-level estimation of
                 {VLSI} random testing",
  journal =      j-TODAES,
  volume =       "3",
  number =       "3",
  pages =        "389--407",
  month =        jul,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-3/p389-jone/p389-jone.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-3/p389-jone/",
  abstract =     "The defect level in circuit testing is the percentage
                 of circuits such as chips, that are defective and
                 shipped for use after testing. Our previously published
                 results showed that the defect level of circuit
                 fabrication and testing should be a probability
                 distribution, rather than a single value, and the
                 concept of confidence degree was proposed [Gondalia et
                 al. 1993; Jone et al. 1995]. In this work, defect level
                 is represented by a confidence interval which is more
                 conventional and easier to interpret. The point
                 estimate of defect level analysis and conditions to
                 avoid meaningless confidence intervals are also
                 investigated. Methods for adaptive random test length
                 determination driven by different confidence intervals
                 or interval length are proposed to meet both test
                 requirements and test costs tradeoff. Finally, a
                 complete test plan that can direct the test flow from
                 fabrication infancy to maturity is suggested.",
  acknowledgement = ack-nhfb,
  generalterms = "Experimentation; Measurement; Performance;
                 Reliability",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "defect level analysis; random testing; test confidence
                 analysis; test quality; VLSI testing",
  subject =      "Hardware --- Performance and Reliability ---
                 Reliability, Testing, and Fault-Tolerance (B.8.1)",
}

@Article{Mathur:1998:RAE,
  author =       "Anmol Mathur and Ali Dasdan and Rajesh K. Gupta",
  title =        "Rate analysis for embedded systems",
  journal =      j-TODAES,
  volume =       "3",
  number =       "3",
  pages =        "408--436",
  month =        jul,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-3/p408-mathur/p408-mathur.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-3/p408-mathur/",
  abstract =     "Embedded systems consist of interacting components
                 that are required to deliver a specific functionality
                 under constraints on execution rates and relative time
                 separation of the components. In this article, we model
                 an embedded system using concurrent processes
                 interacting through synchronization. We assume that
                 there are rate constraints on the execution rates of
                 processes imposed by the designer or the environment of
                 the system, where the execution rate of a process is
                 the number of its executions per unit time. We address
                 the problem of computing bounds on the execution rates
                 of processes constituting an embedded system, and
                 propose an interactive rate analysis framework. As part
                 of the rate analysis framework we present an efficient
                 algorithms for checking the consistency of the rate
                 constraints. Bounds on the execution rate of each
                 process are computed using an efficient algorithm based
                 on the relationship between the execution rate of a
                 process and the maximum mean delay cycles in the
                 process graph. Finally, if the computed rates violate
                 some of the rate constraints, some of the processes in
                 the system are redesigned using information from the
                 rate analysis step. This rate analysis framework is
                 implemented in a tool called RATAN. We illustrate by an
                 example how RATAN can be used in an embedded system
                 design.",
  acknowledgement = ack-nhfb,
  generalterms = "Algorithms; Design; Performance; Theory",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "average execution rate; concurrent system modeling;
                 embedded systems; interactive rate violation debugging;
                 rate analysis; rate constraints",
  subject =      "Computer Systems Organization --- Performance of
                 Systems (C.4): {\bf Modeling techniques}; Computer
                 Systems Organization --- Performance of Systems (C.4):
                 {\bf Performance attributes}; Computer Systems
                 Organization --- Special-Purpose and Application-Based
                 Systems (C.3): {\bf Real-time and embedded systems};
                 Computer Systems Organization --- Performance of
                 Systems (C.4): {\bf Design studies}",
}

@Article{Pan:1998:OCP,
  author =       "Peichen Pan and C. L. Liu",
  title =        "Optimal clock period {FPGA} technology mapping for
                 sequential circuits",
  journal =      j-TODAES,
  volume =       "3",
  number =       "3",
  pages =        "437--462",
  month =        jul,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-3/p437-pan/p437-pan.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-3/p437-pan/",
  abstract =     "We study the technology mapping problem for sequential
                 circuits for look-up table (LUT) based field
                 programmable gate arrays (FPGAs). Existing approaches
                 to the problem simply remove the flip-flops (FFs), then
                 map the remaining combinational logic, and finally put
                 the FFs back. These approaches ignore the sequential
                 nature of a circuit and assume the positions of the FFs
                 are fixed. However, FFs in a sequential circuit can be
                 repositioned by a functionality-preserving
                 transformation called retiming. As a result, existing
                 approaches can only consider a very small portion of
                 the available solution space. We propose in this paper
                 a novel approach to the technology mapping problem. In
                 our approach, retiming is integrated into the
                 technology mapping process so as to consider the full
                 solution space. We then present a polynomial technology
                 mapping algorithm that, for a given circuit, produces a
                 mapping solution with the minimum clock period among
                 all possible ways of retiming. The effectiveness of the
                 algorithm is also demonstrated experimentally.",
  acknowledgement = ack-nhfb,
  generalterms = "Algorithms; Performance",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "clock period; field-programmable gate arrays; FPGAs;
                 logic replication; look-up tables; retiming; sequential
                 synthesis; technology mapping",
  subject =      "Hardware --- Logic Design --- Design Styles (B.6.1):
                 {\bf Sequential circuits}; Hardware --- Logic Design
                 --- Design Aids (B.6.3): {\bf Automatic synthesis};
                 Hardware --- Logic Design --- Design Aids (B.6.3): {\bf
                 Optimization}",
}

@Article{Riepe:1998:EBD,
  author =       "Michael A. Riepe and Karem A. Sakallah",
  title =        "The edge-based design rule model revisited",
  journal =      j-TODAES,
  volume =       "3",
  number =       "3",
  pages =        "463--486",
  month =        jul,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-3/p463-riepe/p463-riepe.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-3/p463-riepe/",
  abstract =     "A model for integrated circuit design rules based on
                 rectangle edge constraints has been proposed by
                 Jeppson, Christensson, and Hedenstierna. This model
                 appears to be the most rigorous proposed to date for
                 the description of such edge-based design rules.
                 However, in certain rare circumstances their model is
                 unable to express the correct design rule when the
                 constrained edges are not adjacent in the layout. We
                 introduce a new notation, called an edge path, which
                 allows us to extend their model to allow for
                 constraints between edges separated by an arbitrary
                 number of intervening edges. Using this notation we
                 enumerate all edge paths that are required to correctly
                 model the original design rule macros of the JCH model,
                 and prove that these macros are sufficient to model the
                 most common rules. We also show how this notation
                 allows us to directly specify many kinds of conditional
                 design rules that required ad hoc specification under
                 the JCH model.",
  acknowledgement = ack-nhfb,
  generalterms = "Theory; Verification",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "design rule checking; design rules; layout
                 verification",
  subject =      "Computer Applications --- Computer-Aided Engineering
                 (J.6); Hardware --- Integrated Circuits --- Design Aids
                 (B.7.2): {\bf Verification}",
}

@Article{Su:1998:EFL,
  author =       "Alan Su and Yu-Chin Hsu and Ta-Yung Liu and Mike
                 Tien-Chien Lee",
  title =        "Eliminating false loops caused by sharing in control
                 path",
  journal =      j-TODAES,
  volume =       "3",
  number =       "3",
  pages =        "487--495",
  month =        jul,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-3/p487-su/p487-su.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-3/p487-su/",
  abstract =     "In high-level synthesis, resource sharing may result
                 in a circuit containing false loops that create great
                 difficulty in timing validation during the design
                 sign-off phase. It is hence desirable to avoid
                 generating any false loops in a synthesized circuit.
                 Previous work [Stok 1992; Huang et al. 1995] considered
                 mainly data path sharing for false loop elimination.
                 However, for a complete circuit with both data path and
                 control path, false loops can be created due to control
                 logic sharing. In this article, we present a novel
                 approach to detect and eliminate the false loops caused
                 by control logic sharing. An effective filter is
                 devised to reduce the computational complexity of false
                 loop detection, which is based on checking the level
                 numbers that are propagated from data path operators to
                 inputs and outputs of the control path. Only the
                 input/output pairs of the control path identified by
                 the filter are further investigated by traversing into
                 the data path for false loop detection. A removal
                 algorithm is then applied to eliminate the detected
                 false loops, followed by logic minimization to further
                 optimize the circuit. Experimental results show that
                 for the nine example circuits we tested, the final
                 designs after false loop removal and logic minimization
                 give only slightly larger area than the original ones
                 that contain false loops.",
  acknowledgement = ack-nhfb,
  generalterms = "Algorithms; Design",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "control path; false loop",
  subject =      "Hardware --- Register-Transfer-Level Implementation
                 --- Design Aids (B.5.2): {\bf Automatic synthesis};
                 Hardware --- Register-Transfer-Level Implementation ---
                 Design Aids (B.5.2): {\bf Hardware description
                 languages}; Hardware --- Register-Transfer-Level
                 Implementation --- Design Aids (B.5.2): {\bf
                 Optimization}; Hardware --- Register-Transfer-Level
                 Implementation --- Design Aids (B.5.2): {\bf
                 Verification}",
}

@Article{Zhou:1998:ORR,
  author =       "Hai Zhou and D. F. Wong",
  title =        "Optimal river routing with crosstalk constraints",
  journal =      j-TODAES,
  volume =       "3",
  number =       "3",
  pages =        "496--514",
  month =        jul,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-3/p496-zhou/p496-zhou.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-3/p496-zhou/",
  abstract =     "With the increasing density of VLSI circuits, the
                 interconnection wires are being packed even closer.
                 This has increased the effect of interaction among
                 these wires on circuit performance and hence, the
                 importance of controlling crosstalk. In this article,
                 we consider river routing with crosstalk constraints.
                 Given the positions of the pins in a single-layer
                 routing channel and the maximum tolerable crosstalk
                 between each pair of neighboring nets, we give a
                 polynomial time algorithm to decide whether there is a
                 feasible river routing solution and produce one with
                 minimum crosstalk when it is feasible.",
  acknowledgement = ack-nhfb,
  generalterms = "Algorithms; Design; Experimentation; Performance",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "crosstalk; river routing",
  subject =      "Hardware --- Integrated Circuits --- Design Aids
                 (B.7.2): {\bf Placement and routing}; Mathematics of
                 Computing --- Discrete Mathematics --- Graph Theory
                 (G.2.2): {\bf Network problems}; Computer Applications
                 --- Computer-Aided Engineering (J.6): {\bf
                 Computer-aided design (CAD)}",
}

@Article{Passerone:1998:MRS,
  author =       "C. Passerone and C. Sansoe and L. Lavagno and R.
                 McGeer and J. Martin and R. Passerone and A.
                 Sangiovanni-Vincentelli",
  title =        "Modeling reactive systems in {Java}",
  journal =      j-TODAES,
  volume =       "3",
  number =       "4",
  pages =        "515--523",
  month =        oct,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-4/p515-passerone/p515-passerone.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-4/p515-passerone/",
  abstract =     "We present an application of the Java TM programming
                 language to specify and implement reactive real-time
                 systems. We have developed and tested a collection of
                 classes and methods to describe concurrent modules and
                 their asynchronous communication by means of signals.
                 The control structures are closely patterned after
                 those of the synchronous language {\em Esterel},
                 succinctly describing concurrency, sequencing and
                 preemption. We show the user-friendliness and
                 efficiency of the proposed technique by using an
                 example from the automotive domain.",
  acknowledgement = ack-nhfb,
  generalterms = "Design; Languages; Theory",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "high level design; Java; prototyping; simulation",
  subject =      "Hardware --- Logic Design --- Design Aids (B.6.3):
                 {\bf Hardware description languages}; Computing
                 Methodologies --- Simulation and Modeling --- Model
                 Validation and Analysis (I.6.4); Computer Applications
                 --- Physical Sciences and Engineering (J.2): {\bf
                 Electronics}; Computer Applications --- Computer-Aided
                 Engineering (J.6): {\bf Computer-aided design (CAD)}",
}

@Article{Wang:1998:MEV,
  author =       "Li-C. Wang and Magdy S. Abadir and Jing Zeng",
  title =        "On measuring the effectiveness of various design
                 validation approaches for {PowerPC} microprocessor
                 embedded arrays",
  journal =      j-TODAES,
  volume =       "3",
  number =       "4",
  pages =        "524--532",
  month =        oct,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-4/p524-wang/p524-wang.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-4/p524-wang/",
  abstract =     "Design validation for embedded arrays remains as a
                 challenging problem in today's microprocessor design
                 environment. At Somerset, validation of array designs
                 relies on both formal verification and vector
                 simulation. Although several methods for array design
                 validation have been proposed and had great success
                 [Ganguly et al. 1996; Pandey et al. 1996, 1997; Wang
                 and Abadir 1997], little evidence has been reported for
                 the effectiveness of these methods with respect to the
                 detection of design errors. In this paper, we measure
                 the effectiveness of different validation approaches
                 based on automatic design error injection and
                 simulation. The technique provides a systematic way to
                 evaluate various validation approaches at both logic
                 and transistor levels. Experimental results on recent
                 PowerPC microprocessor arrays will be discussed and
                 reported.",
  acknowledgement = ack-nhfb,
  generalterms = "Design; Verification",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "assertion test generation; ATPG; design error model;
                 logic verification; symbolic trajectory evaluation;
                 validation",
  subject =      "Hardware --- Logic Design --- Design Aids (B.6.3):
                 {\bf Simulation}; Hardware --- Logic Design --- Design
                 Aids (B.6.3): {\bf Verification}; Hardware ---
                 Integrated Circuits --- Design Aids (B.7.2): {\bf
                 Simulation}; Hardware --- Integrated Circuits ---
                 Design Aids (B.7.2): {\bf Verification}",
}

@Article{Dasdan:1998:TDD,
  author =       "Ali Dasdan and Dinesh Ramanathan and Rajesh K. Gupta",
  title =        "A timing-driven design and validation methodology for
                 embedded real-time systems",
  journal =      j-TODAES,
  volume =       "3",
  number =       "4",
  pages =        "533--553",
  month =        oct,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-4/p533-dasdan/p533-dasdan.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-4/p533-dasdan/",
  abstract =     "We address the problem of timing constraint derivation
                 and validation for reactive and real-time embedded
                 systems. We assume that such a system is structured
                 into its tasks, and the structure is modeled using a
                 task graph. Our solution uses the timing behavior
                 committed by the environment to the system first to
                 derive the timing constraints on the system's internal
                 behavior and then use them to derive and validate the
                 timing constraints on the system's external behavior.
                 Our solution consists of the following contributions: a
                 generalized task graph model, a comprehensive
                 classification of timing constraints, algorithms for
                 derivation and validation of timing constraints of the
                 system modeled in the generalized task graph model, a
                 codesign methodology that combines the model and the
                 algorithms, and the implementation of this methodology
                 in a tool called RADHA-RATAN. The main advantages of
                 our solution are that it simplifies the problem of
                 ensuring timing correctness of the system by reducing
                 the complexity of the problem from system level to task
                 level, and that it makes the codesign methodology
                 timing-driven in that our solution makes it possible to
                 maintain a handle on the system's timing correctness
                 from very early stages in the system's design flow.",
  acknowledgement = ack-nhfb,
  generalterms = "Design; Performance; Verification",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "performance verification; period assignment; period
                 derivation; rate assignment; rate derivation;
                 requirements analysis; system-level design; timing
                 analysis; timing-driven codesign",
  subject =      "Computer Systems Organization --- General (C.0): {\bf
                 Systems specification methodology}; Computer Systems
                 Organization --- Special-Purpose and Application-Based
                 Systems (C.3): {\bf Real-time and embedded systems};
                 Computer Systems Organization --- Performance of
                 Systems (C.4): {\bf Modeling techniques}; Computer
                 Systems Organization --- Performance of Systems (C.4):
                 {\bf Performance attributes}; Software --- Operating
                 Systems --- Organization and Design (D.4.7): {\bf
                 Real-time systems and embedded systems}; Software ---
                 Operating Systems --- Performance (D.4.8): {\bf
                 Modeling and prediction}; Computer Applications ---
                 Computer-Aided Engineering (J.6): {\bf Computer-aided
                 design (CAD)}",
}

@Article{Rajan:1998:ASD,
  author =       "S. P. Rajan and M. Fujita and K. Yuan and M. T-C.
                 Lee",
  title =        "{ATM} switch design by high-level modeling, formal
                 verification and high-level synthesis",
  journal =      j-TODAES,
  volume =       "3",
  number =       "4",
  pages =        "554--562",
  month =        oct,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-4/p554-rajan/p554-rajan.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-4/p554-rajan/",
  abstract =     "Asynchronous Transfer Mode (ATM) has emerged as a
                 backbone for high-speed broadband telecommunication
                 networks. In this paper, we present ATM switch design,
                 starting from a parametric high-level model and
                 debugging the model using a combination of formal
                 verification and simulation. The model has been used to
                 synthesize ATM switches according to customers'
                 choices, by choosing concrete values for each of the
                 generic parameters. We provide a pragmatic combination
                 of simulation, model checking, and theorem proving to
                 gain confidence in the ATM switch design correctness.",
  acknowledgement = ack-nhfb,
  generalterms = "Design; Verification",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "ATM switch; high-level design; synthesis;
                 verification",
  subject =      "Computer Applications --- Computer-Aided Engineering
                 (J.6): {\bf Computer-aided design (CAD)}",
}

@Article{Huggins:1998:SVP,
  author =       "James K. Huggins and David {Van Campenhout}",
  title =        "Specification and verification of pipelining in the
                 {ARM2} {RISC} microprocessor",
  journal =      j-TODAES,
  volume =       "3",
  number =       "4",
  pages =        "563--580",
  month =        oct,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-4/p563-huggins/p563-huggins.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-4/p563-huggins/",
  abstract =     "Gurevich Abstract State Machines (ASMs) provide a
                 sound mathematical basis for the specification and
                 verification of systems. An application of the ASM
                 methodology to the verification of a pipelined
                 microprocessor (an ARM2 implementation) is described.
                 Both the sequential execution model and final pipelined
                 model are formalized using ASMs. A series of
                 intermediate models are introduced that gradually
                 expose the complications of pipelining. The first
                 intermediate model is proven equivalent to the
                 sequential model in the absence of structural, control,
                 and data hazards. In the following steps, these
                 simplifying assumptions are lifted one by one, and the
                 original proof is refined to establish the equivalence
                 of each intermediate model with the sequential model,
                 leading ultimately to a full proof of equivalence of
                 the sequential and pipelined models.",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "abstract state machines; ARM processor; design
                 verification; formal verification; pipelined
                 processors; pipelining",
  subject =      "Hardware --- Register-Transfer-Level Implementation
                 --- Design Aids (B.5.2); Computer Systems Organization
                 --- General (C.0): {\bf Systems specification
                 methodology}; Computer Systems Organization ---
                 Processor Architectures --- Single Data Stream
                 Architectures (C.1.1)",
}

@Article{VanCampenhout:1998:HLD,
  author =       "D. {Van Campenhout} and H. Al-Asaad and J. P. Hayes
                 and T. Mudge and R. B. Brown",
  title =        "High-level design verification of microprocessors via
                 error modeling",
  journal =      j-TODAES,
  volume =       "3",
  number =       "4",
  pages =        "581--599",
  month =        oct,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-4/p581-campenhout/p581-campenhout.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-4/p581-campenhout/",
  abstract =     "A design verification methodology for microprocessor
                 hardware based on modeling design errors and generating
                 simulation vectors for the modeled errors via physical
                 fault testing techniques is presented. We have
                 systematically collected design error data from a
                 number of microprocessor design projects. The error
                 data is used to derive error models suitable for design
                 verification testing. A class of basic error models is
                 identified and shown to yield tests that provide good
                 coverage of common error types. To improve coverage for
                 more complex errors, a new class of conditional error
                 models is introduced. An experiment to evaluate the
                 effectiveness of our methodology is presented. Single
                 actual design errors are injected into a correct
                 design, and it is determined if the methodology will
                 generate a test that detects the actual errors. The
                 experiment has been conducted for two microprocessor
                 designs and the results indicate that very high
                 coverage of actual design errors can be obtained with
                 test sets that are complete for a small number of
                 synthetic error models.",
  acknowledgement = ack-nhfb,
  generalterms = "Verification",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "design errors; design verification; error modeling",
  subject =      "Hardware --- General (B.0); Hardware ---
                 Register-Transfer-Level Implementation --- Design Aids
                 (B.5.2)",
}

@Article{Hasteer:1998:EEC,
  author =       "G. Hasteer and A. Mathur and P. Banerjee",
  title =        "Efficient equivalence checking of multi-phase designs
                 using phase abstraction and retiming",
  journal =      j-TODAES,
  volume =       "3",
  number =       "4",
  pages =        "600--625",
  month =        oct,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-4/p600-hasteer/p600-hasteer.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-4/p600-hasteer/",
  abstract =     "Equivalence checking of finite state machines (FSMs)
                 traditionally assumes single phase machines where a
                 single clock (implicit or explicit) synchronizes the
                 state of the FSM. We extend the equivalence checking
                 paradigm to FSMs with multi-phase clocks. Such designs
                 are becoming increasingly popular in high performance
                 microprocessors since they result in lower
                 synchronization overhead. In addition, aggressive
                 pipelining and the use of ``sparse'' encodings results
                 in designs where the ratio of steady states to the
                 total state space is very low. In this paper, we show
                 that automatically transforming such designs to ones
                 that have more ``dense'' encodings can result in
                 significant benefits in using implicit BDD-based
                 techniques for their verification. We explore two such
                 techniques: {\em phase abstraction\/} and {\em
                 retiming\/} and demonstrate their utility in the
                 context of FSM equivalence checking. The main
                 contributions of our work are: \par

                 --We show that a multi-phase FSM can be transformed to
                 a functionally equivalent one phase FSM and this phase
                 abstraction leads to significant improvement in the
                 size of FSMs that can be checked for equivalence.
                 \par

                 --We show that min-latch retiming preserves equivalence
                 and can be performed efficiently in multi-phase
                 designs, even when latch borrowing and discarding is
                 allowed at the primary inputs and outputs. \par

                 --We demonstrate the utility of our approach on several
                 controller FSMs from the industry.",
  acknowledgement = ack-nhfb,
  annote =       "Article title page incorrectly has Bannerjee instead
                 of Banerjee.",
  generalterms = "Algorithms; Design; Performance; Theory;
                 Verification",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "binary decision; diagram; encoding density;
                 multi-phase FSM; product machine; sequential hardware
                 equivalence; steady states",
  subject =      "Hardware --- Logic Design --- Design Aids (B.6.3);
                 Hardware --- Logic Design --- Design Aids (B.6.3): {\bf
                 Verification}; Computer Applications --- Computer-Aided
                 Engineering (J.6): {\bf Computer-aided design (CAD)}",
}

@Article{Benso:1998:ELC,
  author =       "A. Benso and P. Prinetto and M. Rebaudengo and M.
                 Sonza Reorda",
  title =        "{EXFI}: a low-cost fault injection system for embedded
                 microprocessor-based boards",
  journal =      j-TODAES,
  volume =       "3",
  number =       "4",
  pages =        "626--634",
  month =        oct,
  year =         "1998",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1998-3-4/p626-benso/p626-benso.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1998-3-4/p626-benso/",
  abstract =     "Evaluating the faulty behavior of low-cost embedded
                 microprocessor-based boards is an increasingly
                 important issue, due to their adoption in many safety
                 critical systems. The architecture of a complete Fault
                 Injection environment is proposed, integrating a module
                 for generating a collapsed list of faults, and another
                 for performing their injection and gathering the
                 results. To address this issue, the paper describes a
                 software-implemented Fault Injection approach based on
                 the Trace Exception Mode available in most
                 microprocessors. The authors describe EXFI, a
                 prototypical system implementing the approach, and
                 provide data about some sample benchmark applications.
                 The main advantages of EXFI are the low cost, the good
                 portability, and the high efficiency",
  acknowledgement = ack-nhfb,
  generalterms = "Design; Experimentation; Measurement",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "fault coverage; fault injection; microprocessor
                 systems; software-implemented fault injection; trace
                 exception mode",
  subject =      "Hardware --- Performance and Reliability ---
                 Reliability, Testing, and Fault-Tolerance (B.8.1);
                 Hardware --- Performance and Reliability ---
                 Performance Analysis and Design Aids (B.8.2)",
}

@Article{Gasteier:1999:BBC,
  author =       "Michael Gasteier and Manfred Glesner",
  title =        "Bus-based communication synthesis on system level",
  journal =      j-TODAES,
  volume =       "4",
  number =       "1",
  pages =        "1--11",
  month =        jan,
  year =         "1999",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1999-4-1/p1-gasteier/p1-gasteier.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1999-4-1/p1-gasteier/",
  abstract =     "In this article, we present an approach to automatic
                 generation of communication topologies for statically
                 scheduled systems of subsystems. Given a specification
                 containing a set of processes that communicate via
                 abstract send and receive functions, we show how a
                 cost-efficient communication topology consisting of one
                 or more buses without arbitration scheme can be set up
                 for such applications.",
  acknowledgement = ack-nhfb,
  generalterms = "Algorithms; Design",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "bus generation; bus without arbitration; communication
                 synthesis; statically scheduled systems; transfer
                 scheduling",
  subject =      "Hardware --- Input/Output and Data Communications ---
                 Interconnections (Subsystems) (B.4.3)",
}

@Article{Liao:1999:TCB,
  author =       "Stan Liao and Srinivas Devadas and Kurt Keutzer",
  title =        "A text-compression-based method for code size
                 minimization in embedded systems",
  journal =      j-TODAES,
  volume =       "4",
  number =       "1",
  pages =        "12--38",
  month =        jan,
  year =         "1999",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1999-4-1/p12-liao/p12-liao.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1999-4-1/p12-liao/",
  abstract =     "We address the problem of code-size minimization in
                 VLSI systems with embedded DSP processors. Reducing
                 code size reduces the production cost of embedded
                 systems \par

                 we use data-compression methods to develop code-size
                 minimization strategies. In our framework, the
                 compressed program consists of a skeleton and a
                 dictionary. We show that the dictionary can be computed
                 by solving a set-covering problem derived from the
                 original program. To execute the compressed code, we
                 describe two methods that have different performance
                 characteristics and different degrees of freedom in
                 compressing the code. We also address performance
                 considerations, and show that they can be incorporated
                 easily into the set-covering formulation, and present
                 experimental results obtained with Texas Instruments'
                 optimizing TMS3220C25 compiler.",
  acknowledgement = ack-nhfb,
  generalterms = "Algorithms; Experimentation; Measurement;
                 Performance",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "code size optimization; compression",
  subject =      "Software --- Programming Languages --- Processors
                 (D.3.4): {\bf Compilers}; Software --- Programming
                 Languages --- Processors (D.3.4): {\bf Optimization};
                 Data --- Coding and Information Theory (E.4): {\bf Data
                 compaction and compression}",
}

@Article{Song:1999:CDP,
  author =       "Xiaoyu Song and Yuke Wang",
  title =        "On the crossing distribution problem",
  journal =      j-TODAES,
  volume =       "4",
  number =       "1",
  pages =        "39--51",
  month =        jan,
  year =         "1999",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1999-4-1/p39-song/p39-song.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1999-4-1/p39-song/",
  abstract =     "VLSI layout design is typically decomposed into four
                 steps: {\em placement, global routing, routing region
                 definition, and detailed routing}. The crossing
                 distribution problem occurs prior to detailed routing
                 [Groenveld 1989; Mared-Sadowska and Sarrafzadeh 1995;
                 Wang and Shung 1992]. A {\em crossing\/} is defined as
                 the intersection of two nets. The problem of net
                 crossing distribution is important in layout design,
                 such as design of dense chips, multichip modules (MCM),
                 critical net routing, and analog circuits [Groenveld
                 1989; Sarrafzadeh 1995; Wang and Shung 1992]. It is
                 observed that nets crossing each other are more
                 difficult to route than those that do not cross. The
                 layout of crossing nets has to be realized in more than
                 two layers and requires a larger number of {\em vias}.
                 In this paper we study the crossing distribution
                 problem of two-terminal nets between two regions. We
                 present an optimal $O(n^2)$ time algorithm for
                 two-sided nets, where n is the number of nets. Our
                 results are superior to previous ones [Markek-Sadowska
                 and Sarrafzadeh 1995; Wang and Shung 1992]. We give an
                 optimal $O(n^2)$ time algorithm for the crossing
                 distribution problem with one-sided nets. We solve
                 optimally the complete version of the crossing
                 distribution problem for two-terminal nets in two
                 regions that has not been studied before.",
  acknowledgement = ack-nhfb,
  generalterms = "Algorithms; Design; Experimentation",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "crossings; VLSI layout",
  subject =      "Hardware --- Integrated Circuits (B.7); Hardware ---
                 Integrated Circuits --- Design Aids (B.7.2); Theory of
                 Computation --- Analysis of Algorithms and Problem
                 Complexity (F.2); Theory of Computation --- Analysis of
                 Algorithms and Problem Complexity --- Nonnumerical
                 Algorithms and Problems (F.2.2): {\bf Sequencing and
                 scheduling}",
}

@Article{Tseng:1999:TLL,
  author =       "Jyh-Mou Tseng and Jing-Yang Jou",
  title =        "Two-level logic minimization for low power",
  journal =      j-TODAES,
  volume =       "4",
  number =       "1",
  pages =        "52--69",
  month =        jan,
  year =         "1999",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1999-4-1/p52-tseng/p52-tseng.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1999-4-1/p52-tseng/",
  abstract =     "In this paper we present a complete Boolean method for
                 reducing the power consumption in two-level
                 combinational circuits. The two-level logic optimizer
                 performs the logic minimization for low power targeting
                 static PLA, general logic gates, and dynamic PLA
                 implementations. We modify the Espresso algorithm by
                 adding our heuristics, which bias logic minimization
                 toward lowering power dissipation. In our heuristics,
                 signal probabilities and transition densities are two
                 important parameters. The experimental results are
                 promising.",
  acknowledgement = ack-nhfb,
  generalterms = "Algorithms; Design; Performance; Theory",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "logic synthesis; low power design; programmable logic
                 array; two-level logic minimization",
  subject =      "Hardware --- Logic Design --- Design Styles (B.6.1):
                 {\bf Combinational logic}; Hardware --- Logic Design
                 --- Design Styles (B.6.1): {\bf Logic arrays}; Hardware
                 --- Logic Design --- Design Aids (B.6.3): {\bf
                 Automatic synthesis}; Hardware --- Integrated Circuits
                 --- Types and Design Styles (B.7.1): {\bf VLSI (very
                 large scale integration)}",
}

@Article{Vahid:1999:PCT,
  author =       "Frank Vahid",
  title =        "Procedure cloning: a transformation for improved
                 system-level functional partitioning",
  journal =      j-TODAES,
  volume =       "4",
  number =       "1",
  pages =        "70--96",
  month =        jan,
  year =         "1999",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1999-4-1/p70-vahid/p70-vahid.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1999-4-1/p70-vahid/",
  abstract =     "Functional partitioning assigns the functions of a
                 system's program-like specification among system
                 components, such as standard-software and
                 custom-hardware processors. We introduce a new
                 transformation, called procedure cloning, that
                 significantly improves functional partitioning results.
                 The transformation creates a clone of a procedure for
                 sole use by a particular procedure caller, so the clone
                 can be assigned to the caller's processor, which in
                 turn improves performance through reduced
                 communication. Heuristics are used to prevent the
                 exponential size increase that could occur if cloning
                 were done indiscriminately. We introduce a variety of
                 cloning heuristics, highlight experiments demonstrating
                 the improvements obtained using cloning, and compare
                 the various cloning heuristics.",
  acknowledgement = ack-nhfb,
  generalterms = "Design",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "behavioral synthesis; embedded systems; functional
                 partitioning; hardware/software codesign; replication;
                 system-level design; system-on-a-chip;
                 transformations",
  subject =      "Hardware --- General (B.0); Hardware ---
                 Register-Transfer-Level Implementation --- Design Aids
                 (B.5.2): {\bf Automatic synthesis}; Hardware ---
                 Register-Transfer-Level Implementation --- Design Aids
                 (B.5.2): {\bf Hardware description languages}; Hardware
                 --- Register-Transfer-Level Implementation --- Design
                 Aids (B.5.2): {\bf Optimization}; Computer Applications
                 --- Computer-Aided Engineering (J.6): {\bf
                 Computer-aided design (CAD)}",
}

@Article{Wang:1999:PRP,
  author =       "Qi Wang and Sarma B. K. Vrudhula and Gary Yeap and
                 Shantanu Ganguly",
  title =        "Power reduction and power-delay trade-offs using logic
                 transformations",
  journal =      j-TODAES,
  volume =       "4",
  number =       "1",
  pages =        "97--121",
  month =        jan,
  year =         "1999",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1999-4-1/p97-wang/p97-wang.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1999-4-1/p97-wang/",
  abstract =     "We present an efficient technique to reduce the
                 switching activity in a technology-mapped CMOS
                 combinational circuit based on local logic
                 transformations. The transformations consist of adding
                 redundant connections or gates so as to reduce
                 switching activity. We describe simple and efficient
                 procedures, based on logic implication, for identifying
                 the sources and targets of the redundant connections.
                 Additionally, we give procedures that permit the
                 designer to trade-off power and delay after the
                 transformations. Results of experiments on both the
                 MCNC benchmark circuits and the circuits of a PowerPC
                 microprocessor chip are given. The results indicate
                 that significant power reduction of a CMOS
                 combinational circuit can be achieved with very low
                 area overhead, delay penalty, and computational cost.",
  acknowledgement = ack-nhfb,
  generalterms = "Design; Experimentation; Performance",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "CMOS logic; logic optimization; logic synthesis; low
                 power; power estimation",
  subject =      "Hardware --- Integrated Circuits --- General (B.7.0);
                 Hardware --- Logic Design --- Design Styles (B.6.1):
                 {\bf Combinational logic}",
}

@Article{Kern:1999:FVH,
  author =       "Christoph Kern and Mark R. Greenstreet",
  title =        "Formal verification in hardware design: a survey",
  journal =      j-TODAES,
  volume =       "4",
  number =       "2",
  pages =        "123--193",
  month =        apr,
  year =         "1999",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1999-4-2/p123-kern/p123-kern.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1999-4-2/p123-kern/",
  abstract =     "In recent years, formal methods have emerged as an
                 alternative approach to ensuring the quality and
                 correctness of hardware designs, overcoming some of the
                 limitations of traditional validation techniques such
                 as simulation and testing. \par

                 There are two main aspects to the application of formal
                 methods in a design process: the formal framework used
                 to specify desired properties of a design and the
                 verification techniques and tools used to reason about
                 the relationship between a specification and a
                 corresponding implementation. We survey a variety of
                 frameworks and techniques proposed in the literature
                 and applied to actual designs. The specification
                 frameworks we describe include temporal logics,
                 predicate logic, abstraction and refinement, as well as
                 containment between $\omega$-regular languages. The
                 verification techniques presented include model
                 checking, automata-theoretic techniques, automated
                 theorem proving, and approaches that integrate the
                 above methods. \par

                 In order to provide insight into the scope and
                 limitations of currently available techniques, we
                 present a selection of case studies where formal
                 methods were applied to industrial-scale designs, such
                 as microprocessors, floating-point hardware, protocols,
                 memory subsystems, and communications hardware.",
  acknowledgement = ack-nhfb,
  generalterms = "Design; Verification",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "case studies; formal methods; formal verification;
                 hardware verification; language containment; model
                 checking; survey; theorem proving",
  subject =      "General Literature --- Introductory and Survey (A.1);
                 Hardware --- Integrated Circuits --- Design Aids
                 (B.7.2): {\bf Verification}",
}

@Article{Lee:1999:BBI,
  author =       "Kuen-Jong Lee and Jing-Jou Tang and Tsung-Chu Huang",
  title =        "{BIFEST}: a built-in intermediate fault effect sensing
                 and test generation system for {CMOS} bridging faults",
  journal =      j-TODAES,
  volume =       "4",
  number =       "2",
  pages =        "194--218",
  month =        apr,
  year =         "1999",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1999-4-2/p194-lee/p194-lee.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1999-4-2/p194-lee/",
  abstract =     "This paper presents BIFEST, an ATPG system that
                 employs the built-in intermediate voltage test
                 technique in an efficient ATPG process to deal with
                 CMOS bridging faults. Fast and accurate calculations of
                 the intermediate bridging voltages and the variant
                 threshold tolerance margins on a resistive bridging
                 fault model are presented. A PODEM-like, PPSFP-based
                 ATPG process is developed to generate test patterns for
                 faults that are conventionally logic-testable. The
                 remaining faults are then dealt with by special
                 circuits, called built-in intermediate voltage sensors
                 (BIVSs). By this methodology, almost the same fault
                 coverage as that employing {\em I DDQ\/} testing can be
                 achieved with only logic monitoring required.",
  acknowledgement = ack-nhfb,
  generalterms = "Design; Experimentation; Reliability",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  subject =      "Hardware --- Integrated Circuits --- General (B.7.0)",
}

@Article{Thornton:1999:BSC,
  author =       "M. A. Thornton and V. S. S. Nair",
  title =        "Behavioral synthesis of combinational logic using
                 spectral-based heuristics",
  journal =      j-TODAES,
  volume =       "4",
  number =       "2",
  pages =        "219--230",
  month =        apr,
  year =         "1999",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1999-4-2/p219-thornton/p219-thornton.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1999-4-2/p219-thornton/",
  abstract =     "A prototype system developed to convert a behavioral
                 representation of a Boolean function in OBDD form into
                 an initial structural representation is described and
                 experimental results are given. The system produces a
                 multilevel circuit using heuristic rules based on
                 properties of a subset of spectral coefficients. Since
                 the behavioral description is in OBDD form, efficient
                 methods are used to quickly compute the small subset of
                 spectral coefficients needed for the application of the
                 heuristics. The heuristics guide subsequent
                 decompositions of the OBDD, resulting in an iterative
                 construction of the structural form. At each stage of
                 the translation, the form of the decomposition is
                 chosen in order to achieve optimization goals.",
  acknowledgement = ack-nhfb,
  generalterms = "Design",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "automatic synthesis; decision diagram; decision
                 diagrams; design aids; logic design; spectral methods",
  subject =      "Hardware --- Logic Design --- Design Aids (B.6.3)",
}

@Article{Cheng:1999:CGN,
  author =       "Wei-Kai Cheng and Youn-Long Lin",
  title =        "Code generation of nested loops for {DSP} processors
                 with heterogeneous registers and structural
                 pipelining",
  journal =      j-TODAES,
  volume =       "4",
  number =       "3",
  pages =        "231--256",
  month =        jul,
  year =         "1999",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1999-4-3/p231-cheng/p231-cheng.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1999-4-3/p231-cheng/",
  abstract =     "We propose a microcode-optimizing method targeting a
                 programmable DSP processor. Efficient generation of
                 microcodes is essential to better utilize the
                 computation power of a DSP processor. Since most
                 state-of-the-art DSP processors feature some sort of
                 irregular architectures and most DSP applications have
                 nested loop constructs, their code generation is a
                 nontrivial task. In this paper, we consider two
                 features frequently found in contemporary DSP
                 processors -- structural pipelining and heterogeneous
                 registers. We propose a code generator that performs
                 instruction scheduling and register allocation
                 simultaneously. The proposed approach has been
                 implemented and evaluated using a set of benchmark core
                 algorithms. Simulation of the generated codes targeted
                 towards the TI TMS320C40 DSP processor shows that our
                 system is indeed more effective compared with a
                 commercial optimizing DSP compiler.",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "code generation; DSP",
  subject =      "Computer Systems Organization --- Special-Purpose and
                 Application-Based Systems (C.3): {\bf Real-time and
                 embedded systems}",
}

@Article{Li:1999:PEE,
  author =       "Yau-Tsun Steven Li and Sharad Malik and Andrew Wolfe",
  title =        "Performance estimation of embedded software with
                 instruction cache modeling",
  journal =      j-TODAES,
  volume =       "4",
  number =       "3",
  pages =        "257--279",
  month =        jul,
  year =         "1999",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1999-4-3/p257-li/p257-li.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1999-4-3/p257-li/",
  abstract =     "Embedded systems generally interact in some way with
                 the outside world. This may involve measuring sensors
                 and controlling actuators, communicating with other
                 systems, or interacting with users. These functions
                 impose real-time constraints on system design.
                 Verification of these specifications requires computing
                 an upper bound on the worst-case execution time (WCET)
                 of a hardware/software system. Furthermore, it is
                 critical to derive a tight upper bound on WCET in order
                 to make efficient use of system resources. \par

                 The problem of bounding WCET is particularly difficult
                 on modern processors. These processors use cache-based
                 memory systems that vary memory access time based on
                 the dynamic memory access pattern of the program. This
                 must be accurately modeled in order to tightly bound
                 WCET. Several analysis methods have been proposed to
                 bound WCET on processors with instruction caches.
                 Existing approaches either search all possible program
                 paths, an intractable problem, or they use highly
                 pessimistic assumptions to limit the search space. In
                 this paper we present a more effective method for
                 modeling instruction cache activity and computing a
                 tight bound on WCET. The method uses an integer linear
                 programming formulation and does not require explicit
                 enumeration of program paths. The method is implemented
                 in the program {\tt cinderella} and we present some
                 experimental results of this implementation.",
  acknowledgement = ack-nhfb,
  generalterms = "Design; Experimentation; Performance; Theory",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  subject =      "Computer Systems Organization --- Performance of
                 Systems (C.4): {\bf Modeling techniques}; Computer
                 Systems Organization --- Special-Purpose and
                 Application-Based Systems (C.3): {\bf Real-time and
                 embedded systems}",
}

@Article{Shi:1999:SSL,
  author =       "C.-J. Richard Shi and Michael W. Tian",
  title =        "Simulation and sensitivity of linear analog circuits
                 under parameter variations by {Robust} interval
                 analysis",
  journal =      j-TODAES,
  volume =       "4",
  number =       "3",
  pages =        "280--312",
  month =        jul,
  year =         "1999",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1999-4-3/p280-shi/p280-shi.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1999-4-3/p280-shi/",
  abstract =     "An interval-mathematic approach is presented for
                 frequency-domain simulation and sensitivity analysis of
                 linear analog circuits under parameter variations. With
                 uncertain parameters represented as intervals, bounding
                 frequency-domain responses is formulated as the problem
                 of solving systems of linear interval equations. The
                 formulation is based on a variant of modified nodal
                 analysis, and is particularly amenable to interval
                 analysis. Some characterization of the solution sets of
                 systems of linear interval equations are derived. With
                 these characterizations, an elegant and efficient
                 algorithm is proposed to solve systems of linear
                 interval equations. While the widely used Monte Carlo
                 approach requires many circuit simulations to achieve
                 even moderate accuracy, the computational cost of the
                 proposed approach is about twice that of one circuit
                 simulation. The computed response bounds contain
                 provably, or are usually very close to, the actual
                 response bounds. Further, sensitivity under parameter
                 variations can be computed from the response bounds at
                 minor computational cost. The algorithms are
                 implemented in SPICE3F5, using sparse-matrix techniques
                 and tested on several practical analog circuits.",
  acknowledgement = ack-nhfb,
  generalterms = "Algorithms; Design; Verification",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "interval mathematics; process variations; sensitivity;
                 uncertainty; worst-case analysis",
  subject =      "Hardware --- Integrated Circuits --- Design Aids
                 (B.7.2): {\bf Simulation}; Hardware --- Integrated
                 Circuits --- Design Aids (B.7.2): {\bf Verification};
                 Mathematics of Computing --- Numerical Analysis ---
                 Numerical Linear Algebra (G.1.3): {\bf Linear systems
                 (direct and iterative methods)}; Mathematics of
                 Computing --- Numerical Analysis --- Numerical Linear
                 Algebra (G.1.3): {\bf Sparse, structured, and very
                 large systems (direct and iterative methods)}; Computer
                 Applications --- Computer-Aided Engineering (J.6): {\bf
                 Computer-aided manufacturing (CAM)}",
}

@Article{Wurth:1999:FMO,
  author =       "Bernd Wurth and Ulf Schlichtmann and Klaus Eckl and
                 Kurt J. Antreich",
  title =        "Functional multiple-output decomposition with
                 application to technology mapping for lookup
                 table-based {FPGAs}",
  journal =      j-TODAES,
  volume =       "4",
  number =       "3",
  pages =        "313--350",
  month =        jul,
  year =         "1999",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1999-4-3/p313-wurth/p313-wurth.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1999-4-3/p313-wurth/",
  abstract =     "Functional decomposition is an important technique for
                 technology mapping to look up table-based FPGA
                 architectures. We present the theory of and a novel
                 approach to functional disjoint decomposition of
                 multiple-output functions, in which common subfunctions
                 are extracted during technology mapping. \par

                 While a Boolean function usually has a very large
                 number of subfunctions, we show that not all of them
                 are useful for multiple-output decomposition. We use a
                 partition of the set of bound set vertices as the basis
                 to compute {\em preferable\/} decomposition functions,
                 which are sufficient for an optimal multiple-output
                 decomposition. \par

                 We propose several new algorithms that deal with
                 central issues of functional multiple-output
                 decomposition. First, an efficient algorithm to solve
                 the variable partitioning problem is described. Second,
                 we show how to implicitly compute all preferable
                 functions of a single-output function and how to
                 identify all common preferable functions of a
                 multiple-output function. Due to implicit computation
                 in the crucial steps, the algorithm is very efficient.
                 Experimental results show significant reductions in
                 area.",
  acknowledgement = ack-nhfb,
  generalterms = "Algorithms; Design; Experimentation; Performance;
                 Theory",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "assignable functions; Boolean functions;
                 computer-aided design of VLSI; decomposition; FPGA
                 technology; implicit BDD-based methods; mapping
                 synthesis; multiple-output decomposition; preferable
                 functions; subfunction sharing gain; subfunction
                 sharing potential; TOS; variable partitioning for
                 decomposition",
  subject =      "Computer Applications --- Computer-Aided Engineering
                 (J.6); Hardware --- Integrated Circuits --- Types and
                 Design Styles (B.7.1): {\bf Gate arrays}",
}

@Article{Benini:1999:SSC,
  author =       "L. Benini and G. {De Micheli} and E. Macii and M.
                 Poncino and R. Scarsi",
  title =        "Symbolic synthesis of clock-gating logic for power
                 optimization of synchronous controllers",
  journal =      j-TODAES,
  volume =       "4",
  number =       "4",
  pages =        "351--375",
  month =        oct,
  year =         "1999",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1999-4-4/p351-benini/p351-benini.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1999-4-4/p351-benini/",
  abstract =     "Recent results have shown that dynamic power
                 management is effective in reducing the total power
                 consumption of sequential circuits. In this paper, we
                 propose a bottom-up approach for the automatic
                 extraction and synthesis of dynamic power management
                 circuitry starting from structural logic-level
                 specifications. Our techniques leverage the compact
                 BDD-based representation of Boolean and pseudo-Boolean
                 functions to detect idle conditions where the clock can
                 be stopped without compromising functional correctness.
                 Moreover, symbolic techniques allow accurate
                 probabilistic computations; in particular, they enable
                 the use of non-equiprobable primary input
                 distributions, a key step in the construction of models
                 that match the behavior of real hardware devices with a
                 high degree of fidelity. The results are encouraging,
                 since power savings of up to 34\% have been obtained on
                 standard benchmark circuits.",
  acknowledgement = ack-nhfb,
  generalterms = "Design",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  subject =      "Hardware --- Logic Design --- Design Styles (B.6.1):
                 {\bf Sequential circuits}; Hardware --- Logic Design
                 --- Design Aids (B.6.3): {\bf Automatic synthesis};
                 Hardware --- Logic Design --- Design Aids (B.6.3): {\bf
                 Optimization}",
}

@Article{Choi:1999:FDA,
  author =       "Kyumyung Choi and Steven P. Levitan",
  title =        "A flexible datapath allocation method for
                 architectural synthesis",
  journal =      j-TODAES,
  volume =       "4",
  number =       "4",
  pages =        "376--404",
  month =        oct,
  year =         "1999",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1999-4-4/p376-choi/p376-choi.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1999-4-4/p376-choi/",
  abstract =     "We present a robust datapath allocation method that is
                 flexible enough to handle constraints imposed by a
                 variety of target architectures. Key features of this
                 method are its ability to handle accurate modeling of
                 datapath units and the simultaneous optimization of
                 direct objective functions. The proposed method
                 consists of a new binding model construction scheme and
                 an optimization technique based on simulated annealing.
                 To illustrate the flexibility of this method, two
                 datapath allocation procedures have been developed for
                 two problem environments: (1) a procedure that
                 incorporates interconnection area and delay estimates,
                 where floor-planning is tightly integrated into
                 datapath allocation; and (2) a procedure that handles
                 registers, register files, and multiport memories for
                 data storage, as well as random and linear topologies
                 for interconnection architectures. Results from these
                 two applications show our method produces competitive
                 designs for benchmark circuits, as well as being
                 flexible enough to be used for a variety of different
                 domains.",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "allocation and binding; high-level synthesis",
  subject =      "Hardware --- Register-Transfer-Level Implementation
                 --- Design Aids (B.5.2): {\bf Automatic synthesis};
                 Hardware --- Register-Transfer-Level Implementation ---
                 Design Aids (B.5.2): {\bf Optimization}; Hardware ---
                 Integrated Circuits --- Design Aids (B.7.2): {\bf
                 Placement and routing}; Mathematics of Computing ---
                 Numerical Analysis --- Optimization (G.1.6); Computer
                 Applications --- Computer-Aided Engineering (J.6): {\bf
                 Computer-aided design (CAD)}",
}

@Article{Hong:1999:POU,
  author =       "Inki Hong and Miodrag Potkonjak and Ramesh Karri",
  title =        "Power optimization using divide-and-conquer techniques
                 for minimization of the number of operations",
  journal =      j-TODAES,
  volume =       "4",
  number =       "4",
  pages =        "405--429",
  month =        oct,
  year =         "1999",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1999-4-4/p405-hong/p405-hong.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1999-4-4/p405-hong/",
  abstract =     "We introduce an approach for power optimization using
                 a set of compilation and architectural techniques. The
                 key technical innovation is a novel divide-and-conquer
                 compilation technique to minimize the number of
                 operations for general computations. Our technique
                 optimizes not only a significantly wider set of
                 computations than the previously published techniques,
                 but also outperforms (or performs at least as well as
                 other techniques) on all examples. Along the
                 architectural dimension, we investigate coordinated
                 impact of compilation techniques on the number of
                 processors which provide optimal trade-off between cost
                 and power. We demonstrate that proper compilation
                 techniques can significantly reduce power with bounded
                 hardware cost. The effectiveness of all techniques and
                 algorithms is documented on numerous real-life
                 designs.",
  acknowledgement = ack-nhfb,
  generalterms = "Algorithms",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "code generation; transformations",
  subject =      "Software --- Programming Languages --- Processors
                 (D.3.4): {\bf Compilers}; Software --- Programming
                 Languages --- Processors (D.3.4): {\bf Optimization}",
}

@Article{Potkonjak:1999:MAD,
  author =       "Miodrag Potkonjak and Wayne Wolf",
  title =        "A methodology and algorithms for the design of hard
                 real-time multitasking {ASICs}",
  journal =      j-TODAES,
  volume =       "4",
  number =       "4",
  pages =        "430--459",
  month =        oct,
  year =         "1999",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/1999-4-4/p430-potkonjak/p430-potkonjak.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/1999-4-4/p430-potkonjak/",
  abstract =     "Traditional high-level synthesis concentrates on the
                 implementation of a single task (e.g. filter, linear
                 controller, A/D converter). However, many
                 applications--multifunctional embedded controllers
                 intelligent wireless end-points, and DSP and multimedia
                 servers--are defined as sets of several computational
                 tasks. This paper describes new techniques for the
                 synthesis of ASIC implementations that realize multiple
                 computational processes under hard real-time
                 constraints. Our synthesis methodology establishes
                 connections between two important computer engineering
                 domains: operating systems and behavioral synthesis.
                 Our hierarchical approach starts from an
                 incompletely-specified preliminary solution and uses,
                 interchangeably, operating system and behavioral
                 synthesis techniques to derive increasingly more
                 detailed and accurate design solutions. We have
                 experimented with both optimal and heuristic algorithms
                 to implement this methodology. The optimal algorithm
                 uses several heuristics to speed up the average run
                 time of an exhaustive branch-and-bound search.
                 Force-directed optimization is the core of the
                 heuristic synthesis method. Analysis of the proposed
                 algorithms and the experiments shows that matching the
                 number of bits and type of operational in tasks
                 assigned to the same application-specific processor was
                 the most important factor in obtaining area-efficient
                 designs.",
  acknowledgement = ack-nhfb,
  generalterms = "Algorithms; Design; Performance",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  subject =      "Hardware --- Integrated Circuits --- Types and Design
                 Styles (B.7.1): {\bf Algorithms implemented in
                 hardware}",
}

@Article{DosSantos:2000:CMP,
  author =       "Luiz C. V. {Dos Santos} and M. J. M. Heijligers and C.
                 A. J. {Van Eijk} and J. {Van Eijndhoven} and J. A. G.
                 Jess",
  title =        "A code-motion pruning technique for global
                 scheduling",
  journal =      j-TODAES,
  volume =       "5",
  number =       "1",
  pages =        "1--33",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 09:50:12 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-1/p1-dos_santos/p1-dos_santos.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-1/p1-dos_santos/",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Fang:2000:MFP,
  author =       "Wen-Jong Fang and Allen C.-H. Wu",
  title =        "Multiway {FPGA} partitioning by fully exploiting
                 design hierarchy",
  journal =      j-TODAES,
  volume =       "5",
  number =       "1",
  pages =        "34--50",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-1/p34-fang/p34-fang.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-1/p34-fang/",
  abstract =     "In this paper, we present a new integrated synthesis
                 and partitioning method for multiple-FPGA applications.
                 Our approach bridges the gap between HDL synthesis and
                 physical partitioning by fully exploiting the design
                 hierarchy. We propose a novel multiple-FPGA synthesis
                 and partitioning method which is performed in three
                 phases: (1) fine-grained synthesis, (2)
                 functional-based clustering, and (3) hierarchical
                 set-covering partitioning. This method first
                 synthesizes a design specification in a fine-grained
                 way so that functional clusters can be preserved based
                 on the structural nature of the design specification.
                 Then, it applies a hierarchical set-covering
                 partitioning method to form the final FPGA partitions.
                 Experimental results on a number of benchmarks and
                 industrial designs demonstrate that IO limits are the
                 bottleneck for CLB utilization when applying a
                 traditional multiple-FPGA synthesis method on flattened
                 netlists. In contrast, by fully exploiting the design
                 structural hierarchy during the multiple-FPGA
                 partitioning, our proposed method produces fewer FPGA
                 partitions with higher CLB and lower IO-pin
                 utilizations.",
  acknowledgement = ack-nhfb,
  generalterms = "Design; Experimentation; Performance",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "fine-grained synthesis; functional clustering;
                 multi-way partitioning; multiple-FPGA synthesis",
  subject =      "Hardware --- Integrated Circuits --- Types and Design
                 Styles (B.7.1): {\bf Gate arrays}; Hardware ---
                 Integrated Circuits --- Design Aids (B.7.2)",
}

@Article{Hsiung:2000:CCM,
  author =       "Pao-Ann Hsiung",
  title =        "{CMAPS}: a cosynthesis methodology for
                 application-oriented parallel systems",
  journal =      j-TODAES,
  volume =       "5",
  number =       "1",
  pages =        "51--81",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-1/p51-hsiung/p51-hsiung.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-1/p51-hsiung/",
  abstract =     "Currently, a lot of research is devoted to {\em system
                 design}, and little work is done on {\em requirements
                 analysis}. Besides going from specification to design,
                 one of our main objectives is to show how an
                 application problem can be transformed into
                 specifications. Working from the hardware-software
                 codesign perspective, a system is designed starting
                 from an application problem itself, rather than the
                 detailed behavioral specifications. Given an
                 application problem specified as a directed acyclic
                 graph of elementary problems, a hardware-software
                 solution is derived such that the synthesized software,
                 a parallel pseudoprogram, can be scheduled and executed
                 on the synthesized software, a parallel pseudoprogram,
                 can be scheduled and executed on the synthesized
                 hardware, a set of system-level parallel computer
                 specifications, with heuristically optimal performance.
                 This is known as system-level cosynthesis of
                 application-oriented general-purpose parallel systems
                 for which a novel methodology called {\em Cosynthesis
                 Methodology for Application-Oriented Parallel
                 Systems\/} (CMAPS), is presented. Since parallel
                 programs and multiprocessor architectures are largely
                 interdependent, CMAPS explores the relationship between
                 hardware designs and software algorithms by
                 interleaving the modeling phases and the synthesis
                 phases of both hardware and software. High scalability
                 in terms of problem complexity and easy upgradability
                 to new technologies are achieved through modularization
                 of the input problem specification, of the software
                 algorithms, and of the hardware subsystem models. The
                 work presented in this paper will be beneficial to
                 designers of general-purpose parallel computer systems
                 which must be oriented toward solving some
                 user-specified problem such as the global controller of
                 an industry automation process or a multiprocessor
                 video server. Some application examples are given to
                 illustrate various codesign phases of CMAPS and its
                 feasibility.",
  acknowledgement = ack-nhfb,
  generalterms = "Design",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "application-oriented general-purpose multiprocessors;
                 hardware-software modeling and cosynthesis;
                 requirements analysis",
  subject =      "Computer Applications --- Computer-Aided Engineering
                 (J.6): {\bf Computer-aided design (CAD)}; Computer
                 Systems Organization --- General (C.0): {\bf System
                 architectures}; Computer Systems Organization ---
                 General (C.0): {\bf Systems specification methodology};
                 Computer Systems Organization --- Processor
                 Architectures --- Multiple Data Stream Architectures
                 (Multiprocessors) (C.1.2); Computer Systems
                 Organization --- Computer System Implementation ---
                 General (C.5.0); Computer Systems Organization ---
                 Processor Architectures --- Parallel Architectures
                 (C.1.4)",
}

@Article{Mehta:2000:UFR,
  author =       "Dinesh P. Mehta and Naveed Sherwani",
  title =        "On the use of flexible, rectilinear blocks to obtain
                 minimum-area floorplans in mixed block and cell
                 designs",
  journal =      j-TODAES,
  volume =       "5",
  number =       "1",
  pages =        "82--97",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-1/p82-mehta/p82-mehta.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-1/p82-mehta/",
  abstract =     "This paper presents three minimum-area floorplanning
                 algorithms that use flexible arbitrary rectilinear
                 shapes for the standard cell regions in MBC design. The
                 first algorithm (pure HCST) introduces a grid traversal
                 technique which guarantees a minimum-area floorplan.
                 The second algorithm (Hybrid-BF) uses a combination of
                 HCST and Breadth First (BF) traversals to give a
                 practical solution that approximately places flexible
                 blocks at specified locations called {\em seeds}. The
                 third algorithm (Hybrid-MBF) improves on the shapes of
                 the flexible blocks generated by Hybrid-BF by using a
                 combination of HCST and a Modified Breadth First (MBF)
                 traversal. All three algorithms are polynomial in the
                 number of grid squares. Optimizated implementations of
                 Hybrid-BF and Hybrid-MBF required less than two seconds
                 on a SUN SPARCstation 10.",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "floorplanning; mixed block and cell designs;
                 rectilinear polygons",
  subject =      "Hardware --- Integrated Circuits --- Design Aids
                 (B.7.2): {\bf Layout}; Theory of Computation ---
                 Analysis of Algorithms and Problem Complexity ---
                 Nonnumerical Algorithms and Problems (F.2.2): {\bf
                 Routing and layout}; Mathematics of Computing ---
                 Discrete Mathematics --- Graph Theory (G.2.2): {\bf
                 Graph algorithms}",
}

@Article{Sapatnekar:2000:PDO,
  author =       "Sachin S. Sapatnekar and Weitong Chuang",
  title =        "Power-delay optimizations in gate sizing",
  journal =      j-TODAES,
  volume =       "5",
  number =       "1",
  pages =        "98--114",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-1/p98-sapatnekar/p98-sapatnekar.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-1/p98-sapatnekar/",
  abstract =     "The problem of power-delay tradeoffs in transistor
                 sizing is examined using a nonlinear optimization
                 formulation. Both the dynamic and the short-circuit
                 power are considered, and a new modeling technique is
                 used to calculate the short-circuit power. The notion
                 of transition density is used, with an enhancement that
                 considers the effect of gate delays on the transition
                 density. When the short-circuit power is neglected, the
                 minimum power circuit is identical to the minimum area
                 circuit. However, under our more realistic models, our
                 experimental results on several circuits show that the
                 minimum power circuit is not necessarily the same as
                 the minimum area circuit.",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "optimization; power estimation; transistor sizing;
                 VLSI layout",
  subject =      "Hardware --- Integrated Circuits --- Design Aids
                 (B.7.2): {\bf Layout}",
}

@Article{Benini:2000:SLPa,
  author =       "Luca Benini and Giovanni de Micheli",
  title =        "System-level power optimization: techniques and
                 tools",
  journal =      j-TODAES,
  volume =       "5",
  number =       "2",
  pages =        "115--192",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-2/p115-benini/p115-benini.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-2/p115-benini/",
  abstract =     "This tutorial surveys design methods for
                 energy-efficient system-level design. We consider
                 electronic systems consisting of a hardware platform
                 and software layers. We consider the three major
                 constituents of hardware that consume energy, namely
                 computation, communication, and storage units, and we
                 review methods of reducing their energy consumption. We
                 also study models for analyzing the energy cost of
                 software, and methods for energy-efficient software
                 design and compilation. This survey is organized around
                 three main phases of a system design: conceptualization
                 and modeling design and implementation, and runtime
                 management. For each phase, we review recent techniques
                 for energy-efficient design of both hardware and
                 software.",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  subject =      "Hardware --- Integrated Circuits --- Design Aids
                 (B.7.2); Hardware --- Performance and Reliability ---
                 Performance Analysis and Design Aids (B.8.2); Computer
                 Systems Organization --- Processor Architectures ---
                 General (C.1.0); Software --- Software Engineering ---
                 Design Tools and Techniques (D.2.2)",
}

@Article{Cong:2000:SGD,
  author =       "Jason Cong and Yean-Yow Hwang",
  title =        "Structural gate decomposition for depth-optimal
                 technology mapping in {LUT-based} {FPGA} designs",
  journal =      j-TODAES,
  volume =       "5",
  number =       "2",
  pages =        "193--225",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-2/p193-cong/p193-cong.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-2/p193-cong/",
  abstract =     "In this paper we study structural gate decomposition
                 in general, simple gate networks for depth-optimal
                 technology mapping using $K$-input Lookup-Tables
                 ($K$-LUTs). We show that (1) structural gate
                 decomposition in any $K$-bounded network results in an
                 optimal mapping depth smaller than or equal to that of
                 the original network, regardless of the decomposition
                 method used; and (2) the problem of structural gate
                 decomposition for depth-optimal technology mapping is
                 NP-hard for $K$-unbounded networks when $K \geq 3$ and
                 remains NP-hard for $K$-bounded networks when $K \geq
                 5$. Based on these results, we propose two new
                 structural gate decomposition algorithms, named {\tt
                 DOGMA} and {\tt DOGMA-m}, which combine the
                 level-driven node-packing technique (used in FlowMap)
                 and the network flow-based labeling technique (used in
                 {\tt Chortle-d}) for depth-optimal technology mapping.
                 Experimental results show that (1) among five
                 structural gate decomposition algorithms, {\tt DOGMA-m}
                 results in the best mapping solutions; and (2) compared
                 with {\tt speed\_up} (an algebraic algorithm) and {\tt
                 TOS} (a Boolean approach), {\tt DOGMA-m} completes,
                 decomposition of all tested benchmarks in a short time
                 while {\tt speed\_up} and {\tt TOS} fail in several
                 cases. However, {\tt speed\_up} results in the smallest
                 depth and area in the following technology mapping
                 steps.",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "computer-aided design of VSLI; decomposition; delay
                 minimization; FPGA; logic optimization; programmable
                 logic; simplification; synthesis; system design;
                 technology mapping",
  subject =      "Hardware --- Logic Design --- Design Styles (B.6.1);
                 Hardware --- Logic Design --- Design Aids (B.6.3);
                 Hardware --- Logic Design --- Design Aids (B.6.3): {\bf
                 Automatic synthesis}; Hardware --- Integrated Circuits
                 --- Types and Design Styles (B.7.1)",
}

@Article{Hwang:2000:PSS,
  author =       "Chi-Hong Hwang and Allen C.-H. Wu",
  title =        "A predictive system shutdown method for energy saving
                 of event-driven computation",
  journal =      j-TODAES,
  volume =       "5",
  number =       "2",
  pages =        "226--241",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-2/p226-hwang/p226-hwang.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-2/p226-hwang/",
  abstract =     "This paper presents a system-level power management
                 technique for energy savings of event-driven
                 application. We present a new predictive
                 system-shutdown method to exploit sleep mode operations
                 for energy saving. We use an exponential-average
                 approach to predict the upcoming idle period. We
                 introduce two mechanisms, prediction-miss correction
                 and prewake-up, to improve the hit ratio and to reduce
                 the delay overhead. Experiments on four different
                 event-driven applications show that our proposed method
                 achieves high hit ratios in a wide range of delay
                 overheads, which results in a high degree of energy
                 with low delay penalties.",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "event-drive; power management; predictive; sleep mode;
                 system shutdown",
  subject =      "Computer Applications --- Computer-Aided Engineering
                 (J.6)",
}

@Article{Sudarsanam:2000:SRA,
  author =       "Ashok Sudarsanam and Sharad Malik",
  title =        "Simultaneous reference allocation in code generation
                 for dual data memory bank {ASIPs}",
  journal =      j-TODAES,
  volume =       "5",
  number =       "2",
  pages =        "242--264",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-2/p242-sudarsanam/p242-sudarsanam.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-2/p242-sudarsanam/",
  abstract =     "We address the problem of code generation for DSP
                 systems on a chip. In such systems, the amount of
                 silicon devoted of program ROM is limited, so
                 application software must be sufficiently dense.
                 Additionally, the software must be written so as to
                 meet various high-performance constraints, which may
                 include hard real-time constraints. Unfortunately,
                 current compiler technology is unable to generate
                 high-quality code for DSPs, whose architectures are
                 highly irregular. Thus, designers often resort to
                 programming application software in assembly--a
                 time-consuming task. In this paper, we focus on
                 providing support for architectural feature of DSPs
                 that makes code generation difficult, namely multiple
                 data memory banks. This feature increases memory
                 bandwidth by permitting multiple data memory accesses
                 to occur in parallel when the referenced variables
                 belong to different data memory banks and the registers
                 involved conform to a strict set of conditions. We
                 present an algorithm that attempts to maximize the
                 benefit of this architectural feature. While previous
                 approaches have decoupled the phases of register
                 allocation and memory bank assignment, thereby
                 compromising code quality, our algorithm performs these
                 two phases simultaneously. Experimental results
                 demonstrate that our algorithm not only generates
                 high-quality compiled code, but also improves the
                 quality of completely-referenced code.",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "code generation; code optimization; graph labelling;
                 memory bank assignment; register allocation",
  subject =      "Software --- Programming Languages --- Processors
                 (D.3.4); Software --- Programming Languages ---
                 Processors (D.3.4): {\bf Code generation}; Software ---
                 Programming Languages --- Processors (D.3.4): {\bf
                 Compilers}; Software --- Programming Languages ---
                 Processors (D.3.4): {\bf Optimization}",
}

@Article{Irwin:2000:E,
  author =       "Mary Jane Irwin",
  title =        "Editorial",
  journal =      j-TODAES,
  volume =       "5",
  number =       "3",
  pages =        "265--266",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p265-irwin/p265-irwin.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p265-irwin/",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Bahar:2000:POT,
  author =       "R. Iris Bahar and Ernest T. Lampe and Enrico Macii",
  title =        "Power optimization of technology-dependent circuits
                 based on symbolic computation of logic implications",
  journal =      j-TODAES,
  volume =       "5",
  number =       "3",
  pages =        "267--293",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p267-bahar/p267-bahar.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p267-bahar/",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "aids; automation; design synthesis; logic design",
  subject =      "Hardware --- Logic Design --- Design Styles (B.6.1):
                 {\bf Combinational logic}; Hardware --- Control
                 Structures and Microprogramming --- Microprogram Design
                 Aids (B.1.4): {\bf Optimization}; Hardware --- Logic
                 Design --- Design Aids (B.6.3): {\bf Optimization};
                 Hardware --- Performance and Reliability --- General
                 (B.8.0); Computer Applications --- Physical Sciences
                 and Engineering (J.2): {\bf Electronics}",
}

@Article{Balakrishnan:2000:AFS,
  author =       "M. Balakrishnan and Heman Khanna",
  title =        "Allocation of {FIFO} structures in {RTL} data paths",
  journal =      j-TODAES,
  volume =       "5",
  number =       "3",
  pages =        "294--310",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p294-balakrishnan/p294-balakrishnan.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p294-balakrishnan/",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "data path; FIFO; ILP; RTL; synthesis",
  subject =      "Hardware --- Register-Transfer-Level Implementation
                 --- Design (B.5.1); Mathematics of Computing ---
                 Probability and Statistics (G.3): {\bf Queueing
                 theory}",
}

@Article{Benini:2000:SLPb,
  author =       "L. Benini and G. {De Micheli}",
  title =        "Synthesis of low-power selectively-clocked systems
                 from high-level specification",
  journal =      j-TODAES,
  volume =       "5",
  number =       "3",
  pages =        "311--321",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p311-benini/p311-benini.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p311-benini/",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "algorithms; design; gated clock; high-level synthesis;
                 low power",
  subject =      "Hardware --- Control Structures and Microprogramming
                 --- Control Structure Performance Analysis and Design
                 Aids (B.1.2); Hardware --- Performance and Reliability
                 --- General (B.8.0); Theory of Computation ---
                 Computation by Abstract Devices --- Models of
                 Computation (F.1.1): {\bf Unbounded-action devices}",
}

@Article{Blythe:2000:EOD,
  author =       "Stephen A. Blythe and Robert A. Walker",
  title =        "Efficient optimal design space characterization
                 methodologies",
  journal =      j-TODAES,
  volume =       "5",
  number =       "3",
  pages =        "322--336",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p322-blythe/p322-blythe.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p322-blythe/",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "bounding; clock-length determination; design space
                 exploration; efficient searching; high-level synthesis;
                 module selection; scheduling",
  subject =      "Computer Applications --- Computer-Aided Engineering
                 (J.6); Hardware --- Performance and Reliability ---
                 General (B.8.0); Computing Methodologies --- Simulation
                 and Modeling --- General (I.6.0); Computer Applications
                 --- Physical Sciences and Engineering (J.2): {\bf
                 Electronics}",
}

@Article{Bogliolo:2000:RBR,
  author =       "Alessandro Bogliolo and Luca Benini and Giovanni {De
                 Micheli}",
  title =        "Regression-based {RTL} power modeling",
  journal =      j-TODAES,
  volume =       "5",
  number =       "3",
  pages =        "337--372",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p337-bogliolo/p337-bogliolo.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p337-bogliolo/",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "adaptive characterization; functional macros;
                 regression models; RTL design; RTL power modeling",
  subject =      "Hardware --- Register-Transfer-Level Implementation
                 --- Design Aids (B.5.2); Hardware --- Logic Design ---
                 Design Aids (B.6.3); Hardware --- Performance and
                 Reliability --- General (B.8.0); Computing Milieux ---
                 Management of Computing and Information Systems ---
                 Installation Management (K.6.2): {\bf Benchmarks}",
}

@Article{Bommu:2000:RBF,
  author =       "Surendra Bommu and Niall O'Neill and Maciej
                 Ciesielski",
  title =        "Retiming-based factorization for sequential logic
                 optimization",
  journal =      j-TODAES,
  volume =       "5",
  number =       "3",
  pages =        "373--398",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p373-bommu/p373-bommu.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p373-bommu/",
  abstract =     "Current sequential optimization techniques apply a
                 variety of logic transformations that mainly target the
                 combinational logic component of the circuit. Retiming
                 is typically applied as a postprocessing step to the
                 gate-level implementation obtained after technology
                 mapping. This paper introduces a new sequential logic
                 transformation which integrates retiming with logic
                 transformations at the technology-independent level.
                 This transformation is based on implicit retiming
                 across logic blocks and fanout stems during logic
                 optimization. Its application to sequential network
                 synthesis results in the optimization of logic across
                 register boundaries. It can be used in conjunction with
                 any measure of circuit quality for which a fast and
                 reliable gain estimation method can be obtained. We
                 implemented our new technique within the SIS framework
                 and demonstrated its effectiveness in terms of
                 cycle-time minimization on a set sequential benchmark
                 circuits.",
  acknowledgement = ack-nhfb,
  generalterms = "Algorithms; Design",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "finite stat machines; retiming; sequential synthesis",
  subject =      "Hardware --- General (B.0); Hardware --- Logic Design
                 (B.6)",
}

@Article{Carchiolo:2000:HSS,
  author =       "Vincenza Carchiolo and Michele Malgeri and Giuseppe
                 Mangioni",
  title =        "Hardware\slash software synthesis of formal
                 specifications in codesign of embedded systems",
  journal =      j-TODAES,
  volume =       "5",
  number =       "3",
  pages =        "399--432",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p399-carchiolo/p399-carchiolo.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p399-carchiolo/",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "codesign; embedded system; hardware and software
                 synthesis",
  subject =      "Hardware --- Register-Transfer-Level Implementation
                 --- Design Aids (B.5.2); Computer Systems Organization
                 --- Special-Purpose and Application-Based Systems
                 (C.3): {\bf Real-time and embedded systems}; Computer
                 Systems Organization --- General (C.0); Software ---
                 Software Engineering --- Requirements/Specifications
                 (D.2.1); Theory of Computation --- Mathematical Logic
                 and Formal Languages --- Formal Languages (F.4.3)",
}

@Article{Chang:2000:TDR,
  author =       "Yao-Wen Chang and Kai Zhu and D. F. Wong",
  title =        "Timing-driven routing for symmetrical array-based
                 {FPGAs}",
  journal =      j-TODAES,
  volume =       "5",
  number =       "3",
  pages =        "433--450",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p433-chang/p433-chang.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p433-chang/",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "computer-aided design of VLSI; field-programmable gate
                 array; layout; synthesis",
  subject =      "Hardware --- Integrated Circuits --- Types and Design
                 Styles (B.7.1): {\bf Gate arrays}; Theory of
                 Computation --- Analysis of Algorithms and Problem
                 Complexity --- Nonnumerical Algorithms and Problems
                 (F.2.2): {\bf Routing and layout}; Hardware ---
                 Integrated Circuits --- Design Aids (B.7.2): {\bf
                 Placement and routing}; Computer Applications ---
                 Computer-Aided Engineering (J.6)",
}

@Article{Gelosh:2000:MLT,
  author =       "Donald S. Gelosh and Dorothy E. Setliff",
  title =        "Modeling layout tools to derive forward estimates of
                 area and delay at the {RTL} level",
  journal =      j-TODAES,
  volume =       "5",
  number =       "3",
  pages =        "451--491",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p451-gelosh/p451-gelosh.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p451-gelosh/",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "estimation; estimation techniques; layout; machine
                 learning; VLSI CAD",
  subject =      "Hardware --- Input/Output and Data Communications ---
                 Performance Analysis and Design Aids** (B.4.4);
                 Hardware --- Register-Transfer-Level Implementation ---
                 Design Aids (B.5.2): {\bf Automatic synthesis};
                 Computer Applications --- Computer-Aided Engineering
                 (J.6); Hardware --- Integrated Circuits --- Types and
                 Design Styles (B.7.1): {\bf VLSI (very large scale
                 integration)}; Computing Methodologies --- Artificial
                 Intelligence --- Learning (I.2.6): {\bf Concept
                 learning}; Computing Methodologies --- Simulation and
                 Modeling --- Simulation Output Analysis (I.6.6)",
}

@Article{Gogniat:2000:CBE,
  author =       "G. Gogniat and M. Auguin and L. Bianco and A.
                 Pegatoquet",
  title =        "A codesign back-end approach for embedded system
                 design",
  journal =      j-TODAES,
  volume =       "5",
  number =       "3",
  pages =        "492--509",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p492-gogniat/p492-gogniat.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p492-gogniat/",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "codesign; communications synthesis; HW&slash; SW
                 integration; template architecture",
  subject =      "Computer Systems Organization --- Special-Purpose and
                 Application-Based Systems (C.3): {\bf Real-time and
                 embedded systems}; Computer Applications ---
                 Computer-Aided Engineering (J.6); Hardware ---
                 Integrated Circuits --- Types and Design Styles
                 (B.7.1): {\bf Advanced technologies}",
}

@Article{Gupta:2000:CIP,
  author =       "Avaneendra Gupta and John P. Hayes",
  title =        "{CLIP}: integer-programming-based optimal layout
                 synthesis of {$2$D CMOS} cells",
  journal =      j-TODAES,
  volume =       "5",
  number =       "3",
  pages =        "510--547",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p510-gupta/p510-gupta.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p510-gupta/",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "circuit clustering; CMOS networks; diffusion sharing;
                 integer linear programming; integer programming; layout
                 optimization; leaf cell synthesis; module generation;
                 transistor chains; two-dimensional layout",
  subject =      "Hardware --- Integrated Circuits --- Types and Design
                 Styles (B.7.1): {\bf Memory technologies}; Hardware ---
                 Integrated Circuits --- Design Aids (B.7.2): {\bf
                 Layout}; Hardware --- Integrated Circuits --- Design
                 Aids (B.7.2): {\bf Simulation}; Mathematics of
                 Computing --- Numerical Analysis --- Optimization
                 (G.1.6): {\bf Integer programming}; Software ---
                 Programming Languages --- Language Classifications
                 (D.3.2): {\bf Specialized application languages};
                 Computer Applications --- Computer-Aided Engineering
                 (J.6)",
}

@Article{Hsiao:2000:DST,
  author =       "Michael S. Hsiao and Elizabeth M. Rudnick and Janak H.
                 Patel",
  title =        "Dynamic state traversal for sequential circuit test
                 generation",
  journal =      j-TODAES,
  volume =       "5",
  number =       "3",
  pages =        "548--565",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p548-hsiao/p548-hsiao.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p548-hsiao/",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "automatic test pattern generation (ATPG);
                 finite-state-machine traversal; genetic algorithms;
                 sequential circuits; simulation-based; testing",
  subject =      "Hardware --- Performance and Reliability ---
                 Reliability, Testing, and Fault-Tolerance (B.8.1);
                 Hardware --- Logic Design --- Design Styles (B.6.1):
                 {\bf Sequential circuits}; Computer Applications ---
                 Computer-Aided Engineering (J.6); Computing
                 Methodologies --- Artificial Intelligence --- Problem
                 Solving, Control Methods, and Search (I.2.8): {\bf
                 Heuristic methods}",
}

@Article{Jha:2000:HLL,
  author =       "Pradip K. Jha and Nikil D. Dutt",
  title =        "High-level library mapping for memories",
  journal =      j-TODAES,
  volume =       "5",
  number =       "3",
  pages =        "566--603",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p566-jha/p566-jha.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p566-jha/",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "high-level synthesis; memory libraries;
                 technology-mapping",
  subject =      "Hardware --- Memory Structures --- Design Styles
                 (B.3.2): {\bf Primary memory}; Hardware ---
                 Register-Transfer-Level Implementation --- Design
                 (B.5.1): {\bf Memory design}; Computer Applications ---
                 Computer-Aided Engineering (J.6); Hardware ---
                 Register-Transfer-Level Implementation --- Design Aids
                 (B.5.2): {\bf Automatic synthesis}",
}

@Article{Lalgudi:2000:OCE,
  author =       "Kumar N. Lalgudi and Marios C. Papaefthymiou and
                 Miodrag Potkonjak",
  title =        "Optimizing computations for effective
                 block-processing",
  journal =      j-TODAES,
  volume =       "5",
  number =       "3",
  pages =        "604--630",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p604-lalgudi/p604-lalgudi.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p604-lalgudi/",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "combinatorial optimization; computation dataflow
                 graphs; embedded systems; high-level synthesis; integer
                 linear programming; retiming; scheduling;
                 vectorization",
  subject =      "Computer Systems Organization --- Special-Purpose and
                 Application-Based Systems (C.3): {\bf Signal processing
                 systems}; Computing Methodologies --- Pattern
                 Recognition --- Applications (I.5.4): {\bf Signal
                 processing}; Mathematics of Computing --- Numerical
                 Analysis --- Optimization (G.1.6): {\bf Integer
                 programming}; Mathematics of Computing --- Discrete
                 Mathematics --- General (G.2.0); Theory of Computation
                 --- Analysis of Algorithms and Problem Complexity ---
                 Nonnumerical Algorithms and Problems (F.2.2): {\bf
                 Sequencing and scheduling}; Computing Methodologies ---
                 Artificial Intelligence --- Problem Solving, Control
                 Methods, and Search (I.2.8): {\bf Scheduling}; Computer
                 Applications --- Computer-Aided Engineering (J.6)",
}

@Article{Long:2000:FFA,
  author =       "David E. Long and Mahesh A. Iyer and Miron
                 Abramovici",
  title =        "{FILL} and {FUNI}: algorithms to identify illegal
                 states and sequentially untestable faults",
  journal =      j-TODAES,
  volume =       "5",
  number =       "3",
  pages =        "631--657",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p631-long/p631-long.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p631-long/",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "automatic test generation; illegal states; sequential
                 circuits; untestable faults",
  subject =      "Hardware --- Performance and Reliability --- General
                 (B.8.0); Hardware --- Control Structures and
                 Microprogramming --- General (B.1.0); Hardware ---
                 Arithmetic and Logic Structures --- General (B.2.0);
                 Computer Applications --- Computer-Aided Engineering
                 (J.6); Hardware --- Arithmetic and Logic Structures ---
                 High-Speed Arithmetic (B.2.4): {\bf Algorithms};
                 Hardware --- Logic Design --- Design Styles (B.6.1):
                 {\bf Sequential circuits}; Hardware --- Logic Design
                 --- Design Aids (B.6.3); Hardware --- Integrated
                 Circuits --- Types and Design Styles (B.7.1)",
}

@Article{Marculescu:2000:SSM,
  author =       "Diana Marculescu and Radu Marculescu and Massoud
                 Pedram",
  title =        "Stochastic sequential machine synthesis with
                 application to constrained sequence generation",
  journal =      j-TODAES,
  volume =       "5",
  number =       "3",
  pages =        "658--681",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p658-marculescu/p658-marculescu.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p658-marculescu/",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "algorithms; design; performance; theory",
  subject =      "Data --- Coding and Information Theory (E.4): {\bf
                 Data compaction and compression}; Computer Applications
                 --- Computer-Aided Engineering (J.6); Hardware ---
                 Logic Design --- Design Aids (B.6.3); Hardware ---
                 Integrated Circuits --- Types and Design Styles
                 (B.7.1): {\bf VLSI (very large scale integration)};
                 Hardware --- Performance and Reliability --- General
                 (B.8.0); Theory of Computation --- Computation by
                 Abstract Devices --- Models of Computation (F.1.1);
                 Mathematics of Computing --- Probability and Statistics
                 (G.3): {\bf Stochastic processes}",
}

@Article{Panda:2000:CVC,
  author =       "Preeti Ranjan Panda and Nikil D. Dutt and Alexandru
                 Nicolau",
  title =        "On-chip vs. off-chip memory: the data partitioning
                 problem in embedded processor-based systems",
  journal =      j-TODAES,
  volume =       "5",
  number =       "3",
  pages =        "682--704",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p682-panda/p682-panda.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p682-panda/",
  abstract =     "Efficient utilization of on-chip memory space is
                 extremely important in modern embedded system
                 applications based on processor cores. In addition to a
                 data cache that interfaces with slower off-chip memory,
                 a fast on-chip SRAM, called Scratch-Pad memory, is
                 often used in several applications, so that critical
                 data can be stored there with a guaranteed fast access
                 time. We present a technique for efficiently exploiting
                 on-chip Scratch-Pad memory by partitioning the
                 application's scalar and arrayed variables into
                 off-chip DRAM and on-chip Scratch-Pad SRAM, with the
                 goal of minimizing the total execution time of embedded
                 applications. We also present extensions of our
                 proposed memory assignment strategy to handle context
                 switching between multiple programs, as well as a
                 generalized memory hierarchy. Our experiments on code
                 kernels from typical applications show that our
                 technique results in significant performance
                 improvements.",
  acknowledgement = ack-nhfb,
  generalterms = "Algorithms; Measurement; Performance",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "data cache; data partitioning; memory synthesis;
                 on-chip memory; scratch-pad memory; system design;
                 system synthesis",
  subject =      "Hardware --- Memory Structures --- Design Styles
                 (B.3.2): {\bf Cache memories}; Software --- Programming
                 Languages --- Processors (D.3.4): {\bf Compilers}",
}

@Article{Raimi:2000:EML,
  author =       "Richard Raimi and Ramin Hojati and Kedar S. Namjoshi",
  title =        "Environment modeling and language universality",
  journal =      j-TODAES,
  volume =       "5",
  number =       "3",
  pages =        "705--725",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p705-raimi/p705-raimi.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p705-raimi/",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "abstraction; environment modeling; language
                 universality; model checking",
  subject =      "Hardware --- Performance and Reliability ---
                 Reliability, Testing, and Fault-Tolerance (B.8.1);
                 Computer Systems Organization --- Performance of
                 Systems (C.4); Computer Applications --- Computer-Aided
                 Engineering (J.6); Theory of Computation ---
                 Computation by Abstract Devices --- Models of
                 Computation (F.1.1): {\bf Automata}; Software ---
                 Software Engineering --- Software/Program Verification
                 (D.2.4): {\bf Model checking}; Theory of Computation
                 --- Computation by Abstract Devices --- Models of
                 Computation (F.1.1): {\bf Unbounded-action devices}",
}

@Article{Yan:2000:TLB,
  author =       "Jin-Tai Yan",
  title =        "Three-layer bubble-sorting-based {nonManhattan}
                 channel routing",
  journal =      j-TODAES,
  volume =       "5",
  number =       "3",
  pages =        "726--734",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p726-yan/p726-yan.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p726-yan/",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "bubble-sorting algorithm; channel routing; three-layer
                 nonManhattan routing model",
  subject =      "Theory of Computation --- Analysis of Algorithms and
                 Problem Complexity --- Nonnumerical Algorithms and
                 Problems (F.2.2): {\bf Routing and layout}; Hardware
                 --- Integrated Circuits --- Design Aids (B.7.2): {\bf
                 Placement and routing}; Hardware --- Integrated
                 Circuits --- Design Aids (B.7.2): {\bf Verification};
                 Hardware --- Performance and Reliability --- General
                 (B.8.0); Computer Applications --- Computer-Aided
                 Engineering (J.6); Hardware --- Input/Output and Data
                 Communications --- Input/Output Devices (B.4.2): {\bf
                 Channels and controllers}",
}

@Article{Yang:2000:ERC,
  author =       "Cheng-Hsing Yang and Sao-Jie Chen and Jan-Ming Ho and
                 Chia-Chun Tsai",
  title =        "Efficient routability check algorithms for segmented
                 channel routing",
  journal =      j-TODAES,
  volume =       "5",
  number =       "3",
  pages =        "735--747",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p735-yang/p735-yang.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p735-yang/",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "field programmable gate arrays (FPGAs); routing;
                 segmented channel",
  subject =      "Hardware --- Input/Output and Data Communications ---
                 Input/Output Devices (B.4.2): {\bf Channels and
                 controllers}; Hardware --- Integrated Circuits ---
                 Types and Design Styles (B.7.1): {\bf Gate arrays};
                 Computer Applications --- Computer-Aided Engineering
                 (J.6); Hardware --- Integrated Circuits --- Design Aids
                 (B.7.2): {\bf Placement and routing}; Theory of
                 Computation --- Analysis of Algorithms and Problem
                 Complexity --- Nonnumerical Algorithms and Problems
                 (F.2.2): {\bf Routing and layout}",
}

@Article{Marwedel:2000:GE,
  author =       "Peter Marwedel",
  title =        "Guest {Editorial}",
  journal =      j-TODAES,
  volume =       "5",
  number =       "4",
  pages =        "749--751",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-4/p749-marwedel/p749-marwedel.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-4/p749-marwedel/",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  subject =      "Computing Milieux --- Computers and Society ---
                 Organizational Impacts (K.4.3)",
}

@Article{Aditya:2000:CSM,
  author =       "Shail Aditya and Scott A. Mahlke and B. Ramakrishna
                 Rau",
  title =        "Code size minimization and retargetable assembly for
                 custom {EPIC} and {VLIW} instruction formats",
  journal =      j-TODAES,
  volume =       "5",
  number =       "4",
  pages =        "752--773",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-4/p752-aditya/p752-aditya.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-4/p752-aditya/",
  abstract =     "PICO is a fully automated system for designing the
                 architecture and the microarchitecture of VLIW and EPIC
                 processors. A serious concern with this class of
                 processors, due to their very long instructions, is
                 their code size. One focus of this paper is to describe
                 a series of code size minimization techniques used
                 within PICO, some of which are applied during the
                 automatic design of the instruction format, while
                 others are applied during program assembly. The design
                 of a retargetable assembler to support these techniques
                 also poses certain novel challenges, which constitute
                 the second focus of this paper. Contrary to widely held
                 perceptions, we demonstrate that it is entirely
                 possible to design VLIW and EPIC processors that are
                 capable of issuing large numbers of operational per
                 cycle, but whose code size is only moderately larger
                 than that for a sequential CISC processor.",
  acknowledgement = ack-nhfb,
  generalterms = "Design; Experimentation; Measurement",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "code size minimization; custom templates; design
                 automation; EPIC; instruction format design; noop
                 compression; retargetable assembly; VLIW",
  subject =      "Computer Systems Organization --- Processor
                 Architectures --- Single Data Stream Architectures
                 (C.1.1): {\bf RISC/CISC, VLIW architectures}; Software
                 --- Programming Languages --- Processors (D.3.4): {\bf
                 Code generation}; Software --- Programming Languages
                 --- Processors (D.3.4): {\bf Retargetable compilers};
                 Hardware --- Control Structures and Microprogramming
                 --- Control Structure Performance Analysis and Design
                 Aids (B.1.2)",
}

@Article{VanEijk:2000:CAC,
  author =       "Koen {Van Eijk} and Bart Mesman and Carlos A. Alba
                 Pinto and Qin Zhao and Marco Bekooij and Jef {Van
                 Meerbergen} and Jochen Jess",
  title =        "Constraint analysis for code generation: basic
                 techniques and applications in {FACTS}",
  journal =      j-TODAES,
  volume =       "5",
  number =       "4",
  pages =        "774--793",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 09:50:12 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-4/p774-van_eijk/p774-van_eijk.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-4/p774-van_eijk/",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Leupers:2000:GBC,
  author =       "Rainer Leupers and Steven Bashford",
  title =        "Graph-based code selection techniques for embedded
                 processors",
  journal =      j-TODAES,
  volume =       "5",
  number =       "4",
  pages =        "794--814",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-4/p794-leupers/p794-leupers.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-4/p794-leupers/",
  abstract =     "Code selection is an important task in code generation
                 for programmable processors, where the goal is to find
                 an efficient mapping of machine-independent
                 intermediate code to processor-specific machine
                 instructions. Traditional approaches to code selection
                 are based on tree parsing which enables fast and
                 optimal code selection for intermediate code given as a
                 set of data-flow trees. While this approach is
                 generally useful in compilers for general-purpose
                 processors, it may lead to poor code quality in the
                 case of embedded processors. The reason is that the
                 special architectural features of embedded processors
                 require performing code selection on data-flow graphs,
                 which are a more general representation of intermediate
                 code. In this paper, we present data-flow graph-based
                 code selection techniques for two architectural
                 families of embedded processors: media processors with
                 support for SIMD instructions and fixed-point DSPs with
                 irregular data paths. Both techniques exploit the fact
                 that, in the area of embedded systems, high code
                 quality is a much more important goal than high
                 compilation speed. We demonstrate that certain
                 architectural features can only be utilized by
                 graph-based code selection, while in other cases this
                 approach leads to a significant increase in code
                 quality as compared to tree-based code selection.",
  acknowledgement = ack-nhfb,
  generalterms = "Algorithms; Experimentation",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "code selection; data-flow graphs; embedded processors;
                 irregular data paths; SIMD instructions",
  subject =      "Software --- Programming Languages --- Processors
                 (D.3.4): {\bf Code generation}",
}

@Article{Pees:2000:RCS,
  author =       "Stefan Pees and Andreas Hoffmann and Heinrich Meyr",
  title =        "Retargetable compiled simulation of embedded
                 processors using a machine description language",
  journal =      j-TODAES,
  volume =       "5",
  number =       "4",
  pages =        "815--834",
  month =        jan,
  year =         "2000",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2000-5-4/p815-pees/p815-pees.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2000-5-4/p815-pees/",
  abstract =     "Fast processor simulators are needed for the software
                 development of embedded processors, for HW/SW
                 cosimulation systems, and for profiling and design of
                 application-specific processors. Such fast simulators
                 can be generated based on the machine description
                 language LISA. Using this language to model processor
                 architectures enables the generation of compiled
                 simulators on various abstraction levels, assemblers,
                 and compiler back ends. The article discusses the
                 requirements of software development tools on processor
                 models and presents the approach based on the LISA
                 language. Furthermore, the implementation of a
                 retargetable environment consisting of compiled
                 simulator, debugger, and assembler is presented.
                 Measurements for a verified, cycle-based LISA model of
                 the TI TMS320C62$\times$ DSP show that this approach
                 achieves between 37$\times$ and 170$\times$ higher
                 simulation speed compared to a commercial simulator
                 using a standard technique and the same accuracy
                 level.",
  acknowledgement = ack-nhfb,
  generalterms = "Design; Languages; Performance; Verification",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "compiled simulation; DSP processors; HW/SW
                 cosimulation; instruction set simulators; machine
                 description languages; processor modeling and
                 simulation; system-on-chip",
  subject =      "Computing Methodologies --- Simulation and Modeling
                 --- Model Development (I.6.5): {\bf Modeling
                 methodologies}; Computer Systems Organization ---
                 Special-Purpose and Application-Based Systems (C.3):
                 {\bf Real-time and embedded systems}; Hardware ---
                 Control Structures and Microprogramming --- Control
                 Structure Performance Analysis and Design Aids (B.1.2):
                 {\bf Simulation**}",
}

@Article{Bakshi:2001:PCH,
  author =       "Smita Bakshi and Daniel D. Gajski",
  title =        "Performance-constrained hierarchical pipelining for
                 behaviors, loops, and operations",
  journal =      j-TODAES,
  volume =       "6",
  number =       "1",
  pages =        "1--25",
  month =        apr,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 09:50:12 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/citations/journals/todaes/2001-6-1/p1-bakshi/",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Chakrabarty:2001:OTA,
  author =       "Krishnendu Chakrabarty",
  title =        "Optimal test access architectures for
                 system-on-a-chip",
  journal =      j-TODAES,
  volume =       "6",
  number =       "1",
  pages =        "26--49",
  month =        jan,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2001-6-1/p26-chakrabarty/p26-chakrabarty.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2001-6-1/p26-chakrabarty/",
  abstract =     "Test access is a major problem for core-based
                 system-on-a-chip (SOC) designs. Since embedded cores in
                 an SOC are not directly accessible via chip inputs and
                 outputs, special access mechanisms are required to test
                 them at the system level. An efficient test access
                 architecture should also reduce test cost by minimizing
                 test application time. We address several issues
                 related to the design of optimal test access
                 architectures that minimize testing time., including
                 the assignment of cores to test buses, distribution of
                 test data width between multiple test buses, and
                 analysis of test data width required to satisfy an
                 upper bound on the testing time. Even though the
                 decision versions of all these problems are shown to be
                 NP-complete, they can be solved exactly for practical
                 instances using integer linear programming (ILP). As a
                 case study, the ILP models for two hypothetical but
                 nontrivial systems are solved using a public-domain ILP
                 software package.",
  acknowledgement = ack-nhfb,
  generalterms = "Design; Reliability",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  subject =      "Hardware --- Integrated Circuits --- Types and Design
                 Styles (B.7.1); Hardware --- Integrated Circuits ---
                 Design Aids (B.7.2); Hardware --- Integrated Circuits
                 --- Reliability and Testing** (B.7.3)",
}

@Article{Chen:2001:ALP,
  author =       "Rita Yu Chen and Mary Jane Irwin and Raminder S.
                 Bajwa",
  title =        "Architecture-level power estimation and design
                 experiments",
  journal =      j-TODAES,
  volume =       "6",
  number =       "1",
  pages =        "50--66",
  month =        jan,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2001-6-1/p50-chen/p50-chen.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2001-6-1/p50-chen/",
  abstract =     "Architecture-level power estimation has received more
                 attention recently because of its efficiency. This
                 article presents a technique used to do power analysis
                 of processors at the architecture level. It provides
                 cycle-by-cycle power consumption data of the
                 architecture on the basis of the instruction/data flow
                 stream. To characterize the power dissipation of
                 control units, a novel hierarchical method has been
                 developed. Using this technique, a power estimator is
                 implemented for a commercial processor. The accuracy of
                 the estimator is validated by comparing the power
                 values it produces against measurements made by a
                 gate-level power simulator for the same benchmark set.
                 Our estimation approach is shown to provide very
                 efficient and accurate power analysis at the
                 architecture level. The energy models built for
                 first-pass estimation (such as ALU, MAC unit, register
                 files) are reusable for future architecture design
                 modification. In this article, we demonstrate the
                 application of the technique. Furthermore, this
                 technique can evaluate various kinds of software to
                 achieve hardware/software codesign for low power.",
  acknowledgement = ack-nhfb,
  generalterms = "Design; Experimentation; Performance",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "architecture tradeoff; architecture-level power
                 estimation; computer-aided design of VLSI; control
                 unit; energy model; energy table; functional unit;
                 hardware/software codesign; instruction format
                 transition; low power design; output signal transition;
                 power analysis and estimation; switch capacitance",
  subject =      "Computer Applications --- Computer-Aided Engineering
                 (J.6)",
}

@Article{Hsiung:2001:PPO,
  author =       "Pao-Ann Hsiung",
  title =        "{POSE}: a parallel object-oriented synthesis
                 environment",
  journal =      j-TODAES,
  volume =       "6",
  number =       "1",
  pages =        "67--92",
  month =        jan,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2001-6-1/p67-hsiung/p67-hsiung.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2001-6-1/p67-hsiung/",
  abstract =     "Design automation tools and methodologies always
                 encounter a problem of how systems may be designed
                 efficiently, including issues such as static modeling
                 and dynamic manipulation of system parts. With the
                 rapid progress of design technology, the continuously
                 increasing number of different choices per system part
                 and the growing complexity of today's systems, the
                 efficiency of the design environment is not only a
                 major concern now, but will also be a demanding problem
                 in the near future. In contrast to heuristic methods, a
                 novel environment called POSE is proposed that
                 increases efficiency during design without losing
                 optimality in the final design results. System parts
                 are modeled using the popular object-oriented modeling
                 technique and are dynamically manipulated using the
                 parallel design technique. A complete integration of
                 object-oriented and parallel techniques is one of the
                 major feature of POSE. Common problems related to
                 parallel design such as {\em emptiness\/} and {\em
                 deadlock\/} are also elegantly solved within POSE.
                 Experimental results and formal analysis based on POSE
                 all show its practical and theoretical usefulness. POSE
                 can be used at any level of synthesis as long as
                 off-the-shelf building-blocks manipulation is required.
                 POSE can be applied especially to {\em system-level\/}
                 synthesis, whose targets can be parallel computer
                 architectures, systems-on-chip, or embedded systems. We
                 will show how POSE has been applied to ICOS, a recently
                 proposed synthesis methodology. Furthermore, POSE can
                 be easily integrated with other heuristic design
                 methodologies to allow increased design efficiency.",
  acknowledgement = ack-nhfb,
  generalterms = "Design",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "design-completion check; hardware synthesis;
                 object-oriented technology; parallel design; synthesis
                 rollback",
  subject =      "Computer Applications --- Computer-Aided Engineering
                 (J.6): {\bf Computer-aided design (CAD)}; Hardware ---
                 Miscellaneous (B.m): {\bf Design management}",
}

@Article{Huang:2001:CSP,
  author =       "Ing-Jer Huang",
  title =        "Co-synthesis of pipelined structures and instruction
                 reordering constraints for instruction set processors",
  journal =      j-TODAES,
  volume =       "6",
  number =       "1",
  pages =        "93--121",
  month =        jan,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2001-6-1/p93-huang/p93-huang.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2001-6-1/p93-huang/",
  abstract =     "This paper presents a hardware/software co-synthesis
                 approach to pipelined ISP (instruction set processor)
                 design. The approach synthesizes the pipeline structure
                 from a given instruction set architecture (behavioral)
                 specification. In addition, it generates a set of
                 reordering constraints that guides the compiler
                 back-end (reorderer) to properly schedule instructions
                 so that possible pipeline hazards are avoided and
                 throughput is improved. \par

                 Co-synthesis takes place while resolving pipeline
                 hazards, which can be attributed to interim-instruction
                 dependencies (IIDs). An extended taxonomy of IIDs have
                 been proposed for the systematic analysis of pipeline
                 hazards. Hardware/software methods are developed to
                 resolve IIDs. Algorithms based on taxonomy and
                 resolutions are constructed and integrated into the
                 pipeline synthesis process to explore hardware and
                 software design space. Application benchmarks are used
                 to evaluate possible designs and guide the design
                 decision. The power of the co-synthesis tool PIPER is
                 demonstrated through pipeline synthesis of one
                 illustrative example and two ISPs, including an
                 industrial one (TDY-43). In comparison with other
                 related approaches, our approach achieves higher
                 throughput and provides a systematic way to explore the
                 hardware/software trade-off.",
  acknowledgement = ack-nhfb,
  generalterms = "Design; Theory",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "compiler instruction optimization\; instruction set
                 processor; pipeline hazards; pipeline taxonomy;
                 synthesis",
  subject =      "Hardware --- Control Structures and Microprogramming
                 --- Control Structure Performance Analysis and Design
                 Aids (B.1.2): {\bf Automatic synthesis**}",
}

@Article{Mariatos:2001:MAC,
  author =       "E. P. Mariatos and A. N. Birbas and M. K. Birbas",
  title =        "A mapping algorithm for computer-assisted exploration
                 in the design of embedded systems",
  journal =      j-TODAES,
  volume =       "6",
  number =       "1",
  pages =        "122--147",
  month =        jan,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  note =         "See note \cite{Chen:2007:NMA}.",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2001-6-1/p122-mariatos/p122-mariatos.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2001-6-1/p122-mariatos/",
  abstract =     "We present a technique for automatic exploration of
                 architectural alternatives in the design of complex
                 electronic embedded systems and systems-on-a-chip. The
                 technique transforms the problem into a set of simple
                 model-to-model operations and a mapping algorithm that
                 becomes the core of the entire design process. The
                 mapping algorithm is formulated as an assignment-type
                 problem (ATP), which is, in turn, solved by a
                 straightforward optimization method. The result is a
                 design assistance tool, which is demonstrated through a
                 telecommunication systems example.",
  acknowledgement = ack-nhfb,
  generalterms = "Design; Experimentation",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "codesign; embedded system design space exploration;
                 specification mapping",
  subject =      "Computer Systems Organization --- Special-Purpose and
                 Application-Based Systems (C.3); Software --- Software
                 Engineering --- Design Tools and Techniques (D.2.2):
                 {\bf Computer-aided software engineering (CASE)}",
}

@Article{Panda:2001:DMO,
  author =       "P. R. Panda and F. Catthoor and N. D. Dutt and K.
                 Danckaert and E. Brockmeyer and C. Kulkarni and A.
                 Vandercappelle and P. G. Kjeldsberg",
  title =        "Data and memory optimization techniques for embedded
                 systems",
  journal =      j-TODAES,
  volume =       "6",
  number =       "2",
  pages =        "149--206",
  month =        jan,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2001-6-2/p149-panda/p149-panda.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2001-6-2/p149-panda/",
  abstract =     "We present a survey of the state-of-the-art techniques
                 used in performing data and memory-related
                 optimizations in embedded systems. The optimizations
                 are targeted directly or indirectly at the memory
                 subsystem, and impact one or more out of three
                 important cost metrics: area, performance, and power
                 dissipation of the resulting implementation. \par

                 We first examine architecture-independent optimizations
                 in the form of code transformations. We next cover a
                 broad spectrum of optimization techniques that address
                 memory architectures at varying levels of granularity,
                 ranging from register files to on-chip memory, data
                 caches, and dynamic memory (DRAM). We end with memory
                 addressing related issues.",
  acknowledgement = ack-nhfb,
  generalterms = "Algorithms; Design; Experimentation; Performance",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "address generation; allocation; architecture
                 exploration; code transformation; data cache; data
                 optimization; DRAM; high-level synthesis; memory
                 architecture customization; memory power dissipation;
                 register file; size estimation; SRAM; survey",
  subject =      "Hardware --- Memory Structures --- General (B.3.0);
                 Hardware --- Register-Transfer-Level Implementation ---
                 Design (B.5.1): {\bf Memory design}; Hardware ---
                 Register-Transfer-Level Implementation --- Design Aids
                 (B.5.2): {\bf Optimization}; Hardware --- Integrated
                 Circuits --- Types and Design Styles (B.7.1): {\bf
                 Memory technologies}; Software --- Programming
                 Languages --- Processors (D.3.4): {\bf Optimization}",
}

@Article{Shenoy:2001:ASL,
  author =       "Nagaraj Shenoy and Alok Choudhary and Prithviraj
                 Banerjee",
  title =        "An algorithm for synthesis of large time-constrained
                 heterogeneous adaptive systems",
  journal =      j-TODAES,
  volume =       "6",
  number =       "2",
  pages =        "207--225",
  month =        apr,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2001-6-2/p207-shenoy/p207-shenoy.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2001-6-2/p207-shenoy/",
  abstract =     "Large time-constrained applications are highly
                 computer-intensive and are often implemented as a
                 complex organization of pipelined data parallel tasks
                 on a pool of embedded processors, DSP processors, and
                 FPGAs. The large number of design alternatives
                 available at each task level, the application as a
                 whole, and the special needs of the reconfigurable
                 devices (such as the FPGA) make the manual synthesis of
                 such systems very tedious. \par

                 The automatic synthesis algorithm in this paper
                 combines exact (MILP-based) and heuristic techniques to
                 solve this problem, which basically involves (1)
                 propagation of timing constraints; (2) pipelining the
                 loops to meet throughput requirements; (3) resource
                 selection and scheduling, keeping the processing
                 requirements and the timing constraints in view; (4)
                 scheduling the resources across the tasks to ensure
                 maximum utilization; and (5) hiding the reconfiguration
                 delays of the FPGAs. \par

                 While the use of MILP techniques helps in getting
                 high-quality results, combining them with heuristics
                 ensures acceptable synthesis times, striking a good
                 balance between quality of results and synthesis time.
                 Our experimental evaluation of the algorithm shows an
                 average 40\% in resource cost reduction (compared to
                 manual synthesis) with synthesis times from minutes to
                 as low as a few seconds in some cases.",
  acknowledgement = ack-nhfb,
  generalterms = "Algorithms; Design; Experimentation",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "delay/cost table; hierarchical control data-flow
                 graph; list scheduling; mixed integer linear
                 programming; pipelining; reconfigurable computing;
                 time-constrained synthesis",
  subject =      "Computer Applications --- Computer-Aided Engineering
                 (J.6): {\bf Computer-aided design (CAD)}; Computer
                 Systems Organization --- Special-Purpose and
                 Application-Based Systems (C.3): {\bf Real-time and
                 embedded systems}",
}

@Article{Su:2001:IRA,
  author =       "Chauchin Su and Yue-Tsang Chen and Shyh-Jye Jou",
  title =        "Intrinsic response for analog module testing using an
                 analog testability bus",
  journal =      j-TODAES,
  volume =       "6",
  number =       "2",
  pages =        "226--243",
  month =        apr,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2001-6-2/p226-su/p226-su.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2001-6-2/p226-su/",
  abstract =     "A parasitic effect removal methodology is proposed to
                 handle the large parasitic effects in analog
                 testability buses. The removal is done by an on-chip
                 test generation technique and an intrinsic response
                 extraction algorithm. On-chip test generation creates
                 test signals on-chip to avoid the parasitic effects of
                 the test application bus. The intrinsic response
                 extraction cross-checks and cancels the parasitic
                 effects of both test application and response
                 observation paths. The tests using both SPICE
                 simulation and MNABST-1 P1149.4 test chip reveal that
                 the proposed algorithm can not only remove the
                 parasitic effects of the test buses but also tolerate
                 test signal variations. Furthermore, it is robust
                 enough to handle loud environmental noise and the
                 nonlinearity of the switching devices.",
  acknowledgement = ack-nhfb,
  generalterms = "Experimentation; Theory",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "analog testability bus; analog testing; boundary scan;
                 design for testability; intrinsic response",
  subject =      "Hardware --- Performance and Reliability ---
                 Reliability, Testing, and Fault-Tolerance (B.8.1)",
}

@Article{Huang:2001:VSE,
  author =       "Shi-Yu Huang and Kwang-Ting Cheng and Kuang-Chien
                 Chen",
  title =        "Verifying sequential equivalence using {ATPG}
                 techniques",
  journal =      j-TODAES,
  volume =       "6",
  number =       "2",
  pages =        "244--275",
  month =        apr,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jul 27 10:05:33 MDT 2001",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  URL =          "http://www.acm.org/pubs/articles/journals/todaes/2001-6-2/p244-huang/p244-huang.pdf;
                 http://www.acm.org/pubs/citations/journals/todaes/2001-6-2/p244-huang/",
  abstract =     "In this paper we address the problem of verifying the
                 equivalence of two sequential circuits.
                 State-of-the-art sequential optimization techniques
                 such as retiming and sequential redundancy removal can
                 handle designs with up to hundreds or even thousands of
                 flip-flops. However, the BDD-based approaches for
                 verifying sequential equivalence can easily run into
                 memory explosion for such designs. In an attempt to
                 handle larger circuits, we modify test
                 pattern-generation techniques for verification. The
                 suggested approach utilizes the popular efficient
                 backward-justification technique used in most
                 sequential ATPG programs. We present several techniques
                 to enhance the efficiency of this approach by (1)
                 identifying equivalent flip-flop pairs using an
                 induction-based algorithm, and (2) generalizing the
                 idea of exploring the structural similarity between
                 circuits to perform verification in stages. This
                 ATPG-based framework is suitable for verifying circuits
                 either with or without a reset state. In order to
                 extend this approach to verify retimed circuits, we
                 introduce a delay-compensation-based algorithm for
                 preprocessing the circuits. The experimental results of
                 verifying the correctness of circuits after sequential
                 redundancy removal and retiming with up to several
                 hundred flip-flops are presented.",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  subject =      "Hardware --- Logic Design --- Design Aids (B.6.3):
                 {\bf Verification}; Hardware --- Logic Design ---
                 Design Styles (B.6.1): {\bf Sequential circuits}",
}

@Article{VanPraet:2001:PMC,
  author =       "J. {Van Praet} and D. Lanneer and W. Geurts and G.
                 Goossens",
  title =        "Processor modeling and code selection for retargetable
                 compilation",
  journal =      j-TODAES,
  volume =       "6",
  number =       "3",
  pages =        "277--307",
  month =        jul,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Tue Feb 19 14:35:45 MST 2002",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kagaris:2001:NHC,
  author =       "D. Kagaris and S. Tragoudas",
  title =        "{Von Neumann} hybrid cellular automata for generating
                 deterministic test sequences",
  journal =      j-TODAES,
  volume =       "6",
  number =       "3",
  pages =        "308--321",
  month =        jul,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Tue Feb 19 14:35:45 MST 2002",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Liao:2001:CPT,
  author =       "Swanwa Liao and Mario A. Lopez and Dinesh Mehta",
  title =        "Constrained polygon transformations for incremental
                 floorplanning",
  journal =      j-TODAES,
  volume =       "6",
  number =       "3",
  pages =        "322--342",
  month =        jul,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Tue Feb 19 14:35:45 MST 2002",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Chu:2001:CFS,
  author =       "Chris Chu and D. F. Wong",
  title =        "Closed form solutions to simultaneous buffer
                 insertion\slash sizing and wire sizing",
  journal =      j-TODAES,
  volume =       "6",
  number =       "3",
  pages =        "343--371",
  month =        jul,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Tue Feb 19 14:35:45 MST 2002",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Hu:2001:ELA,
  author =       "Xiaobo Sharon Hu and Danny Z. Chen and Rajeshkumar
                 Sambandam",
  title =        "Efficient list-approximation techniques for floorplan
                 area minimization",
  journal =      j-TODAES,
  volume =       "6",
  number =       "3",
  pages =        "372--400",
  month =        jul,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Tue Feb 19 14:35:45 MST 2002",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Nourani:2001:ITI,
  author =       "Mehrdad Nourani and Joan Carletta and Christos
                 Papachristou",
  title =        "Integrated test of interacting controllers and
                 datapaths",
  journal =      j-TODAES,
  volume =       "6",
  number =       "3",
  pages =        "401--422",
  month =        jul,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Tue Feb 19 14:35:45 MST 2002",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Parulkar:2001:IRC,
  author =       "Ishwar Parulkar and Sandeep K. Gupta and Melvin A.
                 Breuer",
  title =        "Introducing redundant computations in {RTL} data paths
                 for reducing {BIST} resources",
  journal =      j-TODAES,
  volume =       "6",
  number =       "3",
  pages =        "423--445",
  month =        jul,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Tue Feb 19 14:35:45 MST 2002",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Dasgupta:2001:SRG,
  author =       "Parthasarathi Dasgupta and Susmita Sur-Kolay",
  title =        "Slicible rectangular graphs and their optimal
                 floorplans",
  journal =      j-TODAES,
  volume =       "6",
  number =       "4",
  pages =        "447--470",
  month =        oct,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Tue Feb 19 14:35:44 MST 2002",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Hartanto:2001:DSS,
  author =       "Ismed Hartanto and Srikanth Venkataraman and W. Kent
                 Fuchs and Elizabeth M. Rudnick and Janak H. Patel and
                 Sreejit Chakravarty",
  title =        "Diagnostic simulation of stuck-at faults in sequential
                 circuits using compact lists",
  journal =      j-TODAES,
  volume =       "6",
  number =       "4",
  pages =        "471--489",
  month =        oct,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Tue Feb 19 14:35:44 MST 2002",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Narasimhan:2001:FAC,
  author =       "M. Narasimhan and J. Ramanujam",
  title =        "A fast approach to computing exact solutions to the
                 resource-constrained scheduling problem",
  journal =      j-TODAES,
  volume =       "6",
  number =       "4",
  pages =        "490--500",
  month =        oct,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Tue Feb 19 14:35:44 MST 2002",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Karri:2001:IRT,
  author =       "Ramesh Karri and Balakrishnan Iyer",
  title =        "Introspection: a register transfer level technique for
                 cocurrent error detection and diagnosis in data
                 dominated designs",
  journal =      j-TODAES,
  volume =       "6",
  number =       "4",
  pages =        "501--515",
  month =        oct,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Tue Feb 19 14:35:44 MST 2002",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Boyer:2001:ODS,
  author =       "Fran{\c{c}}ois R. Boyer and El Mostapha Aboulhamid and
                 Yvon Savaria and Michel Boyer",
  title =        "Optimal design of synchronous circuits using software
                 pipelining techniques",
  journal =      j-TODAES,
  volume =       "6",
  number =       "4",
  pages =        "516--532",
  month =        oct,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Tue Feb 19 14:35:44 MST 2002",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Voeten:2001:FLT,
  author =       "Jeroen Voeten",
  title =        "On the fundamental limitations of transformational
                 design",
  journal =      j-TODAES,
  volume =       "6",
  number =       "4",
  pages =        "533--552",
  month =        oct,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Tue Feb 19 14:35:44 MST 2002",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Shiue:2001:DMD,
  author =       "Wen-Tsong Shiue and Sathishkumar Udayanarayanan and
                 Chaitali Chakrabarti",
  title =        "Data memory design and exploration for low-power
                 embedded systems",
  journal =      j-TODAES,
  volume =       "6",
  number =       "4",
  pages =        "553--568",
  month =        oct,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Tue Feb 19 14:35:44 MST 2002",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Ashar:2001:UCD,
  author =       "Pranav Ashar and Aarti Gupta and Sharad Malik",
  title =        "Using complete-$1$-distinguishability for {FSM}
                 equivalence checking",
  journal =      j-TODAES,
  volume =       "6",
  number =       "4",
  pages =        "569--590",
  month =        oct,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Tue Feb 19 14:35:44 MST 2002",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Liu:2001:ODC,
  author =       "Tai-Hung Liu and Adnan Aziz and Vigyan Singhal",
  title =        "Optimizing designs containing black boxes",
  journal =      j-TODAES,
  volume =       "6",
  number =       "4",
  pages =        "591--601",
  month =        oct,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Tue Feb 19 14:35:44 MST 2002",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Roop:2001:FST,
  author =       "Partha S. Roop and A. Sowmya and S. Ramesh",
  title =        "Forced simulation: a technique for automating
                 component reuse in embedded systems",
  journal =      j-TODAES,
  volume =       "6",
  number =       "4",
  pages =        "602--628",
  month =        oct,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Tue Feb 19 14:35:44 MST 2002",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Flores:2001:ESM,
  author =       "Paulo F. Flores and Hor{\'a}cio C. Neto and Jo{\~a}o
                 P. Marques-Silva",
  title =        "An exact solution to the minimum size test pattern
                 problem",
  journal =      j-TODAES,
  volume =       "6",
  number =       "4",
  pages =        "629--644",
  month =        oct,
  year =         "2001",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Tue Feb 19 14:35:44 MST 2002",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Chowdhary:2002:GTM,
  author =       "Amit Chowdhary and John P. Hayes",
  title =        "General technology mapping for field-programmable gate
                 arrays based on lookup tables",
  journal =      j-TODAES,
  volume =       "7",
  number =       "1",
  pages =        "1--32",
  month =        jan,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Aug 7 11:12:03 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Michael:2002:ATD,
  author =       "M. Michael and S. Tragoudas",
  title =        "{ATPG} tools for delay faults at the functional
                 level",
  journal =      j-TODAES,
  volume =       "7",
  number =       "1",
  pages =        "33--57",
  month =        jan,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Aug 7 11:12:03 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lysecky:2002:PIB,
  author =       "Roman Lysecky and Frank Vahid",
  title =        "Prefetching for improved bus wrapper performance in
                 cores",
  journal =      j-TODAES,
  volume =       "7",
  number =       "1",
  pages =        "58--90",
  month =        jan,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Aug 7 11:12:03 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Dutt:2002:CAI,
  author =       "Shantanu Dutt and Wenyong Deng",
  title =        "Cluster-aware iterative improvement techniques for
                 partitioning large {VLSI} circuits",
  journal =      j-TODAES,
  volume =       "7",
  number =       "1",
  pages =        "91--121",
  month =        jan,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Aug 7 11:12:03 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Goodby:2002:MSP,
  author =       "Laurence Goodby and Alex Orailo{\u{g}}lu and Paul M.
                 Chau",
  title =        "Microarchitectural synthesis of
                 performance-constrained, low-power {VLSI} designs",
  journal =      j-TODAES,
  volume =       "7",
  number =       "1",
  pages =        "122--136",
  month =        jan,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Aug 7 11:12:03 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{GuerraeSilva:2002:SMA,
  author =       "Lu{\'\i}s {Guerra e Silva} and Jo{\~a}o Marques-Silva
                 and L. Miguel Silveira and Karem A. Sakallah",
  title =        "Satisfiability models and algorithms for circuit delay
                 computation",
  journal =      j-TODAES,
  volume =       "7",
  number =       "1",
  pages =        "137--158",
  month =        jan,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Oct 31 06:28:44 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Darte:2002:CEL,
  author =       "Alain Darte and Robert Schreiber and B. Ramakrishna
                 Rau and Fr{\'e}d{\'e}ric Vivien",
  title =        "Constructing and exploiting linear schedules with
                 prescribed parallelism",
  journal =      j-TODAES,
  volume =       "7",
  number =       "1",
  pages =        "159--172",
  month =        jan,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Aug 7 11:12:03 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Jagannathan:2002:FAC,
  author =       "Ashok Jagannathan and Sung-Woo Hur and John Lillis",
  title =        "A fast algorithm for context-aware buffer insertion",
  journal =      j-TODAES,
  volume =       "7",
  number =       "1",
  pages =        "173--188",
  month =        jan,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Aug 7 11:12:03 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Vemuri:2002:ERO,
  author =       "Ranga Vemuri and Srinivas Katkoori and Meenakshi Kaul
                 and Jay Roy",
  title =        "An efficient register optimization algorithm for
                 high-level synthesis from hierarchical behavioral
                 specifications",
  journal =      j-TODAES,
  volume =       "7",
  number =       "1",
  pages =        "189--216",
  month =        jan,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Aug 7 11:12:03 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lin:2002:OTB,
  author =       "Shi-Zheng Eric Lin and Chieh Changfan and Yu-Chin Hsu
                 and Fur-Shing Tsai",
  title =        "Optimal time borrowing analysis and timing budgeting
                 optimization for latch-based designs",
  journal =      j-TODAES,
  volume =       "7",
  number =       "1",
  pages =        "217--230",
  month =        jan,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Aug 7 11:12:03 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Dasgupta:2002:MBP,
  author =       "Parthasarathi Dasgupta and Peichen Pan and Subhas C.
                 Nandy and Bhargab B. Bhattacharya",
  title =        "Monotone bipartitioning problem in a planar point set
                 with applications to {VLSI}",
  journal =      j-TODAES,
  volume =       "7",
  number =       "2",
  pages =        "231--248",
  month =        apr,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Aug 7 11:12:04 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Corno:2002:IAS,
  author =       "F. Corno and P. Prinetto and M. Rebaudengo and M.
                 Sonza Reorda and G. Squillero",
  title =        "Initializability analysis of synchronous sequential
                 circuits",
  journal =      j-TODAES,
  volume =       "7",
  number =       "2",
  pages =        "249--264",
  month =        apr,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Aug 7 11:12:04 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kim:2002:LTL,
  author =       "Ki-Wook Kim and Taewhan Kim and Ting-Ting Hwang and
                 Sung-Mo Kang and C. L. Liu",
  title =        "Logic transformation for low-power synthesis",
  journal =      j-TODAES,
  volume =       "7",
  number =       "2",
  pages =        "265--283",
  month =        apr,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Aug 7 11:12:04 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Tessier:2002:FPA,
  author =       "Russell Tessier",
  title =        "Fast placement approaches for {FPGAs}",
  journal =      j-TODAES,
  volume =       "7",
  number =       "2",
  pages =        "284--305",
  month =        apr,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Aug 7 11:12:04 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Zhao:2002:TMA,
  author =       "Min Zhao and Sachin S. Sapatnekar",
  title =        "Technology mapping algorithms for domino logic",
  journal =      j-TODAES,
  volume =       "7",
  number =       "2",
  pages =        "306--335",
  month =        apr,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Aug 7 11:12:04 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Araujo:2002:GAR,
  author =       "Guido Araujo and Guilherme Ottoni and Marcelo Cintra",
  title =        "Global array reference allocation",
  journal =      j-TODAES,
  volume =       "7",
  number =       "2",
  pages =        "336--357",
  month =        apr,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Aug 7 11:12:04 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Tsao:2002:UDC,
  author =       "Chung-wen Albert Tsao and Cheng-kok Koh",
  title =        "{UST\slash DME}: a clock tree router for general skew
                 constraints",
  journal =      j-TODAES,
  volume =       "7",
  number =       "3",
  pages =        "359--379",
  month =        jul,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Aug 7 11:12:04 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kountouris:2002:ESC,
  author =       "Apostolos A. Kountouris and Christophe Wolinski",
  title =        "Efficient scheduling of conditional behaviors for
                 high-level synthesis",
  journal =      j-TODAES,
  volume =       "7",
  number =       "3",
  pages =        "380--412",
  month =        jul,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Aug 7 11:12:04 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Vahid:2002:PSP,
  author =       "Frank Vahid",
  title =        "Partitioning sequential programs for {CAD} using a
                 three-step approach",
  journal =      j-TODAES,
  volume =       "7",
  number =       "3",
  pages =        "413--429",
  month =        jul,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Aug 7 11:12:04 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lapinskii:2002:CAH,
  author =       "Viktor S. Lapinskii and Margarida F. Jacome and
                 Gustavo A. De Veciana",
  title =        "Cluster assignment for high-performance embedded
                 {VLIW} processors",
  journal =      j-TODAES,
  volume =       "7",
  number =       "3",
  pages =        "430--454",
  month =        jul,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Aug 7 11:12:04 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Saxena:2002:ESL,
  author =       "Vikram Saxena and Farid N. Najm and Ibrahim N. Hajj",
  title =        "Estimation of state line statistics in sequential
                 circuits",
  journal =      j-TODAES,
  volume =       "7",
  number =       "3",
  pages =        "455--473",
  month =        jul,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Aug 7 11:12:04 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Glebov:2002:FNA,
  author =       "A. Glebov and S. Gavrilov and D. Blaauw and V.
                 Zolotov",
  title =        "False-noise analysis using logic implications",
  journal =      j-TODAES,
  volume =       "7",
  number =       "3",
  pages =        "474--498",
  month =        jul,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Aug 7 11:12:04 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Sarrafzadeh:2002:GE,
  author =       "Majid Sarrafzadeh and Rajeev Jayaraman",
  title =        "Guest editorial",
  journal =      j-TODAES,
  volume =       "7",
  number =       "4",
  pages =        "499--500",
  month =        oct,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Aug 7 11:12:05 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Vemuri:2002:BBL,
  author =       "Navin Vemuri and Priyank Kalla and Russell Tessier",
  title =        "{BDD}-based logic synthesis for {LUT}-based {FPGAs}",
  journal =      j-TODAES,
  volume =       "7",
  number =       "4",
  pages =        "501--525",
  month =        oct,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Aug 7 11:12:05 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Fan:2002:RDG,
  author =       "Hongbing Fan and Jiping Liu and Yu-Liang Wu and C. K.
                 Wong",
  title =        "Reduction design for generic universal switch blocks",
  journal =      j-TODAES,
  volume =       "7",
  number =       "4",
  pages =        "526--546",
  month =        oct,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Aug 7 11:12:05 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Dandalis:2002:RTP,
  author =       "Andreas Dandalis and Viktor K. Prasanna",
  title =        "Run-time performance optimization of an {FPGA}-based
                 deduction engine for {SAT} solvers",
  journal =      j-TODAES,
  volume =       "7",
  number =       "4",
  pages =        "547--562",
  month =        oct,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Aug 7 11:12:05 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Wang:2002:BSF,
  author =       "Haibo Wang and Sarma B. K. Vrudhula",
  title =        "Behavioral synthesis of field programmable analog
                 array circuits",
  journal =      j-TODAES,
  volume =       "7",
  number =       "4",
  pages =        "563--604",
  month =        oct,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Aug 7 11:12:05 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kastner:2002:IGH,
  author =       "R. Kastner and A. Kaplan and S. Ogrenci Memik and E.
                 Bozorgzadeh",
  title =        "Instruction generation for hybrid reconfigurable
                 systems",
  journal =      j-TODAES,
  volume =       "7",
  number =       "4",
  pages =        "605--627",
  month =        oct,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Aug 7 11:12:05 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Wu:2002:PDP,
  author =       "Guang-Ming Wu and Jai-Ming Lin and Yao-Wen Chang",
  title =        "Performance-driven placement for dynamically
                 reconfigurable {FPGAs}",
  journal =      j-TODAES,
  volume =       "7",
  number =       "4",
  pages =        "628--642",
  month =        oct,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Aug 7 11:12:05 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Singh:2002:ECC,
  author =       "Amit Singh and Ganapathy Parthasarathy and Malgorzata
                 Marek-Sadowska",
  title =        "Efficient circuit clustering for area and power
                 reduction in {FPGAs}",
  journal =      j-TODAES,
  volume =       "7",
  number =       "4",
  pages =        "643--663",
  month =        oct,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Aug 7 11:12:05 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Dutt:2002:SBB,
  author =       "Shantanu Dutt and Vinay Verma and Hasan Arslan",
  title =        "A search-based bump-and-refit approach to incremental
                 routing for {ECO} applications in {FPGAs}",
  journal =      j-TODAES,
  volume =       "7",
  number =       "4",
  pages =        "664--693",
  month =        oct,
  year =         "2002",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Aug 7 11:12:05 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Tragoudas:2003:PDF,
  author =       "S. Tragoudas and N. Denny",
  title =        "Path delay fault testing using test points",
  journal =      j-TODAES,
  volume =       "8",
  number =       "1",
  pages =        "1--10",
  month =        jan,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Aug 7 11:12:05 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Chang:2003:AFF,
  author =       "Yao-Wen Chang and Kai Zhu and Guang-Ming Wu and D. F.
                 Wong and C. K. Wong",
  title =        "Analysis of {FPGA\slash FPIC} switch modules",
  journal =      j-TODAES,
  volume =       "8",
  number =       "1",
  pages =        "11--37",
  month =        jan,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Aug 7 11:12:05 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Jone:2003:DTI,
  author =       "W.-B. Jone and J. S. Wang and Hsueh-I Lu and I. P. Hsu
                 and J.-Y. Chen",
  title =        "Design theory and implementation for low-power
                 segmented bus systems",
  journal =      j-TODAES,
  volume =       "8",
  number =       "1",
  pages =        "38--54",
  month =        jan,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Aug 7 11:12:05 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Yao:2003:FRC,
  author =       "Bo Yao and Hongyu Chen and Chung-Kuan Cheng and Ronald
                 Graham",
  title =        "Floorplan representations: {Complexity} and
                 connections",
  journal =      j-TODAES,
  volume =       "8",
  number =       "1",
  pages =        "55--80",
  month =        jan,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Aug 7 11:12:05 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Riepe:2003:TPN,
  author =       "Michael A. Riepe and Karem A. Sakallah",
  title =        "Transistor placement for noncomplementary digital
                 {VLSI} cell synthesis",
  journal =      j-TODAES,
  volume =       "8",
  number =       "1",
  pages =        "81--107",
  month =        jan,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Aug 7 11:12:05 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Blanton:2003:PIP,
  author =       "R. D. (Shawn) Blanton and John P. Hayes",
  title =        "On the properties of the input pattern fault model",
  journal =      j-TODAES,
  volume =       "8",
  number =       "1",
  pages =        "108--124",
  month =        jan,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Aug 7 11:12:05 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{VanAchteren:2003:SSD,
  author =       "Tanja {Van Achteren} and Francky Catthoor and Rudy
                 Lauwereins and Geert Deconinck",
  title =        "Search space definition and exploration for nonuniform
                 data reuse opportunities in data-dominant
                 applications",
  journal =      j-TODAES,
  volume =       "8",
  number =       "1",
  pages =        "125--139",
  month =        jan,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Aug 7 11:12:05 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Edwards:2003:TCC,
  author =       "Stephen A. Edwards",
  title =        "Tutorial: {Compiling} concurrent languages for
                 sequential processors",
  journal =      j-TODAES,
  volume =       "8",
  number =       "2",
  pages =        "141--187",
  month =        apr,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Aug 7 11:12:06 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Wu:2003:RBP,
  author =       "Guang-Ming Wu and Yun-Chih Chang and Yao-Wen Chang",
  title =        "Rectilinear block placement using {B*}-trees",
  journal =      j-TODAES,
  volume =       "8",
  number =       "2",
  pages =        "188--202",
  month =        apr,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Aug 7 11:12:06 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kim:2003:MDO,
  author =       "Ki-Wook Kim and Seong-Ook Jung and Taewhan Kim and
                 Sung-Mo Kang",
  title =        "Minimum delay optimization for domino logic
                 circuits---a coupling-aware approach",
  journal =      j-TODAES,
  volume =       "8",
  number =       "2",
  pages =        "203--213",
  month =        apr,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Aug 7 11:12:06 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Pinar:2003:CSI,
  author =       "Ali Pinar and C. L. Liu",
  title =        "Compacting sequences with invariant transition
                 frequencies",
  journal =      j-TODAES,
  volume =       "8",
  number =       "2",
  pages =        "214--221",
  month =        apr,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Aug 7 11:12:06 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Singhal:2003:SOA,
  author =       "Vigyan Singhal and Carl Pixley and Adnan Aziz and Shaz
                 Qadeer and Robert Brayton",
  title =        "Sequential optimization in the absence of global
                 reset",
  journal =      j-TODAES,
  volume =       "8",
  number =       "2",
  pages =        "222--251",
  month =        apr,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Aug 7 11:12:06 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lee:2003:COV,
  author =       "Chingren Lee and Jenq Kuen Lee and Tingting Hwang and
                 Shi-Chun Tsai",
  title =        "Compiler optimization on {VLIW} instruction scheduling
                 for low power",
  journal =      j-TODAES,
  volume =       "8",
  number =       "2",
  pages =        "252--268",
  month =        apr,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Aug 7 11:12:06 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lopez-Vallejo:2003:HSP,
  author =       "Marisa L{\'o}pez-Vallejo and Juan Carlos L{\'o}pez",
  title =        "On the hardware-software partitioning problem:
                 {System} modeling and partitioning techniques",
  journal =      j-TODAES,
  volume =       "8",
  number =       "3",
  pages =        "269--297",
  month =        jul,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Aug 7 11:12:06 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Obenaus:2003:GFP,
  author =       "Stefan Thomas Obenaus and Ted H. Szymanski",
  title =        "{Gravity}: {Fast} placement for {$3$-D} {VLSI}",
  journal =      j-TODAES,
  volume =       "8",
  number =       "3",
  pages =        "298--315",
  month =        jul,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Aug 7 11:12:06 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Yang:2003:CRD,
  author =       "X. Yang and M. Wang and R. Kastner and S. Ghiasi and
                 M. Sarrafzadeh",
  title =        "Congestion reduction during placement with provably
                 good approximation bound",
  journal =      j-TODAES,
  volume =       "8",
  number =       "3",
  pages =        "316--333",
  month =        jul,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Aug 7 11:12:06 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Constantinides:2003:SSA,
  author =       "G. A. Constantinides and P. Y. K. Cheung and W. Luk",
  title =        "Synthesis of saturation arithmetic architectures",
  journal =      j-TODAES,
  volume =       "8",
  number =       "3",
  pages =        "334--354",
  month =        jul,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Aug 7 11:12:06 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kuchcinski:2003:CDS,
  author =       "Krzysztof Kuchcinski",
  title =        "Constraints-driven scheduling and resource
                 assignment",
  journal =      j-TODAES,
  volume =       "8",
  number =       "3",
  pages =        "355--383",
  month =        jul,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Aug 7 11:12:06 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lee:2003:ACG,
  author =       "J.-Y. Lee and I.-C. Park",
  title =        "Address code generation for {DSP} instruction-set
                 architectures",
  journal =      j-TODAES,
  volume =       "8",
  number =       "3",
  pages =        "384--395",
  month =        jul,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Aug 7 11:12:06 MDT 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Rawat:2003:I,
  author =       "Shishpal Rawat and Hans-Joachim Wunderlich",
  title =        "Introduction",
  journal =      j-TODAES,
  volume =       "8",
  number =       "4",
  pages =        "397--398",
  month =        oct,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Oct 31 06:04:08 MST 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Goel:2003:STA,
  author =       "Sandeep Kumar Goel and Erik Jan Marinissen",
  title =        "{SOC} test architecture design for efficient
                 utilization of test bandwidth",
  journal =      j-TODAES,
  volume =       "8",
  number =       "4",
  pages =        "399--429",
  month =        oct,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Oct 31 06:04:08 MST 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{El-Maleh:2003:TVD,
  author =       "Aiman H. El-Maleh and Yahya E. Osais",
  title =        "Test vector decomposition-based static compaction
                 algorithms for combinational circuits",
  journal =      j-TODAES,
  volume =       "8",
  number =       "4",
  pages =        "430--459",
  month =        oct,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Oct 31 06:04:08 MST 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Reddy:2003:TDV,
  author =       "Sudhakar M. Reddy and Kohei Miyase and Seiji Kajihara
                 and Irith Pomeranz",
  title =        "On test data volume reduction for multiple scan chain
                 designs",
  journal =      j-TODAES,
  volume =       "8",
  number =       "4",
  pages =        "460--469",
  month =        oct,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Oct 31 06:04:08 MST 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Li:2003:TDC,
  author =       "Lei Li and Krishnendu Chakrabarty and Nur A. Touba",
  title =        "Test data compression using dictionaries with
                 selective entries and fixed-length indices",
  journal =      j-TODAES,
  volume =       "8",
  number =       "4",
  pages =        "470--490",
  month =        oct,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Oct 31 06:04:08 MST 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Singh:2003:MST,
  author =       "Adit D. Singh and Markus Seuring and Michael
                 G{\"o}ssel and Egor S. Sogomonyan",
  title =        "Multimode scan: {Test} per clock {BIST} for {IP}
                 cores",
  journal =      j-TODAES,
  volume =       "8",
  number =       "4",
  pages =        "491--505",
  month =        oct,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Oct 31 06:04:08 MST 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Nummer:2003:THP,
  author =       "Muhammad Nummer and Manoj Sachdev",
  title =        "Testing high-performance pipelined circuits with
                 slow-speed testers",
  journal =      j-TODAES,
  volume =       "8",
  number =       "4",
  pages =        "506--521",
  month =        oct,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Oct 31 06:04:08 MST 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Parthasarathy:2003:PTA,
  author =       "Kumar Parthasarathy and Turker Kuyel and Dana Price
                 and Le Jin and Degang Chen and Randall Geiger",
  title =        "{BIST} and production testing of {ADCs} using
                 imprecise stimulus",
  journal =      j-TODAES,
  volume =       "8",
  number =       "4",
  pages =        "522--545",
  month =        oct,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Oct 31 06:04:08 MST 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Li:2003:CLF,
  author =       "Zhuo Li and Xiang Lu and Wangqi Qiu and Weiping Shi
                 and D. M. H. Walker",
  title =        "A circuit level fault model for resistive bridges",
  journal =      j-TODAES,
  volume =       "8",
  number =       "4",
  pages =        "546--559",
  month =        oct,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Oct 31 06:04:08 MST 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Niggemeyer:2003:DAM,
  author =       "Dirk Niggemeyer and Elizabeth M. Rudnick",
  title =        "A data acquisition methodology for on-chip repair of
                 embedded memories",
  journal =      j-TODAES,
  volume =       "8",
  number =       "4",
  pages =        "560--576",
  month =        oct,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Oct 31 06:04:08 MST 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Neuberger:2003:MBU,
  author =       "Gustavo Neuberger and Fernanda de Lima and Luigi Carro
                 and Ricardo Reis",
  title =        "A multiple bit upset tolerant {SRAM} memory",
  journal =      j-TODAES,
  volume =       "8",
  number =       "4",
  pages =        "577--590",
  month =        oct,
  year =         "2003",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Oct 31 06:04:08 MST 2003",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Bunker:2004:FHS,
  author =       "Annette Bunker and Ganesh Gopalakrishnan and Sally A.
                 Mckee",
  title =        "Formal hardware specification languages for protocol
                 compliance verification",
  journal =      j-TODAES,
  volume =       "9",
  number =       "1",
  pages =        "1--32",
  month =        jan,
  year =         "2004",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Jan 28 17:18:10 MST 2004",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Li:2004:PMA,
  author =       "Hao Li and Srinivas Katkoori and Wai-Kei Mak",
  title =        "Power minimization algorithms for {LUT}-based {FPGA}
                 technology mapping",
  journal =      j-TODAES,
  volume =       "9",
  number =       "1",
  pages =        "33--51",
  month =        jan,
  year =         "2004",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Jan 28 17:18:10 MST 2004",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Cho:2004:FMB,
  author =       "Jeonghun Cho and Yunheung Paek and David Whalley",
  title =        "Fast memory bank assignment for fixed-point digital
                 signal processors",
  journal =      j-TODAES,
  volume =       "9",
  number =       "1",
  pages =        "52--74",
  month =        jan,
  year =         "2004",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Jan 28 17:18:10 MST 2004",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Das:2004:MDR,
  author =       "Sandip Das and Susmita Sur-Kolay and Bhargab B.
                 Bhattacharya",
  title =        "{Manhattan}-diagonal routing in channels and
                 switchboxes",
  journal =      j-TODAES,
  volume =       "9",
  number =       "1",
  pages =        "75--104",
  month =        jan,
  year =         "2004",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Jan 28 17:18:10 MST 2004",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Wu:2004:BBA,
  author =       "Lieh-Ming Wu and Kuochen Wang and Chuang-Yi Chiu",
  title =        "A {BNF}-based automatic test program generator for
                 compatible microprocessor verification",
  journal =      j-TODAES,
  volume =       "9",
  number =       "1",
  pages =        "105--132",
  month =        jan,
  year =         "2004",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Jan 28 17:18:10 MST 2004",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kjeldsberg:2004:SRE,
  author =       "P. G. Kjeldsberg and F. Catthoor and E. J. Aas",
  title =        "Storage requirement estimation for optimized design of
                 data intensive applications",
  journal =      j-TODAES,
  volume =       "9",
  number =       "2",
  pages =        "133--158",
  month =        apr,
  year =         "2004",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Nov 4 08:12:30 MST 2004",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Sabade:2004:BTM,
  author =       "Sagar S. Sabade and Duncan M. Walker",
  title =        "{I$_{\mbox{DDX}}$}-based test methods: a survey",
  journal =      j-TODAES,
  volume =       "9",
  number =       "2",
  pages =        "159--198",
  month =        apr,
  year =         "2004",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Nov 4 08:12:30 MST 2004",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Ma:2004:SCU,
  author =       "Yuchun Ma and Xianlong Hong and Sheqin Dong and Yici
                 Cai and Chung-Kuan Cheng and Jun Gu",
  title =        "Stairway compaction using corner block list and its
                 applications with rectilinear blocks",
  journal =      j-TODAES,
  volume =       "9",
  number =       "2",
  pages =        "199--211",
  month =        apr,
  year =         "2004",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Nov 4 08:12:30 MST 2004",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Murthy:2004:BMP,
  author =       "Praveen K. Murthy and Shuvra S. Bhattacharyya",
  title =        "Buffer merging---a powerful technique for reducing
                 memory requirements of synchronous dataflow
                 specifications",
  journal =      j-TODAES,
  volume =       "9",
  number =       "2",
  pages =        "212--237",
  month =        apr,
  year =         "2004",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Nov 4 08:12:30 MST 2004",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Doboli:2004:TLL,
  author =       "Alex Doboli and Nagu Dhanwada and Adrian Nunez-Aldana
                 and Ranga Vemuri",
  title =        "A two-layer library-based approach to synthesis of
                 analog systems from {VHDL-AMS} specifications",
  journal =      j-TODAES,
  volume =       "9",
  number =       "2",
  pages =        "238--271",
  month =        apr,
  year =         "2004",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Nov 4 08:12:30 MST 2004",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Sundararajan:2004:NAI,
  author =       "Vijay Sundararajan and Sachin S. Sapatnekar and Keshab
                 K. Parhi",
  title =        "A new approach for integration of min-area retiming
                 and min-delay padding for simultaneously addressing
                 short-path and long-path constraints",
  journal =      j-TODAES,
  volume =       "9",
  number =       "3",
  pages =        "273--289",
  month =        jul,
  year =         "2004",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Nov 4 08:12:30 MST 2004",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lepak:2004:SSI,
  author =       "Kevin M. Lepak and Min Xu and Jun Chen and Lei He",
  title =        "Simultaneous shield insertion and net ordering for
                 capacitive and inductive coupling minimization",
  journal =      j-TODAES,
  volume =       "9",
  number =       "3",
  pages =        "290--309",
  month =        jul,
  year =         "2004",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Nov 4 08:12:30 MST 2004",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Vicente:2004:APT,
  author =       "Juan D. Vicente and Juan Lanchares and Rom{\'a}n
                 Hermida",
  title =        "Annealing placement by thermodynamic combinatorial
                 optimization",
  journal =      j-TODAES,
  volume =       "9",
  number =       "3",
  pages =        "310--332",
  month =        jul,
  year =         "2004",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Nov 4 08:12:30 MST 2004",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Dandalis:2004:ACE,
  author =       "Andreas Dandalis and Viktor K. Prasanna",
  title =        "An adaptive cryptographic engine for {Internet}
                 protocol security architectures",
  journal =      j-TODAES,
  volume =       "9",
  number =       "3",
  pages =        "333--353",
  month =        jul,
  year =         "2004",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Nov 4 08:12:30 MST 2004",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Yang:2004:FVE,
  author =       "Jun Yang and Rajiv Gupta and Chuanjun Zhang",
  title =        "Frequent value encoding for low power data buses",
  journal =      j-TODAES,
  volume =       "9",
  number =       "3",
  pages =        "354--384",
  month =        jul,
  year =         "2004",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Nov 4 08:12:30 MST 2004",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Dasdan:2004:EAF,
  author =       "Ali Dasdan",
  title =        "Experimental analysis of the fastest optimum cycle
                 ratio and mean algorithms",
  journal =      j-TODAES,
  volume =       "9",
  number =       "4",
  pages =        "385--418",
  month =        oct,
  year =         "2004",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Nov 4 08:12:30 MST 2004",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Ghosh:2004:COE,
  author =       "Arijit Ghosh and Tony Givargis",
  title =        "Cache optimization for embedded processor cores: {An}
                 analytical approach",
  journal =      j-TODAES,
  volume =       "9",
  number =       "4",
  pages =        "419--440",
  month =        oct,
  year =         "2004",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Nov 4 08:12:30 MST 2004",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Gupta:2004:CPC,
  author =       "Sumit Gupta and Rajesh Kumar Gupta and Nikil D. Dutt
                 and Alexandru Nicolau",
  title =        "Coordinated parallelizing compiler optimizations and
                 high-level synthesis",
  journal =      j-TODAES,
  volume =       "9",
  number =       "4",
  pages =        "441--470",
  month =        oct,
  year =         "2004",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Nov 4 08:12:30 MST 2004",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Cota:2004:RCN,
  author =       "{\'E}rika Cota and Luigi Carro and Marcelo
                 Lubaszewski",
  title =        "Reusing an on-chip network for the test of core-based
                 systems",
  journal =      j-TODAES,
  volume =       "9",
  number =       "4",
  pages =        "471--499",
  month =        oct,
  year =         "2004",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Nov 4 08:12:30 MST 2004",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Krishna:2004:AHE,
  author =       "C. V. Krishna and Abhijit Jas and Nur A. Touba",
  title =        "Achieving high encoding efficiency with partial
                 dynamic {LFSR} reseeding",
  journal =      j-TODAES,
  volume =       "9",
  number =       "4",
  pages =        "500--516",
  month =        oct,
  year =         "2004",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Nov 4 08:12:30 MST 2004",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Hung:2004:SCR,
  author =       "William N. N. Hung and Xiaoyu Song and El Mostapha
                 Aboulhamid and Andrew Kennings and Alan Coppola",
  title =        "Segmented channel routability via satisfiability",
  journal =      j-TODAES,
  volume =       "9",
  number =       "4",
  pages =        "517--528",
  month =        oct,
  year =         "2004",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Nov 4 08:12:30 MST 2004",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Dutt:2005:E,
  author =       "Nikil Dutt",
  title =        "Editorial",
  journal =      j-TODAES,
  volume =       "10",
  number =       "1",
  pages =        "1--2",
  month =        jan,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Apr 14 10:34:36 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Cong:2005:TMA,
  author =       "Jason Cong and Hui Huang and Xin Yuan",
  title =        "Technology mapping and architecture evaluation for
                 $k/m$-macrocell-based {FPGAs}",
  journal =      j-TODAES,
  volume =       "10",
  number =       "1",
  pages =        "3--23",
  month =        jan,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Apr 14 10:34:36 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Ruan:2005:BEL,
  author =       "Shanq-Jang Ruan and Kun-Lin Tsai and Edwin Naroska and
                 Feipei Lai",
  title =        "Bipartitioning and encoding in low-power pipelined
                 circuits",
  journal =      j-TODAES,
  volume =       "10",
  number =       "1",
  pages =        "24--32",
  month =        jan,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Apr 14 10:34:36 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Memik:2005:SAO,
  author =       "Seda Ogrenci Memik and Ryan Kastner and Elaheh
                 Bozorgzadeh and Majid Sarrafzadeh",
  title =        "A scheduling algorithm for optimization and early
                 planning in high-level synthesis",
  journal =      j-TODAES,
  volume =       "10",
  number =       "1",
  pages =        "33--57",
  month =        jan,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Apr 14 10:34:36 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Adya:2005:CTM,
  author =       "S. N. Adya and I. L. Markov",
  title =        "Combinatorial techniques for mixed-size placement",
  journal =      j-TODAES,
  volume =       "10",
  number =       "1",
  pages =        "58--90",
  month =        jan,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Apr 14 10:34:36 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Nourani:2005:RHE,
  author =       "Mehrdad Nourani and Mohammad H. Tehranipour",
  title =        "{RL-Huffman} encoding for test compression and power
                 reduction in scan applications",
  journal =      j-TODAES,
  volume =       "10",
  number =       "1",
  pages =        "91--115",
  month =        jan,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Apr 14 10:34:36 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Jan:2005:GMR,
  author =       "Gene Eu Jan and Ki-Yin Chang and Su Gao and Ian
                 Parberry",
  title =        "A $4$-geometry maze router and its application on
                 multiterminal nets",
  journal =      j-TODAES,
  volume =       "10",
  number =       "1",
  pages =        "116--135",
  month =        jan,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Apr 14 10:34:36 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Arato:2005:AAH,
  author =       "P{\'e}ter Arat{\'o} and Zolt{\'a}n {\'A}d{\'a}m Mann
                 and Andr{\'a}s Orb{\'a}n",
  title =        "Algorithmic aspects of hardware\slash software
                 partitioning",
  journal =      j-TODAES,
  volume =       "10",
  number =       "1",
  pages =        "136--156",
  month =        jan,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Apr 14 10:34:36 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kagaris:2005:UMP,
  author =       "Dimitri Kagaris",
  title =        "A unified method for phase shifter computation",
  journal =      j-TODAES,
  volume =       "10",
  number =       "1",
  pages =        "157--167",
  month =        jan,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Apr 14 10:34:36 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kao:2005:EAF,
  author =       "Chi-Chou Kao and Yen-Tai Lai",
  title =        "An efficient algorithm for finding the minimal-area
                 {FPGA} technology mapping",
  journal =      j-TODAES,
  volume =       "10",
  number =       "1",
  pages =        "168--186",
  month =        jan,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Apr 14 10:34:36 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Chabini:2005:SOR,
  author =       "Noureddine Chabini and El Mostapha Aboulhamid and
                 Isma{\"\i}l Chabini and Yvon Savaria",
  title =        "Scheduling and optimal register placement for
                 synchronous circuits derived using software pipelining
                 techniques",
  journal =      j-TODAES,
  volume =       "10",
  number =       "2",
  pages =        "187--204",
  month =        apr,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Tue Apr 26 10:39:39 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Cao:2005:SSL,
  author =       "Aiqun Cao and Naran Sirisantana and Cheng-Kok Koh and
                 Kaushik Roy",
  title =        "Synthesis of skewed logic circuits",
  journal =      j-TODAES,
  volume =       "10",
  number =       "2",
  pages =        "205--228",
  month =        apr,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Tue Apr 26 10:39:39 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kadayif:2005:OIT,
  author =       "I. Kadayif and A. Sivasubramaniam and M. Kandemir and
                 G. Kandiraju and G. Chen",
  title =        "Optimizing instruction {TLB} energy using software and
                 hardware techniques",
  journal =      j-TODAES,
  volume =       "10",
  number =       "2",
  pages =        "229--257",
  month =        apr,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Tue Apr 26 10:39:39 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Liu:2005:ETT,
  author =       "Xiao Liu and Michael S. Hsiao and Sreejit Chakravarty
                 and Paul J. Thadikaran",
  title =        "Efficient techniques for transition testing",
  journal =      j-TODAES,
  volume =       "10",
  number =       "2",
  pages =        "258--278",
  month =        apr,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Tue Apr 26 10:39:39 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Poon:2005:DPM,
  author =       "Kara K. W. Poon and Steven J. E. Wilton and Andy Yan",
  title =        "A detailed power model for field-programmable gate
                 arrays",
  journal =      j-TODAES,
  volume =       "10",
  number =       "2",
  pages =        "279--302",
  month =        apr,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Tue Apr 26 10:39:39 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Bhattacharya:2005:OWP,
  author =       "Soumendu Bhattacharya and Abhijit Chatterjee",
  title =        "Optimized wafer-probe and assembled package test
                 design for analog circuits",
  journal =      j-TODAES,
  volume =       "10",
  number =       "2",
  pages =        "303--329",
  month =        apr,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Tue Apr 26 10:39:39 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Mohanty:2005:EED,
  author =       "Saraju P. Mohanty and N. Ranganathan",
  title =        "Energy-efficient datapath scheduling using multiple
                 voltages and dynamic clocking",
  journal =      j-TODAES,
  volume =       "10",
  number =       "2",
  pages =        "330--353",
  month =        apr,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Tue Apr 26 10:39:39 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Davoodi:2005:VSU,
  author =       "Azadeh Davoodi and Ankur Srivastava",
  title =        "Voltage scheduling under unpredictabilities: a risk
                 management paradigm",
  journal =      j-TODAES,
  volume =       "10",
  number =       "2",
  pages =        "354--368",
  month =        apr,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Tue Apr 26 10:39:39 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Wang:2005:EAV,
  author =       "Zhong Wang and Xiaobo Sharon Hu",
  title =        "Energy-aware variable partitioning and instruction
                 scheduling for multibank memory architectures",
  journal =      j-TODAES,
  volume =       "10",
  number =       "2",
  pages =        "369--388",
  month =        apr,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Tue Apr 26 10:39:39 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Cong:2005:LSC,
  author =       "Jason Cong and Joseph R. Shinnerl and Min Xie and Tim
                 Kong and Xin Yuan",
  title =        "Large-scale circuit placement",
  journal =      j-TODAES,
  volume =       "10",
  number =       "2",
  pages =        "389--430",
  month =        apr,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Tue Apr 26 10:39:39 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Paul:2005:HLM,
  author =       "Joann M. Paul and Donald E. Thomas and Andrew S.
                 Cassidy",
  title =        "High-level modeling and simulation of single-chip
                 programmable heterogeneous multiprocessors",
  journal =      j-TODAES,
  volume =       "10",
  number =       "3",
  pages =        "431--461",
  month =        jul,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Sep 22 11:16:52 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Roy:2005:FSV,
  author =       "Arnab Roy and S. K. Panda and Rajeev Kumar and P. P.
                 Chakrabarti",
  title =        "A framework for systematic validation and debugging of
                 pipeline simulators",
  journal =      j-TODAES,
  volume =       "10",
  number =       "3",
  pages =        "462--491",
  month =        jul,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Sep 22 11:16:52 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Banerjee:2005:OFT,
  author =       "Ansuman Banerjee and Pallab Dasgupta",
  title =        "The open family of temporal logics: {Annotating}
                 temporal operators with input constraints",
  journal =      j-TODAES,
  volume =       "10",
  number =       "3",
  pages =        "492--522",
  month =        jul,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Sep 22 11:16:52 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Koushanfar:2005:BST,
  author =       "Farinaz Koushanfar and Inki Hong and Miodrag
                 Potkonjak",
  title =        "Behavioral synthesis techniques for intellectual
                 property protection",
  journal =      j-TODAES,
  volume =       "10",
  number =       "3",
  pages =        "523--545",
  month =        jul,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Sep 22 11:16:52 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Gupta:2005:RAS,
  author =       "Puneet Gupta and Andrew B. Kahng and Stefanus Mantik",
  title =        "Routing-aware scan chain ordering",
  journal =      j-TODAES,
  volume =       "10",
  number =       "3",
  pages =        "546--560",
  month =        jul,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Sep 22 11:16:52 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Xiang:2005:AIP,
  author =       "Hua Xiang and Xiaoping Tang and Martin D. F. Wong",
  title =        "An algorithm for integrated pin assignment and buffer
                 planning",
  journal =      j-TODAES,
  volume =       "10",
  number =       "3",
  pages =        "561--572",
  month =        jul,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Sep 22 11:16:52 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lee:2005:PDD,
  author =       "Jaehwan John Lee and Vincent John {Mooney III}",
  title =        "An $o(\mbox{min}(m,n))$ parallel deadlock detection
                 algorithm",
  journal =      j-TODAES,
  volume =       "10",
  number =       "3",
  pages =        "573--586",
  month =        jul,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Sep 22 11:16:52 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Harris:2005:I,
  author =       "Ian G. Harris",
  title =        "Introduction",
  journal =      j-TODAES,
  volume =       "10",
  number =       "4",
  pages =        "587--588",
  month =        oct,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jan 13 07:41:02 MST 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Suhaib:2005:XIM,
  author =       "Syed M. Suhaib and Deepak A. Mathaikutty and Sandeep
                 K. Shukla and David Berner",
  title =        "{XFM}: {An} incremental methodology for developing
                 formal models",
  journal =      j-TODAES,
  volume =       "10",
  number =       "4",
  pages =        "589--609",
  month =        oct,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jan 13 07:41:02 MST 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Fujita:2005:ECB,
  author =       "Masahiro Fujita",
  title =        "Equivalence checking between behavioral and {RTL}
                 descriptions with virtual controllers and datapaths",
  journal =      j-TODAES,
  volume =       "10",
  number =       "4",
  pages =        "610--626",
  month =        oct,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jan 13 07:41:02 MST 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Feng:2005:UDP,
  author =       "Tao Feng and Li-C Wang and Kwang-Ting (Tim) Cheng and
                 Chih-Chang (Andy) Lin",
  title =        "Using $2$-domain partitioned {OBDD} data structure in
                 an enhanced symbolic simulator",
  journal =      j-TODAES,
  volume =       "10",
  number =       "4",
  pages =        "627--650",
  month =        oct,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jan 13 07:41:02 MST 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Higgins:2005:SDA,
  author =       "Jason T. Higgins and Mark D. Aagaard",
  title =        "Simplifying the design and automating the verification
                 of pipelines with structural hazards",
  journal =      j-TODAES,
  volume =       "10",
  number =       "4",
  pages =        "651--672",
  month =        oct,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jan 13 07:41:02 MST 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Shamshiri:2005:ILT,
  author =       "Saeed Shamshiri and Hadi Esmaeilzadeh and Zainalabdein
                 Navabi",
  title =        "Instruction-level test methodology for {CPU} core
                 self-testing",
  journal =      j-TODAES,
  volume =       "10",
  number =       "4",
  pages =        "673--689",
  month =        oct,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jan 13 07:41:02 MST 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Al-Yamani:2005:TCE,
  author =       "Ahmad A. Al-Yamani and Edward J. McCluskey",
  title =        "Test chip experimental results on high-level
                 structural test",
  journal =      j-TODAES,
  volume =       "10",
  number =       "4",
  pages =        "690--701",
  month =        oct,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jan 13 07:41:02 MST 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Ciordas:2005:EBM,
  author =       "Calin Ciordas and Twan Basten and Andrei
                 R{\u{a}}dulescu and Kees Goossens and Jef Van
                 Meerbergen",
  title =        "An event-based monitoring service for networks on
                 chip",
  journal =      j-TODAES,
  volume =       "10",
  number =       "4",
  pages =        "702--723",
  month =        oct,
  year =         "2005",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Fri Jan 13 07:41:02 MST 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Dutt:2006:E,
  author =       "Nikil Dutt",
  title =        "Editorial",
  journal =      j-TODAES,
  volume =       "11",
  number =       "1",
  pages =        "1--2",
  month =        jan,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Apr 12 07:15:39 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Givargis:2006:ZCI,
  author =       "Tony Givargis",
  title =        "Zero cost indexing for improved processor cache
                 performance",
  journal =      j-TODAES,
  volume =       "11",
  number =       "1",
  pages =        "3--25",
  month =        jan,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Apr 12 07:15:39 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Constantinides:2006:WLO,
  author =       "George A. Constantinides",
  title =        "Word-length optimization for differentiable nonlinear
                 systems",
  journal =      j-TODAES,
  volume =       "11",
  number =       "1",
  pages =        "26--43",
  month =        jan,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Apr 12 07:15:39 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Su:2006:AMS,
  author =       "Qing Su and Jamil Kawa and Charles Chiang and Yehia
                 Massoud",
  title =        "Accurate modeling of substrate resistive coupling for
                 floating substrates",
  journal =      j-TODAES,
  volume =       "11",
  number =       "1",
  pages =        "44--51",
  month =        jan,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Apr 12 07:15:39 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Davoodi:2006:ETG,
  author =       "Azadeh Davoodi and Ankur Srivastava",
  title =        "Effective techniques for the generalized low-power
                 binding problem",
  journal =      j-TODAES,
  volume =       "11",
  number =       "1",
  pages =        "52--69",
  month =        jan,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Apr 12 07:15:39 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Schaumont:2006:ICE,
  author =       "Patrick Schaumont and Doris Ching and Ingrid
                 Verbauwhede",
  title =        "An interactive codesign environment for
                 domain-specific coprocessors",
  journal =      j-TODAES,
  volume =       "11",
  number =       "1",
  pages =        "70--87",
  month =        jan,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Apr 12 07:15:39 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Jiang:2006:RCD,
  author =       "Iris Hui-Ru Jiang and Song-Ra Pan and Yao-Wen Chang
                 and Jing-Yang Jou",
  title =        "Reliable crosstalk-driven interconnect optimization",
  journal =      j-TODAES,
  volume =       "11",
  number =       "1",
  pages =        "88--103",
  month =        jan,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Apr 12 07:15:39 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kulkarni:2006:CTA,
  author =       "Dhananjay Kulkarni and Walid A. Najjar and Robert
                 Rinker and Fadi J. Kurdahi",
  title =        "Compile-time area estimation for {LUT}-based {FPGAs}",
  journal =      j-TODAES,
  volume =       "11",
  number =       "1",
  pages =        "104--122",
  month =        jan,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Apr 12 07:15:39 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Shrivastava:2006:CFC,
  author =       "Aviral Shrivastava and Partha Biswas and Ashok Halambi
                 and Nikil Dutt and Alex Nicolau",
  title =        "Compilation framework for code size reduction using
                 reduced bit-width {ISAs (rISAs)}",
  journal =      j-TODAES,
  volume =       "11",
  number =       "1",
  pages =        "123--146",
  month =        jan,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Apr 12 07:15:39 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{You:2006:CLP,
  author =       "Yi-Ping You and Chingren Lee and Jenq Kuen Lee",
  title =        "Compilers for leakage power reduction",
  journal =      j-TODAES,
  volume =       "11",
  number =       "1",
  pages =        "147--164",
  month =        jan,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Apr 12 07:15:39 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Shao:2006:LST,
  author =       "Zili Shao and Bin Xiao and Chun Xue and Qingfeng Zhuge
                 and Edwin H.-M. Sha",
  title =        "Loop scheduling with timing and switching-activity
                 minimization for {VLIW DSP}",
  journal =      j-TODAES,
  volume =       "11",
  number =       "1",
  pages =        "165--185",
  month =        jan,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Apr 12 07:15:39 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Mohanty:2006:IMS,
  author =       "Saraju P. Mohanty and N. Ranganathan and Sunil K.
                 Chappidi",
  title =        "{ILP} models for simultaneous energy and transient
                 power minimization during behavioral synthesis",
  journal =      j-TODAES,
  volume =       "11",
  number =       "1",
  pages =        "186--212",
  month =        jan,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Apr 12 07:15:39 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Ozdal:2006:TLB,
  author =       "Muhammet Mustafa Ozdal and Martin D. F. Wong",
  title =        "Two-layer bus routing for high-speed printed circuit
                 boards",
  journal =      j-TODAES,
  volume =       "11",
  number =       "1",
  pages =        "213--227",
  month =        jan,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Apr 12 07:15:39 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kandemir:2006:IEB,
  author =       "M. Kandemir and J. Ramanujam and U. Sezer",
  title =        "Improving the energy behavior of block buffering using
                 compiler optimizations",
  journal =      j-TODAES,
  volume =       "11",
  number =       "1",
  pages =        "228--250",
  month =        jan,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Apr 12 07:15:39 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Ayala-Rincon:2006:PTS,
  author =       "M. Ayala-Rinc{\'o}n and C. H. Llanos and R. P. Jacobi
                 and R. W. Hartenstein",
  title =        "Prototyping time- and space-efficient computations of
                 algebraic operations over dynamically reconfigurable
                 systems modeled by rewriting-logic",
  journal =      j-TODAES,
  volume =       "11",
  number =       "2",
  pages =        "251--281",
  month =        apr,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Aug 23 10:13:18 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Absar:2006:RAI,
  author =       "Javed Absar and Francky Catthoor",
  title =        "Reuse analysis of indirectly indexed arrays",
  journal =      j-TODAES,
  volume =       "11",
  number =       "2",
  pages =        "282--305",
  month =        apr,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Aug 23 10:13:18 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Dasdan:2006:HIT,
  author =       "Ali Dasdan and Ivan Hom",
  title =        "Handling inverted temperature dependence in static
                 timing analysis",
  journal =      j-TODAES,
  volume =       "11",
  number =       "2",
  pages =        "306--324",
  month =        apr,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Aug 23 10:13:18 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Li:2006:ETO,
  author =       "Zuoyuan Li and Xianlong Hong and Qiang Zhou and Jinian
                 Bian and Hannah H. Yang and Vijay Pitchumani",
  title =        "Efficient thermal-oriented {$3$D} floorplanning and
                 thermal via planning for two-stacked-die integration",
  journal =      j-TODAES,
  volume =       "11",
  number =       "2",
  pages =        "325--345",
  month =        apr,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Aug 23 10:13:18 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Padmanaban:2006:IGM,
  author =       "Saravanan Padmanaban and Spyros Tragoudas",
  title =        "Implicit grading of multiple path delay faults",
  journal =      j-TODAES,
  volume =       "11",
  number =       "2",
  pages =        "346--361",
  month =        apr,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Aug 23 10:13:18 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Chen:2006:OSM,
  author =       "Deming Chen and Jason Cong and Junjuan Xu",
  title =        "Optimal simultaneous module and multivoltage
                 assignment for low power",
  journal =      j-TODAES,
  volume =       "11",
  number =       "2",
  pages =        "362--386",
  month =        apr,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Aug 23 10:13:18 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Zhu:2006:CZD,
  author =       "Haikun Zhu and Chung-Kuan Cheng and Ronald Graham",
  title =        "On the construction of zero-deficiency parallel prefix
                 circuits with minimum depth",
  journal =      j-TODAES,
  volume =       "11",
  number =       "2",
  pages =        "387--409",
  month =        apr,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Aug 23 10:13:18 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kandemir:2006:REC,
  author =       "Mahmut Taylan Kandemir",
  title =        "Reducing energy consumption of multiprocessor {SoC}
                 architectures by exploiting memory bank locality",
  journal =      j-TODAES,
  volume =       "11",
  number =       "2",
  pages =        "410--441",
  month =        apr,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Aug 23 10:13:18 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Su:2006:CTD,
  author =       "Fei Su and Sule Ozev and Krishnendu Chakrabarty",
  title =        "Concurrent testing of digital microfluidics-based
                 biochips",
  journal =      j-TODAES,
  volume =       "11",
  number =       "2",
  pages =        "442--464",
  month =        apr,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Aug 23 10:13:18 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Atienza:2006:SDM,
  author =       "David Atienza and Jose M. Mendias and Stylianos
                 Mamagkakis and Dimitrios Soudris and Francky Catthoor",
  title =        "Systematic dynamic memory management design
                 methodology for reduced memory footprint",
  journal =      j-TODAES,
  volume =       "11",
  number =       "2",
  pages =        "465--489",
  month =        apr,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Aug 23 10:13:18 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Li:2006:LVA,
  author =       "Wei Li and Daniel Blakely and Scott {Van Sooy} and
                 Keven Dunn and David Kidd and Robert Rogenmoser and
                 Dian Zhou",
  title =        "{LVS} verification across multiple power domains for a
                 quad-core microprocessor",
  journal =      j-TODAES,
  volume =       "11",
  number =       "2",
  pages =        "490--500",
  month =        apr,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Aug 23 10:13:18 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Cheatham:2006:SFT,
  author =       "Jason A. Cheatham and John M. Emmert and Stan
                 Baumgart",
  title =        "A survey of fault tolerant methodologies for {FPGAs}",
  journal =      j-TODAES,
  volume =       "11",
  number =       "2",
  pages =        "501--533",
  month =        apr,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Aug 23 10:13:18 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Pedram:2006:ISI,
  author =       "Massoud Pedram",
  title =        "Introduction to special issue: {Novel} paradigms in
                 system-level design",
  journal =      j-TODAES,
  volume =       "11",
  number =       "3",
  pages =        "535--536",
  month =        jul,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Aug 23 10:13:19 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Pinto:2006:SLD,
  author =       "Alessandro Pinto and Alvise Bonivento and Allberto L.
                 Sangiovanni-Vincentelli and Roberto Passerone and Marco
                 Sgroi",
  title =        "System level design paradigms: {Platform-based} design
                 and communication synthesis",
  journal =      j-TODAES,
  volume =       "11",
  number =       "3",
  pages =        "537--563",
  month =        jul,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Aug 23 10:13:19 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Marculescu:2006:CCR,
  author =       "Radu Marculescu and Umit Y. Ogras and Nicholas H.
                 Zamora",
  title =        "Computation and communication refinement for
                 multiprocessor {SoC} design: a system-level
                 perspective",
  journal =      j-TODAES,
  volume =       "11",
  number =       "3",
  pages =        "564--592",
  month =        jul,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Aug 23 10:13:19 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Pop:2006:AOD,
  author =       "Paul Pop and Petru Eles and Zebo Peng and Traian Pop",
  title =        "Analysis and optimization of distributed real-time
                 embedded systems",
  journal =      j-TODAES,
  volume =       "11",
  number =       "3",
  pages =        "593--625",
  month =        jul,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Aug 23 10:13:19 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Mishra:2006:ADL,
  author =       "Prabhat Mishra and Aviral Shrivastava and Nikil Dutt",
  title =        "Architecture description language {(ADL)-driven}
                 software toolkit generation for architectural
                 exploration of programmable {SOCs}",
  journal =      j-TODAES,
  volume =       "11",
  number =       "3",
  pages =        "626--658",
  month =        jul,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Aug 23 10:13:19 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Lysecky:2006:WP,
  author =       "Roman Lysecky and Greg Stitt and Frank Vahid",
  title =        "{Warp Processors}",
  journal =      j-TODAES,
  volume =       "11",
  number =       "3",
  pages =        "659--681",
  month =        jul,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Aug 23 10:13:19 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Su:2006:MPF,
  author =       "Fei Su and Krishnendu Chakrabarty",
  title =        "Module placement for fault-tolerant
                 microfluidics-based biochips",
  journal =      j-TODAES,
  volume =       "11",
  number =       "3",
  pages =        "682--710",
  month =        jul,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Aug 23 10:13:19 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Hanchate:2006:GTF,
  author =       "Narender Hanchate and Nagarajan Ranganathan",
  title =        "A game-theoretic framework for multimetric
                 optimization of interconnect delay, power, and
                 crosstalk noise during wire sizing",
  journal =      j-TODAES,
  volume =       "11",
  number =       "3",
  pages =        "711--739",
  month =        jul,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Aug 23 10:13:19 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Chen:2006:SPC,
  author =       "Gang Chen and Jason Cong",
  title =        "Simultaneous placement with clustering and
                 duplication",
  journal =      j-TODAES,
  volume =       "11",
  number =       "3",
  pages =        "740--772",
  month =        jul,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Aug 23 10:13:19 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Bhanja:2006:SFG,
  author =       "Sanjukta Bhanja and Karthikeyan Lingasubramanian and
                 N. Ranganathan",
  title =        "A stimulus-free graphical probabilistic switching
                 model for sequential circuits using dynamic {Bayesian}
                 networks",
  journal =      j-TODAES,
  volume =       "11",
  number =       "3",
  pages =        "773--796",
  month =        jul,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Aug 23 10:13:19 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Cao:2006:POS,
  author =       "Aiqun Cao and Ruibing Lu and Chen Li and Cheng-Kok
                 Koh",
  title =        "Postlayout optimization for synthesis of {Domino}
                 circuits",
  journal =      j-TODAES,
  volume =       "11",
  number =       "4",
  pages =        "797--821",
  month =        oct,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Nov 15 06:47:05 MST 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Nacul:2006:STC,
  author =       "Andr{\'e} C. N{\'a}cul and Tony Givargis",
  title =        "Synthesis of time-constrained multitasking embedded
                 software",
  journal =      j-TODAES,
  volume =       "11",
  number =       "4",
  pages =        "822--847",
  month =        oct,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Nov 15 06:47:05 MST 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kang:2006:STA,
  author =       "Kunhyuk Kang and Bipul C. Paul and Kaushik Roy",
  title =        "Statistical timing analysis using levelized covariance
                 propagation considering systematic and random
                 variations of process parameters",
  journal =      j-TODAES,
  volume =       "11",
  number =       "4",
  pages =        "848--879",
  month =        oct,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Nov 15 06:47:05 MST 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kuo:2006:DID,
  author =       "Wu-An Kuo and Tingting Hwang and Allen C.-H. Wu",
  title =        "Decomposition of instruction decoders for low-power
                 designs",
  journal =      j-TODAES,
  volume =       "11",
  number =       "4",
  pages =        "880--889",
  month =        oct,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Nov 15 06:47:05 MST 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Liu:2006:CML,
  author =       "Yi-Yu Liu and Kuo-Hua Wang and Tingting Hwang",
  title =        "Crosstalk minimization in logic synthesis for {PLAs}",
  journal =      j-TODAES,
  volume =       "11",
  number =       "4",
  pages =        "890--915",
  month =        oct,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Nov 15 06:47:05 MST 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Goren:2006:TSG,
  author =       "Sezer G{\"o}ren and F. Joel Ferguson",
  title =        "Test sequence generation for controller verification
                 and test with high coverage",
  journal =      j-TODAES,
  volume =       "11",
  number =       "4",
  pages =        "916--938",
  month =        oct,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Nov 15 06:47:05 MST 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Wu:2006:MWR,
  author =       "Zhong-Zhen Wu and Shih-Chieh Chang",
  title =        "Multiple wire reconnections based on implication flow
                 graph",
  journal =      j-TODAES,
  volume =       "11",
  number =       "4",
  pages =        "939--952",
  month =        oct,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Nov 15 06:47:05 MST 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Wang:2006:PDT,
  author =       "Chi-Shong Wang and Chingwei Yeh",
  title =        "Performance-driven technology mapping with {MSG}
                 partition and selective gate duplication",
  journal =      j-TODAES,
  volume =       "11",
  number =       "4",
  pages =        "953--973",
  month =        oct,
  year =         "2006",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Nov 15 06:47:05 MST 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Gangwar:2007:IIC,
  author =       "Anup Gangwar and M. Balakrishnan and Anshul Kumar",
  title =        "Impact of intercluster communication mechanisms on
                 {ILP} in clustered {VLIW} architectures",
  journal =      j-TODAES,
  volume =       "12",
  number =       "1",
  pages =        "1:1--1:??",
  month =        jan,
  year =         "2007",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:08:29 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "VLIW processors have started gaining acceptance in the
                 embedded systems domain. However, monolithic register
                 file VLIW processors with a large number of functional
                 units are not viable. This is because of the need for a
                 large number of ports to support FU requirements, which
                 makes them expensive and extremely slow. A simple
                 solution is to break the register file into a number of
                 smaller register files with a subset of FUs connected
                 to it. These architectures are termed clustered VLIW
                 processors. In this article, we first build a case for
                 clustered VLIW processors with four or more clusters by
                 showing that the achievable ILP in most of the media
                 applications for a 16 ALU and 8 LD/ST VLIW processor is
                 around 20. We then provide a classification of the
                 intercluster interconnection design space, and show
                 that a large part of this design space is currently
                 unexplored. Next, using our performance evaluation
                 methodology, we evaluate a subset of this design space
                 and show that the most commonly used type of
                 interconnection, RF-to-RF, fails to meet achievable
                 performance by a large factor, while certain other
                 types of interconnections can lower this gap
                 considerably. We also establish that this behavior is
                 heavily application dependent, emphasizing the
                 importance of application-specific architecture
                 exploration. We also present results about the
                 statistical behavior of these different architectures
                 by varying the number of clusters in our framework from
                 4 to 16. These results clearly show the advantages of
                 one specific architecture over others. Finally, based
                 on our results, we propose a new interconnection
                 network, which should lower this performance gap.",
  acknowledgement = ack-nhfb,
  articleno =    "1",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "ASIP; clustered VLIW processors; performance
                 evaluation; VLIW",
}

@Article{Zamora:2007:SLP,
  author =       "Nicholas H. Zamora and Xiaoping Hu and Radu
                 Marculescu",
  title =        "System-level performance\slash power analysis for
                 platform-based design of multimedia applications",
  journal =      j-TODAES,
  volume =       "12",
  number =       "1",
  pages =        "2:1--2:??",
  month =        jan,
  year =         "2007",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:08:29 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The objective of this article is to introduce the use
                 of Stochastic Automata Networks (SANs) as an effective
                 formalism for application-architecture modeling in
                 system-level average-case analysis for platform-based
                 design. By platform, we mean a family of heterogeneous
                 architectures that satisfy a set of architectural
                 constraints imposed to allow re-use of hardware and
                 software components. More precisely, we show how SANs
                 can be used early in the design cycle to identify the
                 best performance/power trade-offs among several
                 application-architecture combinations. Having this
                 information available not only helps avoid lengthy
                 simulations for predicting power and performance
                 figures, but also enables efficient mapping of
                 different applications onto a chosen platform. We
                 illustrate the benefits of our methodology by using the
                 ``Picture-in-Picture'' video decoder as a driver
                 application.",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "average-case analysis; design space exploration;
                 hardware/software codesign; Markov chains; performance
                 models; platform-based design; stochastic automata
                 networks (SANs)",
}

@Article{Sham:2007:ARD,
  author =       "Chiu-Wing Sham and Evangeline F. Y. Young",
  title =        "Area reduction by deadspace utilization on
                 interconnect optimized floorplan",
  journal =      j-TODAES,
  volume =       "12",
  number =       "1",
  pages =        "3:1--3:??",
  month =        jan,
  year =         "2007",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:08:29 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Interconnect optimization has become the major concern
                 in floorplanning. Many approaches would use simulated
                 annealing (SA) with a cost function composed of a
                 weighted sum of area, wirelength, and interconnect
                 cost. These approaches can reduce the interconnect cost
                 efficiently but the area penalty of the interconnect
                 optimized floorplan is usually quite large. In this
                 article, we propose an approach called deadspace
                 utilization (DSU) to reclaim the unused area of an
                 interconnect optimized floorplan by linear programming.
                 Since modules are not necessarily rectangular in shape
                 in floorplanning, some deadspace can be redistributed
                 to the modules to increase the area occupied by each
                 module. If the area of each module can be expanded by
                 the same ratio, the whole floorplan can be compacted by
                 that ratio to give a smaller floorplan. However, we
                 will limit the compaction ratio to prevent
                 overcongestion. Experiments show that we can apply this
                 deadspace utilization technique to reduce the area and
                 total wirelength of an interconnect optimized floorplan
                 further while the routability can be maintained at the
                 same time.",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "area reduction; Floorplanning",
}

@Article{Li:2007:SBC,
  author =       "Lei Li and Zhanglei Wang and Krishnendu Chakrabarty",
  title =        "Scan-{BIST} based on cluster analysis and the encoding
                 of repeating sequences",
  journal =      j-TODAES,
  volume =       "12",
  number =       "1",
  pages =        "4:1--4:??",
  month =        jan,
  year =         "2007",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:08:29 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We present a built-in self-test (BIST) approach for
                 full-scan designs that extracts the most frequently
                 occurring sequences from deterministic test patterns.
                 The extracted sequences are stored on-chip, and are
                 used during test application. Three sets of test
                 patterns are applied to the circuit under test during a
                 BIST test session; these include pseudorandom patterns,
                 semirandom patterns, and deterministic patterns. The
                 semirandom patterns are generated based on the stored
                 sequences and they are more likely to detect
                 hard-to-detect faults than pseudorandom patterns. The
                 deterministic patterns are encoded using either the
                 stored sequences or the LFSR reseeding technique to
                 reduce test data volume. We use the cluster analysis
                 technique for sequence extraction to reduce the amount
                 of data to be stored. Experimental results for the
                 ISCAS-89 benchmark circuits show that the proposed
                 approach often requires less on-chip storage and test
                 data volume than other recent BIST methods.",
  acknowledgement = ack-nhfb,
  articleno =    "4",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Built-in self-test (BIST); clustering test data
                 volume; test compression",
}

@Article{Cai:2007:WAD,
  author =       "Yuan Cai and Marcus T. Schmitz and Bashir M.
                 Al-Hashimi and Sudhakar M. Reddy",
  title =        "Workload-ahead-driven online energy minimization
                 techniques for battery-powered embedded systems with
                 time-constraints",
  journal =      j-TODAES,
  volume =       "12",
  number =       "1",
  pages =        "5:1--5:??",
  month =        jan,
  year =         "2007",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:08:29 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article proposes a new online voltage scaling
                 (VS) technique for battery-powered embedded systems
                 with real-time constraints. The VS technique takes into
                 account the execution times and discharge currents of
                 tasks to further reduce the battery charge consumption
                 when compared to the recently reported slack forwarding
                 technique [Ahmed and Chakrabarti 2004], while
                 maintaining low online complexity of O (1).
                 Furthermore, we investigate the impact of online
                 rescheduling and remapping on the battery charge
                 consumption for tasks with data dependency which has
                 not been explicitly addressed in the literature and
                 propose a novel rescheduling/remapping technique.
                 Finally, we take leakage power into consideration and
                 extend the proposed online techniques to include
                 adaptive body biasing (ABB) which is used to reduce the
                 leakage power. We demonstrate and compare the
                 efficiency of the presented techniques using seven
                 real-life benchmarks and numerous automatically
                 generated examples.",
  acknowledgement = ack-nhfb,
  articleno =    "5",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "adaptive body biasing; battery; Dynamic voltage
                 scaling; embedded systems",
}

@Article{Zhu:2007:HMF,
  author =       "Xinping Zhu and Sharad Malik",
  title =        "A hierarchical modeling framework for on-chip
                 communication architectures of multiprocessing {SoCs}",
  journal =      j-TODAES,
  volume =       "12",
  number =       "1",
  pages =        "6:1--6:??",
  month =        jan,
  year =         "2007",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:08:29 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In multiprocessor-based SoCs, optimizing the
                 communication architecture is often as important, if
                 not more important, than optimizing the computation
                 architecture. While there are mature platforms and
                 techniques for the modeling and evaluation of
                 architectures of processing elements, the same is not
                 true for the communication architectures. This article
                 presents an application-driven retargetable prototyping
                 platform that fills this gap. This environment aims to
                 facilitate the design exploration of the communication
                 subsystem through application-level execution-driven
                 simulations and quantitative analysis. Based on an
                 analysis of a wide range of on-chip communication
                 architectures, we describe how a specific hierarchical
                 class library can be used to develop new on-chip
                 communication architectures, or variants of existing
                 ones with relatively little incremental effort. We
                 demonstrate this through three case studies including
                 two commercial on-chip bus systems and an on-chip
                 packet switching network. Here we show that, through
                 careful analysis and construction, it is possible for
                 the modeling environment to support the common features
                 of these architectures as part of the library and
                 permit instantiation of the individual architectures as
                 variants of the library design. Consequently,
                 system-level design choices regarding the communication
                 architecture can be made with high confidence in the
                 early stages of design. In addition to improving design
                 quality, this methodology also results in significantly
                 shortening design-time.",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "bus; design exploration; multiprocessor system;
                 network-on-chip; object-oriented modeling; on-chip
                 communication architecture; packet-switching network;
                 Retargetable simulation",
}

@Article{Majumder:2007:HPV,
  author =       "Subhashis Majumder and Susmita Sur-Kolay and Bhargab
                 B. Bhattacharya and Swarup Kumar Das",
  title =        "Hierarchical partitioning of {VLSI} floorplans by
                 staircases",
  journal =      j-TODAES,
  volume =       "12",
  number =       "1",
  pages =        "7:1--7:??",
  month =        jan,
  year =         "2007",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:08:29 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article addresses the problem of recursively
                 bipartitioning a given floorplan F using monotone
                 staircases. At each level of the hierarchy, a monotone
                 staircase from one corner of F to its opposite corner
                 is identified, such that (i) the two parts of the
                 bipartition are nearly equal in area (or in the number
                 of blocks), and (ii) the number of nets crossing the
                 staircase is minimal. The problem of area-balanced
                 bipartitioning is shown to be NP-hard, and a
                 maxflow-based heuristic is proposed. Such a hierarchy
                 may be useful to repeater placement in deep-submicron
                 physical design, and also to global routing.",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "balanced bipartitioning; Floorplanning; global
                 routing; network flow; NP-completeness",
}

@Article{Lee:2007:ISS,
  author =       "Jong-Eun Lee and Kiyoung Choi and Nikil D. Dutt",
  title =        "Instruction set synthesis with efficient instruction
                 encoding for configurable processors",
  journal =      j-TODAES,
  volume =       "12",
  number =       "1",
  pages =        "8:1--8:??",
  month =        jan,
  year =         "2007",
  CODEN =        "ATASFO",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:08:29 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Application-specific instructions can significantly
                 improve the performance, energy-efficiency, and code
                 size of configurable processors. While generating new
                 instructions from application-specific operation
                 patterns has been a common way to improve the
                 instruction set (IS) of a configurable processor,
                 automating the design of ISs for given applications
                 poses new challenges---how to create as well as utilize
                 new instructions in a systematic manner, and how to
                 choose the best set of application-specific
                 instructions considering the various effects the new
                 instructions may have on the data path and the
                 compilation? To address these problems, we present a
                 novel IS synthesis framework that optimizes the IS
                 through an efficient instruction encoding for the given
                 application as well as for the given data path
                 architecture. We first build a library of new
                 instructions created with various encoding alternatives
                 taking into account the data path architecture
                 constraints, and then select the best set of
                 instructions while satisfying the instruction bitwidth
                 constraint. We formulate the problem using integer
                 linear programming and also present an effective
                 heuristic algorithm. Experimental results using our
                 technique generate ISs that show improvements of up to
                 about 40\% over the native IS for several application
                 benchmarks running on typical embedded RISC
                 processors.",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Application-specific instruction set processor (ASIP);
                 bitwidth-economical; configurable processor;
                 instruction encoding; ISA customization and
                 specialization",
}

@Article{Dutt:2007:E,
  author =       "Nikil Dutt",
  title =        "Editorial",
  journal =      j-TODAES,
  volume =       "12",
  number =       "2",
  pages =        "9:1--9:??",
  month =        apr,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1230800.1230801",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:08:48 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Wang:2007:DIC,
  author =       "Chao Wang and Zijiang Yang and Franjo
                 Ivan{\v{c}}i{\'c} and Aarti Gupta",
  title =        "Disjunctive image computation for software
                 verification",
  journal =      j-TODAES,
  volume =       "12",
  number =       "2",
  pages =        "10:1--10:??",
  month =        apr,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1230800.1230802",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:08:48 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Existing BDD-based symbolic algorithms designed for
                 hardware designs do not perform well on software
                 programs. We propose novel techniques based on unique
                 characteristics of software programs. Our algorithm
                 divides an image computation step into a disjunctive
                 set of easier ones that can be performed in isolation.
                 We use hypergraph partitioning to minimize the number
                 of live variables in each disjunctive component, and
                 variable scopes to simplify transition relations and
                 reachable state subsets. Our experiments on nontrivial
                 C programs show that BDD-based symbolic algorithms can
                 directly handle software models with a much larger
                 number of state variables than for hardware designs.",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "binary decision diagram; formal verification; image
                 computation; Model checking; reachability analysis",
}

@Article{Mochocki:2007:TOA,
  author =       "Bren Mochocki and Xiaobo Sharon Hu and Gang Quan",
  title =        "Transition-overhead-aware voltage scheduling for
                 fixed-priority real-time systems",
  journal =      j-TODAES,
  volume =       "12",
  number =       "2",
  pages =        "11:1--11:??",
  month =        apr,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1230800.1230803",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:08:48 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Time transition overhead is a critical problem for
                 hard real-time systems that employ dynamic voltage
                 scaling (DVS) for power and energy management. While it
                 is a common practice of much previous work to ignore
                 transition overhead, these algorithms cannot guarantee
                 deadlines and/or are less effective in saving energy
                 when transition overhead is significant and not
                 appropriately dealt with. In this article we introduce
                 two techniques, one offline and one online, to
                 correctly account for transition overhead in preemptive
                 fixed-priority real-time systems. We present several
                 DVS scheduling algorithms that implement these methods
                 that can guarantee task deadlines under arbitrarily
                 large transition time overheads and reduce energy
                 consumption by as much as 40\% when compared to
                 previous methods.",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Dynamic voltage scaling; fixed priority; low power;
                 scheduling; transition overhead",
}

@Article{Chang:2007:PLP,
  author =       "Hongliang Chang and Sachin S. Sapatnekar",
  title =        "Prediction of leakage power under process
                 uncertainties",
  journal =      j-TODAES,
  volume =       "12",
  number =       "2",
  pages =        "12:1--12:??",
  month =        apr,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1230800.1230804",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:08:48 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In this article, we present a method to analyze the
                 total leakage current of a circuit under process
                 variations, considering interdie and intradie
                 variations as well as the effect of the spatial
                 correlations of intradie variations. The approach
                 considers both the subthreshold and gate tunneling
                 leakage power, as well as their interactions. With
                 process variations, each leakage component is
                 approximated by a lognormal distribution, and the total
                 chip leakage is computed as a sum of the correlated
                 lognormals. Since the lognormals to be summed are large
                 in number and have complicated correlation structures
                 due to both spatial correlations and the correlation
                 among different leakage mechanisms, we propose an
                 efficient method to reduce the number of correlated
                 lognormals for summation to a manageable quantity. We
                 do so by identifying dominant states of leakage
                 currents and taking advantage of the spatial
                 correlation model and input states at the gates. An
                 improved approach utilizing the principal components
                 computed from spatially correlated process parameters
                 is also proposed to further improve runtime efficiency.
                 We show that the proposed methods are effective in
                 predicting the probability distribution of total chip
                 leakage, and that ignoring spatial correlations can
                 underestimate the standard deviation of full-chip
                 leakage power.",
  acknowledgement = ack-nhfb,
  articleno =    "12",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Circuit; leakage; process variation; yield",
}

@Article{Mohanty:2007:MBE,
  author =       "Sumit Mohanty and Viktor K. Prasanna",
  title =        "A model-based extensible framework for efficient
                 application design using {FPGA}",
  journal =      j-TODAES,
  volume =       "12",
  number =       "2",
  pages =        "13:1--13:??",
  month =        apr,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1230800.1230805",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:08:48 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "For an FPGA designer, several choices are available in
                 terms of target FPGA devices, IP-cores, algorithms,
                 synthesis options, runtime reconfiguration, degrees of
                 parallelism, among others, while implementing a design.
                 Evaluation of design alternatives in the early stages
                 of the design cycle is important because the choices
                 made can have a critical impact on the performance of
                 the final design. However, a large number of
                 alternatives not only results in a large number of
                 designs, but also makes it a hard problem to
                 efficiently manage, simulate, and evaluate them. In
                 this article, we present a framework for FPGA-based
                 application design that addresses the aforementioned
                 issues. This framework supports a hierarchical modeling
                 approach that integrates application and device
                 modeling techniques and allows development of a library
                 of models for design reuse. The framework integrates a
                 high-level performance estimator for rapid estimation
                 of the latency, area, and energy of the designs. In
                 addition, a design space exploration tool allows
                 efficient evaluation of candidate designs against the
                 given performance requirements. The framework also
                 supports extension through integration of widely used
                 tools for FPGA-based design while presenting a unified
                 environment for different target FPGAs. We demonstrate
                 our framework through the modeling and performance
                 estimation of a signal processing kernel and the design
                 of end-to-end applications.",
  acknowledgement = ack-nhfb,
  articleno =    "13",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "design tool; extensible; Modeling; reuse",
}

@Article{Tang:2007:PDF,
  author =       "Weiyu Tang and Arun Kejariwal and Alexander V.
                 Veidenbaum and Alexandru Nicolau",
  title =        "A predictive decode filter cache for reducing power
                 consumption in embedded processors",
  journal =      j-TODAES,
  volume =       "12",
  number =       "2",
  pages =        "14:1--14:??",
  month =        apr,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1230800.1230806",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:08:48 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "With advances in semiconductor technology, power
                 management has increasingly become a very important
                 design constraint in processor design. In embedded
                 processors, instruction fetch and decode consume more
                 than 40\% of processor power. This calls for
                 development of power minimization techniques for the
                 fetch and decode stages of the processor pipeline. For
                 this, filter cache has been proposed as an
                 architectural extension for reducing the power
                 consumption. A filter cache is placed between the CPU
                 and the instruction cache (I-cache) to provide the
                 instruction stream. A filter cache has the advantages
                 of shorter access time and lower power consumption.
                 However, the downside of a filter cache is a possible
                 performance loss in case of cache misses. \par

                 In this article, we present a novel technique---decode
                 filter cache (DFC)---for minimizing power consumption
                 with minimal performance impact. The DFC stores decoded
                 instructions. Thus, a hit in the DFC eliminates
                 instruction fetch and its subsequent decoding. The
                 bypassing of both instruction fetch and decode reduces
                 processor power. We present a runtime approach for
                 predicting whether the next fetch source is present in
                 the DFC. In case a miss is predicted, we reduce the
                 miss penalty by accessing the I-cache directly. We
                 propose to classify instructions as cacheable or
                 noncacheable, depending on the decode width. For
                 efficient use of the cache space, a sectored cache
                 design is used for the DFC so that both cacheable and
                 noncacheable instructions can coexist in the DFC
                 sector. Experimental results show that the DFC reduces
                 processor power by 34\% on an average and our next
                 fetch prediction mechanism reduces miss penalty by more
                 than 91\%.",
  acknowledgement = ack-nhfb,
  articleno =    "14",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Cache; embedded processors; power optimization",
}

@Article{Issenin:2007:DDR,
  author =       "Ilya Issenin and Erik Brockmeyer and Miguel Miranda
                 and Nikil Dutt",
  title =        "{DRDU}: a data reuse analysis technique for efficient
                 scratch-pad memory management",
  journal =      j-TODAES,
  volume =       "12",
  number =       "2",
  pages =        "15:1--15:??",
  month =        apr,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1230800.1230807",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:08:48 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In multimedia and other streaming applications, a
                 significant portion of energy is spent on data
                 transfers. Exploiting data reuse opportunities in the
                 application, we can reduce this energy by making copies
                 of frequently used data in a small local memory and
                 replacing speed- and power-inefficient transfers from
                 main off-chip memory by more efficient local data
                 transfers. In this article we present an automated
                 approach for analyzing these opportunities in a program
                 that allows modification of the program to use custom
                 scratch-pad memory configurations comprising a
                 hierarchical set of buffers for local storage of
                 frequently reused data. Using our approach we are able
                 to both reduce energy consumption of the memory
                 subsystem when using a scratch-pad memory by about a
                 factor of two, on average, and improve memory system
                 performance compared to a cache of the same size.",
  acknowledgement = ack-nhfb,
  articleno =    "15",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "compiler analysis; data reuse analysis; memory
                 hierarchy; Scratch-pad memory management",
}

@Article{Hosseinabady:2007:LTA,
  author =       "Mohammad Hosseinabady and Pejman Lotfi-Kamran and
                 Zainalabedin Navabi",
  title =        "Low test application time resource binding for
                 behavioral synthesis",
  journal =      j-TODAES,
  volume =       "12",
  number =       "2",
  pages =        "16:1--16:??",
  month =        apr,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1230800.1230808",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:08:48 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Recent advances in process technology have led to a
                 rapid increase in the density of integrated circuits
                 (ICs). Increased density and the need to test for new
                 types of defects in nanometer technologies have
                 resulted in a tremendous increase in test application
                 time (TAT). This article presents a test synthesis
                 method to reduce test application time for testing the
                 datapath of a design. The test application time is
                 reduced by applying a test-time-aware resource sharing
                 algorithm on a scheduled control data flow graph (CDFG)
                 of a design.",
  acknowledgement = ack-nhfb,
  articleno =    "16",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "CDFG; high-level synthesis; test synthesis;
                 Testability",
}

@Article{Elshoukry:2007:CPA,
  author =       "Mohammed Elshoukry and Mohammad Tehranipoor and C. P.
                 Ravikumar",
  title =        "A critical-path-aware partial gating approach for test
                 power reduction",
  journal =      j-TODAES,
  volume =       "12",
  number =       "2",
  pages =        "17:1--17:??",
  month =        apr,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1230800.1230809",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:08:48 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Power reduction during test application is important
                 from the viewpoint of chip reliability and for
                 obtaining correct test results. One of the ways to
                 reduce scan test power is to block transitions
                 propagating from the outputs of scan cells through
                 combinational logic. In order to accomplish this, some
                 researchers have proposed setting primary inputs to
                 appropriate values or adding extra gates at the outputs
                 of scan cells. In this article, we point out the
                 limitations of such full gating techniques in terms of
                 area overhead and performance degradation. We propose
                 an alternate solution where a partial set of scan cells
                 is gated. A subset of scan cells is selected to give
                 maximum reduction in test power within a given area
                 constraint. An alternate formulation of the problem is
                 to treat maximum permitted test power as a constraint
                 and achieve a test power that is within this limit
                 using the fewest number of gated scan cells, thereby
                 leading to the least impact in area overhead. Our
                 problem formulation also comprehends performance
                 constraints and prevents the inclusion of gating points
                 on critical paths. The area overhead is predictable and
                 closely corresponds to the average power reduction.",
  acknowledgement = ack-nhfb,
  articleno =    "17",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Low-power testing; partial gating; scan cell gating;
                 scan testing",
}

@Article{Pomeranz:2007:FDT,
  author =       "Irith Pomeranz and Sudhakar M. Reddy",
  title =        "Forming {N}-detection test sets without test
                 generation",
  journal =      j-TODAES,
  volume =       "12",
  number =       "2",
  pages =        "18:1--18:??",
  month =        apr,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1230800.1230810",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:08:48 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We describe a procedure for forming $n$-detection test
                 sets for $n > 1$ without applying a test generation
                 procedure to target faults. The proposed procedure
                 accepts a one-detection test set. It extracts test
                 cubes for target faults from the one-detection test
                 set, and merges the test cubes to obtain new test
                 vectors. By extracting and merging different test cubes
                 in different iterations of this process, an
                 $n$-detection test set is obtained. Merging of test
                 cubes does not require test generation or fault
                 simulation. Fault simulation is required for extracting
                 test cubes for target faults. We demonstrate that the
                 resulting test set is as effective in detecting
                 untargeted faults as an $n$-detection test set
                 generated by a deterministic test generation procedure.
                 We also discuss the application of the proposed
                 procedure starting from a random test set (instead of a
                 one-detection test set).",
  acknowledgement = ack-nhfb,
  articleno =    "18",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "$n$-detection test sets; Bridging faults; stuck-at
                 faults; test generation",
}

@Article{Fan:2007:ECD,
  author =       "Hongbing Fan and Jiping Liu and Yu-Liang Wu and
                 Chak-Chung Cheung",
  title =        "The exact channel density and compound design for
                 generic universal switch blocks",
  journal =      j-TODAES,
  volume =       "12",
  number =       "2",
  pages =        "19:1--19:??",
  month =        apr,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1230800.1230811",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:08:48 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "A switch block of $k$ sides $W$ terminals on each side
                 is said to be universal (a $(k,W)$-USB) if it is
                 routable for every set of 2-pin nets of channel density
                 at most $W$. The generic optimum universal switch block
                 design problem is to design a $(k,W)$-USB with the
                 minimum number of switches for every pair of $(k,W)$.
                 This problem was first proposed and solved for $k = 4$
                 in Chang et al. [1996], and then solved for even $W$ or
                 for $k \leq 6$ in Shuy et al. [2000] and Fan et al.
                 [2002b]. No optimum $(k,W)$-USB is known for $k \geq 7$
                 and odd $W \geq 3$. But it is already known that when
                 $W$ is a large odd number, a near-optimum $(k,W)$-USB
                 can be obtained by a disjoint union of $(W f_2(k))/2$
                 copies of the optimum $(k,2)$-USB and a noncompound
                 $(k,f_2(k))$-USB, where the value of $f_2(k)$ is
                 unknown for $k \geq 8$. In this article, we show that
                 $f_2(k) = k + 3 - i /3$, where $1 \leq i \leq 6$ and $i
                 \equiv k \pmod 6$, and present an explicit design for
                 the noncompound $(k, f_2(k))$-USB. Combining these two
                 results we obtain the exact designs of $(k,W)$-USBs for
                 all $k \geq 7$ and odd $W \geq 3$. The new $(k,W)$-USB
                 designs also yield an efficient detailed routing
                 algorithm.",
  acknowledgement = ack-nhfb,
  articleno =    "19",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "FPGA architecture; routing algorithm; universal switch
                 block",
}

@Article{Lim:2007:ISI,
  author =       "Sung Kyu Lim and Massoud Pedram",
  title =        "Introduction to special issue on demonstrable software
                 systems and hardware platforms",
  journal =      j-TODAES,
  volume =       "12",
  number =       "3",
  pages =        "20:1--20:??",
  month =        aug,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1255456.1255457",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:09:12 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  articleno =    "20",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Hsu:2007:ESC,
  author =       "Chia-Jui Hsu and Ming-Yung Ko and Shuvra S.
                 Bhattacharyya and Suren Ramasubbu and Jos{\'e} Luis
                 Pino",
  title =        "Efficient simulation of critical synchronous dataflow
                 graphs",
  journal =      j-TODAES,
  volume =       "12",
  number =       "3",
  pages =        "21:1--21:??",
  month =        aug,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1255456.1255458",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:09:12 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "System-level modeling, simulation, and synthesis using
                 electronic design automation (EDA) tools are key steps
                 in the design process for communication and signal
                 processing systems, and the synchronous dataflow (SDF)
                 model of computation is widely used in EDA tools for
                 these purposes. Behavioral representations of modern
                 wireless communication systems typically result in
                 critical SDF graphs: These consist of hundreds of
                 components (or more) and involve complex intercomponent
                 connections with highly multirate relationships (i.e. ,
                 with large variations in average rates of data transfer
                 or component execution across different subsystems).
                 Simulating such systems using conventional SDF
                 scheduling techniques generally leads to unacceptable
                 simulation time and memory requirements on modern
                 workstations and high-end PCs. In this article, we
                 present a novel simulation-oriented scheduler (SOS)
                 that strategically integrates several techniques for
                 graph decomposition and SDF scheduling to provide
                 effective, joint minimization of time and memory
                 requirements for simulating critical SDF graphs. We
                 have implemented SOS in the advanced design system
                 (ADS) from Agilent Technologies. Our results from this
                 implementation demonstrate large improvements in
                 simulating real-world, large-scale, and highly
                 multirate wireless communication systems (e. g. , 3GPP,
                 Bluetooth, 802. 16e, CDMA 2000, XM radio, EDGE, and
                 Digital TV).",
  acknowledgement = ack-nhfb,
  articleno =    "21",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Scheduling; simulation; synchronous dataflow",
}

@Article{Herrera:2007:FHS,
  author =       "Fernando Herrera and Eugenio Villar",
  title =        "A framework for heterogeneous specification and design
                 of electronic embedded systems in {SystemC}",
  journal =      j-TODAES,
  volume =       "12",
  number =       "3",
  pages =        "22:1--22:??",
  month =        aug,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1255456.1255459",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:09:12 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This work proposes a methodology which enables
                 heterogeneous specification of complex, electronic
                 systems in SystemC supporting the integration of
                 components under different models of computation
                 (MoCs). This feature is necessary in order to deal with
                 the growing complexity, concurrency, and heterogeneity
                 of electronic embedded systems. The specification
                 methodology is based on the SystemC standard language.
                 Nevertheless, the use of SystemC for heterogeneous
                 system specification is not straightforward. The first
                 problem to be addressed is the efficient and
                 predictable mapping of untimed events required by
                 abstract MoCs over the discrete-event MoC on which the
                 SystemC simulation kernel is based. This mapping is
                 essential in order to understand the simulation results
                 provided by the SystemC model of those MoCs. The
                 specification methodology proposes the set of rules and
                 guidelines required by each specific MoC. Moreover, the
                 methodology supports a smooth integration of several
                 MoCs in the same system specification. A set of
                 facilities is provided covering the deficiencies of the
                 language. These facilities constitute the
                 methodology-specific library called HetSC. The
                 methodology and associated library have been
                 demonstrated to be useful for the specification of
                 complex, heterogeneous embedded systems supporting
                 essential design tasks such as performance analysis and
                 SW generation.",
  acknowledgement = ack-nhfb,
  articleno =    "22",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Heterogeneous specification; models of computation;
                 SystemC",
}

@Article{Lee:2007:CCA,
  author =       "Hyung Gyu Lee and Naehyuck Chang and Umit Y. Ogras and
                 Radu Marculescu",
  title =        "On-chip communication architecture exploration: {A}
                 quantitative evaluation of point-to-point, bus, and
                 network-on-chip approaches",
  journal =      j-TODAES,
  volume =       "12",
  number =       "3",
  pages =        "23:1--23:??",
  month =        aug,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1255456.1255460",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:09:12 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Traditionally, design-space exploration for
                 systems-on-chip (SoCs) has focused on the computational
                 aspects of the problem at hand. However, as the number
                 of components on a single chip and their performance
                 continue to increase, a shift from computation-based to
                 communication-based design becomes mandatory. As a
                 result, the communication architecture plays a major
                 role in the area, performance, and energy consumption
                 of the overall system. This article presents a
                 comprehensive evaluation of three on-chip communication
                 architectures targeting multimedia applications.
                 Specifically, we compare and contrast the
                 network-on-chip (NoC) with point-to-point (P2P) and
                 bus-based communication architectures in terms of area,
                 performance, and energy consumption. As the main
                 contribution, we present complete P2P, bus-, and
                 NoC-based implementations of a real multimedia
                 application (i.e. the MPEG-2 encoder), and provide
                 direct measurements using an FPGA prototype and actual
                 video clips, rather than simulation and synthetic
                 workloads. We also support the experimental findings
                 through a theoretical analysis. Both experimental and
                 analysis results show that the NoC architecture scales
                 very well in terms of area, performance, energy, and
                 design effort, while the P2P and bus-based
                 architectures scale poorly on all accounts except for
                 performance and area, respectively.",
  acknowledgement = ack-nhfb,
  articleno =    "23",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "FPGA prototype; MPEG-2 encoder; Networks-on-chip;
                 point-to-point; system-on-chip",
}

@Article{Ha:2007:PHS,
  author =       "Soonhoi Ha and Sungchan Kim and Choonseung Lee and
                 Youngmin Yi and Seongnam Kwon and Young-Pyo Joo",
  title =        "{PeaCE}: a hardware-software codesign environment for
                 multimedia embedded systems",
  journal =      j-TODAES,
  volume =       "12",
  number =       "3",
  pages =        "24:1--24:??",
  month =        aug,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1255456.1255461",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:09:12 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Existent hardware-software (HW-SW) codesign tools
                 mainly focus on HW-SW cosimulation to build a virtual
                 prototyping environment that enables software design
                 and system verification without need of making a
                 hardware prototype. Not only HW-SW cosimulation, but
                 also HW-SW codesign methodology involves system
                 specification, functional simulation, design-space
                 exploration, and hardware-software cosynthesis. The
                 PeaCE codesign environment is the first full-fledged
                 HW-SW codesign environment that provides seamless
                 codesign flow from functional simulation to system
                 synthesis. Targeting for multimedia applications with
                 real-time constraints, PeaCE specifies the system
                 behavior with a heterogeneous composition of three
                 models of computation and utilizes features of the
                 formal models maximally during the whole design
                 process. It is also a reconfigurable framework in the
                 sense that third-party design tools can be integrated
                 to build a customized tool chain. Experiments with
                 industry-strength examples prove the viability of the
                 proposed technique.",
  acknowledgement = ack-nhfb,
  articleno =    "24",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "design-space exploration; embedded systems;
                 Hardware-software codesign; hardware-software
                 cosimulation; model-based design",
}

@Article{Atienza:2007:HSE,
  author =       "David Atienza and Pablo G. {Del Valle} and Giacomo
                 Paci and Francesco Poletti and Luca Benini and Giovanni
                 {De Micheli} and Jose M. Mendias and Roman Hermida",
  title =        "{HW-SW} emulation framework for temperature-aware
                 design in {MPSoCs}",
  journal =      j-TODAES,
  volume =       "12",
  number =       "3",
  pages =        "26:1--26:??",
  month =        aug,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1255456.1255463",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:09:12 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "New tendencies envisage multiprocessor
                 systems-on-chips (MPSoCs) as a promising solution for
                 the consumer electronics market. MPSoCs are complex to
                 design, as they must execute multiple applications
                 (games, video) while meeting additional design
                 constraints (energy consumption, time-to-market).
                 Moreover, the rise of temperature in the die for MPSoCs
                 can seriously affect their final performance and
                 reliability. In this article, we present a new
                 hardware-software emulation framework that allows
                 designers a complete exploration of the thermal
                 behavior of final MPSoC designs early in the design
                 flow. The proposed framework uses FPGA emulation as the
                 key element to model hardware components of the
                 considered MPSoC platform at multimegahertz speeds. It
                 automatically extracts detailed system statistics that
                 are used as input to our software thermal library
                 running in a host computer. This library calculates at
                 runtime the temperature of on-chip components, based on
                 the collected statistics from the emulated system and
                 final floorplan of the MPSoC. This enables fast testing
                 of various thermal management techniques. Our results
                 show speedups of three orders of magnitude compared to
                 cycle-accurate MPSoC simulators.",
  acknowledgement = ack-nhfb,
  articleno =    "26",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "emulation; FPGA; MPSoC; temperature; Thermal-aware
                 design",
}

@Article{Wu:2007:EPM,
  author =       "Wei Wu and Lingling Jin and Jun Yang and Pu Liu and
                 Sheldon X.-D. Tan",
  title =        "Efficient power modeling and software thermal sensing
                 for runtime temperature monitoring",
  journal =      j-TODAES,
  volume =       "12",
  number =       "3",
  pages =        "26:1--26:??",
  month =        aug,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1255456.1255462",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:09:12 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The evolution of microprocessors has been hindered by
                 increasing power consumption and heat dissipation on
                 die. An excessive amount of heat creates reliability
                 problems, reduces the lifetime of a processor, and
                 elevates the cost of cooling and packaging
                 considerably. It is therefore imperative to be able to
                 monitor the temperature variations across the die in a
                 timely and accurate manner. \par

                 Most current techniques rely on on-chip thermal sensors
                 to report the temperature of the processor.
                 Unfortunately, significant variation in chip
                 temperature both spatially and temporally exposes the
                 limitation of the sensors. We present a compensating
                 approach to tracking chip temperature through an OS
                 resident software module that generates live power and
                 thermal profiles of the processor. We developed such a
                 software thermal sensor (STS) in a Linux system with a
                 Pentium 4 Northwood core. We employed highly efficient
                 numerical methods in our model to minimize the overhead
                 of temperature calculation. We also developed an
                 efficient algorithm for functional unit power modeling.
                 Our power and thermal models are calibrated and
                 validated against on-chip sensor readings, thermal
                 images of the Northwood heat spreader, and the
                 thermometer measurements on the package. The resulting
                 STS offers detailed power and temperature breakdowns of
                 each functional unit at runtime, enabling more
                 efficient online power and thermal monitoring and
                 management at a higher level, such as the operating
                 system.",
  acknowledgement = ack-nhfb,
  articleno =    "26",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Power; thermal",
}

@Article{Huang:2007:ESC,
  author =       "Po-Kuan Huang and Soheil Ghiasi",
  title =        "Efficient and scalable compiler-directed energy
                 optimization for realtime applications",
  journal =      j-TODAES,
  volume =       "12",
  number =       "3",
  pages =        "27:1--27:??",
  month =        aug,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1255456.1255464",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:09:12 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "With continuing shrinkage of technology feature sizes,
                 the share of leakage in total energy consumption of
                 digital systems continues to grow. Coordinated supply
                 voltage and body bias throttling enables the compiler
                 to better optimize the total energy consumption of the
                 system in future technology nodes. We present a
                 compilation technique that targets realtime
                 applications running on embedded processors with
                 combined dynamic voltage scaling (DVS) and adaptive
                 body biasing (ABB) capabilities. Considering the delay
                 and energy penalty of switching between operating modes
                 of the processor, our compiler judiciously inserts
                 mode-switch instructions in selected locations of the
                 code and generates executable binary that is guaranteed
                 to meet the deadline constraint. More importantly, our
                 algorithm runs very fast and comes reasonably close to
                 the theoretical limit of energy optimization using DVS
                 + ABB. At 65nm technology, we improve the energy
                 dissipation of the generated code by an average of 33.
                 20\% under deadline constraints. While our technique's
                 improvement in energy dissipation over conventional DVS
                 is marginal (6. 91\%) at 130nm, the average improvement
                 continues to grow to 13. 19\%, 22. 97\%, and 33. 21\%
                 for 90nm, 65nm, and 45nm technology nodes,
                 respectively. Compared to a recent ILP-based
                 competitor, we improve the runtime by more than three
                 orders of magnitude, while producing improved
                 results.",
  acknowledgement = ack-nhfb,
  articleno =    "27",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "energy-aware compiler; Leakage; technology scaling",
}

@Article{Shi:2007:CSO,
  author =       "Yiyu Shi and Paul Mesa and Hao Yu and Lei He",
  title =        "Circuit-simulated obstacle-aware {Steiner} routing",
  journal =      j-TODAES,
  volume =       "12",
  number =       "3",
  pages =        "28:1--28:??",
  month =        aug,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1255456.1255465",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:09:12 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article develops circuit-simulated routing
                 algorithms. We model the routing graph by an RC network
                 with terminals as inputs, and show that the faster an
                 output reaches its peak, the higher the possibility for
                 the corresponding Hanan or escape node to become a
                 Steiner point. This enables us to select Steiner points
                 and then apply any minimum spanning tree algorithm to
                 obtain obstacle-free or obstacle-aware Steiner routing.
                 Compared with existing algorithms, our algorithms have
                 significant gain on either wirelength or runtime for
                 obstacle-free routing, and on both wirelength and
                 runtime for obstacle-aware routing.",
  acknowledgement = ack-nhfb,
  articleno =    "28",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "OARSMT; Routing; RSMT; simulation",
}

@Article{Chakrapani:2007:PSC,
  author =       "Lakshmi N. Chakrapani and Pinar Korkmaz and Bilge E.
                 S. Akgul and Krishna V. Palem",
  title =        "Probabilistic system-on-a-chip architectures",
  journal =      j-TODAES,
  volume =       "12",
  number =       "3",
  pages =        "29:1--29:??",
  month =        aug,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1255456.1255466",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:09:12 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Parameter variations, noise susceptibility, and
                 increasing energy dissipation of cmos devices have been
                 recognized as major challenges in circuit and
                 microarchitecture design in the nanometer regime. Among
                 these, parameter variations and noise susceptibility
                 are increasingly causing cmos devices to behave in an
                 ``unreliable'' or ``probabilistic'' manner. To address
                 these challenges, a shift in design paradigm from
                 current-day deterministic designs to ``statistical'' or
                 ``probabilistic'' designs is deemed inevitable. To
                 respond to this need, in this article, we introduce and
                 study an entirely novel family of probabilistic
                 architectures: the probabilistic system-on-a-chip
                 (psoc). psoc architectures are based on cmos devices
                 rendered probabilistic due to noise, referred to as
                 probabilistic CMOS or PCMOS devices. We demonstrate
                 that in addition to harnessing the probabilistic
                 behavior of pcmos devices, psoc architectures yield
                 significant improvements, both in energy consumed as
                 well as performance in the context of probabilistic or
                 randomized applications with broad utility. All of our
                 application and architectural savings are quantified
                 using the product of the energy and performance,
                 denoted (energy $\times$ performance): The pcmos-based
                 gains are as high as a substantial multiplicative
                 factor of over 560 when compared to a competing
                 energy-efficient cmos-based realization. Our
                 architectural design is application specific and
                 involves navigating design space spanning the algorithm
                 (application), its architecture (psoc), and the
                 probabilistic technology (pcmos).",
  acknowledgement = ack-nhfb,
  articleno =    "29",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Embedded systems; probabilistic computing",
}

@Article{Hsieh:2007:FDC,
  author =       "Ang-Chih Hsieh and Tzu-Teng Lin and Tsuang-Wei Chang
                 and Tingting Hwang",
  title =        "A functionality-directed clustering technique for
                 low-power {MTCMOS} design---computation of
                 simultaneously discharging current",
  journal =      j-TODAES,
  volume =       "12",
  number =       "3",
  pages =        "30:1--30:??",
  month =        aug,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1255456.1255467",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:09:12 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Multithreshold CMOS (MTCMOS) is a circuit style that
                 can effectively reduce leakage power consumption. Sleep
                 transistor sizing is the key issue when a MTCMOS
                 circuit is designed. If the size of sleep transistor is
                 large enough, the circuit performance can surely be
                 maintained but the area and dynamic power consumption
                 of the sleep transistor may increase. On the other
                 hand, if the sleep transistor size is too small, there
                 will be significant performance degradation because of
                 the increased resistance to ground. Previous approaches
                 [Kao et al. 1998; Anis et al. 2002] to designing sleep
                 transistor size are based mainly on mutually-exclusive
                 discharge patterns. However, these approaches
                 considered only the topology of a circuit (i.e.,
                 interconnections of nodes in the circuit-graph saving
                 the functionality of node). We observed that any two
                 possible simultaneously switching gates may not
                 discharge at the same time in terms of functionality.
                 Thus, we propose an algorithm to determine how to
                 cluster cells to share sleep transistors, while taking
                 both topology and functionality into consideration.
                 Moreover, one placement refinement algorithm that takes
                 clustering information into account will be presented.
                 At the logic level, the results show that the proposed
                 clustering method can achieve an average of 22\%
                 reduction in terms of the number of unit-size sleep
                 transistors as compared to a method that does not
                 consider functionality. At the physical level, two
                 placement results are discussed. The first is produced
                 by a traditional placement tool plus topology check
                 (functionality check) for insertion of sleep
                 transistors. It shows that the functionality check
                 algorithm produces 9\% less chip area as compared with
                 the topology check algorithm. The second result is
                 produced by a placement refinement algorithm where the
                 initial placement is done in the first placement
                 experiment. It shows that the placement refinement
                 algorithm achieves 5\% more reduction in area at the
                 expense of 4\% increase in wire length. Totally, around
                 14\% reduction is achieved by utilizing the clustering
                 information.",
  acknowledgement = ack-nhfb,
  articleno =    "30",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "DSTN; low power; MTCMOS; sleep transistor",
}

@Article{Dastidar:2007:VST,
  author =       "Tathagato Rai Dastidar and P. P. Chakrabarti",
  title =        "A verification system for transient response of analog
                 circuits",
  journal =      j-TODAES,
  volume =       "12",
  number =       "3",
  pages =        "31:1--31:??",
  month =        aug,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1255456.1255468",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:09:12 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We present a method for application of formal
                 techniques like model checking and equivalence checking
                 for validation of the transient response of nonlinear
                 analog circuits. We propose a temporal logic called Ana
                 CTL (computational tree logic for analog circuit
                 verification) which is suitable for specifying
                 properties specific to analog circuits. The application
                 of Ana CTL for validation of transient behavior of
                 arbitrarily nonlinear analog circuits is presented. The
                 transient response of a circuit under all possible
                 input waveforms is represented as a finite state
                 machine (FSM), by bounding and discretizing the
                 continuous state space of an analog circuit. We have
                 developed algorithms to run Ana CTL queries on this
                 discretized model using search-based methods which
                 reduce the runtime considerably by avoiding creation of
                 the whole FSM. The application of these methods on
                 several real-life analog circuits is presented and we
                 show that this system is a useful aid for detecting and
                 debugging early design errors. \par

                 We also present methods for checking the equivalence of
                 transient response of two analog circuits. The behavior
                 of two different analog circuits can rarely be exactly
                 similar. Hence, we introduce a notion of approximate
                 equivalence. A query language for checking different
                 notions of user-definable approximate equivalence is
                 presented which extends the syntax of the Ana CTL model
                 checking language. In its extended form, Ana CTL can be
                 used combining model checking with equivalence
                 checking.",
  acknowledgement = ack-nhfb,
  articleno =    "31",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Ana CTL; Analog circuits; equivalence checking; model
                 checking; query language; transient response",
}

@Article{Chang:2007:PRE,
  author =       "Kai-Hui Chang and Igor L. Markov and Valeria
                 Bertacco",
  title =        "Postplacement rewiring by exhaustive search for
                 functional symmetries",
  journal =      j-TODAES,
  volume =       "12",
  number =       "3",
  pages =        "32:1--32:??",
  month =        aug,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1255456.1255469",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:09:12 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We propose two new algorithms for rewiring: a
                 postplacement optimization that reconnects pins of a
                 given netlist without changing the logic function and
                 gate locations. In the first algorithm, we extract
                 small subcircuits consisting of several gates from the
                 design and reconnect pins according to the symmetries
                 of the subcircuits. To enhance the power of symmetry
                 detection, we also propose a graph-based symmetry
                 detector that can identify permutational and
                 phase-shift symmetries on multiple input and output
                 wires, as well as hybrid symmetries, creating abundant
                 opportunities for rewiring. Our second algorithm,
                 called long-range rewiring, is based on reconnecting
                 equivalent pins and can augment the first approach for
                 further optimization. We apply our techniques for
                 wirelength optimization and observe that they provide
                 wirelength reduction comparable to that achieved by
                 detailed placement.",
  acknowledgement = ack-nhfb,
  articleno =    "32",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "placement; rewiring; VLSI",
}

@Article{Mathaikutty:2007:EMD,
  author =       "Deepak Mathaikutty and Hiren Patel and Sandeep Shukla
                 and Axel Jantsch",
  title =        "{EWD}: a metamodeling driven customizable multi-{MoC}
                 system modeling framework",
  journal =      j-TODAES,
  volume =       "12",
  number =       "3",
  pages =        "33:1--33:??",
  month =        aug,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1255456.1255470",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:09:12 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We present the EWD design environment and methodology,
                 a modeling and simulation framework suited for complex
                 and heterogeneous embedded systems with varying degrees
                 of expressibility and modeling fidelity. This
                 environment promotes the use of multiple models of
                 computation (MoCs) to support heterogeneity and
                 metamodeling for conformance tests of syntactic and
                 static semantics during the process of modeling.
                 Therefore, EWD is a multiple MoC modeling and
                 simulation framework that ensures conformance of the
                 MoC formalisms during model construction using a
                 metamodeling approach. In addition, EWD provides a
                 suite of translation tools that generate executable
                 models for two simulation frameworks to demonstrate its
                 language-independent modeling framework. The EWD
                 methodology uses the Generic Modeling Environment for
                 customization of the MoC-specific modeling syntax into
                 a visual representation. To embed the execution
                 semantics of the MoCs into the models, we have built
                 parsing and translation tools that leverage an
                 XML-based interoperability language. This
                 interoperability language is then translated into
                 executable Standard ML or Haskell models that can also
                 be analyzed by existing simulation frameworks such as
                 SML-Sys or ForSyDe. In summary, EWD is a metamodeling
                 driven multitarget design environment with multi-MoC
                 modeling capability.",
  acknowledgement = ack-nhfb,
  articleno =    "33",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "denotational semantics; ForSyDe; functional language;
                 heterogeneous system design; interoperable modeling
                 language; metamodel; Metamodeling; MoC; Ptolemy II;
                 SystemC",
}

@Article{Stitt:2007:BS,
  author =       "Greg Stitt and Frank Vahid",
  title =        "Binary synthesis",
  journal =      j-TODAES,
  volume =       "12",
  number =       "3",
  pages =        "34:1--34:??",
  month =        aug,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1255456.1255471",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:09:12 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Recent high-level synthesis approaches and C-based
                 hardware description languages attempt to improve the
                 hardware design process by allowing developers to
                 capture desired hardware functionality in a well-known
                 high-level source language. However, these approaches
                 have yet to achieve wide commercial success due in part
                 to the difficulty of incorporating such approaches into
                 software tool flows. The requirement of using a
                 specific language, compiler, or development environment
                 may cause many software developers to resist such
                 approaches due to the difficulty and possible
                 instability of changing well-established robust tool
                 flows. Thus, in the past several years, synthesis from
                 binaries has been introduced, both in research and in
                 commercial tools, as a means of better integrating with
                 tool flows by supporting all high-level languages and
                 software compilers. Binary synthesis can be more easily
                 integrated into a software development tool-flow by
                 only requiring an additional backend tool, and it even
                 enables completely transparent dynamic translation of
                 executing binaries to configurable hardware circuits.
                 In this article, we survey the key technologies
                 underlying the important emerging field of binary
                 synthesis. We compare binary synthesis to several
                 related areas of research, and we then describe the key
                 technologies required for effective binary synthesis:
                 decompilation techniques necessary for binary synthesis
                 to achieve results competitive with source-level
                 synthesis, hardware/software partitioning methods
                 necessary to find critical binary regions suitable for
                 synthesis, synthesis methods for converting regions to
                 custom circuits, and binary update methods that enable
                 replacement of critical binary regions by circuits.",
  acknowledgement = ack-nhfb,
  articleno =    "34",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Binary synthesis; configurable logic; FPGA;
                 hardware/software codesign; hardware/software
                 partitioning; synthesis from software binaries; warp
                 processors",
}

@Article{Galanis:2007:SES,
  author =       "Michalis D. Galanis and Gregory Dimitroulakos and
                 Spyros Tragoudas and Costas E. Goutis",
  title =        "Speedups in embedded systems with a high-performance
                 coprocessor datapath",
  journal =      j-TODAES,
  volume =       "12",
  number =       "3",
  pages =        "35:1--35:??",
  month =        aug,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1255456.1255472",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:09:12 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article presents the speedups achieved in a
                 generic single-chip microprocessor system by employing
                 a high-performance datapath. The datapath acts as a
                 coprocessor that accelerates computational-intensive
                 kernel sections thereby increasing the overall
                 performance. We have previously introduced the datapath
                 which is composed of Flexible Computational Components
                 (FCCs). These components can realize any two-level
                 template of primitive operations. The automated
                 coprocessor synthesis method from high-level software
                 description and its integration to a design flow for
                 executing applications on the system is presented. For
                 evaluating the effectiveness of our coprocessor
                 approach, analytical study in respect to the type of
                 the custom datapath and to the microprocessor
                 architecture is performed. The overall application
                 speedups of several real-life applications relative to
                 the software execution on the microprocessor are
                 estimated using the design flow. These speedups range
                 from 1. 75 to 5. 84, with an average value of 3. 04,
                 while the overhead in circuit area is small. The design
                 flow achieved the acceleration of the applications near
                 to theoretical speedup bounds. A comparison with
                 another high-performance datapath showed that the
                 proposed coprocessor achieves smaller area-time
                 products by an average of 23\% for the generated
                 datapaths. Additionally, the FCC coprocessor achieves
                 better performance in accelerating kernels relative to
                 software-programmable DSP cores.",
  acknowledgement = ack-nhfb,
  articleno =    "35",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "chaining; coprocessor datapath; design flow; kernels;
                 Performance improvements; synthesis",
}

@Article{Roy:2007:EPA,
  author =       "Suchismita Roy and P. P. Chakrabarti and Pallab
                 Dasgupta",
  title =        "Event propagation for accurate circuit delay
                 calculation using {SAT}",
  journal =      j-TODAES,
  volume =       "12",
  number =       "3",
  pages =        "36:1--36:??",
  month =        aug,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1255456.1255473",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:09:12 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "A SAT-based modeling for event propagation in
                 gate-level digital circuits, which is used for accurate
                 calculation of critical delay in combinational and
                 sequential circuits, is presented in this article. The
                 accuracy of the critical delay estimation process
                 depends on the accuracy with which the circuit in
                 operation is modeled. A high level of precision in the
                 modeling of the internal events in a circuit for the
                 sake of greater accuracy causes a combinatorial blowup
                 in the size of the problem, resulting in a scalability
                 bottleneck for which most existing techniques effect a
                 trade-off by restricting themselves to less precise
                 models. SAT based techniques have a good track record
                 in efficiency and scalability when the problem sizes
                 become too large for most other methods. This article
                 proposes a SAT-based technique for symbolic event
                 propagation within a circuit which facilitates the
                 estimation of the critical delay of circuits with a
                 greater degree of accuracy, while at the same time
                 scaling efficiently to large circuits. We report very
                 encouraging results on the ISCAS85 and ISCAS89
                 benchmark circuits using the proposed technique.",
  acknowledgement = ack-nhfb,
  articleno =    "36",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Critical delay; event propagation; SAT",
}

@Article{Yuh:2007:TFU,
  author =       "Ping-Hung Yuh and Chia-Lin Yang and Yao-Wen Chang",
  title =        "Temporal floorplanning using the three-dimensional
                 transitive closure {subGraph}",
  journal =      j-TODAES,
  volume =       "12",
  number =       "4",
  pages =        "37:1--37:??",
  month =        sep,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1278349.1278350",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:09:35 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Improving logic capacity by time-sharing, dynamically
                 reconfigurable Field Gate Programmable Arrays (FPGAs)
                 are employed to handle designs of high complexity and
                 functionality. In this paper, we use a novel
                 graph-based topological floorplan representation, named
                 3D-subTCG (3-Dimensional Transitive Closure subGraph),
                 to deal with the 3-dimensional (temporal)
                 floorplanning/placement problem, arising from
                 dynamically reconfigurable FPGAs. The 3D-subTCG uses
                 three transitive closure graphs to model the temporal
                 and spatial relations between modules. We derive the
                 feasibility conditions for the precedence constraints
                 induced by the execution of the dynamically
                 reconfigurable FPGAs. Because the geometric
                 relationship is transparent to the 3D-subTCG and its
                 induced operations (i.e., we can directly detect the
                 relationship between any two tasks from the
                 representation), we can easily detect any violation of
                 the temporal precedence constraints on 3D-subTCG. We
                 also derive important properties of the 3D-subTCG to
                 reduce the solution space and shorten the running time
                 for 3D (temporal) foorplanning/placement. Experimental
                 results show that our 3D-subTCG-based algorithm is very
                 effective and efficient.",
  acknowledgement = ack-nhfb,
  articleno =    "37",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "partially dynamical reconfiguration; Reconfigurable
                 computing; temporal floorplanning",
}

@Article{Liu:2007:IEM,
  author =       "Jinfeng Liu and Pai H. Chou",
  title =        "Idle energy minimization by mode sequence
                 optimization",
  journal =      j-TODAES,
  volume =       "12",
  number =       "4",
  pages =        "38:1--38:??",
  month =        sep,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1278349.1278351",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:09:35 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article presents techniques for reducing idle
                 energy by mode-sequence optimization (MSO) under timing
                 constraints. Our component-level CoMSO algorithm
                 computes energy-optimal mode-transition sequences for
                 different lengths of idle intervals. Our system-level
                 SyMSO algorithm shifts tasks within slack intervals
                 while satisfying all timing and resource constraints in
                 the given schedule. Experimental results on a
                 commercial software-defined radio show that these new
                 techniques can reduce idle energy by 50--70\%, or
                 30--50\% of total system energy over previous
                 offline-optimal but unsequenced techniques based on
                 localized break-even-time analysis, thanks to rich
                 options offered by mode sequencing.",
  acknowledgement = ack-nhfb,
  articleno =    "38",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "communication speed selection;
                 communication/computation trade-offs; embedded
                 multi-processor; Functional partitioning; low-power
                 design",
}

@Article{Gorjiara:2007:UFE,
  author =       "Bita Gorjiara and Nader Bagherzadeh and Pai H. Chou",
  title =        "Ultra-fast and efficient algorithm for energy
                 optimization by gradient-based stochastic voltage and
                 task scheduling",
  journal =      j-TODAES,
  volume =       "12",
  number =       "4",
  pages =        "39:1--39:??",
  month =        sep,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1278349.1278352",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:09:35 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This paper presents a new technique, called Adaptive
                 Stochastic Gradient Voltage-and-Task Scheduling
                 (ASG-VTS), for power optimization of multicore hard
                 realtime systems. ASG-VTS combines stochastic and
                 energy-gradient techniques to simultaneously solve the
                 slack distribution and task reordering problem. It
                 produces very efficient results with few mode
                 transitions. Our experiments show that ASG-VTS reduces
                 number of mode transitions by 4. 8 times compared to
                 traditional energy-gradient-based approaches. Also, our
                 heuristic algorithm can quickly find a solution that is
                 as good as the optimal for a real-life GSM
                 encoder/decoder benchmark. The runtime of ASG-VTS is
                 150 times and 1034 times faster than energy-gradient
                 based and optimal ILP algorithms, respectively. Since
                 the runtime of ASG-VTS is very low, it is ideal for
                 design space exploration in system-level design tools.
                 We have also developed a web-based interface for
                 ASG-VTS algorithm.",
  acknowledgement = ack-nhfb,
  articleno =    "39",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Power management; slack distribution; voltage and task
                 scheduling",
}

@Article{Vanbroekhoven:2007:PDS,
  author =       "Peter Vanbroekhoven and Gerda Janssens and Maurice
                 Bruynooghe and Francky Catthoor",
  title =        "A practical dynamic single assignment transformation",
  journal =      j-TODAES,
  volume =       "12",
  number =       "4",
  pages =        "40:1--40:??",
  month =        sep,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1278349.1278353",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:09:35 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This paper presents a novel method to construct a
                 dynamic single assignment (DSA) form of array
                 intensive, pointer free C programs. A program in DSA
                 form does not perform any destructive update of scalars
                 and array elements; that is, each element is written at
                 most once. As DSA makes the dependencies between
                 variable references explicit, it facilitates complex
                 analyses and optimizations of programs. Existing
                 transformations into DSA perform a complex data flow
                 analysis with exponential analysis time, and they work
                 only for a limited class of input programs. Our method
                 removes irregularities from the data flow by adding
                 copy assignments to the program, so that it can use
                 simple data flow analyses. The presented DSA
                 transformation scales very well with growing program
                 sizes and overcomes a number of important limitations
                 of existing methods. We have implemented the method and
                 it is being used in the context of memory optimization
                 and verification of those optimizations. Experiments
                 show that in practice, the method scales well indeed,
                 and that added copy operations can be removed in case
                 they are unwanted.",
  acknowledgement = ack-nhfb,
  articleno =    "40",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "arrays; Data flow analysis; parallelization; reaching
                 definitions; single assignment",
}

@Article{Kobayashi:2007:MOS,
  author =       "Yuki Kobayashi and Murali Jayapala and Praveen
                 Raghavan and Francky Catthoor and Masaharu Imai",
  title =        "Methodology for operation shuffling and {L0} cluster
                 generation for low energy heterogeneous {VLIW}
                 processors",
  journal =      j-TODAES,
  volume =       "12",
  number =       "4",
  pages =        "41:1--41:??",
  month =        sep,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1278349.1278354",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:09:35 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Clustering L0 buffers is effective for energy
                 reduction in the instruction memory hierarchy of
                 embedded VLIW processors. However, the efficiency of
                 the clustering depends on the schedule of the target
                 application. Especially in heterogeneous or data
                 clustered VLIW processors, determining energy efficient
                 scheduling is more constraining. \par

                 This article proposes a realistic technique supported
                 by a tool flow to explore operation shuffling for
                 improving generation of L0 clusters. The tool flow
                 explores assignment of operations for each cycle and
                 generates various schedules. This approach makes it
                 possible to reduce energy consumption for various
                 processor architectures. However, the computational
                 complexity is large because of the huge exploration
                 space. Therefore, some heuristics are also developed,
                 which reduce the size of the exploration space while
                 the solution quality remains reasonable. Furthermore,
                 we also propose a technique to support VLIW processors
                 with multiple data clusters, which is essential to
                 apply the methodology to real world processors.
                 \par

                 The experimental results indicate potential gains of up
                 to 27. 6\% in energy in L0 buffers, through operation
                 shuffling for heterogeneous processor architectures as
                 well as a homogeneous architecture. Furthermore, the
                 proposed heuristics drastically reduce the exploration
                 search space by about 90\%, while the results are
                 comparable to full search, with average differences of
                 less than 1\%. The experimental results indicate that
                 energy efficiency can be improved in most of the media
                 benchmarks by the proposed methodology, where the
                 average gain is around 10\% in comparison with
                 generating clusters without operation shuffling.",
  acknowledgement = ack-nhfb,
  articleno =    "41",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Compilers for low energy; loop buffers; VLIW
                 processors",
}

@Article{Maslov:2007:TSR,
  author =       "D. Maslov and G. W. Dueck and D. M. Miller",
  title =        "Techniques for the synthesis of reversible {Toffoli}
                 networks",
  journal =      j-TODAES,
  volume =       "12",
  number =       "4",
  pages =        "42:1--42:??",
  month =        sep,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1278349.1278355",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:09:35 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We present certain new techniques for the synthesis of
                 reversible networks of Toffoli gates, as well as
                 improvements to previous methods. Gate count and
                 technology oriented cost metrics are used. Two new
                 synthesis procedures employing Reed--Muller spectra are
                 introduced and shown to complement earlier synthesis
                 approaches. The previously proposed template
                 simplification method is enhanced through the
                 introduction of a faster and more efficient template
                 application algorithm, an updated classification of the
                 templates, and the addition of new templates of sizes 7
                 and 9. A resynthesis approach is introduced wherein a
                 sequence of gates is chosen from a network, and the
                 reversible specification it realizes is resynthesized
                 as an independent problem in hopes of reducing the
                 network cost. Empirical results are presented to show
                 that the methods are efficient in terms of the
                 realization of reversible benchmark specifications.",
  acknowledgement = ack-nhfb,
  articleno =    "42",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "circuit optimization; quantum computing; reversible
                 logic synthesis",
}

@Article{Bouchebaba:2007:MMO,
  author =       "Youcef Bouchebaba and Bruno Girodias and Gabriela
                 Nicolescu and El Mostapha Aboulhamid and Bruno
                 Lavigueur and Pierre Paulin",
  title =        "{MPSoC} memory optimization using program
                 transformation",
  journal =      j-TODAES,
  volume =       "12",
  number =       "4",
  pages =        "43:1--43:??",
  month =        sep,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1278349.1278356",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:09:35 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Multiprocessor system-on-a-chip (MPSoC) architectures
                 have received a lot of attention in the past years, but
                 few advances in compilation techniques target these
                 architectures. This is particularly true for the
                 exploitation of data locality. Most of the compilation
                 techniques for parallel architectures discussed in the
                 literature are based on a single loop nest. This
                 article presents new techniques that consist in
                 applying loop fusion and tiling to several loop nests
                 and to parallelize the resulting code across different
                 processors. These two techniques reduce the number of
                 memory accesses. However, they increase dependencies
                 and thereby reduce the exploitable parallelism in the
                 code. This article tries to address this contradiction.
                 To optimize the memory space used by temporary arrays,
                 smaller buffers are used as a replacement. Different
                 strategies are studied to optimize the processing time
                 spent accessing these buffers. The experiments show
                 that these techniques yield a significant reduction in
                 the number of data cache misses (30\%) and in
                 processing time (50\%).",
  acknowledgement = ack-nhfb,
  articleno =    "43",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "compiler transformations; data cache; Data locality;
                 embedded systems",
}

@Article{Das:2007:FVT,
  author =       "Dipankar Das and P. P. Chakrabarti and Rajeev Kumar",
  title =        "Functional verification of task partitioning for
                 multiprocessor embedded systems",
  journal =      j-TODAES,
  volume =       "12",
  number =       "4",
  pages =        "44:1--44:??",
  month =        sep,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1278349.1278357",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:09:35 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "With the advent of multiprocessor embedded platforms,
                 application partitioning and mapping have gained
                 primacy as a design step. The output of this design
                 step is a multithreaded partitioned application where
                 each thread is mapped to a processing element
                 (processor or ASIC) in the multiprocessor platform.
                 This partitioned application must be verified to be
                 consistent with the native unpartitioned application.
                 This verification task is called application (or task)
                 partitioning verification. \par

                 This work proposes a code-block-level
                 containment-checking-based methodology for application
                 partitioning verification. We use a UML-based
                 code-block-level modeling language which is rich enough
                 to model most designs. We formulate the application
                 partitioning verification problem as a special case of
                 the containment checking problem, which we call the
                 complete containment checking problem. We propose a
                 state space reduction technique specific to the
                 containment checking, reachability analysis, and
                 deadlock detection problems. We propose novel data
                 structures and token propagation methodologies which
                 enhance the efficiency of containment checking. We
                 present an efficient containment checking algorithm for
                 the application partitioning verification problem. We
                 develop a containment checking tool called TraceMatch
                 and present experimental results. We present a
                 comparison of the state space reduction achieved by
                 TraceMatch with that achieved by formal analysis and
                 verification tools like Spin, PEP, PROD, and LoLA.",
  acknowledgement = ack-nhfb,
  articleno =    "44",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Containment checking; multiprocessor embedded systems;
                 state space reduction; UML activity diagrams",
}

@Article{Huang:2007:CSS,
  author =       "Shih-Hsu Huang and Yow-Tyng Nieh",
  title =        "Clock skew scheduling with race conditions
                 considered",
  journal =      j-TODAES,
  volume =       "12",
  number =       "4",
  pages =        "45:1--45:??",
  month =        sep,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1278349.1278358",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:09:35 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In this article, we provide a fresh viewpoint to the
                 interactions between clock skew scheduling and delay
                 insertion. A race-condition-aware (RCA) clock skew
                 scheduling is proposed to determine the clock skew
                 schedule by taking race conditions (i.e., hold
                 violations) into account. Our objective is not only to
                 optimize the clock period, but also to minimize
                 heuristically the required inserted delay. Compared
                 with previous work, our major contribution includes the
                 following two aspects. First, our approach achieves
                 exactly the same results, but has significant
                 improvement in time complexity. Second, our viewpoint
                 can be generalized to other sequential timing
                 optimization techniques.",
  acknowledgement = ack-nhfb,
  articleno =    "45",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "logic synthesis; performance optimization; Sequential
                 circuits; timing optimization",
}

@Article{Wang:2007:ETR,
  author =       "Gang Wang and Wenrui Gong and Brian Derenzi and Ryan
                 Kastner",
  title =        "Exploring time\slash resource trade-offs by solving
                 dual scheduling problems with the ant colony
                 optimization",
  journal =      j-TODAES,
  volume =       "12",
  number =       "4",
  pages =        "46:1--46:??",
  month =        sep,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1278349.1278359",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:09:35 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Design space exploration during high-level synthesis
                 is often conducted through ad hoc probing of the
                 solution space using some scheduling algorithm. This is
                 not only time consuming but also very dependent on
                 designer's experience. We propose a novel design
                 exploration method that exploits the duality of time-
                 and resource-constrained scheduling problems. Our
                 exploration automatically constructs a time/area
                 tradeoff curve in a fast, effective manner. It is a
                 general approach and can be combined with any
                 high-quality scheduling algorithm. In our work, we use
                 the max-min ant colony optimization technique to solve
                 both time- and resource-constrained scheduling
                 problems. Our algorithm provides significant
                 solution-quality savings (average 17. 3\% reduction of
                 resource counts) with similar runtime compared to using
                 force-directed scheduling exhaustively at every time
                 step. It also scales well across a comprehensive
                 benchmark suite constructed with classic and real-life
                 samples.",
  acknowledgement = ack-nhfb,
  articleno =    "46",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "ant colony optimization; Design space exploration;
                 instruction scheduling; max-min ant system",
}

@Article{Ghosh:2007:LPT,
  author =       "Swaroop Ghosh and Swarup Bhunia and Kaushik Roy",
  title =        "Low-Power and testable circuit synthesis using
                 {Shannon} decomposition",
  journal =      j-TODAES,
  volume =       "12",
  number =       "4",
  pages =        "47:1--47:??",
  month =        sep,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1278349.1278360",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:09:35 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Structural transformation of a design to enhance its
                 testability while satisfying design constraints on
                 power and performance can result in improved test cost
                 and test confidence. In this article, we analyze the
                 testability in a new style of logic design based on
                 Shannon's decomposition and supply gating. We observe
                 that the tree structure of a logic circuit due to
                 Shannon's decomposition makes it intrinsically more
                 testable than a conventionally synthesized circuit,
                 while at the same time providing an improvement in
                 active power. We have analyzed four different aspects
                 of the testability of a circuit: (a) IDDQ test
                 sensitivity, (b) test power during scan-based testing,
                 (c) test length (for both ATPG-generated deterministic
                 and random patterns), and (d) noise immunity.
                 Simulation results on a set of MCNC benchmarks show
                 promising results on all these aspects (an average
                 improvement of 94\% in IDDQ sensitivity, 50\% in test
                 power, 19\% (21\%) in test length for deterministic
                 (random) patterns, and 50\% in coupling noise
                 immunity). We have also demonstrated that the new logic
                 structure can improve parametric yield (6\% on average)
                 of a circuit under process variations when considering
                 a bound on circuit leakage.",
  acknowledgement = ack-nhfb,
  articleno =    "47",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Design-for-test; dynamic supply gating; IDDQ; noise
                 immunity; Shannon expansion; test coverage; test
                 power",
}

@Article{Ostler:2007:IHT,
  author =       "Chris Ostler and Karam S. Chatha and Vijay Ramamurthi
                 and Krishnan Srinivasan",
  title =        "{ILP} and heuristic techniques for system-level design
                 on network processor architectures",
  journal =      j-TODAES,
  volume =       "12",
  number =       "4",
  pages =        "48:1--48:??",
  month =        sep,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1278349.1278361",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:09:35 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Network processors incorporate several architectural
                 features, including symmetric multiprocessing (SMP),
                 block multithreading, and multiple memory elements, to
                 support the high-performance requirements of current
                 day applications. This article presents automated
                 system-level design techniques for application
                 development on such architectures. We propose integer
                 linear programming formulations and heuristic
                 techniques for process allocation and data mapping on
                 SMP and block-multithreading-based network processors.
                 The techniques incorporate process transformations and
                 multithreading-aware data mapping to maximize the
                 throughput of the application. The article presents
                 experimental results that evaluate the techniques by
                 implementing network processing applications on the
                 Intel IXP 2400 architecture.",
  acknowledgement = ack-nhfb,
  articleno =    "48",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "block multithreading; multiprocessor",
}

@Article{Gopalakrishnan:2007:OPD,
  author =       "Sivaram Gopalakrishnan and Priyank Kalla",
  title =        "Optimization of polynomial datapaths using finite ring
                 algebra",
  journal =      j-TODAES,
  volume =       "12",
  number =       "4",
  pages =        "49:1--49:??",
  month =        sep,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1278349.1278362",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:09:35 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article presents an approach to area optimization
                 of arithmetic datapaths at register-transfer level
                 (RTL). The focus is on those designs that perform
                 polynomial computations (add, mult) over finite
                 word-length operands (bit-vectors). We model such
                 polynomial computations over $m$-bit vectors as algebra
                 over finite integer rings of residue classes $Z_2^m$ .
                 Subsequently, we use the number-theoretic and algebraic
                 properties of such rings to transform a given datapath
                 computation into another, bit-true equivalent
                 computation. We also derive a cost model to estimate,
                 at RTL, the area cost of the computation. Using the
                 transformation procedure along with the cost model, we
                 devise algorithmic procedures to search for a
                 lower-cost implementation. We show how these
                 theoretical concepts can be applied to RTL optimization
                 of arithmetic datapaths within practical CAD settings.
                 Experiments conducted over a variety of benchmarks
                 demonstrate substantial optimizations using our
                 approach.",
  acknowledgement = ack-nhfb,
  articleno =    "49",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "arithmetic datapaths; finite ring algebra; High-level
                 synthesis; modulo arithmetic; polynomial datapaths",
}

@Article{Hu:2007:IHM,
  author =       "Q. Hu and P. G. Kjeldsberg and A. Vandecappelle and M.
                 Palkovic and F. Catthoor",
  title =        "Incremental hierarchical memory size estimation for
                 steering of loop transformations",
  journal =      j-TODAES,
  volume =       "12",
  number =       "4",
  pages =        "50:1--50:??",
  month =        sep,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1278349.1278363",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:09:35 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Modern embedded multimedia and telecommunications
                 systems need to store and access huge amounts of data.
                 This becomes a critical factor for the overall energy
                 consumption, area, and performance of the systems. Loop
                 transformations are essential to improve the data
                 access locality and regularity in order to optimally
                 design or utilize a memory hierarchy. However, due to
                 abstract high-level cost functions, current loop
                 transformation steering techniques do not take the
                 memory platform sufficiently into account. They usually
                 also result in only one final transformation solution.
                 On the other hand, the loop transformation search space
                 for real-life applications is huge, especially if the
                 memory platform is still not fully fixed. Use of
                 existing loop transformation techniques will therefore
                 typically lead to suboptimal end-products. It is
                 critical to find all interesting loop transformation
                 instances. This can only be achieved by performing an
                 evaluation of the effect of later design stages at the
                 early loop transformation stage. \par

                 This article presents a fast incremental hierarchical
                 memory-size requirement estimation technique. It
                 estimates the influence of any given sequence of loop
                 transformation instances on the mapping of application
                 data onto a hierarchical memory platform. As the exact
                 memory platform instantiation is often not yet defined
                 at this high-level design stage, a platform-independent
                 estimation is introduced with a Pareto curve output for
                 each loop transformation instance. Comparison among the
                 Pareto curves helps the designer, or a steering tool,
                 to find all interesting loop transformation instances
                 that might later lead to low-power data mapping for any
                 of the many possible memory hierarchy instances.
                 Initially, the source code is used as input for
                 estimation. However, performing the estimation
                 repeatedly from the source code is too slow for large
                 search space exploration. An incremental approach,
                 based on local updating of the previous result, is
                 therefore used to handle sequences of different loop
                 transformations. Experiments show that the initial
                 approach takes a few seconds, which is two orders of
                 magnitude faster than state-of-the-art solutions but
                 still too costly to be performed interactively many
                 times. The incremental approach typically takes just a
                 few milliseconds, which is another two orders of
                 magnitude faster than the initial approach. This huge
                 speedup allows us for the first time to handle
                 real-life industrial-size applications and get
                 realistic feedback during loop transformation
                 exploration.",
  acknowledgement = ack-nhfb,
  articleno =    "50",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "code transformation; Data optimization; high-level
                 synthesis; memory architecture exploration; memory size
                 estimation",
}

@Article{You:2007:CCP,
  author =       "Yi-Ping You and Chung-Wen Huang and Jenq Kuen Lee",
  title =        "Compilation for compact power-gating controls",
  journal =      j-TODAES,
  volume =       "12",
  number =       "4",
  pages =        "51:1--51:??",
  month =        sep,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1278349.1278364",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:09:35 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Power leakage constitutes an increasing fraction of
                 the total power consumption in modern semiconductor
                 technologies due to the continuing size reductions and
                 increasing speeds of transistors. Recent studies have
                 attempted to reduce leakage power using integrated
                 architecture and compiler power-gating mechanisms. This
                 approach involves compilers inserting instructions into
                 programs to shut down and wake up components, as
                 appropriate. While early studies showed this approach
                 to be effective, there are concerns about the large
                 amount of power-control instructions being added to
                 programs due to the increasing amount of components
                 equipped with power-gating controls in SoC design
                 platforms. In this article we present a sink-n-hoist
                 framework for a compiler to generate balanced
                 scheduling of power-gating instructions. Our solution
                 attempts to merge several power-gating instructions
                 into a single compound instruction, thereby reducing
                 the amount of power-gating instructions issued. We
                 performed experiments by incorporating our compiler
                 analysis and scheduling policies into SUIF compiler
                 tools and by simulating the energy consumption using
                 Wattch toolkits. The experimental results demonstrate
                 that our mechanisms are effective in reducing the
                 amount of power-gating instructions while further
                 reducing leakage power compared to previous methods.",
  acknowledgement = ack-nhfb,
  articleno =    "51",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "balanced scheduling; Compilers for low power;
                 data-flow analysis; leakage-power reduction;
                 power-gating mechanisms",
}

@Article{Chen:2007:NMA,
  author =       "Gang Chen and Xiaoyu Song and Feng Liu and Qingping
                 Tan and Fei He",
  title =        "A note on {``A mapping algorithm for computer-assisted
                 exploration in the design of embedded systems''}",
  journal =      j-TODAES,
  volume =       "12",
  number =       "4",
  pages =        "52:1--52:??",
  month =        sep,
  year =         "2007",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1278349.1278365",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:09:35 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  note =         "See \cite{Mariatos:2001:MAC}.",
  acknowledgement = ack-nhfb,
  articleno =    "52",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Dutt:2008:Ea,
  author =       "Nikil Dutt",
  title =        "Editorial",
  journal =      j-TODAES,
  volume =       "13",
  number =       "1",
  pages =        "1:1--1:??",
  month =        jan,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1297666.1297667",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:10:00 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  articleno =    "1",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Hsiao:2008:ISS,
  author =       "Michael S. Hsiao and Robert B. Jones",
  title =        "Introduction to special section on high-level design,
                 validation, and test",
  journal =      j-TODAES,
  volume =       "13",
  number =       "1",
  pages =        "2:1--2:??",
  month =        jan,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1297666.1297668",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:10:00 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Cabodi:2008:BID,
  author =       "Gianpiero Cabodi and Marco Murciano and Sergio Nocco
                 and Stefano Quer",
  title =        "Boosting interpolation with dynamic localized
                 abstraction and redundancy removal",
  journal =      j-TODAES,
  volume =       "13",
  number =       "1",
  pages =        "3:1--3:??",
  month =        jan,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1297666.1297669",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:10:00 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "SAT--based Unbounded Model Checking based on Craig
                 Interpolants is often able to overcome BDDs and other
                 SAT--based techniques on large verification instances.
                 Based on refutation proofs generated by SAT solvers,
                 interpolants provide compact circuit representations of
                 state sets, as they abstract away several nonrelevant
                 details of the proofs. We propose three main
                 contributions, aimed at controlling interpolant size
                 and traversal depth. First of all, we introduce
                 interpolant--based dynamic abstraction to reduce the
                 support of computed interpolants. Subsequently, we
                 propose new advances in interpolant compaction by
                 redundancy removal. Finally, we introduce interpolant
                 computation exploiting circuit quantification, instead
                 of SAT refutation proofs. These techniques heavily rely
                 on an effective application of the incremental SAT
                 paradigm. The experimental results proposed in this
                 paper are specifically oriented to prove properties,
                 rather than disproving them, i.e., they target complete
                 verification instead of simply hunting bugs. They show
                 how this methodology is able to stretch the
                 applicability of interpolant--based Model Checking to
                 larger and deeper verification instances.",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "abstraction; Interpolant; redundancy removal",
}

@Article{Boule:2008:ABA,
  author =       "Marc Boul{\'e} and Zeljko Zilic",
  title =        "Automata-based assertion-checker synthesis of {PSL}
                 properties",
  journal =      j-TODAES,
  volume =       "13",
  number =       "1",
  pages =        "4:1--4:??",
  month =        jan,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1297666.1297670",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:10:00 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Assertion-based verification with languages such as
                 PSL is gaining in importance. From assertions, one can
                 generate hardware assertion checkers for use in
                 emulation, simulation acceleration and silicon debug.
                 We present techniques for checker generation of the
                 complete set of PSL properties, including all variants
                 of operators, both strong and weak. A full
                 automata-based approach allows an entire assertion to
                 be represented by a single automaton, hence allowing
                 optimizations that can not be done in a modular
                 approach where subcircuits are created only for
                 individual operators. For this purpose, automata
                 algorithms are developed for the base cases, and a
                 complete set of rewrite rules is derived for other
                 operators. Automata splitting is introduced for an
                 efficient implementation of the eventually! operator.",
  acknowledgement = ack-nhfb,
  articleno =    "4",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "assertion checkers; Assertion-Based Verification;
                 automata; emulation; hardware; PSL",
}

@Article{Rahaman:2008:CTB,
  author =       "H. Rahaman and J. Mathew and D. K. Pradhan and A. M.
                 Jabir",
  title =        "{C}-testable bit parallel multipliers over {${\rm
                 GF}(2^m)$}",
  journal =      j-TODAES,
  volume =       "13",
  number =       "1",
  pages =        "5:1--5:??",
  month =        jan,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1297666.1297671",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:10:00 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We present a C-testable design of polynomial basis
                 (PB) bit-parallel (BP) multipliers over GF(2 m ) for
                 100\% coverage of stuck-at faults. Our design method
                 also includes the method for test vector generation,
                 which is simple and efficient. C-testability is
                 achieved with three control inputs and approximately
                 6\% additional hardware. Only 8 constant vectors are
                 required irrespective of the sizes of the fields and
                 primitive polynomial. We also present a Built-In
                 Self-Test (BIST) architecture for generating the test
                 vectors efficiently, which eliminates the need for the
                 extra control inputs. Since these circuits have
                 critical applications as parts of cryptography (e. g. ,
                 Elliptic Curve Crypto (ECC) systems) hardware, the BIST
                 architecture may provide with added level of security,
                 as the tests would be done internally and without the
                 requirement of probing by external testing equipment.
                 Finally we present experimental results comprising the
                 area, delay and power of the testable multipliers of
                 various sizes with the help of the Synopsys{\reg} tools
                 using UMC 0. 18 micron CMOS technology library.",
  acknowledgement = ack-nhfb,
  articleno =    "5",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "built-in self-test; C-testable; cryptography; digital
                 signal processing; error control code; fault; Galois
                 field; multiplier; polynomials; stuck-at fault;
                 testing; TPG; VLSI design",
}

@Article{Taktak:2008:TAD,
  author =       "Sami Taktak and Jean-Lou Desbarbieux and Emmanuelle
                 Encrenaz",
  title =        "A tool for automatic detection of deadlock in wormhole
                 networks on chip",
  journal =      j-TODAES,
  volume =       "13",
  number =       "1",
  pages =        "6:1--6:??",
  month =        jan,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1297666.1297672",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:10:00 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We present an extension of Duato's necessary and
                 sufficient condition a routing function must satisfy in
                 order to be deadlock-free, to support environment
                 constraints inducing extra-dependencies between
                 messages. We also present an original algorithm to
                 automatically check the deadlock-freeness of a network
                 with a given routing function. A prototype tool has
                 been developed and automatic deadlock checking of large
                 scale networks with various routing functions have been
                 successfully achieved. We provide comparative results
                 with standard approach, highlighting the benefits of
                 our method.",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Deadlock; interconnection networks; networks on chip;
                 wormhole routing",
}

@Article{Zhou:2008:NER,
  author =       "Hai Zhou",
  title =        "A new efficient retiming algorithm derived by formal
                 manipulation",
  journal =      j-TODAES,
  volume =       "13",
  number =       "1",
  pages =        "7:1--7:??",
  month =        jan,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1297666.1297673",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:10:00 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "A new efficient algorithm is derived for the minimal
                 period retiming by formal manipulation. Contrary to all
                 previous algorithms, which used fixed period
                 feasibility checking to binary-search a candidate
                 range, the derived algorithm checks the optimality of a
                 feasible period directly. It is much simpler and more
                 efficient than previous algorithms. Experimental
                 results showed that it is even faster than ASTRA, an
                 efficient heuristic algorithm. Since the derived
                 algorithm is incremental by nature, it also opens the
                 opportunity to be combined with other optimization
                 techniques.",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "algorithm derivation; Clockperiod minimization;
                 retiming",
}

@Article{Krishnaswamy:2008:PTM,
  author =       "Smita Krishnaswamy and George F. Viamontes and Igor L.
                 Markov and John P. Hayes",
  title =        "Probabilistic transfer matrices in symbolic
                 reliability analysis of logic circuits",
  journal =      j-TODAES,
  volume =       "13",
  number =       "1",
  pages =        "8:1--8:??",
  month =        jan,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1297666.1297674",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:10:00 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We propose the probabilistic transfer matrix (PTM)
                 framework to capture nondeterministic behavior in logic
                 circuits. PTMs provide a concise description of both
                 normal and faulty behavior, and are well-suited to
                 reliability and error susceptibility calculations. A
                 few simple composition rules based on connectivity can
                 be used to recursively build larger PTMs (representing
                 entire logic circuits) from smaller gate PTMs. PTMs for
                 gates in series are combined using matrix
                 multiplication, and PTMs for gates in parallel are
                 combined using the tensor product operation. PTMs can
                 accurately calculate joint output probabilities in the
                 presence of reconvergent fanout and inseparable joint
                 input distributions. To improve computational
                 efficiency, we encode PTMs as algebraic decision
                 diagrams (ADDs). We also develop equivalent ADD
                 algorithms for newly defined matrix operations such as
                 {\tt eliminate\_variables} and {\tt
                 eliminate\_redundant\_variables}, which aid in the
                 numerical computation of circuit PTMs. We use PTMs to
                 evaluate circuit reliability and derive polynomial
                 approximations for circuit error probabilities in terms
                 of gate error probabilities. PTMs can also analyze the
                 effects of logic and electrical masking on error
                 mitigation. We show that ignoring logic masking can
                 overestimate errors by an order of magnitude. We
                 incorporate electrical masking by computing error
                 attenuation probabilities, based on analytical models,
                 into an extended PTM framework for reliability
                 computation. We further define a susceptibility measure
                 to identify gates whose errors are not well masked. We
                 show that hardening a few gates can significantly
                 improve circuit reliability.",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "fault tolerance; Symbolic analysis",
}

@Article{Tzeng:2008:VPS,
  author =       "Chao-Wen Tzeng and Jheng-Syun Yang and Shi-Yu Huang",
  title =        "A versatile paradigm for scan chain diagnosis of
                 complex faults using signal processing techniques",
  journal =      j-TODAES,
  volume =       "13",
  number =       "1",
  pages =        "9:1--9:??",
  month =        jan,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1297666.1297675",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:10:00 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Scan chains are popularly used as the channels for
                 silicon testing and debugging. However, they have also
                 been identified as one of the culprits of silicon
                 failure more recently. To cope with this problem,
                 several scan chain diagnosis approaches have been
                 proposed in the past. The existing methods, however,
                 suffer from one common drawback---that is, they rely on
                 fault models and matching heuristics to locate the
                 faults. Such a paradigm may run into difficulty when
                 the fault under diagnosis does not match the fault
                 model exactly, for example, when there is a bridging
                 between a flip-flop and a logic cell, or the fault is
                 temporal and only manifests itself intermittently. In
                 light of this, we propose in this article a more
                 versatile model-free paradigm for locating the faulty
                 flip-flops in a scan chain, incorporating a number of
                 signal processing techniques, such as filtering and
                 edge detection. These techniques performed on the test
                 responses of the failing chip under diagnosis directly
                 can effectively reveal the fault location(s) in a scan
                 chain. As compared to the previous works, our approach
                 is better capable of handling intermittent faults and
                 bridging faults, even under nonideal conditions, for
                 example, when the core logic is also faulty.
                 Experimental results on several real designs indicate
                 that this approach can indeed catch some nasty faults
                 that previous methods could not catch.",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "design for testability; Diagnosis; fault; profiling;
                 scan chain",
}

@Article{Johnson:2008:IME,
  author =       "F. Ryan Johnson and Joann M. Paul",
  title =        "Interrupt modeling for efficient high-level scheduler
                 design space exploration",
  journal =      j-TODAES,
  volume =       "13",
  number =       "1",
  pages =        "10:1--10:??",
  month =        jan,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1297666.1297676",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:10:00 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Single Chip Heterogeneous Multiprocessors executing a
                 wide variety of software are increasingly common in
                 consumer electronics. Because of the mix of real-time
                 and best effort software across the entire chip, a key
                 design element of these systems is the choice of
                 scheduling strategy. Without task migration, the
                 benefits of single chip processing cannot be fully
                 realized. Previously, high-level modeling environments
                 have not been capable of modeling asynchronous events
                 such as interrupts and preemptive scheduling while
                 preserving the performance benefits of high level
                 simulation. This paper shows how extensions to Modeling
                 Environment for Software and Hardware (MESH) enable
                 precise modeling of these asynchronous events while
                 running more than 1000 faster than cycle-accurate
                 simulation. We discuss how we achieved this and
                 illustrate its use in modeling preemptive scheduling.
                 We evaluate the potential of migrating running tasks
                 between processors to improve performance in a
                 multimedia cell phone example. We show that by allowing
                 schedulers to rebalance processor loads as new tasks
                 arrive significant performance gains can be achieved
                 over statically partitioned and dynamic scheduling
                 approaches. In our example, we show that system
                 response time can be improved by as much as 1. 96 times
                 when a preemptive migratory scheduler is used, despite
                 the overhead incurred by scheduling tasks across
                 multiple processors and transferring state during the
                 migration of running tasks. The contribution of this
                 work is to provide a framework for evaluating
                 preemptive scheduling policies and task migration in a
                 high level simulator, by combining the new ability to
                 model interrupts with dramatically increased efficiency
                 in the high-level modeling of scheduling and
                 communication MESH already provides.",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Heterogeneous chip multiprocessors; MESH; scenario
                 oriented design",
}

@Article{Ogras:2008:AOP,
  author =       "Umit Y. Ogras and Radu Marculescu",
  title =        "Analysis and optimization of prediction-based flow
                 control in networks-on-chip",
  journal =      j-TODAES,
  volume =       "13",
  number =       "1",
  pages =        "11:1--11:??",
  month =        jan,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1297666.1297677",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:10:00 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Networks-on-Chip (NoC) communication architectures
                 have emerged recently as a scalable solution to on-chip
                 communication problems. While the NoC architectures may
                 offer higher bandwidth compared to traditional
                 bus-based communication, their performance can degrade
                 significantly in the absence of effective flow control
                 algorithms. Unfortunately, flow control algorithms
                 developed for macronetworks, either rely on local
                 information, or suffer from large communication
                 overhead and unpredictable delays. Hence, using them in
                 the NoC context is problematic at best. For this
                 reason, we propose a predictive closed-loop flow
                 control mechanism and make the following contributions:
                 First, we develop traffic source and router models
                 specifically targeted to NoCs. Then, we utilize these
                 models to predict the possible congestion in the
                 network. Based on this information, the proposed scheme
                 controls the packet injection rate at traffic sources
                 in order to regulate the total number of packets in the
                 network. We also illustrate the proposed traffic source
                 model and the applicability of the proposed flow
                 controller to actual designs using real NoC
                 implementations. Finally, simulations and experimental
                 study using our FPGA prototype show that the proposed
                 controller delivers a better performance compared to
                 the traditional switch-to-switch flow control
                 algorithms under various real and synthetic traffic
                 patterns.",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "congestion control; flow control; Multi-processor
                 systems; networks-on-chip",
}

@Article{Chang:2008:TCS,
  author =       "Kuei-Chung Chang and Jih-Sheng Shen and Tien-Fu Chen",
  title =        "Tailoring circuit-switched network-on-chip to
                 application-specific system-on-chip by two optimization
                 schemes",
  journal =      j-TODAES,
  volume =       "13",
  number =       "1",
  pages =        "12:1--12:??",
  month =        jan,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1297666.1297678",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:10:00 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "As the number of cores on a chip increases, power
                 consumed by the communication structures takes a
                 significant portion of the overall power budget. In
                 this article, we first propose a circuit-switched
                 interconnection architecture which uses crossroad
                 switches to construct dedicated channels dynamically
                 between any pairs of cores for nonhuge
                 application-specific SoCs. The structure of the
                 crossroad switch is simple, which can be regarded as a
                 NoC-lite router, and we can easily construct a
                 low-power on-chip network with these switches by a
                 system-level design methodology. We also present the
                 design methodology to tailor the proposed
                 interconnection architecture to low-power structures by
                 two proposed optimization schemes with profiled
                 communication characteristics. The first scheme is
                 power-aware topology construction, which can build
                 low-power application-specific interconnection
                 topologies. To further reduce the power consumption, we
                 propose the second optimization scheme to predetermine
                 the operating mode of dual-mode switches in the NoC at
                 runtime. We evaluate several interconnection
                 techniques, and the results show that the proposed
                 architecture is more low-power and high-performance
                 than others under some constraints and scale
                 boundaries. We take multimedia applications as case
                 studies, and experimental results show the power
                 savings of power-aware topology approximate to 49\% of
                 the interconnection architecture. The power consumption
                 can be further reduced approximately 25\% by applying
                 partially dedicated path mechanism.",
  acknowledgement = ack-nhfb,
  articleno =    "12",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Application specific; interconnection; low power;
                 networks on chip; systems on chips",
}

@Article{Abbasian:2008:WBD,
  author =       "A. Abbasian and S. Hatami and A. Afzali-Kusha and M.
                 Pedram",
  title =        "Wavelet-based dynamic power management for
                 nonstationary service requests",
  journal =      j-TODAES,
  volume =       "13",
  number =       "1",
  pages =        "13:1--13:??",
  month =        jan,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1297666.1297679",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:10:00 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In this article, a wavelet-based dynamic power
                 management policy (WBDPM) is proposed. In this
                 approach, the workload source (service requester) is
                 modeled by a nonstationary time series which, in turn,
                 represented by a nondecimated Haar wavelet as its
                 basis. The proposed approach is robust and has the
                 ability to minimize energy dissipation under different
                 performance constraints. To assess the accuracy of the
                 model, the algorithm was implemented for data extracted
                 from the hard disks of computers. Prediction results of
                 this approach for the case of a nonstationary service
                 requester exhibit accuracies of more than 95\%.",
  acknowledgement = ack-nhfb,
  articleno =    "13",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Dynamic power management; low-power system design;
                 nonstationary service request; wavelet-based
                 prediction",
}

@Article{Su:2008:SNT,
  author =       "Yu-Shih Su and Po-Hsien Chang and Shih-Chieh Chang and
                 Tingting Hwang",
  title =        "Synthesis of a novel timing-error detection
                 architecture",
  journal =      j-TODAES,
  volume =       "13",
  number =       "1",
  pages =        "14:1--14:??",
  month =        jan,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1297666.1297680",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:10:00 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Delay variation can cause a design to fail its timing
                 specification. Ernst et al. [2003] observe that the
                 worst delay of a design is least probable to occur.
                 They propose a mechanism to detect and correct
                 occasional errors while the design can be optimized for
                 the common cases. Their experimental results show
                 significant performance (or power) gain as compared
                 with the worst-case design. However, the architecture
                 in Ernst et al. [2003] suffers the short path problem,
                 which is difficult to resolve. In this article, we
                 propose a novel error-detecting architecture to solve
                 the short path problem. Our experimental results show
                 considerable performance gain can be achieved with
                 reasonable area overhead.",
  acknowledgement = ack-nhfb,
  articleno =    "14",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "fault tolerance; Logic synthesis",
}

@Article{Raabe:2008:RDS,
  author =       "Andreas Raabe and Philipp A. Hartmann and Joachim K.
                 Anlauf",
  title =        "{ReChannel}: {Describing} and simulating
                 reconfigurable hardware in {systemC}",
  journal =      j-TODAES,
  volume =       "13",
  number =       "1",
  pages =        "15:1--15:??",
  month =        jan,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1297666.1297681",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:10:00 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "With the ongoing integration of (dynamic)
                 reconfiguration into current system models, new
                 methodologies and tools are needed to help the designer
                 during the development process. This article introduces
                 a language extension for SystemC along with a design
                 methodology for describing and simulating dynamically
                 reconfigurable systems at all levels of abstraction.
                 The presented library provides maximum freedom of
                 description of reconfiguration behavior and its
                 control, while featuring simulation of runtime
                 configuration, removal, and exchange of custom modules
                 as well as third-party IP-cores during the complete
                 architecture refinement process. When designing at
                 RT-level, the resulting hardware description can easily
                 be synthesized by standard synthesis tools.",
  acknowledgement = ack-nhfb,
  articleno =    "15",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "dynamic reconfiguration; hardware description;
                 Reconfigurable hardware; refinement; simulation;
                 SystemC",
}

@Article{Zhou:2008:AAS,
  author =       "Xiangrong Zhou and Chenjie Yu and Alokika Dash and
                 Peter Petrov",
  title =        "Application-aware snoop filtering for low-power cache
                 coherence in embedded multiprocessors",
  journal =      j-TODAES,
  volume =       "13",
  number =       "1",
  pages =        "16:1--16:??",
  month =        jan,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1297666.1297682",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:10:00 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Maintaining local caches coherently in shared-memory
                 multiprocessors results in significant power
                 consumption. The customization methodology we propose
                 exploits the fact that in embedded systems, important
                 knowledge is available to the system designers
                 regarding memory sharing between tasks. We demonstrate
                 how the snoop-induced cache probings can be
                 significantly reduced by identifying and exploiting in
                 a deterministic way the shared memory regions between
                 the processors. Snoop activity is enabled only for the
                 accesses referring to known shared regions. The
                 hardware support is not only cost efficient, but also
                 software programmable, which allows for
                 reprogrammability and customization across different
                 tasks and applications.",
  acknowledgement = ack-nhfb,
  articleno =    "16",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Cache coherence; embedded multiprocessors; low-power
                 embedded systems; snoop filtering",
}

@Article{Ahn:2008:SSC,
  author =       "Yongjin Ahn and Keesung Han and Ganghee Lee and
                 Hyunjik Song and Junhee Yoo and Kiyoung Choi and
                 Xingguang Feng",
  title =        "{SoCDAL}: {System-on-chip design AcceLerator}",
  journal =      j-TODAES,
  volume =       "13",
  number =       "1",
  pages =        "17:1--17:??",
  month =        jan,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1297666.1297683",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:10:00 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Time-to-market pressure and the ever-growing design
                 complexity of multiprocessor system-on-chips have
                 demanded an efficient design environment that enables
                 fast exploration of large design space. In this
                 article, we introduce a new design environment, called
                 SoCDAL, for accelerating multiprocessor system-on-chip
                 design through fast design-space exploration targeting
                 real-time multimedia systems. SoCDAL is a set of mostly
                 automated tools covering system specification,
                 hardware/software estimation,
                 application-to-architecture mapping, simulation model
                 generation, and system verification through simulation.
                 For system specification, the process network model has
                 been widely used for system specification because of
                 its modeling capability. However, it is hard to use for
                 real-time systems design, since its behavior cannot be
                 estimated statically. We introduce a new approach which
                 enables analyzing a process network model statically
                 with some restrictions. For the hardware/software
                 estimation, we analyze codes statically.
                 Application-to-architecture mapping process implements
                 a novel algorithm to support an arbitrary number of
                 processors, with performance evaluation by static
                 scheduling considering communication behavior. Mapping
                 results are used to generate simulation models
                 automatically at several transaction levels to be
                 pipelined to a commercial tool. We show the
                 effectiveness of our approaches by some experimental
                 results with multimedia applications such as JPEG, H.
                 263, and H. 264 encoders, as well as an H. 264
                 decoder.",
  acknowledgement = ack-nhfb,
  articleno =    "17",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "application-to-architecture mapping; Codesign;
                 design-space exploration; multiprocessor
                 system-on-chip; process networks; scheduling;
                 simulation; specification; static hardware/software
                 estimation; synchronous dataflow; transaction-level
                 model; worst-case execution time",
}

@Article{Zamora:2008:EMU,
  author =       "Nicholas H. Zamora and Xiaoping Hu and Umit Y. Ogras
                 and Radu Marculescu",
  title =        "Enabling multimedia using resource-constrained video
                 processing techniques: a node-centric perspective",
  journal =      j-TODAES,
  volume =       "13",
  number =       "1",
  pages =        "18:1--18:??",
  month =        jan,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1297666.1297684",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:10:00 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Successful proliferation of multimedia-enabled devices
                 and advances in very large-scale integration (VLSI)
                 technology has spawned new research efforts in
                 migrating video processing applications onto ever
                 smaller and more inexpensive devices. This article
                 focuses on the technical challenges associated with
                 that migration. \par

                 Due to limitations in size, battery lifetime, and,
                 ultimately, cost, mapping complex video applications
                 onto resource-constrained systems is a very challenging
                 proposition. To this end, we first consider a
                 technique, region-of-interest (ROI) processing, of
                 defining a window within a video frame and only
                 operating on the data inside that window, ignoring the
                 rest of the frame. By using this lossy technique, the
                 processing requirements can be reduced by roughly 80\%
                 while the error introduced in the quality of the
                 results is roughly 10\%. The other technique is
                 adaptive data partitioning (ADP) combined with a
                 content-based power management algorithm. By
                 distributing video processing among multiple processors
                 and shutting them down when they are not needed, the
                 energy consumed per processor can be reduced by 60\%
                 without sacrificing the performance of the underlying
                 video-based application. \par

                 Taken together, these novel techniques enable ambient
                 multimedia systems and maintain the needed overall
                 efficiency in video processing.",
  acknowledgement = ack-nhfb,
  articleno =    "18",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "data partitioning; lossy and lossless video
                 processing; real-time video processing;
                 Region-of-interest (ROI)",
}

@Article{Lee:2008:FCB,
  author =       "Kyungsoo Lee and Naehyuck Chang and Jianli Zhuo and
                 Chaitali Chakrabarti and Sudheendra Kadri and Sarma
                 Vrudhula",
  title =        "A fuel-cell-battery hybrid for portable embedded
                 systems",
  journal =      j-TODAES,
  volume =       "13",
  number =       "1",
  pages =        "19:1--19:??",
  month =        jan,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1297666.1297685",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:10:00 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article presents our work on the development of a
                 fuel cell (FC) and battery hybrid (FC-Bh) system for
                 use in portable microelectronic systems. We describe
                 the design and control of the hybrid system, as well as
                 a dynamic power management (DPM)-based energy
                 management policy that extends its operational
                 lifetime. The FC is of the proton exchange membrane
                 (PEM) type, operates at room temperature, and has an
                 energy density which is 4--6 times that of a Li-ion
                 battery. The FC cannot respond to sudden changes in the
                 load, and so a system powered solely by the FC is not
                 economical. An FC-Bh power source, on the other hand,
                 can provide the high energy density of the FC and the
                 high power density of a battery. \par

                 In this work we first describe the prototype FC-Bh
                 system that we have built. Such a prototype helps to
                 characterize the performance of a hybrid power source,
                 and also helps explore new energy management strategies
                 for embedded systems powered by hybrid sources. Next we
                 describe a Matlab/Simulink-based FC-Bh system simulator
                 which serves as an alternate experimental platform and
                 that enables quick evaluation of system-level control
                 policies. Finally, we present an optimization framework
                 that explicitly considers the characteristics of the
                 FC-Bh system and is aimed at minimizing the fuel
                 consumption. This optimization framework is applied on
                 top of a prediction-based DPM policy and is used to
                 derive a new fuel-efficient DPM scheme. The proposed
                 scheme demonstrates up to 32\% system lifetime
                 extension compared to a competing scheme when run on a
                 real trace-based MPEG encoding example.",
  acknowledgement = ack-nhfb,
  articleno =    "19",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "battery; DPM; fuel cell; hybrid systems; Simulation;
                 simulator",
}

@Article{Chao:2008:LPG,
  author =       "Wei-Chung Chao and Wai-Kei Mak",
  title =        "Low-power gated and buffered clock network
                 construction",
  journal =      j-TODAES,
  volume =       "13",
  number =       "1",
  pages =        "20:1--20:??",
  month =        jan,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1297666.1297686",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:10:00 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We propose an efficient algorithm to construct a
                 low-power zero-skew gated clock network, given the
                 module locations and activity information. Unlike
                 previous works, we consider masking logic insertion and
                 buffer insertion simultaneously, and guarantee to yield
                 a zero-skew clock tree. Both the logical and physical
                 information of the modules are carefully taken into
                 consideration when determining where masking logic
                 should be inserted. We also account for the power
                 overhead of the control signals so that the total
                 average power consumption of the constructed zero-skew
                 gated clock network can be minimized. To this end, we
                 present a recursive approach to compute the effective
                 switched capacitance of a general gated and buffered
                 clock network, accounting for both the clock tree's and
                 controller tree's switched capacitance. The power
                 consumptions of the gated clock networks constructed by
                 our algorithm are 20 to 36\% lower than those reported
                 in the best previous work in the literature.",
  acknowledgement = ack-nhfb,
  articleno =    "20",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "buffer; clock gating; Clock tree; low power;
                 zero-skew",
}

@Article{Sham:2008:OWR,
  author =       "Chiu-Wing Sham and Evangeline F. Y. Young and Hai
                 Zhou",
  title =        "Optimizing wirelength and routability by searching
                 alternative packings in floorplanning",
  journal =      j-TODAES,
  volume =       "13",
  number =       "1",
  pages =        "21:1--21:??",
  month =        jan,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1297666.1297687",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:10:00 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Recent advances in VLSI technology have made
                 optimization of the interconnect delay and routability
                 of a circuit more important. We should consider
                 interconnect planning as early as possible. We propose
                 a postfloorplanning step to reduce the interconnect
                 cost of a floorplan by searching alternative packings.
                 If a packing contains a rectangular bounding box of a
                 group of modules, we can rearrange the blocks in the
                 bounding box to obtain a new floorplan with the same
                 area, but possibly with a smaller interconnect cost.
                 Experimental results show that we can reduce the
                 interconnect cost of a packing without any penalty in
                 area.",
  acknowledgement = ack-nhfb,
  articleno =    "21",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Floorplanning; wirelength reduction",
}

@Article{Wu:2008:CPR,
  author =       "Meng-Chiou Wu and Rung-Bin Lin and Shih-Cheng Tsai",
  title =        "Chip placement in a reticle for multiple-project wafer
                 fabrication",
  journal =      j-TODAES,
  volume =       "13",
  number =       "1",
  pages =        "22:1--22:??",
  month =        jan,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1297666.1297688",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:10:00 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Chip placement in a reticle is crucial to the cost of
                 a multiproject wafer run. In this article we develop
                 several chip placement methods based on the
                 volume-driven compatibility optimization (VOCO)
                 concept, which maximizes dicing compatibility among
                 chips with large-volume requirements while minimizing
                 reticle dimensions. Our mixed-integer linear
                 programming models with VOCO are too complex to render
                 good solutions for large test cases. Our B*-tree with
                 VOCO and HQ with VOCO use $16\% \sim 29\%$ fewer wafers
                 and $8\% \sim 19\%$ less reticle area than the
                 hierarchical quadrisection (HQ) method proposed by
                 Kahng et al. [2005]",
  acknowledgement = ack-nhfb,
  articleno =    "22",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "compatibility graph; conflict graph; mixed-integer
                 linear programming (MILP); Multiple-project wafers
                 (MPW); reticle floorplanning; set cover; set partition;
                 shuttle mask; simulated annealing (SA); wafer dicing",
}

@Article{Dutt:2008:Eb,
  author =       "Nikil Dutt",
  title =        "Editorial",
  journal =      j-TODAES,
  volume =       "13",
  number =       "2",
  pages =        "23:1--23:??",
  month =        apr,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1344418.1344419",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:10:39 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  articleno =    "23",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Saluja:2008:SBA,
  author =       "Nikhil Saluja and Kanupriya Gulati and Sunil P.
                 Khatri",
  title =        "{SAT}-based {ATPG} using multilevel compatible
                 don't-cares",
  journal =      j-TODAES,
  volume =       "13",
  number =       "2",
  pages =        "24:1--24:??",
  month =        apr,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1344418.1344420",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:10:39 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In a typical IC design flow, circuits are optimized
                 using multilevel don't cares. The computed don't cares
                 are discarded before Technology Mapping or Automatic
                 Test Pattern Generation (ATPG). In this paper, we
                 present two combinational ATPG algorithms for
                 combinational designs. These algorithms utilize the
                 multilevel don't cares that are computed for the design
                 during technology independent logic optimization. They
                 are based on Boolean Satisfiability (SAT), and utilize
                 the single stuck-at fault model. Both algorithms make
                 use of the Compatible Observability Don't Cares (CODCs)
                 associated with nodes of the circuit, to speed up the
                 ATPG process. For large circuits, both algorithms make
                 use of approximate CODCs (ACODCs), which we can compute
                 efficiently. Our first technique speeds up fault
                 propagation by modifying the active clauses in the
                 transitive fanout (TFO) of the fault site. In our
                 second technique, we define new j - active variables
                 for specific nodes in the transitive fanin (TFI) of the
                 fault site. Using these j-active variables we write
                 additional clauses to speed up fault justification.
                 Experimental results demonstrate that the combination
                 of these techniques (when using CODCs) results in an
                 average reduction of 45\% in ATPG runtimes. When ACODCs
                 are used, a speed-up of about 30\% is obtained in the
                 ATPG run-times for large designs. We compare our method
                 against a commercial structural ATPG tool as well. Our
                 method is slower for small designs, but for large
                 designs, we obtain a 31\% average speedup over the
                 commercial tool.",
  acknowledgement = ack-nhfb,
  articleno =    "24",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Automatic test pattern generation (ATPG); Boolean
                 satisfiabilty (SAT); don't cares; testing",
}

@Article{Muchherla:2008:NEW,
  author =       "Kishore Kumar Muchherla and Pinhong Chen and Dongsheng
                 Ma and Janet Meiling Wang",
  title =        "A noniterative equivalent waveform model for timing
                 analysis in presence of crosstalk",
  journal =      j-TODAES,
  volume =       "13",
  number =       "2",
  pages =        "25:1--25:??",
  month =        apr,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1344418.1344421",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:10:39 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Due to the nonuniform interconnect scaling in the Deep
                 Sub Micron (DSM) region, the coupling capacitance
                 between wires becomes an increasingly dominant fraction
                 of the total wire capacitance. This couple capacitance
                 introduces server crosstalk which causes delay
                 variations on signal lines and raises signal integrity
                 problems. Therefore, including crosstalk in the timing
                 analysis methods has become imperative for current
                 technologies. And to correctly model the crosstalk,
                 output loading effects, waveform shape and gate driving
                 capability have to be considered. However, most
                 existing crosstalk models have not yet included these
                 factors and consequently suffer from the low accuracy
                 problem. In this article, we propose a noniterative
                 equivalent waveform model that addresses the above
                 mentioned issues. Our experimental results have shown
                 that the new model achieves 3 times speed up and 95\%
                 accuracy compared to the existing models.",
  acknowledgement = ack-nhfb,
  articleno =    "25",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Deep sub micron; delay; equivalent waveform; noise;
                 timing analysis",
}

@Article{Yan:2008:TDO,
  author =       "Jin-Tai Yan",
  title =        "Timing-driven octilinear {Steiner} tree construction
                 based on {Steiner-point} reassignment and path
                 reconstruction",
  journal =      j-TODAES,
  volume =       "13",
  number =       "2",
  pages =        "26:1--26:??",
  month =        apr,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1344418.1344422",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:10:39 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "It is well known that the problem of constructing a
                 timing-driven rectilinear Steiner tree for any signal
                 net is important in performance-driven designs and has
                 been extensively studied. Until now, many efficient
                 approaches have been proposed for the construction of a
                 timing-driven rectilinear Steiner tree. As technology
                 process advances, $+45^\circ$ and $-45^\circ$ diagonal
                 segments can be permitted in an octilinear routing
                 model. To our knowledge, no approach is proposed to
                 construct a timing-driven octilinear Steiner tree for
                 any signal net. In this paper, given a rectilinear
                 Steiner tree for any signal net, we propose an
                 efficient transformation-based approach to construct a
                 timing-driven octilinear Steiner tree based on the
                 computation of the octilinear distance and the concept
                 of Steiner-point reassignment and path reconstruction
                 in an octilinear routing model. The experimental
                 results show that our proposed transformation-based
                 approach can use reasonable CPU time to construct a
                 TOST, and a 10\%--18\% improvement in timing delay and
                 a 5\%--14\% improvement in total wire length in the
                 original RSTs are obtained in the construction of TOSTs
                 for the tested signal nets.",
  acknowledgement = ack-nhfb,
  articleno =    "26",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Elmore delay; Global routing; octilinear Steiner tree;
                 Steiner points",
}

@Article{Baldassin:2008:OSB,
  author =       "Alexandro Baldassin and Paulo Centoducatte and Sandro
                 Rigo and Daniel Casarotto and Luiz C. V. Santos and Max
                 Schultz and Olinto Furtado",
  title =        "An open-source binary utility generator",
  journal =      j-TODAES,
  volume =       "13",
  number =       "2",
  pages =        "27:1--27:??",
  month =        apr,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1344418.1344423",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:10:39 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Electronic system level (ESL) modeling allows early
                 hardware-dependent software (HDS) development. Due to
                 broad CPU diversity and shrinking time-to-market, HDS
                 development can neither rely on hand-retargeting binary
                 tools, nor can it rely on pre-existent tools within
                 standard packages. As a consequence, binary utilities
                 which can be easily adapted to new CPU targets are of
                 increasing interest. We present in this article a
                 framework for automatic generation of binary utilities.
                 It relies on two innovative ideas: platform-aware
                 modeling and more inclusive relocation handling.
                 Generated assemblers, linkers, disassemblers and
                 debuggers were validated for MIPS, SPARC, PowerPC,
                 i8051 and PIC16F84. An open-source prototype generator
                 is available for download.",
  acknowledgement = ack-nhfb,
  articleno =    "27",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Platform debugging; retargetable tools; TLM",
}

@Article{Moscola:2008:RCB,
  author =       "James Moscola and John W. Lockwood and Young H. Cho",
  title =        "Reconfigurable content-based router using
                 hardware-accelerated language parser",
  journal =      j-TODAES,
  volume =       "13",
  number =       "2",
  pages =        "28:1--28:??",
  month =        apr,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1344418.1344424",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:10:39 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article presents a dense logic design for
                 matching multiple regular expressions with a field
                 programmable gate array (FPGA) at 10 + Gbps. It
                 leverages on the design techniques that enforce the
                 shortest critical path on most FPGA architectures while
                 optimizing the circuit size. The architecture is
                 capable of supporting a maximum throughput of 12. 90
                 Gbps on a Xilinx Virtex 4 LX200 and its performance is
                 linearly scalable with size. Additionally, this article
                 presents techniques for parsing data streams to provide
                 semantic information for patterns found within a data
                 stream. We illustrate how a content-based router can be
                 implemented with our parsing techniques using an XML
                 parser as an example. The content-based router
                 presented was designed, implemented, and tested in a
                 Xilinx Virtex XCV2000E FPGA on the FPX platform. It is
                 capable of processing 32-bits of data per clock cycle
                 and runs at 100 MHz. This allows the system to process
                 and route XML messages at 3. 2 Gbps.",
  acknowledgement = ack-nhfb,
  articleno =    "28",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "content-based routing; parser hardware; Parsing;
                 pattern matching; regular expressions; XML",
}

@Article{Jones:2008:RFI,
  author =       "Alex K. Jones and Swapna Dontharaju and Shenchih Tung
                 and Leo Mats and Peter J. Hawrylak and Raymond R. Hoare
                 and James T. Cain and Marlin H. Mickle",
  title =        "Radio frequency identification prototyping",
  journal =      j-TODAES,
  volume =       "13",
  number =       "2",
  pages =        "29:1--29:??",
  month =        apr,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1344418.1344425",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:10:39 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "While RFID is starting to become a ubiquitous
                 technology, the variation between different RFID
                 systems still remains high. This paper presents several
                 prototyping environments for different components of
                 radio frequency identification (RFID) tags to
                 demonstrate how many of these components can be
                 standardized for many different purposes. We include
                 two active tag prototypes, one based on a
                 microprocessor and the second based on custom hardware.
                 To program these devices we present a design automation
                 flow that allows RFID transactions to be described in
                 terms of primitives with behavior written in ANSI C
                 code. To save power with active RFID devices we
                 describe a passive transceiver switch called the
                 ``burst switch'' and demonstrate how this can be used
                 in a system with a microprocessor or custom hardware
                 controller. Finally, we present a full RFID system
                 prototyping environment based on real-time spectrum
                 analysis technology currently deployed at the
                 University of Pittsburgh RFID Center of Excellence.
                 Using our prototyping techniques we show how
                 transactions from multiple standards can be combined
                 and targeted to several microprocessors include the
                 Microchip PIC, Intel StrongARM and XScale, and AD Chips
                 EISC as well as several hardware targets including the
                 Altera Apex, Actel Fusion, Xilinx Coolrunner II,
                 Spartan 3 and Virtex 2, and cell-based ASICs.",
  acknowledgement = ack-nhfb,
  articleno =    "29",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Design automation; low-power; prototyping; RFID",
}

@Article{Hu:2008:PSF,
  author =       "Yu Hu and Yan Lin and Lei He and Tim Tuan",
  title =        "Physical synthesis for {FPGA} interconnect power
                 reduction by dual-Vdd budgeting and retiming",
  journal =      j-TODAES,
  volume =       "13",
  number =       "2",
  pages =        "30:1--30:??",
  month =        apr,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1344418.1344426",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:10:39 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Field programmable dual-Vdd interconnects are
                 effective in reducing FPGA power. We formulate the
                 dual-Vdd-aware slack budgeting problem as a linear
                 program (LP) and a min-cost network flow problem,
                 respectively. Both algorithms reduce interconnect power
                 by 50\% on average compared to single-Vdd
                 interconnects, but the network-flow-based algorithm
                 runs 11x faster on MCNC benchmarks. Furthermore, we
                 develop simultaneous retiming and slack budgeting
                 (SRSB) with flip-flop layout constraints in dual-Vdd
                 FPGAs based on mixed integer linear programming, and
                 speed-up the algorithm by LP relaxation and local
                 legalization. Compared to retiming followed by slack
                 budgeting, SRSB reduces interconnect power by up to 28.
                 8\%.",
  acknowledgement = ack-nhfb,
  articleno =    "30",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "FPGA; Low power; retiming",
}

@Article{AlKhatib:2008:MSC,
  author =       "Iyad {Al Khatib} and Francesco Poletti and Davide
                 Bertozzi and Luca Benini and Mohamed Bechara and Hasan
                 Khalifeh and Axel Jantsch and Rustam Nabiev",
  title =        "A multiprocessor system-on-chip for real-time
                 biomedical monitoring and analysis: {ECG} prototype
                 architectural design space exploration",
  journal =      j-TODAES,
  volume =       "13",
  number =       "2",
  pages =        "31:1--31:??",
  month =        apr,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1344418.1344427",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:10:39 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In this article we focus on multiprocessor
                 system-on-chip (MPSoC) architectures for human heart
                 electrocardiogram (ECG) real time analysis as a
                 hardware/software (HW/SW) platform offering an advance
                 relative to state-of-the-art solutions. This is a
                 relevant biomedical application with good potential
                 market, since heart diseases are responsible for the
                 largest number of yearly deaths. Hence, it is a good
                 target for an application-specific system-on-chip (SoC)
                 and HW/SW codesign. We investigate a symmetric
                 multiprocessor architecture based on STMicroelectronics
                 VLIW DSPs that process in real time 12-lead ECG
                 signals. This architecture improves upon
                 state-of-the-art SoC designs for ECG analysis in its
                 ability to analyze the full 12 leads in real time, even
                 with high sampling frequencies, and its ability to
                 detect heart malfunction for the whole ECG signal
                 interval. We explore the design space by considering a
                 number of hardware and software architectural options.
                 Comparing our design with present-day solutions from an
                 SoC and application point-of-view shows that our
                 platform can be used in real time and without
                 failures.",
  acknowledgement = ack-nhfb,
  articleno =    "31",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "electrocardiogram algorithms; embedded system design;
                 hardware space exploration; Multiprocessor
                 system-on-chip; real time analysis",
}

@Article{Zhou:2008:HTC,
  author =       "Xiangrong Zhou and Peter Petrov",
  title =        "Heterogeneously tagged caches for low-power embedded
                 systems with virtual memory support",
  journal =      j-TODAES,
  volume =       "13",
  number =       "2",
  pages =        "32:1--32:??",
  month =        apr,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1344418.1344428",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:10:39 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "An energy-efficient data cache organization for
                 embedded processors with virtual memory is proposed.
                 Application knowledge regarding memory references is
                 used to eliminate most tag translations. A novel
                 tagging scheme is introduced, where both virtual and
                 physical tags coexist. Physical tags and special
                 handling of superset index bits are only used for
                 references to shared regions in order to avoid cache
                 inconsistency. By eliminating the need for most address
                 translations on cache access, a significant power
                 reduction is achieved. We outline an efficient hardware
                 architecture, where the application information is
                 captured in a reprogrammable way and the cache is
                 minimally modified.",
  acknowledgement = ack-nhfb,
  articleno =    "32",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Embedded systems",
}

@Article{Liu:2008:PVA,
  author =       "Fang Liu and Sule Ozev and Plamen K. Nikolov",
  title =        "Parametric variability analysis for multistage analog
                 circuits using analytical sensitivity modeling",
  journal =      j-TODAES,
  volume =       "13",
  number =       "2",
  pages =        "33:1--33:??",
  month =        apr,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1344418.1344429",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:10:39 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Process variations play an increasingly important role
                 on the success of analog circuits. State-of-the-art
                 analog circuits are based on complex architectures and
                 contain many hierarchical layers and parameters.
                 Knowledge of the parameter variances and their
                 contribution patterns is crucial for a successful
                 design process. This information is valuable to find
                 solutions for many problems in design, design
                 automation, testing, and fault tolerance. In this
                 article, we present a hierarchical variance analysis
                 methodology for multistage analog circuits. Starting
                 from the process/layout level, we derive implicit
                 hierarchical relations and extract the sensitivity
                 information analytically. We make use of previously
                 computed values whenever possible so as to reduce
                 computational time. The proposed approach is
                 particularly geared for the domain of design and test
                 automation, where multiple runs on slightly different
                 circuits are necessary. Experimental results indicate
                 that the proposed method provides both accuracy and
                 computational efficiency when compared with prior
                 approaches.",
  acknowledgement = ack-nhfb,
  articleno =    "33",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "analog circuits; Hierarchical variance analysis;
                 parameter correlations; performance model; process
                 variations",
}

@Article{Cheng:2008:FSI,
  author =       "Lei Cheng and Deming Chen and Martin D. F. Wong",
  title =        "A fast simultaneous input vector generation and gate
                 replacement algorithm for leakage power reduction",
  journal =      j-TODAES,
  volume =       "13",
  number =       "2",
  pages =        "34:1--34:??",
  month =        apr,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1344418.1344430",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:10:39 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The Input vector control (IVC) technique is based on
                 the observation that the leakage current in a CMOS
                 logic gate depends on gate input state, and a good
                 input vector is able to minimize leakage when the
                 circuit is in sleep mode. The gate replacement
                 technique is a very effective method to further reduce
                 the leakage current. In this article, we propose a fast
                 heuristic algorithm to find a low-leakage input vector
                 with simultaneous gate replacement. Results on MCNC91
                 benchmark circuits show that our algorithm produces
                 14\% better leakage current reduction with several
                 orders of magnitude speedup in runtime for large
                 circuits compared to the previous state-of-the-art
                 algorithm. In particular, the average runtime for the
                 ten largest combinational circuits has been
                 dramatically reduced from 1879 seconds to 0.34
                 seconds.",
  acknowledgement = ack-nhfb,
  articleno =    "34",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "gate replacement; Input vector control; leakage
                 reduction",
}

@Article{Bernasconi:2008:OKS,
  author =       "Anna Bernasconi and Valentina Ciriani and Roberto
                 Cordone",
  title =        "The optimization of {kEP-SOPs}: {Computational}
                 complexity, approximability and experiments",
  journal =      j-TODAES,
  volume =       "13",
  number =       "2",
  pages =        "35:1--35:??",
  month =        apr,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1344418.1344431",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:10:39 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We propose a new algebraic four-level expression
                 called k-EXOR-projected sum of products (kEP-SOP). The
                 optimization of a kEP-SOP is NP NP-hard, but can be
                 approximated within a fixed performance guarantee in
                 polynomial time. Moreover, fully testable circuits
                 under the stuck-at-fault model can be derived from
                 kEP-SOPs by adding at most a constant number of
                 multiplexer gates. The experiments show that the
                 computational time is very short and the results are
                 most of the time optimal with respect to the number of
                 products involved. kEP-SOPs also prove experimentally a
                 good starting point for general multilevel logic
                 synthesis.",
  acknowledgement = ack-nhfb,
  articleno =    "35",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "approximation algorithm; Automatic synthesis;
                 multilevel logic synthesis; optimization; testing",
}

@Article{Bahar:2008:IJA,
  author =       "R. Iris Bahar and Krishnendu Chakrabarty",
  title =        "Introduction to joint {ACM JETC\slash TODAES} special
                 issue on new, emerging, and specialized technologies",
  journal =      j-TODAES,
  volume =       "13",
  number =       "2",
  pages =        "36:1--36:??",
  month =        apr,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1344418.1344432",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Jun 12 18:10:39 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  articleno =    "36",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Dutt:2008:E,
  author =       "Nikil Dutt",
  title =        "Editorial",
  journal =      j-TODAES,
  volume =       "13",
  number =       "3",
  pages =        "37:1--37:??",
  month =        jul,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1367045.1367046",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Tue Aug 5 18:41:27 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  articleno =    "37",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Jones:2008:ISS,
  author =       "Alex K. Jones and Robert Walker",
  title =        "Introduction to the special section on demonstrable
                 software systems and hardware platforms {II}",
  journal =      j-TODAES,
  volume =       "13",
  number =       "3",
  pages =        "38:1--38:??",
  month =        jul,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1367045.1367047",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Tue Aug 5 18:41:27 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  articleno =    "38",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Kwon:2008:RPP,
  author =       "Seongnam Kwon and Yongjoo Kim and Woo-Chul Jeun and
                 Soonhoi Ha and Yunheung Paek",
  title =        "A retargetable parallel-programming framework for
                 {MPSoC}",
  journal =      j-TODAES,
  volume =       "13",
  number =       "3",
  pages =        "39:1--39:??",
  month =        jul,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1367045.1367048",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Tue Aug 5 18:41:27 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "As more processing elements are integrated in a single
                 chip, embedded software design becomes more
                 challenging: It becomes a parallel programming for
                 nontrivial heterogeneous multiprocessors with diverse
                 communication architectures, and design constraints
                 such as hardware cost, power, and timeliness. In the
                 current practice of parallel programming with MPI or
                 OpenMP, the programmer should manually optimize the
                 parallel code for each target architecture and for the
                 design constraints. Thus, the design-space exploration
                 of MPSoC (multiprocessor systems-on-chip) costs become
                 prohibitively large as software development overhead
                 increases drastically. To solve this problem, we
                 develop a parallel-programming framework based on a
                 novel programming model called common intermediate code
                 (CIC). In a CIC, functional parallelism and data
                 parallelism of application tasks are specified
                 independently of the target architecture and design
                 constraints. Then, the CIC translator translates the
                 CIC into the final parallel code, considering the
                 target architecture and design constraints to make the
                 CIC retargetable. Experiments with preliminary
                 examples, including the H.263 decoder, show that the
                 proposed parallel-programming framework increases the
                 design productivity of MPSoC software significantly.",
  acknowledgement = ack-nhfb,
  articleno =    "39",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "design-space exploration; embedded software;
                 multiprocessor system on chip; parallel-programming;
                 software generation",
}

@Article{Kumar:2008:MSS,
  author =       "Akash Kumar and Shakith Fernando and Yajun Ha and Bart
                 Mesman and Henk Corporaal",
  title =        "Multiprocessor systems synthesis for multiple
                 use-cases of multiple applications on {FPGA}",
  journal =      j-TODAES,
  volume =       "13",
  number =       "3",
  pages =        "40:1--40:??",
  month =        jul,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1367045.1367049",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Tue Aug 5 18:41:27 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Future applications for embedded systems demand chip
                 multiprocessor designs to meet real-time deadlines. The
                 large number of applications in these systems generates
                 an exponential number of use-cases. The key design
                 automation challenges are designing systems for these
                 use-cases and fast exploration of software and hardware
                 implementation alternatives with accurate performance
                 evaluation of these use-cases. These challenges cannot
                 be overcome by current design methodologies which are
                 semiautomated, time consuming, and error prone.\par

                 In this article, we present a design methodology to
                 generate multiprocessor systems in a systematic and
                 fully automated way for {\em multiple use-cases}.
                 Techniques are presented to merge multiple use-cases
                 into one hardware design to minimize cost and design
                 time, making it well suited for fast design-space
                 exploration (DSE) in MPSoC systems. Heuristics to
                 partition use-cases are also presented such that each
                 partition can fit in an FPGA, and all use-cases can be
                 catered for.\par

                 The proposed methodology is implemented into a tool for
                 Xilinx FPGAs for evaluation. The tool is also made
                 available online for the benefit of the research
                 community and is used to carry out a DSE case study
                 with multiple use-cases of real-life applications: H263
                 and JPEG decoders. The generation of the entire design
                 takes about 100 ms, and the whole DSE was completed in
                 45 minutes, including FPGA mapping and synthesis. The
                 heuristics used for use-case partitioning reduce the
                 design-exploration time elevenfold in a case study with
                 mobile-phone applications.",
  acknowledgement = ack-nhfb,
  articleno =    "40",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "design exploration; FPGA; multi-application;
                 multimedia systems; multiple use-cases; multiprocessor
                 systems; synchronous data-flow graphs",
}

@Article{Krashinsky:2008:ISV,
  author =       "Ronny Krashinsky and Christopher Batten and Krste
                 Asanovi{\'c}",
  title =        "Implementing the {Scale} vector-thread processor",
  journal =      j-TODAES,
  volume =       "13",
  number =       "3",
  pages =        "41:1--41:??",
  month =        jul,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1367045.1367050",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Tue Aug 5 18:41:27 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The Scale vector-thread processor is a
                 complexity-effective solution for embedded computing
                 which flexibly supports both vector and highly
                 multithreaded processing. The 7.1-million transistor
                 chip has 16 decoupled execution clusters, vector load
                 and store units, and a nonblocking 32KB cache. An
                 automated and iterative design and verification flow
                 enabled a performance-, power-, and area-efficient
                 implementation with two person-years of development
                 effort. Scale has a core area of 16.6 mm$^2$ in 180 nm
                 technology, and it consumes 400 mW--1.1 W while running
                 at 260 MHz.",
  acknowledgement = ack-nhfb,
  articleno =    "41",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "hybrid C++/Verilog simulation; iterative VLSI design
                 flow; multithreaded processors; procedural datapath
                 pre-placement; vector processors; vector-thread
                 processors",
}

@Article{Mishra:2008:SDD,
  author =       "Prabhat Mishra and Nikil Dutt",
  title =        "Specification-driven directed test generation for
                 validation of pipelined processors",
  journal =      j-TODAES,
  volume =       "13",
  number =       "3",
  pages =        "42:1--42:??",
  month =        jul,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1367045.1367051",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Tue Aug 5 18:41:27 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Functional validation is a major bottleneck in
                 pipelined processor design due to the combined effects
                 of increasing design complexity and lack of efficient
                 techniques for directed test generation. Directed test
                 vectors can reduce overall validation effort, since
                 shorter tests can obtain the same coverage goal
                 compared to the random tests. This article presents a
                 specification-driven directed test generation
                 methodology. The proposed methodology makes three
                 important contributions. First, a general graph model
                 is developed that can capture the structure and
                 behavior (instruction set) of a wide variety of
                 pipelined processors. The graph model is generated from
                 the processor specification. Next, we propose a
                 functional fault model that is used to define the
                 functional coverage for pipelined architectures.
                 Finally, we propose two complementary test generation
                 techniques: test generation using model checking, and
                 test generation using template-based procedures. These
                 test generation techniques accept the graph model of
                 the architecture as input and generate test programs to
                 detect all the faults in the functional fault model.
                 Our experimental results on two pipelined processor
                 models demonstrate several orders-of-magnitude
                 reduction in overall validation effort by drastically
                 reducing both test-generation time and number of test
                 programs required to achieve a coverage goal.",
  acknowledgement = ack-nhfb,
  articleno =    "42",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "functional validation; model checking; test
                 generation",
}

@Article{Joo:2008:ECP,
  author =       "Yongsoo Joo and Youngjin Cho and Donghwa Shin and
                 Jaehyun Park and Naehyuck Chang",
  title =        "An energy characterization platform for memory devices
                 and energy-aware data compression for multilevel-cell
                 flash memory",
  journal =      j-TODAES,
  volume =       "13",
  number =       "3",
  pages =        "43:1--43:??",
  month =        jul,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1367045.1367052",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Tue Aug 5 18:41:27 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Memory devices often consume more energy than
                 microprocessors in current portable embedded systems,
                 but their energy consumption changes significantly with
                 the type of transaction, data values, and access
                 timing, as well as depending on the total number of
                 transactions. These variabilities mean that an
                 innovative tool and framework are required to
                 characterize modern memory devices running in embedded
                 system architectures.\par

                 We introduce an energy measurement and characterization
                 platform for memory devices, and demonstrate an
                 application to multilevel-cell (MLC) flash memories, in
                 which we discover significant value-dependent
                 programming energy variations. We introduce an
                 energy-aware data compression method that minimizes the
                 flash programming energy, rather than the size of the
                 compressed data, which is formulated as an entropy
                 coding with unequal bit-pattern costs. Deploying a
                 probabilistic approach, we derive energy-optimal
                 bit-pattern probabilities and expected values of the
                 bit-pattern costs which are applicable to the large
                 amounts of compressed data typically found in
                 multimedia applications. Then we develop an
                 energy-optimal prefix coding that uses integer linear
                 programming, and construct a prefix-code table. From a
                 consideration of Pareto-optimal energy consumption, we
                 can make tradeoffs between data size and programming
                 energy, such as a 41\% energy savings for a 52\% area
                 overhead.",
  acknowledgement = ack-nhfb,
  articleno =    "43",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "compression; flash memory; MLC",
}

@Article{Huffmire:2008:DSS,
  author =       "Ted Huffmire and Brett Brotherton and Nick Callegari
                 and Jonathan Valamehr and Jeff White and Ryan Kastner
                 and Tim Sherwood",
  title =        "Designing secure systems on reconfigurable hardware",
  journal =      j-TODAES,
  volume =       "13",
  number =       "3",
  pages =        "44:1--44:??",
  month =        jul,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1367045.1367053",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Tue Aug 5 18:41:27 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The extremely high cost of custom ASIC fabrication
                 makes FPGAs an attractive alternative for deployment of
                 custom hardware. Embedded systems based on
                 reconfigurable hardware integrate many functions onto a
                 single device. Since embedded designers often have no
                 choice but to use soft IP cores obtained from third
                 parties, the cores operate at different trust levels,
                 resulting in mixed-trust designs. The goal of this
                 project is to evaluate recently proposed security
                 primitives for reconfigurable hardware by building a
                 real embedded system with several cores on a single
                 FPGA and implementing these primitives on the system.
                 Overcoming the practical problems of integrating
                 multiple cores together with security mechanisms will
                 help us to develop realistic security-policy
                 specifications that drive enforcement mechanisms on
                 embedded systems.",
  acknowledgement = ack-nhfb,
  articleno =    "44",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Advanced Encryption Standard (AES); controlled
                 sharing; enforcement mechanisms; execution monitors;
                 Field programmable gate arrays (FPGAs); hardware
                 security; isolation; memory protection; reference
                 monitors; security policies; security primitives;
                 separation; static analysis; systems-on-a-chip (SoCs)",
}

@Article{Manolios:2008:AVS,
  author =       "Panagiotis Manolios and Sudarshan K. Srinivasan",
  title =        "Automatic verification of safety and liveness for
                 pipelined machines using {WEB} refinement",
  journal =      j-TODAES,
  volume =       "13",
  number =       "3",
  pages =        "45:1--45:??",
  month =        jul,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1367045.1367054",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Tue Aug 5 18:41:27 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We show how to automatically verify that complex
                 pipelined machine models satisfy the same safety and
                 liveness properties as their instruction-set
                 architecture (ISA) models by using well-founded
                 equivalence bisimulation (WEB) refinement. We show how
                 to reduce WEB-refinement proof obligations to formulas
                 expressible in the decidable logic of counter
                 arithmetic with lambda expressions and uninterpreted
                 functions (CLU). This allows us to automate the
                 verification of the pipelined machine models by using
                 the UCLID decision procedure to transform CLU formulas
                 to Boolean satisfiability problems. To relate pipelined
                 machine states to ISA states, we use the commitment and
                 flushing refinement maps. We evaluate our work using 17
                 pipelined machine models that contain various features,
                 including deep pipelines, precise exceptions, branch
                 prediction, interrupts, and instruction queues. Our
                 experimental results show that the overhead of proving
                 liveness, obtained by comparing the cost of proving
                 both safety and liveness with the cost of only proving
                 safety, is about 17\%, but depends on the refinement
                 map used; for example, the liveness overhead is 23\%
                 when flushing is used and is negligible when commitment
                 is used.",
  acknowledgement = ack-nhfb,
  articleno =    "45",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "bisimulation; commitment; flushing; liveness;
                 pipelined machines; refinement; refinement maps; SAT;
                 verification",
}

@Article{Wu:2008:PVA,
  author =       "Huaizhi Wu and Martin D. F. Wong and Wilsin Gosti",
  title =        "Postplacement voltage assignment under performance
                 constraints",
  journal =      j-TODAES,
  volume =       "13",
  number =       "3",
  pages =        "46:1--46:??",
  month =        jul,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1367045.1367055",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Tue Aug 5 18:41:27 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Multi-Vdd is an effective method to reduce both
                 leakage and dynamic power. A key challenge in a
                 multi-Vdd design is to control the complexity of the
                 power-supply system and limit the demand for level
                 shifters. This can be tackled by grouping cells of
                 different supply voltages into a small number of
                 voltage islands. Recently, an elegant algorithm was
                 proposed for generating voltage islands that balance
                 the power-versus-design-cost tradeoff under performance
                 requirement, according to the placement proximity of
                 the critical cells. One prerequisite of this algorithm
                 is an initial voltage assignment at the standard-cell
                 level that meets timing. In this article, we present a
                 novel method to produce quality voltage assignment
                 which not only meets timing but also forms good
                 proximity of the critical cells to provide a smooth
                 input to the aforementioned voltage island generation.
                 Our algorithm is based on effective delay budgeting and
                 efficient computation of physical proximity by Voronoi
                 diagram. Our extensive experiments on real industrial
                 designs show that our algorithm leads to 25\%--75\%
                 improvement in the voltage island generation in terms
                 of the number of voltage islands generated, with
                 computation time only linear to design size.",
  acknowledgement = ack-nhfb,
  articleno =    "46",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "low power; timing; voltage assignment; Voronoi
                 diagram",
}

@Article{Bombieri:2008:ROT,
  author =       "Nicola Bombieri and Franco Fummi and Graziano
                 Pravadelli",
  title =        "Reuse and optimization of testbenches and properties
                 in a {TLM-to-RTL} design flow",
  journal =      j-TODAES,
  volume =       "13",
  number =       "3",
  pages =        "47:1--47:??",
  month =        jul,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1367045.1367056",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Tue Aug 5 18:41:27 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In transaction-level modeling (TLM), verification
                 methodologies based on transactions allow testbenches,
                 properties, and IP cores in mixed TL-RTL designs to be
                 reused. However, no papers in the literature analyze
                 the effectiveness of transaction-based verification
                 (TBV) in comparison to the more traditional RTL
                 approach. The first contribution of this article is the
                 introduction of a functional-fault-model-based
                 methodology for demonstrating the effectiveness of
                 reuse through TBV. A second contribution is the
                 introduction of a similar methodology for efficient
                 property checking which identifies and removes
                 redundant properties prior to assertion-based
                 verification or model checking.",
  acknowledgement = ack-nhfb,
  articleno =    "47",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "fault models; functional verification; model checking;
                 TBV; TLM",
}

@Article{Inoue:2008:PVS,
  author =       "Hiroaki Inoue and Junji Sakai and Masato Edahiro",
  title =        "Processor virtualization for secure mobile terminals",
  journal =      j-TODAES,
  volume =       "13",
  number =       "3",
  pages =        "48:1--48:??",
  month =        jul,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1367045.1367057",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Tue Aug 5 18:41:27 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We propose a processor virtualization architecture,
                 VIRTUS, to provide a dedicated domain for preinstalled
                 applications and virtualized domains for downloaded
                 native applications. With it, security-oriented
                 next-generation mobile terminals can provide any number
                 of domains for native applications. VIRTUS features
                 three new technologies, namely, VMM asymmetrization,
                 dynamic interdomain communication (IDC), and
                 virtualization-assist logic, and it is first in the
                 world to virtualize an ARM-based multiprocessor.
                 Evaluations have shown that VMM asymmetrization results
                 in significantly less performance degradation and LOC
                 increase than do other VMMs. Further, dynamic IDC
                 overhead is low enough, and virtualization-assist logic
                 can be implemented in a sufficiently small area.",
  acknowledgement = ack-nhfb,
  articleno =    "48",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "multiprocessor; processor virtualization",
}

@Article{Sanz:2008:CSS,
  author =       "Concepci{\'o}n Sanz and Manuel Prieto and Jos{\'e}
                 Ignacio G{\'o}mez and Antonis Papanikolaou and Miguel
                 Miranda and Francky Catthoor",
  title =        "Combining system scenarios and configurable memories
                 to tolerate unpredictability",
  journal =      j-TODAES,
  volume =       "13",
  number =       "3",
  pages =        "49:1--49:??",
  month =        jul,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1367045.1367058",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Tue Aug 5 18:41:27 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Process variability and the dynamism of new
                 applications increase the uncertainty of embedded
                 systems and force designers to use pessimistic
                 assumptions, which have a tremendous impact on both the
                 performance and energy consumption of their memory
                 organizations. In this article we introduce an
                 experimental framework which tries to mitigate the
                 effects of both sources of unpredictability. At compile
                 time, an extensive profiling helps us to detect system
                 scenarios and bounds application dynamism. At the
                 organization level, we incorporate a heterogeneous
                 memory architecture composed by several configurable
                 memories. A calibration process and a runtime control
                 system adapt the platform to the current application
                 needs. Our approach manages to reduce significantly the
                 energy overhead associated to both variability and
                 application dynamism (up to 60\%, according to our
                 simulations) without compromising the timing
                 constraints existing in our target domain of dynamic
                 periodic multimedia applications.",
  acknowledgement = ack-nhfb,
  articleno =    "49",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "parametric yield; process variation; variability
                 compensation",
}

@Article{Ozturk:2008:IBE,
  author =       "Ozcan Ozturk and Mahmut Kandemir",
  title =        "{ILP}-based energy minimization techniques for banked
                 memories",
  journal =      j-TODAES,
  volume =       "13",
  number =       "3",
  pages =        "50:1--50:??",
  month =        jul,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1367045.1367059",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Tue Aug 5 18:41:27 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Main memories can consume a significant portion of
                 overall energy in many data-intensive embedded
                 applications. One way of reducing this energy
                 consumption is banking, that is, dividing available
                 memory space into multiple banks and placing unused
                 (idle) memory banks into low-power operating modes.
                 Prior work investigated code-restructuring- and
                 data-layout-reorganization-based approaches for
                 increasing the energy benefits that could be obtained
                 from a banked memory architecture. This article
                 explores different techniques that can potentially
                 coexist within the same optimization framework for
                 maximizing benefits of low-power operating modes. These
                 techniques include employing nonuniform bank sizes,
                 data migration, data compression, and data replication.
                 By using these techniques, we try to increase the
                 chances for utilizing low-power operating modes in a
                 more effective manner, and achieve further energy
                 savings over what could be achieved by exploiting
                 low-power modes alone. Specifically, nonuniform banking
                 tries to match bank sizes with application-data access
                 patterns. The goal of data migration is to cluster data
                 with similar access patterns in the same set of banks.
                 Data compression reduces the size of the data used by
                 an application, and thus helps reduce the number of
                 memory banks occupied by data. Finally, data
                 replication increases bank idleness by duplicating
                 select read-only data blocks across banks. We formulate
                 each of these techniques as an ILP (integer linear
                 programming) problem, and solve them using a commercial
                 solver. Our experimental analysis using several
                 benchmarks indicates that all the techniques presented
                 in this framework are successful in reducing memory
                 energy consumption. Based on our experience with these
                 techniques, we recommend to compiler writers for banked
                 memories to consider data compression, replication, and
                 migration.",
  acknowledgement = ack-nhfb,
  articleno =    "50",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "compilers; data compression; DRAM; low-power operating
                 modes; memory banking; migration; replication",
}

@Article{Das:2008:RSA,
  author =       "Sabyasachi Das and Sunil P. Khatri",
  title =        "Resource sharing among mutually exclusive
                 sum-of-product blocks for area reduction",
  journal =      j-TODAES,
  volume =       "13",
  number =       "3",
  pages =        "51:1--51:??",
  month =        jul,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1367045.1367060",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Tue Aug 5 18:41:27 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In state-of-the-art digital designs, arithmetic blocks
                 consume a major portion of the total area of the IC.
                 The arithmetic sum-of-product (SOP) is the most widely
                 used arithmetic block. Some of the examples of SOP are
                 adder, subtractor, multiplier, multiply-accumulator
                 (MAC), squarer, chain-of-adders, incrementor,
                 decrementor, etc. In this article, we introduce a
                 novel, area-efficient architecture to share different
                 SOP blocks which are used in a mutually exclusive
                 manner. We implement the core functions of the largest
                 SOP only once and reuse different parts of the core
                 subblocks for all other SOP operations with the help of
                 multiplexers. This architecture can be used in the
                 nontiming-critical paths of the design, to save
                 significant amounts of area. Our experimental data
                 shows that the proposed sharing-based architecture
                 results in about 37\% area savings compared to the
                 results obtained from a commercially available
                 best-in-class datapath synthesis tool. In addition, our
                 proposed shared implementation consumes about 18\% less
                 power. These improvements were verified on
                 placed-and-routed designs as well.",
  acknowledgement = ack-nhfb,
  articleno =    "51",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Tseng:2008:PPD,
  author =       "I-Lun Tseng and Adam Postula",
  title =        "Partitioning parameterized 45-degree polygons with
                 constraint programming",
  journal =      j-TODAES,
  volume =       "13",
  number =       "3",
  pages =        "52:1--52:??",
  month =        jul,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1367045.1367061",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Tue Aug 5 18:41:27 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "An algorithm for partitioning parameterized 45-degree
                 polygons into parameterized trapezoids is proposed in
                 this article. The algorithm is based on the plane-sweep
                 technique and can handle polygons with complicated
                 constraints. The input to the algorithm consists of the
                 contour of a parameterized polygon to be partitioned
                 and a set of constraints for parameters of the contour.
                 The algorithm uses horizontal cuts only and generates a
                 number of nonoverlapping trapezoids whose union is the
                 original parameterized polygon. Processing of
                 constraints and coordinates that contain first-order
                 multiple-variable polynomials has been made possible by
                 incorporating the JaCoP constraint programming library.
                 The proposed algorithm has been implemented in Java
                 programming language and can be used as the basis to
                 build the trapezoidal corner stitching data structure
                 for parameterized VLSI layout masks.",
  acknowledgement = ack-nhfb,
  articleno =    "52",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "analog and mixed-signal design; parameterized layouts;
                 parameterized polygons; polygon decomposition;
                 trapezoidal corner stitching",
}

@Article{Sehgal:2008:PAS,
  author =       "Anuja Sehgal and Sudarshan Bahukudumbi and Krishnendu
                 Chakrabarty",
  title =        "Power-aware {SoC} test planning for effective
                 utilization of port-scalable testers",
  journal =      j-TODAES,
  volume =       "13",
  number =       "3",
  pages =        "53:1--53:??",
  month =        jul,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1367045.1367062",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Tue Aug 5 18:41:27 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Many system-on-chip (SoC) integrated circuits contain
                 embedded cores with different scan frequencies. To
                 better meet the test requirements for such
                 heterogeneous SoCs, leading tester companies have
                 recently introduced port-scalable testers, which can
                 simultaneously drive groups of channels at different
                 data rates. However, the number of tester channels
                 available for scan testing is limited; therefore, a
                 higher shift frequency can increase the test time for a
                 core if the resulting test access architecture reduces
                 the bit-width used to access it. We present a scalable
                 test planning technique that exploits port scalability
                 of testers to reduce SoC test time. We compare the
                 proposed heuristic optimization method to two baseline
                 methods based on prior works that use a single scan
                 data rate for all embedded cores. We also propose a
                 power-aware test planning technique to effectively
                 utilize port-scalable testers under constraints of test
                 power consumption. Experimental results are presented
                 for power-aware test scheduling to illustrate the
                 impact of power constraints on overall test time.",
  acknowledgement = ack-nhfb,
  articleno =    "53",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "integer linear programming; port-scalable testers; SoC
                 test; test access architecture",
}

@Article{Pecenka:2008:ESR,
  author =       "Tomas Pecenka and Lukas Sekanina and Zdenek Kotasek",
  title =        "Evolution of synthetic {RTL} benchmark circuits with
                 predefined testability",
  journal =      j-TODAES,
  volume =       "13",
  number =       "3",
  pages =        "54:1--54:??",
  month =        jul,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1367045.1367063",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Tue Aug 5 18:41:27 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article presents a new real-world application of
                 evolutionary computing in the area of digital-circuits
                 testing. A method is described which enables to evolve
                 large synthetic RTL benchmark circuits with a
                 predefined structure and testability. Using the
                 proposed method, a new collection of synthetic
                 benchmark circuits was developed. These benchmark
                 circuits will be useful in a validation process of
                 novel algorithms and tools in the area of
                 digital-circuits testing. Evolved benchmark circuits
                 currently represent the most complex benchmark circuits
                 with a known level of testability. Furthermore, these
                 circuits are the largest that have ever been designed
                 by means of evolutionary algorithms. This work also
                 investigates suitable parameters of the evolutionary
                 algorithm for this problem and explores the limits in
                 the complexity of evolved circuits.",
  acknowledgement = ack-nhfb,
  articleno =    "54",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "benchmark circuit; evolvable hardware; testability
                 analysis",
}

@Article{Pedram:2008:E,
  author =       "Massoud Pedram",
  title =        "Editorial",
  journal =      j-TODAES,
  volume =       "13",
  number =       "4",
  pages =        "55:1--55:??",
  month =        sep,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1391962.1391963",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Oct 1 16:09:32 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  acknowledgement = ack-nhfb,
  articleno =    "55",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Guan:2008:SAP,
  author =       "Nan Guan and Qingxu Deng and Zonghua Gu and Wenyao Xu
                 and Ge Yu",
  title =        "Schedulability analysis of preemptive and
                 nonpreemptive {EDF} on partial runtime-reconfigurable
                 {FPGAs}",
  journal =      j-TODAES,
  volume =       "13",
  number =       "4",
  pages =        "56:1--56:??",
  month =        sep,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1391962.1391964",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Oct 1 16:09:32 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Field Programmable Gate Arrays (FPGAs) are very
                 popular in today's embedded systems design, and Partial
                 Runtime-Reconfigurable (PRTR) FPGAs allow HW tasks to
                 be placed and removed dynamically at runtime. Hardware
                 task scheduling on PRTR FPGAs brings many challenging
                 issues to traditional real-time scheduling theory,
                 which have not been adequately addressed by the
                 research community compared to software task scheduling
                 on CPUs. In this article, we consider the
                 schedulability analysis problem of HW task scheduling
                 on PRPR FPGAs. We derive utilization bounds for several
                 variants of global preemptive/nonpreemptive EDF
                 scheduling, and compare the performance of different
                 utilization bound tests.",
  acknowledgement = ack-nhfb,
  articleno =    "56",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "FPGA; Real-time scheduling; reconfigurable devices",
}

@Article{Mukherjee:2008:HLC,
  author =       "Rajarshi Mukherjee and Song Liu and Seda Ogrenci Memik
                 and Somsubhra Mondal",
  title =        "A high-level clustering algorithm targeting dual
                 {V$_{dd}$ FPGAs}",
  journal =      j-TODAES,
  volume =       "13",
  number =       "4",
  pages =        "57:1--57:??",
  month =        sep,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1391962.1391965",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Oct 1 16:09:32 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Recent advanced power optimizations deployed in
                 commercial FPGAs, laid out a roadmap towards FPGA
                 devices that can be integrated into ultra low power
                 systems. In this article, we present a high-level
                 design tool to support the process of mapping an
                 application onto a FPGA device with dual supply
                 voltages. Our main contribution in this paper is an
                 algorithm, which creates voltage scaling ready clusters
                 by utilizing the timing slack available in the designs.
                 We propose to first create clusters of CLBs within a
                 given CLB-level netlist. This clustering algorithm
                 intends to group chains of CLBs possessing similar
                 amounts of timing slack along their critical path
                 together. Once these clusters are identified, they are
                 placed onto respective V$_{dd}$ partitions on the
                 device. We have evaluated different dual V$_{dd}$
                 fabrics and the potential gain in power consumption is
                 explored. When a subset of the logic blocks on the
                 device can be driven by low V$_{dd}$ levels (either
                 with a dedicated low V$_{dd}$ supply or with a
                 programmable selection between low and high V$_{dd}$
                 levels for these blocks) this affects placement and
                 routing. As a result the maximum frequency of the
                 designs may be affected. In order to evaluate the
                 overall impact of creating voltage islands, we measured
                 the Energy-Delay Product for our benchmark designs. We
                 observed that the Energy-Delay product can be decreased
                 by 26.9\% when the placement of the designs into
                 different voltage levels is guided by our clustering
                 algorithm.",
  acknowledgement = ack-nhfb,
  articleno =    "57",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "clustering; Dynamic power; field programmable gate
                 arrays; partitioning; placement; voltage scaling",
}

@Article{Resano:2008:ESR,
  author =       "Javier Resano and Juan Antonio Clemente and Carlos
                 Gonzalez and Daniel Mozos and Francky Catthoor",
  title =        "Efficiently scheduling runtime reconfigurations",
  journal =      j-TODAES,
  volume =       "13",
  number =       "4",
  pages =        "58:1--58:??",
  month =        sep,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1391962.1391966",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Oct 1 16:09:32 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Due to the emergence of portable devices that must run
                 complex dynamic applications there is a need for
                 flexible platforms for embedded systems. Runtime
                 reconfigurable hardware can provide this flexibility
                 but the reconfiguration latency can significantly
                 decrease the performance. When dealing with task
                 graphs, runtime support that schedules the
                 reconfigurations in advance can drastically reduce this
                 overhead. However, executing complex scheduling
                 heuristics at runtime may generate an excessive
                 penalty. Hence, we have developed a hybrid
                 design-time/runtime reconfiguration scheduling
                 heuristic that generates its final schedule at runtime
                 but carries out most computations at design-time. We
                 have tested our approach in a PowerPC 405 processor
                 embedded on a FPGA demonstrating that it generates a
                 very small runtime penalty while providing almost as
                 good schedules as a full runtime approach.",
  acknowledgement = ack-nhfb,
  articleno =    "58",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "FPGAs; hardware multitasking; Reconfigurable
                 architectures; runtime/design-time scheduling",
}

@Article{Garg:2008:SLT,
  author =       "Siddharth Garg and Diana Marculescu",
  title =        "System-level throughput analysis for process variation
                 aware multiple voltage-frequency island designs",
  journal =      j-TODAES,
  volume =       "13",
  number =       "4",
  pages =        "59:1--59:??",
  month =        sep,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1391962.1391967",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Oct 1 16:09:32 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The increasing variability in manufacturing process
                 parameters is expected to lead to significant
                 performance degradation in deep submicron technologies.
                 Multiple Voltage-Frequency Island (VFI) design styles
                 with fine-grained, process-variation aware clocking
                 have recently been shown to possess increased immunity
                 to manufacturing process variations. In this article,
                 we propose a theoretical framework that allows
                 designers to quantify the performance improvement that
                 is to be expected if they were to migrate from a fully
                 synchronous design to the proposed multiple VFI design
                 style. Specifically, we provide techniques to
                 efficiently and accurately estimate the probability
                 distribution of the execution rate (or throughput) of
                 both single and multiple VFI systems under the
                 influence of manufacturing process variations. Finally,
                 using an MPEG-2 encoder benchmark, we demonstrate how
                 the proposed analysis framework can be used by
                 designers to make architectural decisions such as the
                 granularity of VFI domain partitioning based on the
                 throughput constraints their systems are required to
                 satisfy.",
  acknowledgement = ack-nhfb,
  articleno =    "59",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Globally asynchronous locally synchronous;
                 manufacturing process variations; maximum cycle mean;
                 performance analysis; system-level design;
                 voltage-frequency islands",
}

@Article{Ozturk:2008:APB,
  author =       "Ozcan Ozturk and Mahmut Kandemir and Guangyu Chen",
  title =        "Access pattern-based code compression for
                 memory-constrained systems",
  journal =      j-TODAES,
  volume =       "13",
  number =       "4",
  pages =        "60:1--60:??",
  month =        sep,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1391962.1391968",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Oct 1 16:09:32 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "As compared to a large spectrum of performance
                 optimizations, relatively less effort has been
                 dedicated to optimize other aspects of embedded
                 applications such as memory space requirements, power,
                 real-time predictability, and reliability. In
                 particular, many modern embedded systems operate under
                 tight memory space constraints. One way of addressing
                 this constraint is to compress executable code and data
                 as much as possible. While researchers on code
                 compression have studied efficient hardware and
                 software based code compression strategies, many of
                 these techniques do not take application behavior into
                 account; that is, the same compression/decompression
                 strategy is used irrespective of the application being
                 optimized. This article presents an
                 application-sensitive code compression strategy based
                 on control flow graph (CFG) representation of the
                 embedded program. The idea is to start with a memory
                 image wherein all basic blocks of the application are
                 compressed, and decompress only the blocks that are
                 predicted to be needed in the near future. When the
                 current access to a basic block is over, our approach
                 also decides the point at which the block could be
                 compressed. We propose and evaluate several compression
                 and decompression strategies that try to reduce memory
                 requirements without excessively increasing the
                 original instruction cycle counts. Some of our
                 strategies make use of profile data, whereas others are
                 fully automatic. Our experimental evaluation using
                 seven applications from the MediaBench suite and three
                 large embedded applications reveals that the proposed
                 code compression strategy is very successful in
                 practice. Our results also indicate that working at a
                 basic block granularity, as opposed to a procedure
                 granularity, is important for maximizing memory space
                 savings.",
  acknowledgement = ack-nhfb,
  articleno =    "60",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "CFG; code access pattern; code compression; Embedded
                 systems; memory optimization",
}

@Article{Baradaran:2008:CAM,
  author =       "Nastaran Baradaran and Pedro C. Diniz",
  title =        "A compiler approach to managing storage and memory
                 bandwidth in configurable architectures",
  journal =      j-TODAES,
  volume =       "13",
  number =       "4",
  pages =        "61:1--61:??",
  month =        sep,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1391962.1391969",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Oct 1 16:09:32 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Configurable architectures offer the unique
                 opportunity of realizing hardware designs tailored to
                 the specific data and computational patterns of an
                 application code. Customizing the storage structures is
                 becoming increasingly important in mitigating the
                 continuing gap between memory latencies and internal
                 computing speeds. In this article we describe and
                 evaluate a compiler algorithm that maps the arrays of a
                 loop-based computation to internal storage structures,
                 either RAM blocks or discrete registers. Our objective
                 is to minimize the overall execution time while
                 considering the capacity and bandwidth constraints of
                 the storage resources. The novelty of our approach lies
                 in creating a single framework that combines high-level
                 compiler techniques with lower-level scheduling
                 information for mapping the data. We illustrate the
                 benefits of our approach for a set of image/signal
                 processing kernels using a Xilinx Virtex\TM{}
                 Field-Programmable Gate Array (FPGA). Our algorithm
                 leads to faster designs compared to the
                 state-of-the-art {\em custom data layout\/} mapping
                 technique, in some instances using less storage. When
                 compared to hand-coded designs, our results are
                 comparable in terms of execution time and resources,
                 but are derived in a minute fraction of the design
                 time.",
  acknowledgement = ack-nhfb,
  articleno =    "61",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Compiler analysis; configurable architectures;
                 high-level hardware synthesis; storage allocation and
                 management",
}

@Article{Banerjee:2008:ASM,
  author =       "Ansuman Banerjee and Pallab Dasgupta and P. P.
                 Chakrabarti",
  title =        "Auxiliary state machines + context-triggered
                 properties in verification",
  journal =      j-TODAES,
  volume =       "13",
  number =       "4",
  pages =        "62:1--62:??",
  month =        sep,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1391962.1391970",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Oct 1 16:09:32 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Formal specifications of interface protocols between a
                 design-under-test and its environment mostly consist of
                 two types of correctness requirements, namely (a) a set
                 of invariants that applies throughout the protocol
                 execution and (b) a set of {\em context-triggered\/}
                 properties that applies only when the protocol state
                 belongs to a specific set of contexts. To model such
                 requirements, an increasingly popular design choice in
                 the assertion IP design community has been the use of
                 abstract {\em context state machines\/} and
                 state-oriented properties. In this paper, we formalize
                 this modeling style and present algorithms for
                 verifying such specifications. Specifically, we present
                 a purely formal approach and a semi-formal approach for
                 verifying such specifications. We demonstrate the use
                 of this design style in modeling some of the industry
                 standard protocol descriptions and present encouraging
                 results.",
  acknowledgement = ack-nhfb,
  articleno =    "62",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Panda:2008:SBV,
  author =       "S. K. Panda and Arnab Roy and P. P. Chakrabarti and
                 Rajeev Kumar",
  title =        "Simulation-based verification using {Temporally
                 Attributed Boolean Logic}",
  journal =      j-TODAES,
  volume =       "13",
  number =       "4",
  pages =        "63:1--63:??",
  month =        sep,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1391962.1391971",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Oct 1 16:09:32 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We propose a specification logic called Temporally
                 Attributed Boolean (TAB) Logic for Assertion Based
                 Verification, which allows us to: (i) represent
                 assertions succinctly, (ii) incorporate
                 data-orientation and (iii) associate timing to design
                 intentions. TAB Logic allows us to write specifications
                 functionally linking system variables from different
                 temporal contexts. We present examples to show the
                 motivation for this logic especially in the context of
                 high level modeling of complex real time systems. We
                 formally define TAB Logic, formulate the problem of
                 verification on a simulation trace and present
                 efficient algorithms to check TAB assertions, both
                 offline and online. We present results of application
                 of TAB Logic for Instruction Semantics and Bus
                 Transaction Verification of a bus integrated pipelined
                 processor core implementation. We also employ TAB Logic
                 to validate the Interrupt mode behavior of the
                 processor core implementation. Further, we show the
                 utility of TAB Logic in fault detection. Finally, we
                 demonstrate the applicability of TAB Logic in the
                 domain of simulation based verification of analog
                 circuits like Operational Amplifiers and DC-DC
                 Converters. We finally discuss the limitations of TAB
                 logic and conclude.",
  acknowledgement = ack-nhfb,
  articleno =    "63",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Bus verification; instruction semantics verification;
                 interrupt testing; offline-online verification
                 algorithm; simulation based verification; temporal
                 logic; timing verification",
}

@Article{Wang:2008:LAS,
  author =       "Sying-Jyan Wang and Kuo-Lin Peng and Kuang-Cyun Hsiao
                 and Katherine Shu-Min Li",
  title =        "Layout-aware scan chain reorder for launch-off-shift
                 transition test coverage",
  journal =      j-TODAES,
  volume =       "13",
  number =       "4",
  pages =        "64:1--64:??",
  month =        sep,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1391962.1391972",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Oct 1 16:09:32 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Launch-off-shift (LOS) is a popular delay test
                 technique for scan-based designs. However, it is
                 usually not possible to achieve good delay fault
                 coverage in LOS test due to conflicts in test vectors.
                 In this article, we propose a layout-based scan chain
                 ordering method to improve fault coverage for LOS test
                 with limited routing overhead. A fast and effective
                 algorithm is used to eliminate conflicts in test
                 vectors while at the same time restrict the extra scan
                 chain routing. This approach provides many advantages.
                 (1) The proposed method can improve delay fault
                 coverage for LOS test. (2) With layout information
                 taken into account, the routing penalty is limited, and
                 thus the impact on circuit performance will not be
                 significant. Experimental results show that the
                 proposed LOS test method achieves about the same level
                 of delay fault coverage as enhanced scan does, while
                 the average scan chain wire length is about 2.2 times
                 of the shortest scan chain.",
  acknowledgement = ack-nhfb,
  articleno =    "64",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "scan chain ordering; Scan test; test generation;
                 transition faults",
}

@Article{Moiseev:2008:TAP,
  author =       "Konstantin Moiseev and Avinoam Kolodny and Shmuel
                 Wimer",
  title =        "Timing-aware power-optimal ordering of signals",
  journal =      j-TODAES,
  volume =       "13",
  number =       "4",
  pages =        "65:1--65:??",
  month =        sep,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1391962.1391973",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Oct 1 16:09:32 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "A computationally efficient technique for reducing
                 interconnect active power in VLSI systems is presented.
                 Power reduction is accomplished by simultaneous wire
                 spacing and net ordering, such that cross-capacitances
                 between wires are optimally shared. The existence of a
                 unique power-optimal wire order within a bundle is
                 proven, and a method to construct this order is
                 derived. The optimal order of wires depends only on the
                 activity factors of the underlying signals; hence, it
                 can be performed prior to spacing optimization. By
                 using this order of wires, optimality of the combined
                 solution is guaranteed (as compared with any other
                 ordering and spacing of the wires). Timing-aware power
                 optimization is enabled by simultaneously considering
                 timing criticality weights and activity factors for the
                 signals. The proposed algorithm has been applied to
                 various interconnect layouts, including wire bundles
                 from high-end microprocessor circuits in 65 nm
                 technology. Interconnect power reduction of 17\% on
                 average has been observed in such bundles.",
  acknowledgement = ack-nhfb,
  articleno =    "65",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "interconnect optimization; power optimization; Wire
                 ordering; wire spacing",
}

@Article{Lu:2008:EDI,
  author =       "Chao-Hung Lu and Hung-Ming Chen and Chien-Nan Jimmy
                 Liu",
  title =        "Effective decap insertion in area-array {SoC}
                 floorplan design",
  journal =      j-TODAES,
  volume =       "13",
  number =       "4",
  pages =        "66:1--66:??",
  month =        sep,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1391962.1391974",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Oct 1 16:09:32 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "As VLSI technology enters the nanometer era, supply
                 voltages continue to drop due to the reduction of power
                 dissipation, but it makes power integrity problems even
                 worse. Employing decoupling capacitances (decaps) in
                 floorplan stage is a common approach to alleviating
                 supply noise problems. Previous researches overestimate
                 the decap budget and do not fully utilize the empty
                 space of the floorplan. A floorplan usually has a lot
                 of available space that can be used to insert the decap
                 without increasing the floorplan area. Therefore, the
                 goal of this work is to develop a better model to
                 calculate the required decap to solve the power supply
                 noise problem in area-array based designs, and increase
                 the usage of available space in the floorplan to reduce
                 the area overhead caused by decap insertion. The
                 experimental results of this work are encouraging.
                 Compared with previous approaches, our methodology
                 reduces 38\% of the decap budget in average for MCNC
                 benchmarks but can still meet the power supply noise
                 requirements. The final floorplan areas with decap are
                 also smaller than the numbers reported in previous
                 works.",
  acknowledgement = ack-nhfb,
  articleno =    "66",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "decap insertion; floorplan; Power supply noise",
}

@Article{Moffitt:2008:CDF,
  author =       "Michael D. Moffitt and Jarrod A. Roy and Igor L.
                 Markov and Martha E. Pollack",
  title =        "Constraint-driven floorplan repair",
  journal =      j-TODAES,
  volume =       "13",
  number =       "4",
  pages =        "67:1--67:??",
  month =        sep,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1391962.1391975",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Oct 1 16:09:32 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In this work, we propose a new and efficient approach
                 to the {\em floorplan repair\/} problem, where violated
                 design constraints are satisfied by applying small
                 changes to an existing rough floorplan. Such a
                 floorplan can be produced by a human designer, a
                 scalable placement algorithm, or result from
                 engineering adjustments to an existing floorplan. In
                 such cases, overlapping modules must be separated, and
                 others may need to be repositioned to satisfy
                 additional requirements. Our algorithmic framework uses
                 an expressive graph-based encoding of constraints which
                 can reflect fixed-outline, region, proximity and
                 alignment constraints. By tracking the implications of
                 existing constraints, we resolve violations by imposing
                 gradual modifications to the floorplan, in an attempt
                 to preserve the characteristics of its initial design.
                 Empirically, our approach is effective at removing
                 overlaps and repairing violations that may occur when
                 design constraints are acquired and imposed
                 dynamically.",
  acknowledgement = ack-nhfb,
  articleno =    "67",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "constraints; Floorplanning; legalization",
}

@Article{Ozdal:2008:ORA,
  author =       "Muhammet Mustafa Ozdal and Martin D. F. Wong and
                 Philip S. Honsinger",
  title =        "Optimal routing algorithms for rectilinear pin
                 clusters in high-density multichip modules",
  journal =      j-TODAES,
  volume =       "13",
  number =       "4",
  pages =        "68:1--68:??",
  month =        sep,
  year =         "2008",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1391962.1391976",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Oct 1 16:09:32 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "As the circuit densities and transistor counts are
                 increasing, the package routing problem is becoming
                 more and more challenging. In this article, we study an
                 important routing problem encountered in typical
                 high-end MCM designs: routing within dense pin
                 clusters. Pin clusters are often formed by pins that
                 belong to the same functional unit or the same data
                 bus, and can become bottlenecks in terms of overall
                 routability. Typically, these clusters have irregular
                 shapes, which can be approximated with rectilinear
                 convex boundaries. Since such boundaries have often
                 irregular shapes, a traditional escape routing
                 algorithm may give unroutable solutions. In this
                 article, we study how the positions of escape terminals
                 on a convex boundary affect the overall routability.
                 For this purpose, we propose a set of necessary and
                 sufficient conditions to model routability outside a
                 rectilinear convex boundary. Given an escape routing
                 solution, we propose an optimal algorithm to select the
                 maximal subset of nets that are routable outside the
                 boundary. After that, we focus on an integrated
                 approach to consider routability constraints (outside
                 the boundary) during the actual escape routing
                 algorithm. Here, we propose an optimal algorithm to
                 find the best escape routing solution that satisfies
                 all routability constraints. Our experiments
                 demonstrate that we can reduce the number of layers by
                 17\% on the average, by using this integrated
                 methodology.",
  acknowledgement = ack-nhfb,
  articleno =    "68",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Escape routing; multi-chip modules; network flow",
}

@Article{Keinert:2009:SAE,
  author =       "Joachim Keinert and Martin Streub{\"u}hr and Thomas
                 Schlichter and Joachim Falk and Jens Gladigau and
                 Christian Haubelt and J{\"u}rgen Teich and Michael
                 Meredith",
  title =        "{SystemCoDesigner} --- an automatic {ESL} synthesis
                 approach by design space exploration and behavioral
                 synthesis for streaming applications",
  journal =      j-TODAES,
  volume =       "14",
  number =       "1",
  pages =        "1:1--1:??",
  month =        jan,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1455229.1455230",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Mon Jan 26 18:12:50 MST 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "With increasing design complexity, the gap from ESL
                 (Electronic System Level) design to RTL synthesis
                 becomes more and more crucial to many industrial
                 projects. Although several behavioral synthesis tools
                 exist to automatically generate synthesizable RTL code
                 from C/C++/SystemC-based input descriptions and
                 software generation for embedded processors is
                 automated as well, an efficient ESL synthesis
                 methodology combining both is still missing. This
                 article presents SystemCoDesigner, a novel
                 SystemC-based ESL tool to automatically optimize a
                 hardware/software SoC (System on Chip) implementation
                 with respect to several objectives. Starting from a
                 SystemC behavioral model, SystemCoDesigner
                 automatically extracts the mathematical model, performs
                 a behavioral synthesis step, and explores the
                 multiobjective design space using state-of-the-art
                 multiobjective optimization algorithms. During design
                 space exploration, a single design point is evaluated
                 by simulating highly accurate performance models, which
                 are automatically generated from the SystemC behavioral
                 model and the behavioral synthesis results. Moreover,
                 SystemCoDesigner permits the automatic generation of
                 bit streams for FPGA targets from any previously
                 optimized SoC implementation. Thus SystemCoDesigner is
                 the first fully automated ESL synthesis tool providing
                 a correct-by-construction generation of
                 hardware/software SoC implementations. As a case study,
                 a model of a Motion-JPEG decoder was automatically
                 optimized and implemented using SystemCoDesigner.
                 Several synthesized SoC variants based on this model
                 show different tradeoffs between required hardware
                 costs and achieved system throughput, ranging from
                 software-only solutions to pure hardware
                 implementations that reach real-time performance for
                 QCIF streams on a 50MHz FPGA.",
  acknowledgement = ack-nhfb,
  articleno =    "1",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "hardware/software codesign; System design",
}

@Article{Hansson:2009:CTC,
  author =       "Andreas Hansson and Kees Goossens and Marco Bekooij
                 and Jos Huisken",
  title =        "{CoMPSoC}: a template for composable and predictable
                 multi-processor system on chips",
  journal =      j-TODAES,
  volume =       "14",
  number =       "1",
  pages =        "2:1--2:??",
  month =        jan,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1455229.1455231",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Mon Jan 26 18:12:50 MST 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "A growing number of applications, often with firm or
                 soft real-time requirements, are integrated on the same
                 System on Chip, in the form of either hardware or
                 software intellectual property. The applications are
                 started and stopped at run time, creating different
                 use-cases. Resources, such as interconnects and
                 memories, are shared between different applications,
                 both within and between use-cases, to reduce silicon
                 cost and power consumption.\par

                 The functional and temporal behaviour of the
                 applications is verified by simulation and formal
                 methods. Traditionally, designers resort to monolithic
                 verification of the system as whole, since the
                 applications interfere in shared resources, and thus
                 affect each other's behaviour. Due to interference
                 between applications, the integration and verification
                 complexity grows exponentially in the number of
                 applications, and the task to verify correct behaviour
                 of concurrent applications is on the system designer
                 rather than the application designers.\par

                 In this work, we propose a Composable and Predictable
                 Multi-Processor System on Chip (CoMPSoC) platform
                 template. This scalable hardware and software template
                 removes all interference between applications through
                 resource reservations. We demonstrate how this enables
                 a divide-and-conquer design strategy, where all
                 applications, potentially using different programming
                 models and communication paradigms, are developed and
                 verified independently of one another. Performance is
                 analyzed per application, using state-of-the-art
                 dataflow techniques or simulation, depending on the
                 requirements of the application. These results still
                 apply when the applications are integrated onto the
                 platform, thus separating system-level design and
                 application design.",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Composable; model of computation; network on chip;
                 predictable; system on chip",
}

@Article{Gheorghita:2009:SSB,
  author =       "Stefan Valentin Gheorghita and Martin Palkovic and
                 Juan Hamers and Arnout Vandecappelle and Stelios
                 Mamagkakis and Twan Basten and Lieven Eeckhout and Henk
                 Corporaal and Francky Catthoor and Frederik Vandeputte
                 and Koen De Bosschere",
  title =        "System-scenario-based design of dynamic embedded
                 systems",
  journal =      j-TODAES,
  volume =       "14",
  number =       "1",
  pages =        "3:1--3:??",
  month =        jan,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1455229.1455232",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Mon Jan 26 18:12:50 MST 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In the past decade, real-time embedded systems have
                 become much more complex due to the introduction of a
                 lot of new functionality in one application, and due to
                 running multiple applications concurrently. This
                 increases the dynamic nature of today's applications
                 and systems, and tightens the requirements for their
                 constraints in terms of deadlines and energy
                 consumption. State-of-the-art design methodologies try
                 to cope with these novel issues by identifying several
                 most used cases and dealing with them separately,
                 reducing the newly introduced complexity. This article
                 presents a generic and systematic design-time/run-time
                 methodology for handling the dynamic nature of modern
                 embedded systems, which can be utilized by existing
                 design methodologies to increase their efficiency. It
                 is based on the concept of {\em system scenarios},
                 which group system behaviors that are similar from a
                 multidimensional cost perspective --- such as resource
                 requirements, delay, and energy consumption --- in such
                 a way that the system can be configured to exploit this
                 cost similarity. At design-time, these scenarios are
                 individually optimized. Mechanisms for predicting the
                 current scenario at run-time, and for switching between
                 scenarios, are also derived. This design trajectory is
                 augmented with a run-time calibration mechanism, which
                 allows the system to learn on-the-fly during its
                 execution, and to adapt itself to the current input
                 stimuli, by extending the scenario set, changing the
                 scenario definitions, and both the prediction and
                 switching mechanisms. To show the generality of our
                 methodology, we show how it has been applied on four
                 very different real-life design problems. In all
                 presented case studies, substantial energy reductions
                 were obtained by exploiting scenarios.",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Design methodology; dynamic nature; embedded systems;
                 energy reduction; real-time systems; system scenarios",
}

@Article{Xu:2009:STA,
  author =       "Qiang Xu and Yubin Zhang and Krishnendu Chakrabarty",
  title =        "{SOC} test-architecture optimization for the testing
                 of embedded cores and signal-integrity faults on
                 core-external interconnects",
  journal =      j-TODAES,
  volume =       "14",
  number =       "1",
  pages =        "4:1--4:??",
  month =        jan,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1455229.1455233",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Mon Jan 26 18:12:50 MST 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The test time for core-external interconnect shorts
                 and opens is typically much less than that for
                 core-internal logic. Therefore, prior work on
                 test-infrastructure design for core-based
                 system-on-a-chip (SOC) has mainly focused on minimizing
                 the test time for core-internal logic. However, as
                 feature sizes shrink for newer process technologies,
                 the test time for signal integrity (SI) faults on
                 interconnects cannot be neglected. The test time for SI
                 faults can be comparable to, or even larger than, the
                 test time for the embedded cores. We investigate the
                 impact of interconnect SI tests on SOC
                 test-architecture design and optimization. A compaction
                 method for SI faults and algorithms for
                 test-architecture optimization are also presented.
                 Experimental results for the ITC'02 benchmarks show
                 that the proposed approach can significantly reduce the
                 overall testing time for core-internal logic and
                 core-external interconnects.",
  acknowledgement = ack-nhfb,
  articleno =    "4",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Core-based system-on-chip; interconnect testing; test
                 scheduling; test-access mechanism (TAM)",
}

@Article{Jin:2009:GND,
  author =       "Zhong-Yi Jin and Curt Schurgers and Rajesh K. Gupta",
  title =        "A gateway node with duty-cycled radio and processing
                 subsystems for wireless sensor networks",
  journal =      j-TODAES,
  volume =       "14",
  number =       "1",
  pages =        "5:1--5:??",
  month =        jan,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1455229.1455234",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Mon Jan 26 18:12:50 MST 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Wireless sensor nodes are increasingly being tasked
                 with computation and communication intensive functions
                 while still subject to constraints related to energy
                 availability. On these embedded platforms, once all low
                 power design techniques have been explored,
                 duty-cycling the various subsystems remains the primary
                 option to meet the energy and power constraints. This
                 requires the ability to provide spurts of high MIPS and
                 high bandwidth connections. However, due to the large
                 overheads associated with duty-cycling the computation
                 and communication subsystems, existing high performance
                 sensor platforms are not efficient in supporting such
                 an option. In this article, we present the design and
                 optimizations taken in a wireless gateway node (WGN)
                 that bridges data from wireless sensor networks to
                 Wi-Fi networks in an on-demand basis. We discuss our
                 strategies to reduce duty-cycling related costs by
                 partitioning the system and by reducing the amount of
                 time required to activate or deactivate the
                 high-powered components. We compare the design choices
                 and performance parameters with those made in the Intel
                 {\em Stargate\/} platform to show the effectiveness of
                 duty-cycling on our platform. We have built a working
                 prototype, and the experimental results with two
                 different power management schemes show significant
                 reductions in latency and average power consumption
                 compared to the {\em Stargate}. The WGN running our
                 power-gating scheme performs about six times better in
                 terms of average system power consumption than the {\em
                 Stargate\/} running the suspend-system scheme for large
                 working-periods where the active power dominates. For
                 short working-periods where the transition
                 (enable/disable) power becomes dominant, we perform up
                 to seven times better. The comparative performance of
                 our system is even greater when the sleep power
                 dominates.",
  acknowledgement = ack-nhfb,
  articleno =    "5",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Embedded systems; gateway; power savings; sensor
                 nodes",
}

@Article{Wu:2009:EER,
  author =       "Chin-Hsien Wu",
  title =        "An energy-efficient {I/O} request mechanism for
                 multi-bank flash-memory storage systems",
  journal =      j-TODAES,
  volume =       "14",
  number =       "1",
  pages =        "6:1--6:??",
  month =        jan,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1455229.1455235",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Mon Jan 26 18:12:50 MST 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Emerging critical issues for flash-memory storage
                 systems, especially with regard to implementation
                 within many embedded systems, are the programmed I/O
                 nature of data transfers and their energy-efficient
                 nature. We propose an I/O request mechanism in the
                 Memory-Technology-Device (MTD) layer to exploit the
                 programmed I/O-based data transfers for flash-memory
                 storage systems. We propose to revise the waiting
                 function in the Memory-Technology-Device (MTD) layer to
                 relieve the microprocessor from busy-waiting, in order
                 to make more CPU cycles available for other tasks. An
                 energy-efficient mechanism based on the I/O request
                 mechanism is also presented for multi-bank flash-memory
                 storage systems, which particularly focuses on
                 switching the power state of each flash-memory bank. We
                 demonstrate that the energy-efficient I/O request
                 mechanism not only saves more CPU cycles to execute
                 other tasks, but also reduces the energy consumption of
                 flash-memory, based on experiments incorporating
                 realistic system workloads.",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "embedded systems; energy-efficient; Flash Memory;
                 programmed I/O; storage systems",
}

@Article{Dontharaju:2009:DAP,
  author =       "Swapna Dontharaju and Shenchih Tung and James T. Cain
                 and Leonid Mats and Marlin H. Mickle and Alex K.
                 Jones",
  title =        "A design automation and power estimation flow for
                 {RFID} systems",
  journal =      j-TODAES,
  volume =       "14",
  number =       "1",
  pages =        "7:1--7:??",
  month =        jan,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1455229.1455236",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Mon Jan 26 18:12:50 MST 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "While RFID has become a ubiquitous technology, there
                 is still a need for RFID systems with different
                 capabilities, protocols, and features depending on the
                 application. This article describes a design automation
                 flow and power estimation technique for fast
                 implementation and design feedback of new RFID systems.
                 Physical layer features are described using {\em
                 waveform features}, which are used to automatically
                 generate physical layer encoding and decoding hardware
                 blocks. {\em RFID primitives\/} to be supported by the
                 tag are enumerated with {\em RFID macros\/} and the
                 behavior of each primitive is specified using ANSI-C
                 within the template to automatically generate the tag
                 controller. Case studies implementing widely used
                 standards such as ISO 18000 Part 7 and ISO 18000 Part
                 6C using this automation technique are presented. The
                 power macromodeling flow demonstrated here is shown to
                 be within 5\% to 10\% accuracy, while providing results
                 100 times faster than traditional methods. When
                 eliminating the need for certain features of ISO 18000
                 Part 6C, the design flow shows that the power required
                 by the implementation is reduced by nearly 50\%.",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "design automation; low-power; prototyping; RFID",
}

@Article{Dasdan:2009:PEA,
  author =       "Ali Dasdan",
  title =        "Provably efficient algorithms for resolving temporal
                 and spatial difference constraint violations",
  journal =      j-TODAES,
  volume =       "14",
  number =       "1",
  pages =        "8:1--8:??",
  month =        jan,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1455229.1455237",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Mon Jan 26 18:12:50 MST 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "A system of difference constraints is a formal model
                 of temporal and spatial constraints in many areas such
                 as scheduling, constraint satisfaction, and layout
                 compaction. During construction of such a system,
                 constraint violations often arise, and they need to be
                 resolved. Previous algorithms for this task fall into
                 two groups: those algorithms that are fast but cannot
                 resolve all violations, and those algorithms that can
                 resolve all violations but are exponentially slow. We
                 propose the first algorithms that are fast as well as
                 able to resolve all violations. Moreover, unlike the
                 previous algorithms, our algorithms support the
                 ordering of violations using their inherent criticality
                 or user-defined priority. We provably and
                 experimentally justify the efficiency and efficacy of
                 our algorithms.",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Behavioral synthesis; constraint satisfaction;
                 interface timing; layout compaction; multimedia
                 synchronization; rate analysis; real-time systems;
                 scheduling; timing constraints",
}

@Article{Sinha:2009:DIC,
  author =       "Arnab Sinha and Pallab Dasgupta and Bhaskar Pal and
                 Sayantan Das and Prasenjit Basu and P. P. Chakrabarti",
  title =        "Design intent coverage revisited",
  journal =      j-TODAES,
  volume =       "14",
  number =       "1",
  pages =        "9:1--9:??",
  month =        jan,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1455229.1455238",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Mon Jan 26 18:12:50 MST 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "{\em Design intent coverage\/} is a formal methodology
                 for analyzing the gap between a formal architectural
                 specification of a design and the formal functional
                 specifications of the component RTL blocks of the
                 design. In this article we extend the design intent
                 coverage methodology to hybrid specifications
                 containing both state-machines and formal properties.
                 We demonstrate the benefits of this extension in two
                 domains of considerable recent interest, namely (a) the
                 use of auxiliary state-machines in formal
                 specifications, and (b) the use of modest sized RTL
                 blocks in the design intent coverage analysis.",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Design Intent Coverage",
}

@Article{Yang:2009:MCS,
  author =       "Zijiang Yang and Chao Wang and Aarti Gupta and Franjo
                 Ivanv{\v{c}}i{\'c}",
  title =        "Model checking sequential software programs via mixed
                 symbolic analysis",
  journal =      j-TODAES,
  volume =       "14",
  number =       "1",
  pages =        "10:1--10:??",
  month =        jan,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1455229.1455239",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Mon Jan 26 18:12:50 MST 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We present an efficient symbolic search algorithm for
                 software model checking. Our algorithms perform
                 word-level reasoning by using a combination of decision
                 procedures in Boolean and integer and real domains, and
                 use novel symbolic search strategies optimized
                 specifically for sequential programs to improve
                 scalability. Experiments on real-world C programs show
                 that the new symbolic search algorithms can achieve
                 several orders-of-magnitude improvements over existing
                 methods based on bit-level (Boolean) reasoning.",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "binary decision diagram; composite symbolic formula;
                 image computation; Model checking; Presburger
                 arithmetic; reachability analysis",
}

@Article{Mehta:2009:ICH,
  author =       "Gayatri Mehta and Justin Stander and Mustafa Baz and
                 Brady Hunsaker and Alex K. Jones",
  title =        "Interconnect customization for a hardware fabric",
  journal =      j-TODAES,
  volume =       "14",
  number =       "1",
  pages =        "11:1--11:??",
  month =        jan,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1455229.1455240",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Mon Jan 26 18:12:50 MST 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article describes several multiplexer-based
                 interconnection strategies designed to improve energy
                 consumption of stripe-based coarse-grain reconfigurable
                 fabrics. Application requirements for the architecture
                 as well as two dense subgraphs are extracted from a
                 suite of signal and image processing benchmarks. These
                 statistics are used to drive the strategy of the
                 composition of multiplexer-based interconnect. The
                 article compares interconnects that are fully connected
                 between stripes, those with a cardinality of 8:1 to
                 4:1, and extensions that provide a 5:1 cardinality,
                 limited 6:1 cardinality, and hybrids between 5:1 and
                 3:1 cardinalities. Additionally, dedicated vertical
                 routes are considered replacing some computational
                 units with dedicated pass-gates. Using a fabric
                 interconnect model (FIM) written in XML, we demonstrate
                 that fabric instances and mappers can be automatically
                 generated using a Web-based design flow. Upon testing
                 these instances, we found that using an 8:1 cardinality
                 interconnect with 33\% of the computational units
                 replaced with dedicated pass-gates provided the best
                 energy versus mappability tradeoff, resulting in a 50\%
                 energy improvement over fully connected rows and 20\%
                 energy improvement over an 8:1 cardinality interconnect
                 without dedicated vertical routes.",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "architecture; computer-aided design; demonstrable;
                 hardware fabric; low-energy; Reconfigurable",
}

@Article{Sham:2009:CPE,
  author =       "Chiu-Wing Sham and Evangeline F. Y. Young and Jingwei
                 Lu",
  title =        "Congestion prediction in early stages of physical
                 design",
  journal =      j-TODAES,
  volume =       "14",
  number =       "1",
  pages =        "12:1--12:??",
  month =        jan,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1455229.1455241",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Mon Jan 26 18:12:50 MST 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Routability optimization has become a major concern in
                 physical design of VLSI circuits. Due to the recent
                 advances in VLSI technology, interconnect has become a
                 dominant factor of the overall performance of a
                 circuit. In order to optimize interconnect cost, we
                 need a good congestion estimation method to predict
                 routability in the early designing stages. Many
                 congestion models have been proposed but there's still
                 a lot of room for improvement. Besides, routers will
                 perform rip-up and reroute operations to prevent
                 overflow, but most models do not consider this case.
                 The outcome is that the existing models will usually
                 underestimate the routability. In this paper, we have a
                 comprehensive study on our proposed congestion models.
                 Results show that the estimation results of our
                 approaches are always more accurate than the previous
                 congestion models.",
  acknowledgement = ack-nhfb,
  articleno =    "12",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Estimation; floorplanning; placement",
}

@Article{Zhu:2009:ESA,
  author =       "Yi Zhu and Yuanfang Hu and Michael B. Taylor and
                 Chung-Kuan Cheng",
  title =        "Energy and switch area optimizations for {FPGA} global
                 routing architectures",
  journal =      j-TODAES,
  volume =       "14",
  number =       "1",
  pages =        "13:1--13:??",
  month =        jan,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1455229.1455242",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Mon Jan 26 18:12:50 MST 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Low energy and small switch area usage are two
                 important design objectives in FPGA global routing
                 architecture design. This article presents an improved
                 MCF model based CAD flow that performs aggressive
                 optimizations, such as topology and wire style
                 optimization, to reduce the energy and switch area of
                 FPGA global routing architectures. The experiments show
                 that when compared to traditional mesh architecture,
                 the optimized FPGA routing architectures achieve up to
                 10\% to 15\% energy savings and up to 20\% switch area
                 savings in average for a set of seven benchmark
                 circuits.",
  acknowledgement = ack-nhfb,
  articleno =    "13",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "FPGA; global routing; low power",
}

@Article{Huang:2009:OPR,
  author =       "Shih-Hsu Huang and Chia-Ming Chang and Yow-Tyng Nieh",
  title =        "Opposite-phase register switching for peak current
                 minimization",
  journal =      j-TODAES,
  volume =       "14",
  number =       "1",
  pages =        "14:1--14:??",
  month =        jan,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1455229.1455243",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Mon Jan 26 18:12:50 MST 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In a synchronous sequential circuit, huge current
                 peaks are often observed at the moment of clock
                 transition (since all registers are clocked). Previous
                 works focus on reducing the number of switching
                 registers. However, even though the switching registers
                 are the same, different combinations of switching
                 directions still result in different peak currents.
                 Based on that observation, in this article, we propose
                 an ECO (engineering change order) approach to minimize
                 the peak current by considering the switching
                 directions of registers. Our approach is well suitable
                 for reducing the peak current in IC testing.
                 Experimental data consistently show that our approach
                 works well in practice.",
  acknowledgement = ack-nhfb,
  articleno =    "14",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "IC testing; Logic synthesis; peak current; sequential
                 circuit synthesis",
}

@Article{Lin:2009:SCD,
  author =       "Yen-Chun Lin and Li-Ling Hung",
  title =        "Straightforward construction of depth-size optimal,
                 parallel prefix circuits with fan-out 2",
  journal =      j-TODAES,
  volume =       "14",
  number =       "1",
  pages =        "15:1--15:??",
  month =        jan,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1455229.1455244",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Mon Jan 26 18:12:50 MST 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Prefix computation is used in various areas and is
                 considered as a primitive operation. Parallel prefix
                 circuits are parallel prefix algorithms on the
                 combinational circuit model. The depth of a prefix
                 circuit is a measure of its processing time; smaller
                 depth implies faster computation. The size of a prefix
                 circuit is the number of operation nodes in it. Smaller
                 size implies less power consumption, less VLSI area,
                 and less cost. A prefix circuit with $n$ inputs is
                 depth-size optimal if its depth plus size equals $2 n -
                 2$. A circuit with a smaller fan-out is in general
                 faster and occupies less VLSI area. To be of practical
                 use, the depth and fan-out of a prefix circuit should
                 be small. In this paper, a family of depth-size
                 optimal, parallel prefix circuits with fan-out 2 is
                 presented. This family of prefix circuits is easier to
                 construct and more amenable to automatic synthesis than
                 two other families of the same type, although the three
                 families have the same minimum depth among all
                 depth-size optimal prefix circuits with fan-out 2. The
                 balanced structure of the new family is also a merit.",
  acknowledgement = ack-nhfb,
  articleno =    "15",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Depth-size optimal; fan-out; parallel prefix
                 circuits",
}

@Article{Kahng:2009:LAA,
  author =       "Andrew B. Kahng and Chul-Hong Park and Puneet Sharma
                 and Qinke Wang",
  title =        "Lens aberration aware placement for timing yield",
  journal =      j-TODAES,
  volume =       "14",
  number =       "1",
  pages =        "16:1--16:??",
  month =        jan,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1455229.1455245",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Mon Jan 26 18:12:50 MST 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Process variations due to lens aberrations are to a
                 large extent systematic, and can be modeled for
                 purposes of analyses and optimizations in the design
                 phase. Traditionally, variations induced by lens
                 aberrations have been considered random due to their
                 small extent. However, as process margins reduce, and
                 as improvements in reticle enhancement techniques
                 control variations due to other sources with increased
                 efficacy, lens aberration-induced variations gain
                 importance. For example, our experiments indicate that
                 delays of most cells in the Artisan TSMC 90nm library
                 are affected by 2--8\% due to lens aberration.
                 Aberration-induced variations are systematic and depend
                 on the location in the lens field. In this article, we
                 first propose an aberration-aware timing analysis flow
                 that accounts for aberration-induced cell delay
                 variations. We then propose an aberration-aware
                 timing-driven analytical placement approach that
                 utilizes the predictable slow and fast regions created
                 on the chip due to aberration to improve cycle time. We
                 study the dependence of our improvement on chip size,
                 as well as use of the technique along with field
                 blading which allows partial reticle exposure. We
                 evaluate our technique on two testcases, {\em AES\/}
                 and {\em JPEG\/} implemented in 90nm technology. The
                 proposed technique reduces cycle time by 4.322\% (80ps)
                 at the cost of 1.587\% increase in trial-routed
                 wirelength for AES. On JPEG, we observe a cycle time
                 reduction of 5.182\% (132ps) at the cost of 1.095\%
                 increase in trial-routed wirelength.",
  acknowledgement = ack-nhfb,
  articleno =    "16",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "design for manufacturing; Layout; lithography; timing
                 yield",
}

@Article{Chien:2009:SMV,
  author =       "Chih-Da Chien and Cheng-An Chien and Jui-Chin Chu and
                 Jiun-In Guo and Ching-Hwa Cheng",
  title =        "A {252Kgates\slash 4.9Kbytes SRAM\slash 71mW}
                 multistandard video decoder for high definition video
                 applications",
  journal =      j-TODAES,
  volume =       "14",
  number =       "1",
  pages =        "17:1--17:??",
  month =        jan,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1455229.1455246",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Mon Jan 26 18:12:50 MST 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "This article proposes a low-cost, low-power
                 multistandard video decoder for high definition (HD)
                 video applications. The proposed design supports
                 multiple-standard (JPEG baseline, MPEG-1/2/4 Simple
                 Profile (SP), and H.264 Baseline Profile (BP)) video
                 decoding through interactive parsing control and common
                 parameter bus interface. In order to reduce hardware
                 cost, the shared adder-based structure and reusable
                 data management are proposed to achieve hardware
                 sharing and reduce internal memory size, respectively.
                 In addition, the proposed design is optimized through
                 reducing memory bandwidth by increasing both data reuse
                 amount and burst length of memory access as well as
                 eliminating cycle overhead in data access for
                 supporting HD video decoding with single AHB-based SDR
                 memory. The proposed 252Kgates/4.9kB/71mW/0.13$\mu$m
                 multi-standard video decoder reduces 72\% in gate count
                 and 87\% in power consumption as compared to the
                 state-of-the-art design, when operating at 120MHz for
                 real-time HD1080 video decoding with single AHB-based
                 SDR memory.",
  acknowledgement = ack-nhfb,
  articleno =    "17",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "H.264; MPEG; Video decoder",
}

@Article{Reviriego:2009:EED,
  author =       "Pedro Reviriego and Juan Antonio Maestro",
  title =        "Efficient error detection codes for multiple-bit upset
                 correction in {SRAMs} with {BICS}",
  journal =      j-TODAES,
  volume =       "14",
  number =       "1",
  pages =        "18:1--18:??",
  month =        jan,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1455229.1455247",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Mon Jan 26 18:12:50 MST 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Memories are one of the most widely used elements in
                 electronic systems, and their reliability when exposed
                 to Single Events Upsets (SEUs) has been studied
                 extensively. As transistor sizes shrink, Multiple Bits
                 Upsets (MBUs) are becoming an increasingly important
                 factor in the reliability of memories exposed to
                 radiation effects. To address this issue, Built-in
                 Current Sensors (BICS) have recently been applied in
                 conjunction with Single Error Correction/Double Error
                 Detection (SEC-DED) codes to protect memories from
                 MBUs. In this article, this approach is taken one step
                 further, proposing specific codes optimized to be
                 combined with BICS to provide protection against MBUs
                 in memories. By exploiting the locality of errors
                 within an MBU and the error detection and location
                 capabilities of BICS, the proposed codes result in both
                 a better protection level and a reduced cost compared
                 with the existing SEC-DED approach.",
  acknowledgement = ack-nhfb,
  articleno =    "18",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "error correcting codes; Fault tolerant memory;
                 high-level protection technique; protection against
                 radiation",
}

@Article{Avnit:2009:PCC,
  author =       "K. Avnit and V. D'silva and A. Sowmya and S. Ramesh
                 and S. Parameswaran",
  title =        "Provably correct on-chip communication: a formal
                 approach to automatic protocol converter synthesis",
  journal =      j-TODAES,
  volume =       "14",
  number =       "2",
  pages =        "19:1--19:??",
  month =        mar,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1497561.1497562",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Apr 2 15:06:01 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Hardware module reuse is a standard solution to the
                 problems of increasing complexity of chip architectures
                 and pressure to reduce time to market. In the absence
                 of a single module interface standard, predesigned
                 modules for ``plug-and-play'' usually require a
                 converter between incompatible interface protocols.
                 Current approaches to automatic synthesis of protocol
                 converters mostly lack formal foundations and either
                 employ abstractions far removed from the HDL
                 implementation level or grossly simplify the structure
                 of the protocols considered. This work presents a
                 state-machine-based formalism for modeling bus-based
                 communication protocols and a notion of protocol
                 compatibility and of correct conversion between
                 incompatible protocols. This formalism is used to
                 derive algorithms for checking protocol compatibility
                 and for provably correct, automatic converter
                 synthesis. Experiments with automatic converter
                 synthesis between different configurations of widely
                 used commercial bus protocols, such as AMBA AHB, ASB
                 APB, and the Open Core Protocol (OCP) are discussed.
                 The work here is unique in its combination of a
                 completely formal approach and the use of a low
                 abstraction level that enables precise modeling of
                 protocol characteristics that is also close to HDL.",
  acknowledgement = ack-nhfb,
  articleno =    "19",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "automatic design; converter synthesis; protocol
                 compatibility; System-on-chip",
}

@Article{Pasricha:2009:SLP,
  author =       "Sudeep Pasricha and Young-Hwan Park and Nikil Dutt and
                 Fadi J. Kurdahi",
  title =        "System-level {PVT} variation-aware power exploration
                 of on-chip communication architectures",
  journal =      j-TODAES,
  volume =       "14",
  number =       "2",
  pages =        "20:1--20:??",
  month =        mar,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1497561.1497563",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Apr 2 15:06:01 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "With the shift towards deep submicron (DSM)
                 technologies, the increase in leakage power and the
                 adoption of power-aware design methodologies have
                 resulted in potentially significant variations in power
                 consumption under different process, voltage, and
                 temperature (PVT) corners. In this article, we first
                 investigate the impact of PVT corners on power
                 consumption at the system-on-chip (SoC) level,
                 especially for the on-chip communication
                 infrastructure. Given a target technology library, we
                 then show how it is possible to ``scale up'' and
                 abstract the PVT variability at the system level,
                 allowing characterization of the PVT-aware design space
                 early in the design flow. We conducted several
                 experiments to estimate power for PVT corner cases, at
                 the gate level, as well as at the higher system level.
                 Our preliminary results are very interesting, and
                 indicate that (i) there are significant variations in
                 power consumption across PVT corners; and (ii) the
                 PVT-aware power estimation problem may be amenable to a
                 reasonably simple abstraction at the system level.",
  acknowledgement = ack-nhfb,
  articleno =    "20",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "digital systems; high-level synthesis; on-chip
                 communication architectures; performance exploration;
                 power estimation; PVT variation",
}

@Article{Mukhopadhyay:2009:IAA,
  author =       "Rajdeep Mukhopadhyay and S. K. Panda and Pallab
                 Dasgupta and John Gough",
  title =        "Instrumenting {AMS} assertion verification on
                 commercial platforms",
  journal =      j-TODAES,
  volume =       "14",
  number =       "2",
  pages =        "21:1--21:??",
  month =        mar,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1497561.1497564",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Apr 2 15:06:01 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The industry trend appears to be moving towards
                 designs that integrate large digital circuits with
                 multiple analog/RF (radio frequency) interfaces. In the
                 verification of these large integrated circuits, the
                 number of nets that need to be monitored has been
                 growing rapidly. Consequently, the mixed-signal design
                 community has been feeling the need for AMS (Analog and
                 Mixed Signal) assertions that can automatically monitor
                 conformance with expected time-domain behavior and help
                 in debugging deviations from the design intent. The
                 main challenges in providing this support are (a)
                 developing AMS assertion languages or AMS verification
                 libraries, and (b) instrumenting existing commercial
                 simulators to support assertion verification during
                 simulation. In this article, we report two approaches:
                 the first extends the {\em Open Verification Library\/}
                 (OVL) to the AMS domain by integrating a new collection
                 of AMS verification libraries; while the second extends
                 {\em SystemVerilog Assertions\/} (SVA) by augmenting
                 analog predicates into SVA. We demonstrate the use of
                 AMS-OVL on the Cadence Virtuoso environment while
                 emphasizing that our libraries can work in any
                 environment that supports Verilog and Verilog-A. We
                 also report the development of tool support for AMS-SVA
                 using a combination of Cadence NCSIM and Synopsys VCS.
                 We demonstrate the utility of both approaches on the
                 verification of LP3918, an integrated power management
                 unit (PMU) from National Semiconductors. We believe
                 that in the absence of existing EDA (Electronic Design
                 Automation) tools for AMS assertion verification, the
                 proposed approaches of integrating our libraries and
                 our tool sets with existing commercial simulators will
                 be of considerable and immediate practical value.",
  acknowledgement = ack-nhfb,
  articleno =    "21",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Assertion; integrated mixed signal design; OVL;
                 simulation; SVA; verification library",
}

@Article{Palkovic:2009:TOL,
  author =       "Martin Palkovic and Francky Catthoor and Henk
                 Corporaal",
  title =        "Trade-offs in loop transformations",
  journal =      j-TODAES,
  volume =       "14",
  number =       "2",
  pages =        "22:1--22:??",
  month =        mar,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1497561.1497565",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Apr 2 15:06:01 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Nowadays, multimedia systems deal with huge amounts of
                 memory accesses and large memory footprints. To
                 alleviate the impact of these accesses and reduce the
                 memory footprint, high-level memory exploration and
                 optimization techniques have been proposed. These
                 techniques try to more efficiently utilize the memory
                 hierarchy. An important step in these optimization
                 techniques are loop transformations (LT). They have a
                 crucial effect on later data memory footprint
                 optimization steps and code generation. However, the
                 state-of-the-art work has focused only on individual
                 objectives. The main one in literature involves
                 improving the locality of data accesses, and thus
                 reducing the data memory footprint. It does not
                 consider the trade-offs in the LT step in relation to
                 successive optimization steps. Therefore, it is not
                 globally efficient in mapping the application on the
                 target platform.\par

                 In this article we will discuss several trade-offs
                 during the loop transformations. To our knowledge, we
                 are the first ones considering these global trade-offs.
                 Previous work always gave mostly one solution, having
                 the best locality and thus the optimized memory
                 footprint, even though some research in two-dimensional
                 trade-offs in this area exists as well. We start from
                 this state-of-the-art solution with minimal footprint.
                 We show that by sacrificing the footprint, we can
                 obtain gains in data reuse (crucial for energy
                 reduction) and reduce the control-flow complexity. We
                 demonstrate our approach on a real-life application,
                 namely the QSDPCM video coder. At the end, we show that
                 considering trade-offs for this application leads to
                 16\% energy reduction in a two-layer memory subsystem
                 and 10\% cycle reduction on the ARM platform.",
  acknowledgement = ack-nhfb,
  articleno =    "22",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "cost components; Data transfer and storage
                 exploration; loop transformations; optimization;
                 trade-offs",
}

@Article{Fummi:2009:CMH,
  author =       "Franco Fummi and Mirko Loghi and Massimo Poncino and
                 Graziano Pravadelli",
  title =        "A cosimulation methodology for {HW\slash SW}
                 validation and performance estimation",
  journal =      j-TODAES,
  volume =       "14",
  number =       "2",
  pages =        "23:1--23:??",
  month =        mar,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1497561.1497566",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Apr 2 15:06:01 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Cosimulation strategies allow us to simulate and
                 verify HW/SW embedded systems before the real platform
                 is available. In this field, there is a large variety
                 of approaches that rely on different communication
                 mechanisms to implement an efficient interface between
                 the SW and the HW simulators. However, the literature
                 lacks a comprehensive methodology which addresses the
                 need for integrating and synchronizing heterogeneous
                 simulators, like, for example, the SystemC simulation
                 kernel for HW modules and an instruction set simulator
                 for SW applications, without being intrusive for the HW
                 and SW descriptions involved in the simulation. In this
                 context, this article presents, compares, and
                 integrates in a system-level framework two different
                 co-simulation strategies for modeling, analyzing, and
                 validating the performance of a HW/SW embedded system.
                 Moreover, for both of them, a mechanism is proposed to
                 provide an accurate time synchronization of the HW/SW
                 communication. The first strategy is intended to
                 provide an early cosimulation environment where HW/SW
                 interaction can be validated without involving the
                 operating system. The communication is implemented
                 between a single SW task and a SystemC description of
                 an HW module by exploiting the features of the remote
                 debugging interface of a debugger (the GNU GDB), and by
                 modifying the SystemC simulation kernel. On the other
                 hand, the second strategy is intended to be used in
                 further development steps, when the operating system is
                 introduced to validate the cosimulation between HW
                 modules and multitasking SW applications. In this
                 approach, the communication is implemented via
                 interrupts by using the features offered by the
                 operating system.\par

                 Experimental results are reported on two different case
                 studies to analyze and compare the effectiveness of
                 both the approaches.",
  acknowledgement = ack-nhfb,
  articleno =    "23",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Embedded Systems; HW/SW co-simulation; HW/SW
                 validation",
}

@Article{Inoue:2009:DSD,
  author =       "Hiroaki Inoue and Tsuyoshi Abe and Kazuhisa Ishizaka
                 and Junji Sakai and Masato Edahiro",
  title =        "Dynamic security domain scaling on embedded symmetric
                 multiprocessors",
  journal =      j-TODAES,
  volume =       "14",
  number =       "2",
  pages =        "24:1--24:??",
  month =        mar,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1497561.1497567",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Apr 2 15:06:01 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We propose a method for dynamic security-domain
                 scaling on SMPs that offers both highly scalable
                 performance and high security for future high-end
                 embedded systems. Its most important feature is its
                 highly efficient use of processor resources,
                 accomplished by dynamically changing the number of
                 processors within a security-domain (i.e., dynamically
                 yielding processors to other security-domains) in
                 response to application load requirements. Two new
                 technologies make this scaling possible without any
                 virtualization software: (1) self-transition management
                 and (2) unified virtual address mapping. Evaluations
                 show that this domain control provides highly scalable
                 performance and incurs almost no performance overhead
                 in security-domains. The increase in OSs in binary code
                 size is less than 1.5\%, and the time required for
                 individual state transitions is on the order of a
                 single millisecond. This scaling is the first in the
                 world to make possible the dynamic changing of the
                 number of processors within a security-domain on an ARM
                 SMP.",
  acknowledgement = ack-nhfb,
  articleno =    "24",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "AMP; dynamic security-domain scaling; SMP",
}

@Article{Qiu:2009:CMW,
  author =       "Meikang Qiu and Edwin H.-M. Sha",
  title =        "Cost minimization while satisfying hard\slash soft
                 timing constraints for heterogeneous embedded systems",
  journal =      j-TODAES,
  volume =       "14",
  number =       "2",
  pages =        "25:1--25:??",
  month =        mar,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1497561.1497568",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Apr 2 15:06:01 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In high-level synthesis for real-time embedded systems
                 using heterogeneous functional units (FUs), it is
                 critical to select the best FU type for each task.
                 However, some tasks may not have fixed execution times.
                 This article models each varied execution time as a
                 probabilistic random variable and solves {\em
                 heterogeneous assignment with probability\/} (HAP)
                 problem. The solution of the HAP problem assigns a
                 proper FU type to each task such that the total cost is
                 minimized while the timing constraint is satisfied with
                 a guaranteed confidence probability. The solutions to
                 the HAP problem are useful for both hard real-time and
                 soft real-time systems. Optimal algorithms are proposed
                 to find the optimal solutions for the HAP problem when
                 the input is a tree or a simple path. Two other
                 algorithms, one is optimal and the other is
                 near-optimal heuristic, are proposed to solve the
                 general problem. The experiments show that our
                 algorithms can effectively reduce the total cost while
                 satisfying timing constraints with guaranteed
                 confidence probabilities. For example, our algorithms
                 achieve an average reduction of 33.0\% on total cost
                 with 0.90 confidence probability satisfying timing
                 constraints compared with the previous work using
                 worst-case scenario.",
  acknowledgement = ack-nhfb,
  articleno =    "25",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Embedded Systems; heterogeneous; high-level synthesis;
                 real-time",
}

@Article{Zhou:2009:TAR,
  author =       "Xiangrong Zhou and Chenjie Yu and Peter Petrov",
  title =        "Temperature-aware register reallocation for register
                 file power-density minimization",
  journal =      j-TODAES,
  volume =       "14",
  number =       "2",
  pages =        "26:1--26:??",
  month =        mar,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1497561.1497569",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Apr 2 15:06:01 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Increased chip temperature has been known to cause
                 severe reliability problems and to significantly
                 increase leakage power. The register file has been
                 previously shown to exhibit the highest temperature
                 compared to all other hardware components in a modern
                 high-end embedded processor, which makes it
                 particularly susceptible to faults and elevated leakage
                 power. We show that this is mostly due to the highly
                 clustered register file accesses where a set of few
                 registers physically placed close to each other are
                 accessed with very high frequency. We propose
                 compile-time temperature-aware register reallocation
                 methodologies for breaking such groups of registers and
                 to uniformly distribute the accesses to the register
                 file. This is achieved with {\em no performance\/} and
                 {\em no hardware overheads}. We show that the
                 underlying problem is NP-hard, and subsequently
                 introduce and evaluate two efficient algorithmic
                 heuristics. Our extensive experimental study
                 demonstrates the efficiency of the proposed
                 methodology.",
  acknowledgement = ack-nhfb,
  articleno =    "26",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
}

@Article{Hong:2009:RFD,
  author =       "Yu-Ru Hong and Juinn-Dar Huang",
  title =        "Reducing fault dictionary size for million-gate large
                 circuits",
  journal =      j-TODAES,
  volume =       "14",
  number =       "2",
  pages =        "27:1--27:??",
  month =        mar,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1497561.1497570",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Apr 2 15:06:01 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In general, fault dictionary is prevented from
                 practical applications in fault diagnosis due to its
                 extremely large size. Several previous works are
                 proposed for the fault dictionary size reduction.
                 However, some of them fail to bring down the size to an
                 acceptable level, and others might not be able to
                 handle today's million-gate circuits due to their high
                 time and space complexity. In this article, an
                 algorithm is presented to reduce the size of pass-fail
                 dictionary while still preserving high diagnostic
                 resolution. The proposed algorithm possesses low time
                 and space complexity by avoiding constructing the huge
                 distinguishability table, which inevitably boosts up
                 the required computation complexity. Experimental
                 results demonstrate that the proposed algorithm is
                 capable of handling industrial million-gate large
                 circuits in a reasonable amount of runtime and
                 memory.",
  acknowledgement = ack-nhfb,
  articleno =    "27",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "diagnostic resolution; fault diagnosis; Fault
                 dictionary",
}

@Article{Kavousianos:2009:EPS,
  author =       "Xrysovalantis Kavousianos and Dimitris Bakalis and
                 Dimitris Nikolos",
  title =        "Efficient partial scan cell gating for low-power
                 scan-based testing",
  journal =      j-TODAES,
  volume =       "14",
  number =       "2",
  pages =        "28:1--28:??",
  month =        mar,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1497561.1497571",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Apr 2 15:06:01 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Gating of the outputs of a portion of the scan cells
                 (partial gating) has been recently proposed as a method
                 for reducing the dynamic power dissipation during
                 scan-based testing. We present a new systematic method
                 for selecting, under area and performance design
                 constraints, the most suitable for gating subset of
                 scan cells as well as the proper gating value for each
                 one of them, aiming at the reduction of the average
                 switching activity during testing. We show that the
                 proposed method outperforms the corresponding already
                 known methods, with respect to average dynamic power
                 dissipation reduction.",
  acknowledgement = ack-nhfb,
  articleno =    "28",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Low-power testing; partial gating; scan cell gating;
                 scan-based testing",
}

@Article{Rakhmatov:2009:BVM,
  author =       "Daler Rakhmatov",
  title =        "Battery voltage modeling for portable systems",
  journal =      j-TODAES,
  volume =       "14",
  number =       "2",
  pages =        "29:1--29:??",
  month =        mar,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1497561.1497572",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Apr 2 15:06:01 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "Limited battery life imposes stringent constraints on
                 the operation of battery-powered portable systems.
                 During battery discharge, the battery voltage
                 decreases, until a certain cutoff value is reached,
                 marking the end of battery life. The amount of
                 discharge capacity and energy delivered by the battery
                 during its life depends not only on the battery
                 characteristics, but also on the load conditions. A
                 different system design may result in a different
                 battery current (load) profile over time, leading to a
                 different battery voltage profile over time. This
                 article presents an analytical model that relates the
                 battery voltage to the battery current, thus
                 facilitating system design optimizations with respect
                 to the battery performance. It captures well-known
                 nonlinear phenomena of capacity loss at high discharge
                 rates, charge recovery, and capacity fading. The
                 proposed model has been validated against measurements
                 taken on Li-ion batteries. We also describe techniques
                 for efficient calculations of model's estimates, which
                 lets a user exploit accuracy-complexity tradeoffs.",
  acknowledgement = ack-nhfb,
  articleno =    "29",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "accuracy-complexity tradeoff; analytical modeling;
                 battery performance; battery-powered systems; Low-power
                 design",
}

@Article{Kumar:2009:EML,
  author =       "Yokesh Kumar and Prosenjit Gupta",
  title =        "External memory layout vs. schematic",
  journal =      j-TODAES,
  volume =       "14",
  number =       "2",
  pages =        "30:1--30:??",
  month =        mar,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1497561.1497573",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Apr 2 15:06:01 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "The circuit represented by a VLSI layout must be
                 verified by checking it against the schematic circuit
                 as an important part of the functional verification
                 step. This involves two central problems of matching
                 the circuit graphs with each other (graph isomorphism)
                 and extracting a higher level of circuit from a given
                 level by finding subcircuits in the circuit graph
                 (subgraph isomorphism). Modern day VLSI layouts contain
                 millions of devices. Hence the memory requirements of
                 the data structures required by tools for verifying
                 them become huge and can easily exceed the amount of
                 internal memory available on a computer. In such a
                 scenario, a program not aware of the memory hierarchy
                 performs badly because of its unorganized input/output
                 operations (I/Os) as the speed of a disk access is
                 about a million times slower than accessing a main
                 memory location. In this article, we present
                 I/O-efficient algorithms for the graph isomorphism and
                 subgraph isomorphism problems in the context of
                 verification of VLSI layouts. Experimental results show
                 the need and utility of I/O-efficient algorithms for
                 handling problems with large memory requirements.",
  acknowledgement = ack-nhfb,
  articleno =    "30",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "design automation; external memory algorithms; Graph;
                 subgraph isomorphism; verification of layouts",
}

@Article{Chen:2009:SAP,
  author =       "Po-Yuan Chen and Kuan-Hsien Ho and Tingting Hwang",
  title =        "Skew-aware polarity assignment in clock tree",
  journal =      j-TODAES,
  volume =       "14",
  number =       "2",
  pages =        "31:1--31:??",
  month =        mar,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1497561.1497574",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Apr 2 15:06:01 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In modern sequential VLSI designs, clock tree plays an
                 important role in synchronizing different components in
                 a chip. To reduce peak current and power/ground noises
                 caused by clock network, assigning different signal
                 polarities to clock buffers is proposed in previous
                 work. Although peak current and power/ground noises are
                 minimized by signal polarities assignment, an
                 assignment without timing information may increase the
                 clock skew significantly. As a result, a timing-aware
                 signal polarities assigning technique is necessary. In
                 this article, we propose a novel signal polarities
                 assigning technique which can not only reduce peak
                 current and power/ground noises simultaneously but also
                 render the clock skew in control. The experimental
                 result shows that the clock skew produced by our
                 algorithm is 94\% of original clock skew in average
                 while the clock skews produced by three algorithms
                 (Partition, MST, Matching) in the absence of post clock
                 tuning steps in the previous work are 235\%, 272\%, and
                 283\%, respectively. Moreover, our algorithm is as
                 efficient as the three algorithms of the previous work
                 in reducing peak current and power/ground noises.",
  acknowledgement = ack-nhfb,
  articleno =    "31",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Clock skew; clock tree; peak current; polarity
                 assignment; power/ground noise",
}

@Article{Cho:2009:BHR,
  author =       "Minsik Cho and Katrina Lu and Kun Yuan and David Z.
                 Pan",
  title =        "{BoxRouter 2.0}: a hybrid and robust global router
                 with layer assignment for routability",
  journal =      j-TODAES,
  volume =       "14",
  number =       "2",
  pages =        "32:1--32:??",
  month =        mar,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1497561.1497575",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Apr 2 15:06:01 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "In this article, we present BoxRouter 2.0, and discuss
                 its architecture and implementation. As
                 high-performance VLSI design becomes more
                 interconnect-dominant, efficient congestion elimination
                 in global routing is in greater demand. Hence, we
                 propose a global router which has a strong ability to
                 improve routability and minimize the number of vias
                 with blockages, while minimizing wirelength. BoxRouter
                 2.0 is extended from BoxRouter 1.0, but can perform
                 multi-layer routing with 2D global routing and layer
                 assignment. Our 2D global routing is equipped with two
                 ideas: node shifting for congestion-aware Steiner tree
                 and robust negotiation-based A* search for routing
                 stability. After 2D global routing, 2D-to-3D mapping is
                 done by the layer assignment which is powered by
                 progressive via/blockage-aware integer linear
                 programming. Experimental results show that BoxRouter
                 2.0 has better routability with comparable wirelength
                 than other routers on ISPD07 benchmark, and it can
                 complete (no overflow) the widely used ISPD98 benchmark
                 for the first time in the literature with the shortest
                 wirelength. We further generate a set of harder ISPD98
                 benchmarks to push the limit of BoxRouter 2.0, and
                 propose the hardened ISPD98 benchmarks to map
                 state-of-the-art solutions for future routing
                 research.",
  acknowledgement = ack-nhfb,
  articleno =    "32",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "congestion; global routing; integer linear
                 programming; layer assignment; physical design;
                 routability; VLSI",
}

@Article{Gulati:2009:FBH,
  author =       "Kanupriya Gulati and Suganth Paul and Sunil P. Khatri
                 and Srinivas Patil and Abhijit Jas",
  title =        "{FPGA}-based hardware acceleration for {Boolean}
                 satisfiability",
  journal =      j-TODAES,
  volume =       "14",
  number =       "2",
  pages =        "33:1--33:??",
  month =        mar,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1497561.1497576",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Thu Apr 2 15:06:01 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "We present an FPGA-based hardware solution to the
                 Boolean satisfiability (SAT) problem, with the main
                 goals of scalability and speedup. In our approach the
                 traversal of the implication graph as well as conflict
                 clause generation are performed in hardware, in
                 parallel. The experimental results and their analysis,
                 along with the performance models are discussed. We
                 show that an order of magnitude improvement in runtime
                 can be obtained over MiniSAT (the best-in-class
                 software based approach) by using a Virtex-4
                 (XC4VFX140) FPGA device. The resulting system can
                 handle instances with as many as 10K variables and 280K
                 clauses.",
  acknowledgement = ack-nhfb,
  articleno =    "33",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J776",
  keywords =     "Boolean Constant Propagation (BCP); Boolean
                 satisfiabilty (SAT); conflict induced clauses; FPGA;
                 non-chronological backtrack",
}

@Article{Malik:2009:SCU,
  author =       "Avinash Malik and Zoran Salcic and Partha S. Roop",
  title =        "{SystemJ} compilation using the {Tandem Virtual
                 Machine} approach",
  journal =      j-TODAES,
  volume =       "14",
  number =       "3",
  pages =        "34:1--34:??",
  month =        may,
  year =         "2009",
  CODEN =        "ATASFO",
  DOI =          "http://doi.acm.org/10.1145/1529255.1529256",
  ISSN =         "1084-4309 (print), 1557-7309 (electronic)",
  bibdate =      "Wed Jun 3 16:12:53 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/todaes/;
                 http://www.math.utah.edu/pub/tex/bib/todaes.bib",
  abstract =     "SystemJ is a language based on the Globally
                 Asynchronous Locally Synchronous (GALS) paradigm. A
                 SystemJ program is a collection of GALS nodes, also
                 called clock domains, and each clock domain is a
                 synchronous program that extends the Java language.
                 Initial compilation of SystemJ has been to standard
                 Java executing on a Java Virtual Machine (JVM), which
                 is both inefficient and bulky for small embedded
                 systems. This article proposes a new approach for
                 compiling and executing SystemJ using a new type of
                 virtual machine, called a Tandem Virtual Machine (TVM).
                 The TVM approach provides an efficient implementation
                 of SystemJ on both standard processors and
                 resource-constrained embedded processors. The new
                 approach is based on separating the control-d