Valid HTML 4.0! Valid CSS!
%%% -*-BibTeX-*-
%%% ====================================================================
%%%  BibTeX-file{
%%%     author          = "Nelson H. F. Beebe",
%%%     version         = "2.16",
%%%     date            = "12 December 2019",
%%%     time            = "08:08:17 MDT",
%%%     filename        = "visual-instruction-set.bib",
%%%     address         = "University of Utah
%%%                        Department of Mathematics, 110 LCB
%%%                        155 S 1400 E RM 233
%%%                        Salt Lake City, UT 84112-0090
%%%                        USA",
%%%     telephone       = "+1 801 581 5254",
%%%     FAX             = "+1 801 581 4148",
%%%     URL             = "http://www.math.utah.edu/~beebe",
%%%     checksum        = "11030 2126 8979 97202",
%%%     email           = "beebe at math.utah.edu, beebe at acm.org,
%%%                        beebe at computer.org (Internet)",
%%%     codetable       = "ISO/ASCII",
%%%     keywords        = "3DNow!; AltiVec; bibliography; BibTeX; M2;
%%%                        MAX-2; MIPS Digital Media Extensions (MDMX);
%%%                        MMX; SSE; SSE2; VIS; visual instruction set",
%%%     license         = "public domain",
%%%     supported       = "yes",
%%%     docstring       = "This file contains a bibliography of
%%%                        publications on visual instruction sets, a
%%%                        feature added to several RISC architectures
%%%                        starting in 1995 to improve performance
%%%                        significantly for key graphics and signal
%%%                        processing operations.
%%%
%%%                        At version 2.16, the year coverage looked like
%%%                        this:
%%%
%%%                             1994 (   2)    1999 (  10)    2004 (   0)
%%%                             1995 (   7)    2000 (   8)    2005 (   0)
%%%                             1996 (  15)    2001 (   5)    2006 (   1)
%%%                             1997 (  14)    2002 (   1)
%%%                             1998 (  10)    2003 (   0)
%%%                             19xx (   1)
%%%
%%%                             Article:         48
%%%                             Book:             1
%%%                             InProceedings:    8
%%%                             Manual:           5
%%%                             Misc:             2
%%%                             Proceedings:      3
%%%                             TechReport:       7
%%%
%%%                             Total entries:   74
%%%
%%%                        This bibliography has been derived from data
%%%                        in the TeX User Group and BibNet Project
%%%                        bibliography archives.
%%%
%%%                        Spelling has been verified with the UNIX
%%%                        spell and GNU ispell programs using the
%%%                        exception dictionary stored in the companion
%%%                        file with extension .sok.
%%%
%%%                        BibTeX citation tags are uniformly chosen
%%%                        as name:year:abbrev, where name is the
%%%                        family name of the first author or editor,
%%%                        year is a 4-digit number, and abbrev is a
%%%                        3-letter condensation of important title
%%%                        words. Citation tags were automatically
%%%                        generated by software developed for the
%%%                        BibNet Project.
%%%
%%%                        This bibliography is sorted into sections
%%%                        devoted to each different instruction set.
%%%                        Within each section, entries are sorted by
%%%                        ascending year, and within each year in a
%%%                        section, by citation label, using `bibsort
%%%                        -byyear'.
%%%
%%%                        The checksum field above contains a CRC-16
%%%                        checksum as the first value, followed by the
%%%                        equivalent of the standard UNIX wc (word
%%%                        count) utility output of lines, words, and
%%%                        characters.  This is produced by Robert
%%%                        Solovay's checksum utility.",
%%%  }
%%% ====================================================================
%%% ====================================================================
%%% Acknowledgement abbreviations:
@String{ack-nhfb = "Nelson H. F. Beebe,
                    University of Utah,
                    Department of Mathematics, 110 LCB,
                    155 S 1400 E RM 233,
                    Salt Lake City, UT 84112-0090, USA,
                    Tel: +1 801 581 5254,
                    FAX: +1 801 581 4148,
                    e-mail: \path|beebe@math.utah.edu|,
                            \path|beebe@acm.org|,
                            \path|beebe@computer.org| (Internet),
                    URL: \path|http://www.math.utah.edu/~beebe/|"}

%%% ====================================================================
%%% Journal abbreviations:
@String{j-BYTE                  = "Byte Magazine"}

@String{j-CACM                  = "Communications of the ACM"}

@String{j-CCPE                  = "Concurrency and Computation: Prac\-tice and
                                   Experience"}

@String{j-COMPUTER              = "Computer"}

@String{j-DATAMATION            = "Datamation"}

@String{j-DDJ                   = "Dr. Dobb's Journal of Software Tools"}

@String{j-ELECT-NOTES-THEOR-COMP-SCI = "Electronic Notes in Theoretical Computer
                                  Science"}

@String{j-HEWLETT-PACKARD-J     = "Hew\-lett-Pack\-ard Journal: technical
                                   information from the laboratories of
                                   Hew\-lett-Pack\-ard Company"}

@String{j-IEEE-CGA              = "IEEE Computer Graphics and Applications"}

@String{j-IEEE-CONCURR          = "IEEE Concurrency"}

@String{j-IEEE-MICRO            = "IEEE Micro"}

@String{j-IEEE-MULTIMEDIA       = "IEEE MultiMedia"}

@String{j-INTEL-TECH-J          = "Intel Technology Journal"}

@String{j-LECT-NOTES-COMP-SCI   = "Lecture Notes in Computer Science"}

@String{j-LINUX-J               = "Linux Journal"}

@String{j-SIGPLAN               = "ACM SIG{\-}PLAN Notices"}

@String{j-TOCS                  = "ACM Transactions on Computer Systems"}

%%% ====================================================================
%%% Publisher abbreviations:
@String{pub-AMD                 = "Advanced Micro Devices, Inc."}
@String{pub-AMD:adr             = "One AMD Place, P.O. Box 3453, Sunnyvale,
                                   California, USA"}

@String{pub-CRC                 = "CRC Press"}
@String{pub-CRC:adr             = "2000 N.W. Corporate Blvd., Boca Raton,
                                  FL 33431-9868, USA"}

@String{pub-IEEE                = "IEEE Computer Society Press"}

@String{pub-IEEE:adr            = "1109 Spring Street, Suite 300, Silver
                                   Spring, MD 20910, USA"}

@String{pub-MOTOROLA            = "Motorola Corporation"}
@String{pub-MOTOROLA:adr        = "Phoenix, AZ, USA"}

@String{pub-SUN                 = "Sun Microsystems"}
@String{pub-SUN:adr             = "901 San Antonio Road, Palo Alto,
                                   CA 94303-4900, USA"}

%%% ====================================================================
%%% Bibliography entries, by architecture:
%%% ====================================================================
%%% General
@Book{Hsu:2001:CAS,
  author =       "John Y. Hsu",
  title =        "Computer Architecture: Software Aspects, Coding,
                 Hardware",
  publisher =    pub-CRC,
  address =      pub-CRC:adr,
  pages =        "427",
  year =         "2001",
  ISBN =         "0-8493-1026-1, 1-351-83604-8, 1-4200-4110-X (e-book)",
  ISBN-13 =      "978-0-8493-1026-3, 978-1-351-83604-3,
                 978-1-4200-4110-1 (e-book)",
  LCCN =         "A76.9.A73 H758 2001",
  bibdate =      "Fri Jan 19 15:47:59 2001",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/fparith.bib;
                 http://www.math.utah.edu/pub/tex/bib/java2000.bib;
                 http://www.math.utah.edu/pub/tex/bib/master.bib;
                 http://www.math.utah.edu/pub/tex/bib/microchip.bib;
                 http://www.math.utah.edu/pub/tex/bib/virtual-machines.bib;
                 http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  price =        "US\$89.95, UK\pounds 59.99",
  abstract =     "With the new developments in computer architecture,
                 fairly recent publications can quickly become outdated.
                 Computer Architecture: Software Aspects, Coding, and
                 Hardware takes a modern approach. This comprehensive,
                 practical text provides that critical understanding of
                 a central processor by clearly detailing fundamentals,
                 and cutting edge design features. With its balanced
                 software/hardware perspective and its description of
                 Pentium processors, the book allows readers to acquire
                 practical PC software experience. The text presents a
                 foundation-level set of ideas, design concepts, and
                 applications that fully meet the requirements of
                 computer organization and architecture courses. The
                 book features a ``bottom up'' computer design approach,
                 based upon the author's thirty years experience in both
                 academe and industry. By combining computer engineering
                 with electrical engineering, the author describes how
                 logic circuits are designed in a CPU. The extensive
                 coverage of a microprogrammed CPU and new processor
                 design features gives the insight of current computer
                 development. Computer Architecture: Software Aspects,
                 Coding, and Hardware presents a comprehensive review of
                 the subject, from beginner to advanced levels. Topics
                 include: * Two's complement numbers * Integer overflow
                 * Exponent overflow and underflow * Looping *
                 Addressing modes * Indexing * Subroutine linking * I/O
                 structures * Memory mapped I/O * Cycle stealing *
                 Interrupts * Multitasking * Microprogrammed CPU *
                 Multiplication tree * Instruction queue * Multimedia
                 instructions * Instruction cache * Virtual memory *
                 Data cache * Alpha chip * Interprocessor communications
                 * Branch prediction * Speculative loading * Register
                 stack * JAVA virtual machine * Stack machine
                 principles.",
  acknowledgement = ack-nhfb,
  keywords =     "Compaq/DEC Alpha; floating-point arithmetic; Intel
                 x86; Java Virtual Machine; multimedia instructions;
                 Pentium",
  libnote =      "Not yet in my library.",
  tableofcontents = "Number Systems \\
                 Basic Computer Principles \\
                 Assembly Language Principles \\
                 Computer Architecture--General Features \\
                 Microprogrammed CPU Design \\
                 Superscalar Machine Principles \\
                 Vector and Multiple-Processor Machines \\
                 Processor Design Case Studies \\
                 Stack Machine Principles",
}

%%% ====================================================================
%%% AMD 3D!Now
@Article{Jennings:1998:MCS,
  author =       "Matthew D. Jennings and Thomas M. Conte",
  title =        "Mobile Computing: Subword extensions for video
                 processing on mobile systems",
  journal =      j-IEEE-CONCURR,
  volume =       "6",
  number =       "3",
  pages =        "13--16",
  month =        jul # "\slash " # sep,
  year =         "1998",
  CODEN =        "IECMFX",
  DOI =          "https://doi.org/10.1109/4434.708250",
  ISSN =         "1092-3063 (print), 1558-0849 (electronic)",
  ISSN-L =       "1092-3063",
  bibdate =      "Mon Jun 7 07:52:29 MDT 1999",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/ieeeconcurrency.bib;
                 http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  URL =          "http://dlib.computer.org/pd/books/pd1998/pdf/p3013.pdf",
  acknowledgement = ack-nhfb,
  fjournal =     "IEEE Concurrency",
  keywords =     "3DNow!; AltiVec; MAX-2; MIPS Digital Media Extensions
                 (MDMX); MMX; VIS",
}

@InProceedings{Oberman:1998:ATK,
  author =       "Stuart Oberman and Fred Weber and Norbert Juffa and
                 Greg Favor",
  title =        "{AMD 3DNow!} Technology and the {K6-2}
                 Microprocessor",
  crossref =     "IEEE:1998:HCC",
  pages =        "245--254",
  year =         "1998",
  bibdate =      "Mon Jan 08 17:02:55 2001",
  bibsource =    "ftp://www.hotchips.org/pub/hotc7to11cd/hc98/pdf_1up/hc98_10c_oberman_1up.txt;
                 http://www.math.utah.edu/pub/tex/bib/fparith.bib;
                 http://www.math.utah.edu/pub/tex/bib/hot-chips.bib;
                 http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  acknowledgement = ack-nhfb,
}

@Article{Bush:1999:CEE,
  author =       "Jonathan Bush and Timothy S. Newman",
  title =        "The Cutting Edge: Effectively Utilizing {3DNow!} in
                 {Linux}",
  journal =      j-LINUX-J,
  volume =       "68",
  pages =        "??--??",
  month =        dec,
  year =         "1999",
  CODEN =        "LIJOFX",
  ISSN =         "1075-3583 (print), 1938-3827 (electronic)",
  ISSN-L =       "1075-3583",
  bibdate =      "Thu Sep 21 14:31:45 MDT 2000",
  bibsource =    "http://noframes.linuxjournal.com/lj-issues/issue68/index.html;
                 http://www.math.utah.edu/pub/tex/bib/linux-journal.bib;
                 http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  URL =          "http://noframes.linuxjournal.com/lj-issues/issue68/3685.html",
  acknowledgement = ack-nhfb,
  fjournal =     "Linux Journal",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J508",
}

@Article{Iordache:1999:ARS,
  author =       "Cristina Iordache and David W. Matula",
  title =        "Analysis of Reciprocal and Square Root Reciprocal
                 Instructions in the {AMD K6-2} Implementation of
                 {3DNow!}",
  journal =      j-ELECT-NOTES-THEOR-COMP-SCI,
  volume =       "24",
  pages =        "34--62",
  year =         "1999",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1016/S1571-0661(05)80621-8",
  ISSN =         "1571-0661",
  bibdate =      "Fri Jun 24 20:23:13 2005",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  abstract =     "Reciprocal and root reciprocal functions at ``half''
                 and IEEE single precision formats are specified in the
                 AMD 3DNow! instruction set. Implementations in the
                 recently released AMD K6-2 microprocessor are analyzed
                 herein by exhaustive computation and timing loops to
                 ascertain the accuracy and monotonicity properties of
                 the output and throughput\slash latency cycle counts.
                 Periodicities in stepwise function output were observed
                 and employed to construct an underlying bipartite table
                 that can serve as the core of the respective reciprocal
                 function outputs. The recommended RISC instruction
                 macros generated single precision reciprocals and root
                 reciprocals accurate to a unit in the last place.
                 However, the root reciprocal functions failed to
                 satisfy the desirable monotonicity property typically
                 implemented as an industry standard for elementary
                 functions on x86 floating point units. Reasons for the
                 failure are provided and an adjusted table is shown to
                 satisfy the monotonicity standard. Results are
                 summarized in Table 1 and described in the body of this
                 report.",
  acknowledgement = ack-nhfb,
  fjournal =     "Electronic Notes in Theoretical Computer Science",
}

@Article{Oberman:1999:ATA,
  author =       "Stuart Oberman and Greg Favor and Fred Weber",
  title =        "{AMD 3DNow!} Technology: Architecture and
                 Implementations",
  journal =      j-IEEE-MICRO,
  volume =       "19",
  number =       "2",
  pages =        "37--48",
  month =        mar # "\slash " # apr,
  year =         "1999",
  CODEN =        "IEMIDZ",
  DOI =          "https://doi.org/10.1109/40.755466",
  ISSN =         "0272-1732 (print), 1937-4143 (electronic)",
  ISSN-L =       "0272-1732",
  bibdate =      "Thu Dec 14 06:08:58 MST 2000",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/ieeemicro.bib;
                 http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib;
                 Science Citation Index database (1980--2000)",
  URL =          "http://dlib.computer.org/mi/books/mi1999/pdf/m2037.pdf;
                 http://www.computer.org/micro/mi1999/m2037abs.htm",
  acknowledgement = ack-nhfb,
  fjournal =     "IEEE Micro",
  journal-URL =  "http://www.computer.org/csdl/mags/mi/index.html",
}

@InProceedings{Scherer:1999:OTW,
  author =       "Alisa Scherer and Michael Golden and Norbert Juffa and
                 Stephan Meier and Stuart Oberman and Hamid Partovi and
                 Fred Weber",
  booktitle =    "1999 IEEE International Solid-State Circuits
                 Conference",
  title =        "An Out-of-Order Three-Way Superscalar Multimedia
                 Floating-Point Unit",
  publisher =    pub-IEEE,
  address =      pub-IEEE:adr,
  pages =        "??--??",
  year =         "1999",
  ISBN =         "0-7803-5129-0",
  ISBN-13 =      "978-0-7803-5129-5",
  LCCN =         "????",
  bibdate =      "Mon Jan 08 08:35:15 2001",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/fparith.bib;
                 http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib;
                 http://www.sscs.org/digests/1999/DATA/05_5.pdf",
  acknowledgement = ack-nhfb,
  annote =       "The AMD-K7 floating-point unit is implemented as an
                 out-of-order coprocessor responsible for executing all
                 x86 FPU, MMX, and AMD 3DNow! instructions.",
  keywords =     "AMD-K7 floating-point unit",
  pagecount =    "10",
}

@Manual{AMD:2000:AKE,
  title =        "{AMD-K62-E+} Embedded Processor Data Sheet",
  organization = pub-AMD,
  address =      pub-AMD:adr,
  pages =        "xxii + 346",
  month =        sep,
  year =         "2000",
  bibdate =      "Tue Jan 16 16:55:41 2001",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  note =         "Order number 23542A/0",
  URL =          "http://www.amd.com/products/epd/processors/6.32bitproc/8.amdk6fami/28.amdk62e/23542/23542a.pdf",
  acknowledgement = ack-nhfb,
}

@Manual{AMD:2000:TM,
  title =        "{3DNow!} Technology Manual",
  organization = pub-AMD,
  address =      pub-AMD:adr,
  pages =        "x + 62",
  month =        mar,
  year =         "2000",
  bibdate =      "Tue Jan 16 16:55:41 2001",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  note =         "Order number 21928G/0.",
  URL =          "http://www.amd.com/products/epd/processors/6.32bitproc/8.amdk6fami/26.amdk62e/21928/21928.pdf",
  acknowledgement = ack-nhfb,
}

@Article{Fomitchev:2000:ORT,
  author =       "Max I. Fomitchev",
  title =        "Optimizing {3Dnow!} Real-Time Graphics",
  journal =      j-DDJ,
  volume =       "25",
  number =       "8",
  pages =        "40, 42--46",
  month =        aug,
  year =         "2000",
  CODEN =        "DDJOEB",
  ISSN =         "1044-789X",
  bibdate =      "Thu Nov 9 08:25:16 MST 2000",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/dr-dobbs-2000.bib;
                 http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  URL =          "http://www.ddj.com/ftp/2000/2000_08/3dnow.txt;
                 http://www.ddj.com/ftp/2000/2000_08/3dnow.zip",
  abstract =     "One approach to pumping up real-time 3D graphics
                 performance is AMD's 3DNow! technology, which adds
                 floating-point SIMD extensions to the original x86
                 instruction set. Max examines 3DNow! technology, then
                 provides guidelines for optimizing its performance even
                 more. Additional resources include 3dnow.rtf (listings)
                 and 3dnow.zip (source code).",
  acknowledgement = ack-nhfb,
  fjournal =     "Dr. Dobb's Journal of Software Tools",
}

@TechReport{Hunter:2000:EPG,
  author =       "Craig A. Hunter",
  title =        "An Evaluation of {PowerMac G4} Systems for
                 {FORTRAN}-based Scientific Computing with Application
                 to Computational Fluid Dynamics Simulation",
  type =         "Technical report",
  institution =  "NASA Langley Research Center, Configuration
                 Aerodynamics Branch",
  address =      "Hampton, VA, USA",
  month =        jul,
  year =         "2000",
  bibdate =      "Wed Oct 17 11:08:37 2001",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  URL =          "http://ad-www.larc.nasa.gov/~cah/NASA_G4_Study.pdf",
  acknowledgement = ack-nhfb,
}

%%% ====================================================================
%%% Compaq/DEC Alpha Motion Video Instruction (MVI)
@TechReport{Rubinfeld:1996:MVI,
  author =       "Paul Rubinfeld and Bob Rose and Michael McCallig",
  title =        "Motion Video Instruction Extensions for Alpha",
  type =         "Technical report",
  institution =  "Semiconductor Engineering Group",
  address =      "77 Reed Road, HLO2-3/D11 Hudson, MA 01749, USA",
  day =          "18",
  month =        oct,
  year =         "1996",
  bibdate =      "Fri May 18 12:02:58 2001",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  URL =          "http://www.digital.com/semiconductor/alpha/papers/pmvi.pdf",
  acknowledgement = ack-nhfb,
}

%%% ====================================================================
%%% Cyrix M2
@InProceedings{Maher:1996:MIS,
  author =       "Robert Maher",
  title =        "Multimedia Instruction Set Extensions for a
                 Sixth-Generation x86 Processor",
  crossref =     "IEEE:1996:HCV",
  pages =        "163--170",
  year =         "1996",
  bibdate =      "Sat Jan 6 19:21:13 MST 2001",
  bibsource =    "ftp://www.hotchips.org/pub/hotc7to11cd/hc96/hc8_pdf/5.3.pdf;
                 http://www.math.utah.edu/pub/tex/bib/hot-chips.bib;
                 http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib;
                 OCLC Proceedings database",
  acknowledgement = ack-nhfb,
  keywords =     "Cyrix M2 processor",
}

%%% ====================================================================
%%% HP PA-7100LC and MAX-2
%%% See also Jennings:1998:MCS
@Article{Anonymous:1994:NPd,
  author =       "Anonymous",
  title =        "{New Products}",
  journal =      j-DATAMATION,
  volume =       "40",
  number =       "4",
  pages =        "85--??",
  day =          "15",
  month =        feb,
  year =         "1994",
  CODEN =        "DTMNAT",
  ISSN =         "0011-6963",
  bibdate =      "Sat Jan 27 07:35:21 MST 1996",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/datamation.bib;
                 http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  abstract =     "Hewlett--Packard has released entry-level HP 3000
                 servers based on its new PA-7100LC PA-RISC chip.",
  acknowledgement = ack-nhfb,
  fjournal =     "Datamation",
}

@Article{Smith:1994:PWA,
  author =       "Ben Smith",
  title =        "Power Workstation at a {Pentium} Price: The {HP} 9000
                 {Series} 700 {Model} 712/60 uses innovative hardware
                 packaging and the superscalar {PA-7100LC CPU} to
                 achieve breakthroughs in low cost and high performance
                 for a {Unix} workstation",
  journal =      j-BYTE,
  volume =       "19",
  number =       "7",
  pages =        "161--??",
  month =        jul,
  year =         "1994",
  CODEN =        "BYTEDJ",
  ISSN =         "0360-5280 (print), 1082-7838 (electronic)",
  ISSN-L =       "0360-5280",
  bibdate =      "Tue Jan 2 10:01:41 MST 1996",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/byte1990.bib;
                 http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "BYTE Magazine",
}

@Article{Anonymous:1995:OIP,
  author =       "Anonymous",
  title =        "Overview of the Implementation of the {PA 7100LC}
                 Multimedia Enhancements",
  journal =      j-HEWLETT-PACKARD-J,
  volume =       "46",
  number =       "2",
  pages =        "66--66",
  month =        apr,
  year =         "1995",
  CODEN =        "HPJOAX",
  ISSN =         "0018-1153",
  bibdate =      "Mon May 27 08:32:08 MDT 1996",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/hpj.bib;
                 http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  URL =          "http://www.hp.com/hpj/95apr/apr95_66.pdf;
                 http://www.hp.com/hpj/toc-04-95.html",
  acknowledgement = ack-nhfb,
  fjournal =     "Hewlett-Packard Journal: technical information from
                 the laboratories of Hewlett-Packard Company",
}

@Article{Bass:1995:DMP,
  author =       "Mick Bass and Terry W. Blanchard and D. Douglas
                 Josephson and Duncan Weir and Daniel L. Halperin",
  title =        "Design methodologies for the {PA 7100LC}
                 microprocessor",
  journal =      j-HEWLETT-PACKARD-J,
  volume =       "46",
  number =       "2",
  pages =        "23--35",
  month =        apr,
  year =         "1995",
  CODEN =        "HPJOAX",
  ISSN =         "0018-1153",
  bibdate =      "Tue Mar 25 14:12:15 MST 1997",
  bibsource =    "Compendex database;
                 http://www.math.utah.edu/pub/tex/bib/hpj.bib;
                 http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  URL =          "http://www.hp.com/hpj/95apr/apr95_23.pdf;
                 http://www.hp.com/hpj/toc-04-95.html",
  abstract =     "During the development of the PA 7100LC
                 microprocessor, engineers created several methodologies
                 that provided the framework for implementing the design
                 decisions that were made throughout the project. This
                 article explores several of these methodologies, which
                 were crucial to the successful product implementation.
                 For each methodology, the design decisions that
                 impacted the methodology are discussed as well as the
                 alternatives that were considered and the course that
                 were chosen. The areas in which these methodologies
                 were developed include control synthesis, place and
                 route, production test, processor diagnosability,
                 presilicon verification, and postsilicon
                 verification.",
  acknowledgement = ack-nhfb,
  classcodes =   "B1265F (Microprocessors and microcomputers); C5130
                 (Microprocessor chips)",
  classification = "714.2; 721.2; 722; 722.1; 723.5; 912.2; B1265F
                 (Microprocessors and microcomputers); C5130
                 (Microprocessor chips)",
  fjournal =     "Hewlett-Packard Journal: technical information from
                 the laboratories of Hewlett-Packard Company",
  journalabr =   "Hewlett Packard J",
  keywords =     "Algorithms; Buffer storage; Computer hardware;
                 Computer simulation; Computer simulation languages;
                 Control circuitry; Decision making; Design decision;
                 Design methodologies; design methodologies; features;
                 In circuit emulation; Integrated circuit layout;
                 Integrated circuit testing; integrated circuit testing;
                 Logic devices; Microprocessor chips; microprocessor
                 chips; PA 7100LC microprocessor; Postsilicon electrical
                 verification; Processor chip testing; processor chip
                 testing; product; Product features; Programmable logic
                 array; Sample on the fly testing; Simulators; Verilog
                 modeling language",
  thesaurus =    "Integrated circuit testing; Microprocessor chips",
  treatment =    "P Practical",
}

@Article{Bass:1995:PMC,
  author =       "Mick Bass and Patrick Knebel and David W. Quint and
                 William L. Walker",
  title =        "The {PA 7100LC} Microprocessor: {A} Case Study of {IC}
                 Design Decisions in a Competitive Environment",
  journal =      j-HEWLETT-PACKARD-J,
  volume =       "46",
  number =       "2",
  pages =        "12--22",
  month =        apr,
  year =         "1995",
  CODEN =        "HPJOAX",
  ISSN =         "0018-1153",
  bibdate =      "Tue Mar 25 14:12:15 MST 1997",
  bibsource =    "Compendex database;
                 http://www.math.utah.edu/pub/tex/bib/hpj.bib;
                 http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  URL =          "http://www.hp.com/hpj/95apr/apr95_12.pdf;
                 http://www.hp.com/hpj/toc-04-95.html",
  abstract =     "Engineering design decisions made during the early
                 stages of a product's development is crucial to a
                 product's success. In the case of Hewlett--Packard, the
                 careful design decisions made led to the creation of a
                 low-cost, single-chip processor core --- the PA 7100LC
                 --- that includes a built-in memory controller, a
                 combined variable-size off-chip primary instruction and
                 data cache, a 1K-byte on-chip instruction buffer, and a
                 superscalar execution unit with two integer units and
                 one floating-point unit with reduced size and
                 performance. I${}_{DDQ}$, sample-on-the-fly, and debug
                 modes were added to enhance testability, reduce test
                 cost and accelerate the post-silicon schedule. With the
                 PA 7100LC processor, Hewlett--Packard has demonstrated
                 an ability to make design decisions in a manner that
                 leads to products having a strong competitive position
                 in the areas of cost, performance, quality and time to
                 market.",
  acknowledgement = ack-nhfb,
  classcodes =   "B1265F (Microprocessors and microcomputers); C5130
                 (Microprocessor chips)",
  classification = "714.2; 722; 722.4; 911; 912.2; 912.4; B1265F
                 (Microprocessors and microcomputers); C5130
                 (Microprocessor chips)",
  fjournal =     "Hewlett-Packard Journal: technical information from
                 the laboratories of Hewlett-Packard Company",
  journalabr =   "Hewlett Packard J",
  keywords =     "Buffer storage; Central processing unit; Computer
                 hardware; Computer systems; Computer workstations;
                 Costs; Decision making; Design decision; design
                 decisions; Electronics packaging; engineering;
                 Engineering design decisions; Engineers; Floating point
                 unit; I/O controller; IC design decisions; Integrated
                 circuit layout; Memory controller; Microprocessor
                 chips; microprocessor chips; PA 7100LC microprocessor;
                 Performance; performance; Random access storage;
                 Reliability; reliability; Time to market; time to
                 market",
  thesaurus =    "Microprocessor chips",
  treatment =    "P Practical",
}

@Article{Benzel:1995:BBW,
  author =       "Jack D. Benzel",
  title =        "Bugs in Black and White: Imaging {IC} Logic Levels
                 with Voltage Contrast",
  journal =      j-HEWLETT-PACKARD-J,
  volume =       "46",
  number =       "2",
  pages =        "102--106",
  month =        apr,
  year =         "1995",
  CODEN =        "HPJOAX",
  ISSN =         "0018-1153",
  bibdate =      "Tue Mar 25 14:12:15 MST 1997",
  bibsource =    "Compendex database;
                 http://www.math.utah.edu/pub/tex/bib/hpj.bib;
                 http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  URL =          "http://www.hp.com/hpj/95apr/apr95_1b.pdf;
                 http://www.hp.com/hpj/toc-04-95.html",
  abstract =     "Voltage contrast imaging allows tracking down of
                 functional bugs and electrical margin issues and other
                 logical level problems on operating integrated circuits
                 (IC) design using a scanning electron microscope (SEM).
                 This paper shows an overview of voltage contrast and
                 the methods developed to image the failure of dynamic
                 circuits in the floating-point compressor circuitry of
                 the HP PA 7100LC processor chip.",
  acknowledgement = ack-nhfb,
  classcodes =   "B1265F (Microprocessors and microcomputers); B0170E
                 (Production facilities and engineering); B2570
                 (Semiconductor integrated circuits); C5130
                 (Microprocessor chips); C5230 (Digital arithmetic
                 methods)",
  classification = "714.2; 721.3; 741.3; B0170E (Production facilities
                 and engineering); B1265F (Microprocessors and
                 microcomputers); B2570 (Semiconductor integrated
                 circuits); C5130 (Microprocessor chips); C5230 (Digital
                 arithmetic methods)",
  fjournal =     "Hewlett-Packard Journal: technical information from
                 the laboratories of Hewlett-Packard Company",
  journalabr =   "Hewlett Packard J",
  keywords =     "Bugs; bugs; circuitry; Device under test; Dynamic
                 circuits; dynamic circuits; Electronic circuit
                 tracking; Failure analysis; floating point arithmetic;
                 Floating point arithmetic logic unit; floating-point
                 coprocessor; Floating-point coprocessor circuitry; HP
                 PA 7100LC processor chip; Imaging IC logic levels;
                 imaging IC logic levels; Imaging techniques; integrated
                 circuit testing; Integrated circuits; Logic circuits;
                 Logic design; Logical level problems; logical level
                 problems; Microprocessor chips; microprocessor chips;
                 microscope; scanning electron; Scanning electron
                 microscope; Scanning electron microscopy;
                 Synchronization; tracking; visual; Visual tracking;
                 Voltage contrast; voltage contrast; Voltage contrast
                 imaging",
  thesaurus =    "Floating point arithmetic; Integrated circuit testing;
                 Microprocessor chips",
  treatment =    "A Application; P Practical",
}

@Article{Bowers:1995:DLC,
  author =       "Dennis A. Bowers and Gerard M. Enkerlin and Karen L.
                 Murillo",
  title =        "Development of a Low-Cost, High-Performance, Multiuser
                 Business Server System",
  journal =      j-HEWLETT-PACKARD-J,
  volume =       "46",
  number =       "2",
  pages =        "79--84",
  month =        apr,
  year =         "1995",
  CODEN =        "HPJOAX",
  ISSN =         "0018-1153",
  bibdate =      "Tue Mar 25 14:12:15 MST 1997",
  bibsource =    "Compendex database;
                 http://www.math.utah.edu/pub/tex/bib/hpj.bib;
                 http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  URL =          "http://www.hp.com/hpj/95apr/apr95_79.pdf;
                 http://www.hp.com/hpj/toc-04-95.html",
  abstract =     "The HP 9000 Series 800 Models E25, E35, E45, and E55
                 (Ex5) and the HP 3000 Series 908, 918, 928, and 938
                 (9x8) business servers were developed as low-cost,
                 performance enhanced replacements for the HP 9000 F
                 Series and low end G Series and the HP 3000 Series 917,
                 927, 937, and 947. The development of the PA-RISC PA
                 7100LC processor chip and the LASI (LAN/SCSI) I/O
                 interface and the evolution of DRAMs for main memory
                 enabled the development of these low-end servers. The
                 priorities for the Models Ex5 and Series 9x8 server
                 project were short time to market, low cost, and
                 improved performance. The functionality and quality of
                 the new servers were to be as good as the products they
                 were replacing, if not better. The challenge was to get
                 these new servers to market as soon as possible so that
                 HP could continue to be competitive in the business
                 server market and their customers could benefit from
                 better performance at a lower price.",
  abstract-2 =   "Using leveraged technology, an aggressive system team,
                 and clearly emphasized priorities, several versions of
                 low-end multiuser systems were developed in record time
                 while dramatically improving the product's availability
                 to customers.",
  acknowledgement = ack-nhfb,
  classcodes =   "C5630 (Networking equipment); C5430 (Microcomputers);
                 C5690 (Other data communication equipment and
                 techniques)",
  classification = "714.2; 722.1; 722.2; 722.4; 911.4; C5430
                 (Microcomputers); C5630 (Networking equipment); C5690
                 (Other data communication equipment and techniques)",
  fjournal =     "Hewlett-Packard Journal: technical information from
                 the laboratories of Hewlett-Packard Company",
  journalabr =   "Hewlett Packard J",
  keywords =     "3000 Series; Buffer storage; chip; Computer systems;
                 Costs; Data storage equipment; DRAMs; Hewlett Packard
                 computers; HP; HP 3000 Series; HP 9000 F Series; HP
                 9000 Series 800; I/O interface; LAN/SCSI; LASI;
                 Marketing; Microprocessor chips; Models Ex5; multiuser
                 business server system; Multiuser business server
                 system; network servers; PA-RISC PA 7100LC processor;
                 PA-RISC PA 7100LC processor chip; Performance; Random
                 access storage; Series 9x8; Server system;
                 Specifications; User interfaces; VLSI circuits",
  thesaurus =    "Hewlett Packard computers; Network servers",
  treatment =    "A Application; P Practical",
}

@Article{Lee:1995:RTS,
  author =       "Ruby B. Lee and John P. Beck and Joel Lamb and Kenneth
                 E. Severson",
  title =        "Real-time software {MPEG} video decoder on
                 multimedia-enhanced {PA 7100LC} processors",
  journal =      j-HEWLETT-PACKARD-J,
  volume =       "46",
  number =       "2",
  pages =        "60--68",
  month =        apr,
  year =         "1995",
  CODEN =        "HPJOAX",
  ISSN =         "0018-1153",
  bibdate =      "Tue Mar 25 14:12:15 MST 1997",
  bibsource =    "Compendex database;
                 http://www.math.utah.edu/pub/tex/bib/hpj.bib;
                 http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  URL =          "http://www.hp.com/hpj/95apr/apr95_60.pdf;
                 http://www.hp.com/hpj/toc-04-95.html",
  abstract =     "With a combination of software and hardware
                 optimizations, including the availability of PA-RISC
                 multimedia instructions, a software video player
                 running on a low-end workstation is able to play MPEG
                 compressed video at 30 frames/s. In this paper, we
                 discuss the support of MPEG-compressed video as a new
                 (video) data type. In particular, we discuss the
                 technology that enables the video player integrated
                 into the HP MPower 2.0 product to play back
                 MPEG-compressed video at real-time rates of up to 30
                 frames per second.",
  acknowledgement = ack-nhfb,
  classcodes =   "B6140C (Optical information, image and video signal
                 processing); C5260B (Computer vision and image
                 processing techniques); C5540 (Terminals and graphic
                 displays); C5220 (Computer architecture)",
  classification = "714.2; 722; 723; 723.2; 902.2; 911; B6140C (Optical
                 information, image and video signal processing); C5220
                 (Computer architecture); C5260B (Computer vision and
                 image processing techniques); C5540 (Terminals and
                 graphic displays)",
  fjournal =     "Hewlett-Packard Journal: technical information from
                 the laboratories of Hewlett-Packard Company",
  journalabr =   "Hewlett Packard J",
  keywords =     "Algorithms; Computer hardware; Computer software;
                 Computer workstations; computing; Costs; Decoding;
                 decoding; Hardware optimizations; hardware
                 optimizations; Image compression; Image decoding; Image
                 processing; low-end; Low-end workstation;
                 Microprocessor chips; MPEG decoding; MPEG-compressed
                 video; Multimedia; multimedia computing; multimedia
                 instructions; multimedia-enhanced; Multimedia-enhanced
                 PA 7100LC processors; PA 7100LC processors; PA-RISC;
                 PA-RISC multimedia instructions; Real-time software
                 MPEG video decoder; real-time software MPEG video
                 decoder; reduced instruction set; Software video
                 decoder; Software video player; software video player;
                 Standards; video coding; workstation; workstations",
  pagecount =    "8",
  thesaurus =    "Decoding; Multimedia computing; Reduced instruction
                 set computing; Video coding; Workstations",
  treatment =    "A Application; P Practical",
  xxpages =      "63--68",
}

@Article{Pearson:1995:LCH,
  author =       "Roger A. Pearson",
  title =        "A Low-Cost, High-Performance {PA-RISC} Workstation
                 with Built-In Graphics, Multimedia, and Networking
                 Capabilities",
  journal =      j-HEWLETT-PACKARD-J,
  volume =       "46",
  number =       "2",
  pages =        "6--11",
  month =        apr,
  year =         "1995",
  CODEN =        "HPJOAX",
  ISSN =         "0018-1153",
  bibdate =      "Tue Mar 25 14:12:15 MST 1997",
  bibsource =    "Compendex database;
                 http://www.math.utah.edu/pub/tex/bib/hpj.bib;
                 http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  URL =          "http://www.hp.com/hpj/95apr/apr95_6t.pdf;
                 http://www.hp.com/hpj/toc-04-95.html",
  abstract =     "Designing as a set the three VLSI components that
                 provide the core functions of CPU, I/O, and graphics
                 for the HP 9000 Model 712 workstation balanced
                 performance and cost and simplified the interfaces
                 between components, allowing designers to create a
                 system with high performance at a low cost. The three
                 new VLSI chips provide most of the functionality of the
                 Model 712 workstation. The PA 7100LC CPU chip
                 interfaces directly to the cache and main memory. The
                 LASI (LAN/ SCSI) chip does most of the core I/O needed
                 for entry-level workstations. The graphics subsystem
                 consists of the graphics chip and the frame buffer
                 VRAMs. All three chips communicate through the GSC
                 (general system connect) bus. The Models 712/60 and
                 712/80 are very similar and differ only in their cache
                 sizes and cache speeds and in the main system clock
                 speeds.",
  acknowledgement = ack-nhfb,
  classcodes =   "C5540 (Terminals and graphic displays); C5430
                 (Microcomputers); C5220 (Computer architecture); C6130M
                 (Multimedia)",
  classification = "711.1; 714.2; 722.1; 722.3; 722.4; 723.5; C5220
                 (Computer architecture); C5430 (Microcomputers); C5540
                 (Terminals and graphic displays); C6130M (Multimedia)",
  fjournal =     "Hewlett-Packard Journal: technical information from
                 the laboratories of Hewlett-Packard Company",
  journalabr =   "Hewlett Packard J",
  keywords =     "Buffer storage; built-in graphics; Built-in graphics;
                 cache; Cache; cache storage; chip; Computer graphics;
                 Computer workstations; computing; CPU; Data
                 communication equipment; Electromagnetic compatibility;
                 Electromagnetic wave interference; Electromagnetic wave
                 interference control; Expansion cards; general system
                 connect bus; General system connect bus; graphics;
                 Graphics; graphics subsystem c; Graphics subsystem c;
                 high-performance PA-RISC workstation; High-performance
                 PA-RISC workstation; HP 9000 Model 712 workstation;
                 I/O; Integrated circuit layout; Interfaces (computer);
                 LAN/SCSI chip; LASI; Local area networks; multimedia;
                 Multimedia; multimedia systems; networking
                 capabilities; Networking capabilities; PA 7100LC CPU;
                 PA 7100LC CPU chip; Printed circuit design; Processor;
                 Random access storage; reduced instruction set; Single
                 inline memory modules; Telephony; VLSI; VLSI circuits;
                 VLSI components; workstations",
  thesaurus =    "Cache storage; Multimedia systems; Reduced instruction
                 set computing; VLSI; Workstations",
  treatment =    "P Practical",
}

@Article{Lee:1996:SPM,
  author =       "Ruby B. Lee",
  title =        "Subword Parallelism with {MAX-2}: Accelerating media
                 processing with a minimal set of instruction extensions
                 supporting efficient subword parallelism",
  journal =      j-IEEE-MICRO,
  volume =       "16",
  number =       "4",
  pages =        "51--59",
  month =        jul # "\slash " # aug,
  year =         "1996",
  CODEN =        "IEMIDZ",
  DOI =          "https://doi.org/10.1109/40.526925",
  ISSN =         "0272-1732 (print), 1937-4143 (electronic)",
  ISSN-L =       "0272-1732",
  bibdate =      "Thu Dec 14 06:08:58 MST 2000",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib;
                 Science Citation Index database (1980--2000)",
  acknowledgement = ack-nhfb,
  classcodes =   "C6130M (Multimedia); C6150N (Distributed systems
                 software); C6140B (Machine-oriented languages)",
  fjournal =     "IEEE Micro",
  journal-URL =  "http://www.computer.org/csdl/mags/mi/index.html",
  keywords =     "computation; instruction extensions; instruction sets;
                 MAX-2; media processing; multimedia computing;
                 parallel; parallel processing; subword parallelism;
                 word-oriented general-purpose processor",
  treatment =    "P Practical",
}

@Article{Blanchard:1997:PMH,
  author =       "T. W. Blanchard and P. G. Tobin",
  title =        "The {PA 7300LC} microprocessor: a highly integrated
                 system on a chip",
  journal =      j-HEWLETT-PACKARD-J,
  volume =       "48",
  number =       "3",
  pages =        "43--47",
  month =        jun,
  year =         "1997",
  CODEN =        "HPJOAX",
  ISSN =         "0018-1153",
  bibdate =      "Wed Mar 25 15:17:10 MST 1998",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/hpj.bib;
                 http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  URL =          "http://www.hp.com/hpj/97jun/ju97a6.htm",
  abstract =     "In the process of developing a microprocessor, key
                 decisions or guiding principles must be established to
                 set the boundaries for all design decisions. These
                 guiding principles are developed through analysis of
                 marketing, business, and technical requirements. We
                 determined that we could best meet the needs of higher
                 volume and more cost-sensitive products by developing a
                 different set of CPUs tuned to the special requirements
                 of these low-end, midrange systems. The PA 7100LC was
                 the first processor in this line, which continues with
                 the PA 7300LC. This article reviews the guiding
                 principles used during the development of the PA 7300LC
                 microprocessor. A brief overview of the chip is
                 given.",
  acknowledgement = ack-nhfb,
  classification = "B1265F (Microprocessors and microcomputers); C5130
                 (Microprocessor chips)",
  fjournal =     "Hewlett-Packard Journal: technical information from
                 the laboratories of Hewlett-Packard Company",
  keywords =     "business; cost-sensitive products; CPU; design
                 decisions; highly integrated system; low-end midrange
                 systems; marketing; microprocessor chips; PA 7100LC; PA
                 7300LC microprocessor; performance; performance
                 evaluation; technical requirements",
  treatment =    "P Practical",
}

@TechReport{Lee:19xx:BME,
  author =       "Ruby Lee and Jerry Huck",
  title =        "64-bit and Multimedia Extensions in the {PA-RISC 2.0}
                 Architecture",
  type =         "Technical report",
  institution =  "Hewlett--Packard Company",
  address =      "19410 Homestead Road, Cupertino, CA 95014, USA",
  year =         "19xx",
  bibdate =      "Mon Jul 23 08:23:09 2001",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  URL =          "http://www.hp.com/computing/framed/technology/micropro/architecture/docs/pa2go3.html",
  acknowledgement = ack-nhfb,
}

%%% ====================================================================
%%% Intel MMX
%%% See also Jennings:1998:MCS
@Article{Anonymous:1996:BRW,
  author =       "Anonymous",
  title =        "{BYTE} readers write about new server architectures,
                 {Windows NT} issues, {MMX} programming, and ``free
                 {PCs}''",
  journal =      j-BYTE,
  volume =       "21",
  number =       "10",
  pages =        "19--??",
  month =        oct,
  year =         "1996",
  CODEN =        "BYTEDJ",
  ISSN =         "0360-5280 (print), 1082-7838 (electronic)",
  ISSN-L =       "0360-5280",
  bibdate =      "Fri Oct 4 18:38:04 MDT 1996",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/byte1995.bib;
                 http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "BYTE Magazine",
}

@Article{Anonymous:1996:IBRa,
  author =       "Anonymous",
  title =        "{INBOX} --- {BYTE} readers sound off on electric
                 money, {Internet} commerce and privacy, programming for
                 {Intel}'s {MMX}, and praise the core of {BYTE}",
  journal =      j-BYTE,
  volume =       "21",
  number =       "9",
  pages =        "19--??",
  month =        sep,
  year =         "1996",
  CODEN =        "BYTEDJ",
  ISSN =         "0360-5280 (print), 1082-7838 (electronic)",
  ISSN-L =       "0360-5280",
  bibdate =      "Tue Sep 17 17:33:13 MDT 1996",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/byte1995.bib;
                 http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "BYTE Magazine",
}

@Article{Atkins:1996:PSP,
  author =       "Mark Atkins and Ramesh Subramaniam",
  title =        "{PC} Software Performance Tuning",
  journal =      j-COMPUTER,
  volume =       "29",
  number =       "8",
  pages =        "47--54",
  month =        aug,
  year =         "1996",
  CODEN =        "CPTRB4",
  ISSN =         "0018-9162 (print), 1558-0814 (electronic)",
  ISSN-L =       "0018-9162",
  bibdate =      "Mon Feb 3 07:40:15 MST 1997",
  bibsource =    "Compendex database;
                 http://www.math.utah.edu/pub/tex/bib/computer1990.bib;
                 http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  acknowledgement = ack-nhfb,
  affiliation =  "Intel Corp., Santa Clara, CA, USA",
  classification = "714.2; 722.1; 722.4; 723; 723.1; 723.1.1; 723.5;
                 921.5; 922.2",
  fjournal =     "Computer",
  journal-URL =  "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=2",
  journalabr =   "Computer",
  keywords =     "Algorithms; Buffer storage; Code analysis; Code
                 sequencing; Code tuning; Codes (symbols); Computer
                 architecture; Computer simulation; Computer software;
                 Computer software selection and evaluation; Computer
                 systems programming; High level languages; Internal
                 cache; Microprocessor chips; MMX technology; On chip
                 cache; Optimization; Pentium processors; Performance;
                 Personal computers; Pipeline processing systems;
                 Program compilers; Program processors; Runtime
                 libraries; Software engineering; Software performance
                 tuning; Statistical methods; Statistical sampling",
}

@Article{Bik:2001:AEI,
  author =       "Aart J. C. Bik and Milind Girkar and Paul M. Grey and
                 Xinmin Tian",
  title =        "Automatically Exploiting Implicit Parallelism",
  journal =      j-DDJ,
  volume =       "26",
  number =       "7",
  pages =        "28, 30, 32--34",
  month =        jul,
  year =         "2001",
  CODEN =        "DDJOEB",
  ISSN =         "1044-789X",
  bibdate =      "Thu Jun 7 06:07:17 MDT 2001",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  URL =          "http://www.ddj.com/ftp/2001/2001_07/parallel.txt",
  abstract =     "Recent trends in processor design have introduced new
                 ways for programmers to exploit parallelism:
                 ``smarter'' compilers do much of the work for you.
                 Additional resources include parallel.txt (listings).",
  acknowledgement = ack-nhfb,
  fjournal =     "Dr. Dobb's Journal of Software Tools",
  keywords =     "MMX; SSE; SSE2",
}

@InProceedings{Kagan:1996:PMF,
  author =       "Michael Kagan",
  title =        "The {P55C} Microarchitecture --- The First
                 Implementation of {MMX} Technology",
  crossref =     "IEEE:1996:HCV",
  pages =        "157--162",
  year =         "1996",
  bibdate =      "Sat Jan 6 19:21:13 MST 2001",
  bibsource =    "ftp://www.hotchips.org/pub/hotc7to11cd/hc96/hc8_pdf/5.2.pdf;
                 http://www.math.utah.edu/pub/tex/bib/hot-chips.bib;
                 http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib;
                 OCLC Proceedings database",
  acknowledgement = ack-nhfb,
}

@Article{Khazam:1996:PSI,
  author =       "Jonathan Khazam and Bev Bachmayer",
  title =        "Programming strategies for {Intel}'s {MMX}",
  journal =      j-BYTE,
  volume =       "21",
  number =       "8",
  pages =        "63--64",
  month =        aug,
  year =         "1996",
  CODEN =        "BYTEDJ",
  ISSN =         "0360-5280 (print), 1082-7838 (electronic)",
  ISSN-L =       "0360-5280",
  bibdate =      "Fri Jan 3 07:09:24 MST 1997",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/byte1995.bib;
                 http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "BYTE Magazine",
}

@Article{Peleg:1996:MTE,
  author =       "Alex Peleg and Uri Weiser",
  title =        "{MMX} Technology Extension to the {Intel} Architecture
                 --- Improving multimedia and communications application
                 performance by 1.5 to 2 times",
  journal =      j-IEEE-MICRO,
  volume =       "16",
  number =       "4",
  pages =        "42--50",
  month =        jul # "\slash " # aug,
  year =         "1996",
  CODEN =        "IEMIDZ",
  DOI =          "https://doi.org/10.1109/40.526924",
  ISSN =         "0272-1732 (print), 1937-4143 (electronic)",
  ISSN-L =       "0272-1732",
  bibdate =      "Thu Dec 14 06:08:58 MST 2000",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/ieeemicro.bib;
                 http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib;
                 Science Citation Index database (1980--2000)",
  acknowledgement = ack-nhfb,
  classcodes =   "C5220P (Parallel architecture); C5130 (Microprocessor
                 chips)",
  fjournal =     "IEEE Micro",
  journal-URL =  "http://www.computer.org/csdl/mags/mi/index.html",
  keywords =     "communications; compatibility; Intel architecture;
                 microprocessor chips; MMX; multimedia; operating
                 systems; parallel architectures; SIMD",
  treatment =    "P Practical",
}

@InProceedings{Weiser:1996:TCP,
  author =       "Uri Weiser",
  title =        "Trade-off Considerations and Performance of {Intel}'s
                 {MMX} Technology",
  crossref =     "IEEE:1996:HCV",
  pages =        "147--156",
  year =         "1996",
  bibdate =      "Sat Jan 6 19:21:13 MST 2001",
  bibsource =    "ftp://www.hotchips.org/pub/hotc7to11cd/hc96/hc8_pdf/5.1.pdf;
                 http://www.math.utah.edu/pub/tex/bib/hot-chips.bib;
                 http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib;
                 OCLC Proceedings database",
  acknowledgement = ack-nhfb,
  alttitle =     "{Intel MMX} Technology --- an Overview",
}

@Article{Abel:1997:IHQ,
  author =       "James C. Abel and Michael A. Julier",
  title =        "Implementation of a High Quality {Dolby} Digital
                 Decoder using {MMX} Technology",
  journal =      j-INTEL-TECH-J,
  number =       "Q3",
  pages =        "11",
  year =         "1997",
  bibdate =      "Fri Jun 01 05:35:05 2001",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  URL =          "http://developer.intel.com/technology/itj/q31997/articles/art_3.htm;
                 http://developer.intel.com/technology/itj/q31997/pdf/decoder.pdf",
  acknowledgement = ack-nhfb,
}

@Article{Anonymous:1997:BFS,
  author =       "Anonymous",
  title =        "Better in Fits and Starts --- We test a new {Pentium}
                 hopped up with {Intel}'s {MMX}",
  journal =      j-BYTE,
  volume =       "22",
  number =       "2",
  pages =        "26--??",
  month =        feb,
  year =         "1997",
  CODEN =        "BYTEDJ",
  ISSN =         "0360-5280 (print), 1082-7838 (electronic)",
  ISSN-L =       "0360-5280",
  bibdate =      "Sat Feb 15 16:36:48 MST 1997",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/byte1995.bib;
                 http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "BYTE Magazine",
  xxtitle =      "Better in Fits and Starts --- {MMX Pentium} vs. {Power
                 Mac}: See Bits for race results",
}

@Article{Anonymous:1997:JHF,
  author =       "Anonymous",
  title =        "Just How Fast Are Those New {MMX} Chips?",
  journal =      j-BYTE,
  volume =       "22",
  number =       "2",
  pages =        "26--??",
  month =        feb,
  year =         "1997",
  CODEN =        "BYTEDJ",
  ISSN =         "0360-5280 (print), 1082-7838 (electronic)",
  ISSN-L =       "0360-5280",
  bibdate =      "Sat Feb 15 16:36:48 MST 1997",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/byte1995.bib;
                 http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "BYTE Magazine",
}

@Article{Anonymous:1997:MPD,
  author =       "Anonymous",
  title =        "{MMX} Power for Desktop {PCs}: We look at 10 {Pentium}
                 systems souped-up with {MMX} technology",
  journal =      j-BYTE,
  volume =       "22",
  number =       "7",
  pages =        "106--??",
  month =        jul,
  year =         "1997",
  CODEN =        "BYTEDJ",
  ISSN =         "0360-5280 (print), 1082-7838 (electronic)",
  ISSN-L =       "0360-5280",
  bibdate =      "Mon Jul 21 10:12:10 MDT 1997",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/byte1995.bib;
                 http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "BYTE Magazine",
}

@Article{Anonymous:1997:SIM,
  author =       "Anonymous",
  title =        "Special issue on {MMX} Technology",
  journal =      j-INTEL-TECH-J,
  number =       "Q3",
  year =         "1997",
  bibdate =      "Thu May 31 08:02:53 2001",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  URL =          "http://developer.intel.com/technology/itj/q31997.htm",
  acknowledgement = ack-nhfb,
  annote =       "This quarterly journal appears in electronic form
                 only, with papers in HTML and PDF. There are no volume,
                 issue, or page numbers, nor CODEN nor ISSN
                 assignments.",
}

@Article{Blinn:1997:JBC,
  author =       "James F. Blinn",
  title =        "{Jim Blinn}'s Corner: Fugue for {MMX}",
  journal =      j-IEEE-CGA,
  volume =       "17",
  number =       "2",
  pages =        "88--93",
  month =        mar # "\slash " # apr,
  year =         "1997",
  CODEN =        "ICGADZ",
  DOI =          "https://doi.org/10.1109/38.574688",
  ISSN =         "0272-1716 (print), 1558-1756 (electronic)",
  ISSN-L =       "0272-1716",
  bibdate =      "Mon Mar 03 09:18:04 1997",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/cacm1990.bib;
                 http://www.math.utah.edu/pub/tex/bib/ieeecga.bib;
                 http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  note =         "Makes several cogent comments about deficiencies in
                 the Intel MMX pixel-processing instruction set
                 \cite{Peleg:1997:IMM} for use in image compositing.",
  acknowledgement = ack-nhfb,
  fjournal =     "IEEE Computer Graphics and Applications",
  journal-URL =  "http://www.computer.org/portal/web/csdl/magazines/cga",
}

@Article{Grosky:1997:NSV,
  author =       "William Grosky and Anne C. Lear",
  title =        "In the News: Smart But Vulnerable; Transitioning to
                 Digital {TV}; Gaining Voices; Business Briefs;
                 Technology Notes; Phoning the {Web}; Converging
                 Competition; {Intel} Presents {MMX} --- and Another
                 Upgrade Dilemma; Want More Information?",
  journal =      j-IEEE-MULTIMEDIA,
  volume =       "4",
  number =       "1",
  pages =        "12--16",
  month =        jan # "--" # mar,
  year =         "1997",
  CODEN =        "IEMUE4",
  ISSN =         "1070-986X (print), 1941-0166 (electronic)",
  ISSN-L =       "1070-986X",
  bibdate =      "Mon Jan 29 16:05:13 MST 2001",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  URL =          "http://dlib.computer.org/mu/books/mu1997/pdf/u1012.pdf",
  acknowledgement = ack-nhfb,
  fjournal =     "IEEE MultiMedia",
}

@Article{Halfhill:1997:BMN,
  author =       "Tom R. Halfhill",
  title =        "Beyond {MMX} --- New {3-D} power of {AMD}, {Centaur},
                 and {Cyrix} chips will challenge {Intel}",
  journal =      j-BYTE,
  volume =       "22",
  number =       "12",
  pages =        "87--??",
  month =        dec,
  year =         "1997",
  CODEN =        "BYTEDJ",
  ISSN =         "0360-5280 (print), 1082-7838 (electronic)",
  ISSN-L =       "0360-5280",
  bibdate =      "Wed Dec 24 17:26:39 MST 1997",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/byte1995.bib;
                 http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "BYTE Magazine",
}

@Article{Kagan:1997:MMP,
  author =       "Michael Kagan and Simcha Gochman and Doron Orenstien
                 and Derrick Lin",
  title =        "{MMX} Microarchitecture of {Pentium} Processors With
                 {MMX} Technology and {Pentium II} Microprocessors",
  journal =      j-INTEL-TECH-J,
  number =       "Q3",
  pages =        "8",
  year =         "1997",
  bibdate =      "Fri Jun 01 05:35:55 2001",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  URL =          "http://developer.intel.com/technology/itj/q31997/articles/art_4.htm;
                 http://developer.intel.com/technology/itj/q31997/pdf/micro.pdf",
  acknowledgement = ack-nhfb,
}

@Article{Mittal:1997:MTA,
  author =       "Millind Mittal and Alex Peleg and Uri Weiser",
  title =        "{MMX} Technology Architecture Overview",
  journal =      j-INTEL-TECH-J,
  number =       "Q3",
  pages =        "12",
  year =         "1997",
  bibdate =      "Fri Jun 01 05:36:00 2001",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  URL =          "http://developer.intel.com/technology/itj/q31997/articles/art_2.htm;
                 http://developer.intel.com/technology/itj/q31997/pdf/archite.pdf",
  acknowledgement = ack-nhfb,
}

@InProceedings{Modi:1997:PIC,
  author =       "Nimish Modi",
  title =        "The {PentiumAE II CPU}: {A} High Performance Dynamic
                 Execution Processor with {MMX} Technology",
  crossref =     "IEEE:1997:HCI",
  pages =        "??--??",
  year =         "1997",
  bibdate =      "Mon Jan 08 16:33:30 2001",
  bibsource =    "ftp://www.hotchips.org/pub/hotc7to11cd/hc97/pdf_images/hc97_11c_modi_none.txt;
                 http://www.math.utah.edu/pub/tex/bib/hot-chips.bib;
                 http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  acknowledgement = ack-nhfb,
}

@Article{Peleg:1997:IMM,
  author =       "Alex Peleg and Sam Wilkie and Uri Weiser",
  title =        "{Intel MMX} for Multimedia {PCs}",
  journal =      j-CACM,
  volume =       "40",
  number =       "1",
  pages =        "24--38",
  month =        jan,
  year =         "1997",
  CODEN =        "CACMA2",
  ISSN =         "0001-0782 (print), 1557-7317 (electronic)",
  ISSN-L =       "0001-0782",
  bibdate =      "Fri Oct 10 18:17:54 MDT 1997",
  bibsource =    "http://www.acm.org/pubs/toc/;
                 http://www.math.utah.edu/pub/tex/bib/cacm1990.bib;
                 http://www.math.utah.edu/pub/tex/bib/ieeecomputscieng.bib;
                 http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  note =         "See also Blinn's comments \cite{Blinn:1997:JBC} about
                 MMX instruction set deficiencies",
  URL =          "http://www.acm.org/pubs/citations/journals/cacm/1997-40-1/p25-peleg/",
  acknowledgement = ack-nhfb,
  classification = "C5130 (Microprocessor chips); C5220P (Parallel
                 architecture); C5470 (Performance evaluation and
                 testing)",
  fjournal =     "Communications of the ACM",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J79",
  keywords =     "algorithms; compatibility; design; Intel Pentium;
                 management; measurement; microprocessor chips; MMX
                 technology; multimedia computing; multimedia
                 extensions; parallel architectures; parallel
                 processing; performance; performance evaluation;
                 theory",
  subject =      "{\bf C.2.0} Computer Systems Organization,
                 COMPUTER-COMMUNICATION NETWORKS, General. {\bf C.5.3}
                 Computer Systems Organization, COMPUTER SYSTEM
                 IMPLEMENTATION, Microcomputers. {\bf C.3} Computer
                 Systems Organization, SPECIAL-PURPOSE AND
                 APPLICATION-BASED SYSTEMS. {\bf K.1} Computing Milieux,
                 THE COMPUTER INDUSTRY, Suppliers",
  treatment =    "P Practical",
}

@Article{Anonymous:1998:MA,
  author =       "Anonymous",
  title =        "{MMX} Alternatives",
  journal =      j-BYTE,
  volume =       "23",
  number =       "7",
  pages =        "26--??",
  month =        jul,
  year =         "1998",
  CODEN =        "BYTEDJ",
  ISSN =         "0360-5280 (print), 1082-7838 (electronic)",
  ISSN-L =       "0360-5280",
  bibdate =      "Thu Dec 10 19:10:09 1998",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/byte1995.bib;
                 http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "BYTE Magazine",
}

@Article{Pournelle:1998:WJB,
  author =       "Jerry Pournelle",
  title =        "From the Workbench --- {Jerry} builds a new system
                 that uses {AMD's K6 MMX} Enhanced chip",
  journal =      j-BYTE,
  volume =       "23",
  number =       "1",
  pages =        "123--??",
  month =        jan,
  year =         "1998",
  CODEN =        "BYTEDJ",
  ISSN =         "0360-5280 (print), 1082-7838 (electronic)",
  ISSN-L =       "0360-5280",
  bibdate =      "Wed Mar 11 09:04:35 MST 1998",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/byte1995.bib;
                 http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "BYTE Magazine",
}

@Article{Fomitchev:1999:MTC,
  author =       "Max I. Fomitchev",
  title =        "{MMX} Technology Code Optimization",
  journal =      j-DDJ,
  volume =       "24",
  number =       "9",
  pages =        "38, 40, 42--46, 48",
  month =        sep,
  year =         "1999",
  CODEN =        "DDJOEB",
  ISSN =         "1044-789X",
  bibdate =      "Thu Nov 9 06:25:04 MST 2000",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/dr-dobbs-1990.bib;
                 http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  URL =          "http://www.ddj.com/ftp/1999/1999_09/mmx.txt;
                 http://www.ddj.com/ftp/1999/1999_09/mmx.zip",
  abstract =     "Max examines MMX-code optimization techniques and
                 shows how you can achieve maximum speed on the Intel
                 Pentium II and AMD K6-2 processors. Additional
                 resources include mmx.txt (listings) and mmx.zip
                 (source code).",
  acknowledgement = ack-nhfb,
  fjournal =     "Dr. Dobb's Journal of Software Tools",
}

@Article{Ramirez:1999:OIM,
  author =       "Ariel Ortiz Ramirez",
  title =        "An Overview of {Intel}'s {MMX} Technology",
  journal =      j-LINUX-J,
  volume =       "61",
  pages =        "??--??",
  month =        may,
  year =         "1999",
  CODEN =        "LIJOFX",
  ISSN =         "1075-3583 (print), 1938-3827 (electronic)",
  ISSN-L =       "1075-3583",
  bibdate =      "Thu Jun 3 06:34:02 MDT 1999",
  bibsource =    "http://noframes.linuxjournal.com/lj-issues/issue61/index.html;
                 http://www.math.utah.edu/pub/tex/bib/linux-journal.bib;
                 http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "Linux Journal",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J508",
}

%%% See also Scherer:1999:OTW above
@Article{Thakkar:1999:ISS,
  author =       "Shreekant (Ticky) Thakkar and Tom Huff",
  title =        "The {Internet Streaming SIMD Extensions}",
  journal =      j-INTEL-TECH-J,
  number =       "Q2",
  pages =        "8",
  day =          "17",
  month =        may,
  year =         "1999",
  bibdate =      "Fri Jun 01 06:02:08 2001",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  URL =          "http://developer.intel.com/technology/itj/q21999/articles/art_1.htm;
                 http://developer.intel.com/technology/itj/q21999/pdf/simd_ext.pdf",
  acknowledgement = ack-nhfb,
  keywords =     "Streaming SIMD Extensions (SSE)",
}

@Article{Brooks:2000:VBC,
  author =       "David Brooks and Margaret Martonosi",
  title =        "Value-based clock gating and operation packing:
                 dynamic strategies for improving processor power and
                 performance",
  journal =      j-TOCS,
  volume =       "18",
  number =       "2",
  pages =        "89--126",
  month =        may,
  year =         "2000",
  CODEN =        "ACSYEC",
  ISSN =         "0734-2071 (print), 1557-7333 (electronic)",
  ISSN-L =       "0734-2071",
  bibdate =      "Tue Sep 26 07:54:31 MDT 2000",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tocs/;
                 http://www.math.utah.edu/pub/tex/bib/fparith.bib;
                 http://www.math.utah.edu/pub/tex/bib/tocs.bib;
                 http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  URL =          "http://www.acm.org/pubs/citations/journals/tocs/2000-18-2/p89-brooks/",
  abstract =     "The large address space needs of many current
                 applications have pushed processor designs toward
                 64-bit word widths. Although full 64-bit addresses and
                 operations are indeed sometimes needed, arithmetic
                 operations on much smaller quantities are still more
                 common. In fact, another instruction set trend has been
                 the introduction of instructions geared toward subword
                 operations on 16-bit quantities. For examples, most
                 major processors now include instruction set support
                 for multimedia operations allowing parallel execution
                 of several subword operations in the same ALU. This
                 article presents our observations demonstrating that
                 operations on ``narrow-width'' quantities are common
                 not only in multimedia codes, but also in more general
                 workloads. In fact, across the SPECint95 benchmarks,
                 over half the integer operation executions require 16
                 bits or less. Based on this data, we propose two
                 hardware mechanisms that dynamically recognize and
                 capitalize on these narrow-width operations. The first,
                 power-oriented optimization reduces processor power
                 consumption by using operand-value-based clock gating
                 to turn off portions of arithmetic units that will be
                 unused by narrow-width operations. This optimization
                 results in a 45\%--60\% reduction in the integer unit's
                 power consumption for the SPECint95 and MediaBench
                 benchmark suites. Applying this optimization to
                 SPECfp95 benchmarks results in slightly smaller power
                 reductions, but still seems warranted. These reductions
                 in integer unit power consumption equate to a 5\%--10\%
                 full-chip power savings. Our second,
                 performance-oriented optimization improves processor
                 performance by packing together narrow-width operations
                 so that they share a single arithmetic unit.
                 Conceptually similar to a dynamic form of MMX, this
                 optimization offers speedups of 4.3\%--6.2\% for
                 SPECint95 and 8.0\%--10.4\% for MediaBench. \par
                 Overall, these optimizations highlight an increasing
                 opportunity for value-based optimizations to improve
                 both power and performance in current
                 microprocessors.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Computer Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J774",
  subject =      "Hardware --- Arithmetic and Logic Structures (B.2);
                 Computer Systems Organization --- Processor
                 Architectures --- Single Data Stream Architectures
                 (C.1.1): {\bf RISC/CISC, VLIW architectures}",
}

@Article{Drongowski:2000:TM,
  author =       "Paul J. Drongowski",
  title =        "Translating {MMX}",
  journal =      j-COMPUTER,
  volume =       "33",
  number =       "3",
  pages =        "43--43",
  month =        mar,
  year =         "2000",
  CODEN =        "CPTRB4",
  ISSN =         "0018-9162 (print), 1558-0814 (electronic)",
  ISSN-L =       "0018-9162",
  bibdate =      "Mon Oct 30 19:18:20 MST 2000",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/computer2000.bib;
                 http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  URL =          "http://dlib.computer.org/co/books/co2000/pdf/r3040.pdf",
  acknowledgement = ack-nhfb,
  fjournal =     "Computer",
  journal-URL =  "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=2",
  keywords =     "Alpha 21164; Alpha 21264; Digital FX!32; Intel MMX",
}

@Article{Aberdeen:2001:EFM,
  author =       "Douglas Aberdeen and Jonathan Baxter",
  title =        "{Emmerald}: a fast matrix-matrix multiply using
                 {Intel}'s {SSE} instructions",
  journal =      j-CCPE,
  volume =       "13",
  number =       "2",
  pages =        "103--119",
  month =        feb,
  year =         "2001",
  CODEN =        "CCPEBO",
  DOI =          "https://doi.org/10.1002/cpe.549",
  ISSN =         "1532-0626 (print), 1532-0634 (electronic)",
  ISSN-L =       "1532-0626",
  bibdate =      "Wed Jul 25 10:55:46 MDT 2001",
  bibsource =    "1532-0634 (electronic);
                 http://www.interscience.wiley.com/jpages/1532-0626
                 (print);
                 http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib;
                 http://www3.interscience.wiley.com/journalfinder.html",
  URL =          "http://www3.interscience.wiley.com/cgi-bin/abstract/77004416/START;
                 http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=77004416&PLACEBO=IE.pdf",
  acknowledgement = ack-nhfb,
  fjournal =     "Concurrency and Computation: Prac\-tice and
                 Experience",
  journal-URL =  "http://www.interscience.wiley.com/jpages/1532-0626",
}

@Article{Strey:2001:PAI,
  author =       "Alfred Strey and Martin Bange",
  title =        "Performance Analysis of {Intel}'s {MMX} and {SSE}: {A}
                 Case Study",
  journal =      j-LECT-NOTES-COMP-SCI,
  volume =       "2150",
  pages =        "142--??",
  year =         "2001",
  CODEN =        "LNCSD9",
  ISSN =         "0302-9743 (print), 1611-3349 (electronic)",
  ISSN-L =       "0302-9743",
  bibdate =      "Sat Feb 2 13:05:53 MST 2002",
  bibsource =    "http://link.springer-ny.com/link/service/series/0558/tocs/t2150.htm;
                 http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  URL =          "http://link.springer-ny.com/link/service/series/0558/bibs/2150/21500142.htm;
                 http://link.springer-ny.com/link/service/series/0558/papers/2150/21500142.pdf",
  acknowledgement = ack-nhfb,
  fjournal =     "Lecture Notes in Computer Science",
}

%%% ====================================================================
%%% Motorola AltiVec
%%% See also Jennings:1998:MCS
@InProceedings{Diefendorff:1998:ATA,
  author =       "K. Diefendorff and P. Dubey and R. Chochsprung and
                 others",
  title =        "{AltiVec} Technology: Accelerating Media Processing
                 Across the Spectrum",
  crossref =     "IEEE:1998:HCC",
  pages =        "??--??",
  year =         "1998",
  bibdate =      "Fri Jun 24 14:28:18 2005",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  acknowledgement = ack-nhfb,
  xxnote =       "Cited in \cite{Mueller:2005:VFP} in fparith.bib.",
}

@TechReport{Fuller:1998:MAT,
  author =       "Sam Fuller",
  title =        "{Motorola}'s {AltiVec} Technology",
  type =         "Technical Report",
  number =       "ALTIVECWP/D",
  institution =  pub-MOTOROLA,
  address =      pub-MOTOROLA:adr,
  pages =        "4",
  year =         "1998",
  bibdate =      "Tue Jan 09 11:25:58 2001",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  URL =          "http://a1888.g.akamai.net/7/1888/787/83ade987b85512/www.motorola.com/SPS/PowerPC/teksupport/teklibrary/papers/altivec_wp.pdf",
  acknowledgement = ack-nhfb,
}

@Manual{Motorola:1998:ATP,
  title =        "{AltiVec} Technology Programming Environments Manual",
  organization = pub-MOTOROLA,
  address =      pub-MOTOROLA:adr,
  pages =        "350",
  month =        nov,
  year =         "1998",
  bibdate =      "Tue Jan 09 11:20:43 2001",
  bibsource =    "http://a2016.g.akamai.net/7/2016/787/5087c1b5def3b1/www.motorola.com/SPS/PowerPC/teksupport/teklibrary/manuals/altivec_pem.pdf;
                 http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  note =         "Order number ALTIVECPEM/D 11/1998 Rev. 0.1.",
  acknowledgement = ack-nhfb,
}

@InProceedings{Phillip:1998:ATS,
  author =       "Mike Phillip",
  title =        "{AltiVec} Technology: {A} Second Generation {SIMD}
                 Microprocessor Architecture",
  crossref =     "IEEE:1998:HCC",
  pages =        "??--??",
  year =         "1998",
  bibdate =      "Mon Jan 08 17:02:55 2001",
  bibsource =    "ftp://www.hotchips.org/pub/hotc7to11cd/hc98/pdf_1up/hc98_5c_phillip_1up.txt;
                 http://www.math.utah.edu/pub/tex/bib/hot-chips.bib;
                 http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  acknowledgement = ack-nhfb,
}

@Article{Thompson:1998:CPD,
  author =       "Tom Thompson",
  title =        "{CPUs}: {PowerPC} Does Blazing Vector",
  journal =      j-BYTE,
  volume =       "23",
  number =       "7",
  pages =        "43--??",
  month =        jul,
  year =         "1998",
  CODEN =        "BYTEDJ",
  ISSN =         "0360-5280 (print), 1082-7838 (electronic)",
  ISSN-L =       "0360-5280",
  bibdate =      "Thu Dec 10 19:10:09 1998",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/byte1995.bib;
                 http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  abstract =     "Motorola's AltiVec technology provides vector and SIMD
                 operations, has its own dedicated execution unit, and
                 operates on 128-bit quantities.",
  acknowledgement = ack-nhfb,
  fjournal =     "BYTE Magazine",
}

@Misc{Motorola:1999:APE,
  author =       "{Motorola, Inc.}",
  title =        "{AltiVec} Programming Examples",
  howpublished = "World-Wide Web document.",
  day =          "9",
  month =        dec,
  year =         "1999",
  bibdate =      "Tue Jan 09 08:07:17 2001",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  URL =          "http://www.motorola.com/SPS/PowerPC/AltiVec/CodeMain.html",
  acknowledgement = ack-nhfb,
  keywords =     "complex finite impulse response (FIR) filter; encoding
                 a signal to improve its redundancy for transmission;
                 real delayed least mean squared (LMS) FIR filter; real
                 finite impulse response (FIR) filter; RGB to YCbCr
                 color space conversion; scaling values in the range
                 -2040..2040 into the range -127..127; Soft-Decision
                 Viterbi decoder for GSM CC(2,1,5) TCH frames; Sum of
                 Absolute Differences (SOAD or SAD) kernel; transforming
                 values from the range -127..127 to -2048..2047;
                 two-dimensional Discrete Cosine Transform;
                 two-dimensional Inverse Discrete Cosine Transform",
}

@Manual{Motorola:1999:ATP,
  title =        "{AltiVec} Technology Programming Interface Manual",
  organization = pub-MOTOROLA,
  address =      pub-MOTOROLA:adr,
  pages =        "262",
  month =        jun,
  year =         "1999",
  bibdate =      "Tue Jan 09 11:20:43 2001",
  bibsource =    "http://a1008.g.akamai.net/7/1008/787/66cefa0933a341/www.motorola.com/SPS/PowerPC/teksupport/teklibrary/manuals/altivecpim.pdf;
                 http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  note =         "Order number ALTIVECPIM/D 6/1999 Rev. 0",
  acknowledgement = ack-nhfb,
}

@Misc{Motorola:1999:ATT,
  author =       "{Motorola, Inc.}",
  title =        "{AltiVec} Technology Tour",
  howpublished = "World-Wide Web document.",
  day =          "9",
  month =        dec,
  year =         "1999",
  bibdate =      "Tue Jan 09 08:07:17 2001",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  URL =          "http://www.motorola.com/SPS/PowerPC/AltiVec/technology.html",
  acknowledgement = ack-nhfb,
}

@Article{Diefendorff:2000:AEP,
  author =       "Keith Diefendorff and Pradeep K. Dubey and Ron
                 Hochsprung and Hunter Scales",
  title =        "{AltiVec} Extension to {PowerPC} Accelerates Media
                 Processing",
  journal =      j-IEEE-MICRO,
  volume =       "20",
  number =       "2",
  pages =        "85--95",
  month =        mar # "\slash " # apr,
  year =         "2000",
  CODEN =        "IEMIDZ",
  DOI =          "https://doi.org/10.1109/40.848475",
  ISSN =         "0272-1732 (print), 1937-4143 (electronic)",
  ISSN-L =       "0272-1732",
  bibdate =      "Thu Dec 14 06:08:58 MST 2000",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/ieeemicro.bib;
                 http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib;
                 Science Citation Index database (1980--2000)",
  URL =          "http://dlib.computer.org/mi/books/mi2000/pdf/m2085.pdf",
  acknowledgement = ack-nhfb,
  fjournal =     "IEEE Micro",
  journal-URL =  "http://www.computer.org/csdl/mags/mi/index.html",
}

@Article{Fite:2001:FLA,
  author =       "Matthew Fite",
  title =        "First Look at an {Apple G4} with the {AltiVec}
                 Processor",
  journal =      j-LINUX-J,
  volume =       "86",
  pages =        "108, 110--111, 114, 116, 118",
  month =        jun,
  year =         "2001",
  CODEN =        "LIJOFX",
  ISSN =         "1075-3583 (print), 1938-3827 (electronic)",
  ISSN-L =       "1075-3583",
  bibdate =      "Wed May 23 15:20:33 MDT 2001",
  bibsource =    "http://noframes.linuxjournal.com/lj-issues/issue86/index.html;
                 http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  URL =          "http://noframes.linuxjournal.com/lj-issues/issue86/4584.html",
  abstract =     "What can the AltiVec processor do for Linux
                 programmers?",
  acknowledgement = ack-nhfb,
  annote =       "Includes information on downloading Motorola's
                 extension of {\tt gcc} to support the AltiVec
                 instruction set directly from C code.",
  fjournal =     "Linux Journal",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J508",
}

%%% ====================================================================
%%% SPARC VIS
%%% See also Jennings:1998:MCS
@TechReport{Sun:1996:ACN,
  author =       "{Sun Microsystems}",
  title =        "Accelerating Core Networking Functions Using the
                 {UltraSPARC VIS} Instruction Set",
  type =         "Technical Report",
  number =       "{WPR-0013}",
  institution =  pub-SUN,
  address =      pub-SUN:adr,
  month =        aug,
  year =         "1996",
  bibdate =      "Tue Jan 09 14:17:53 2001",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  note =         "Demonstrates speedups of 2x to 5x on key networking
                 kernels from use of the VIS instruction set.",
  URL =          "http://www.sun.com/microelectronics/whitepapers/wpr-0013/index.html",
  acknowledgement = ack-nhfb,
}

@TechReport{Sun:1996:UVI,
  author =       "{Sun Microsystems}",
  title =        "{UltraSPARC}: The {Visual Instruction Set} ({VIS}): On
                 Chip Support for New-Media Processing",
  type =         "Technical Report",
  number =       "{WPR-0004}",
  institution =  pub-SUN,
  address =      pub-SUN:adr,
  year =         "1996",
  bibdate =      "Tue Jan 09 14:20:20 2001",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  URL =          "http://www.sun.com/microelectronics/whitepapers/wpr-0004;
                 http://www.sun.com/microelectronics/whitepapers/wpr-0004/wpr-0004.pdf",
  acknowledgement = ack-nhfb,
}

@Article{Tremblay:1996:UFI,
  author =       "Marc Tremblay and J. Michael O'Connor",
  title =        "{UltraSparc I}: {A} four-issue processor supporting
                 multimedia: Combining on-chip multimedia instructions
                 with a high-performance, four-issue architecture",
  journal =      j-IEEE-MICRO,
  volume =       "16",
  number =       "2",
  pages =        "42--50",
  month =        mar # "\slash " # apr,
  year =         "1996",
  CODEN =        "IEMIDZ",
  DOI =          "https://doi.org/10.1109/40.491461",
  ISSN =         "0272-1732 (print), 1937-4143 (electronic)",
  ISSN-L =       "0272-1732",
  bibdate =      "Sat Jan 13 09:04:51 2001",
  bibsource =    "Compendex database;
                 http://www.math.utah.edu/pub/tex/bib/hot-chips.bib;
                 http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib;
                 Science Citation Index database (1980--2000)",
  note =         "Presented at Hot Chips VII, Stanford University,
                 Stanford, California, August 1995.",
  acknowledgement = ack-nhfb,
  affiliation =  "Sun Microelectronics",
  affiliationaddress = "Mountain View, CA, USA",
  classcodes =   "B1265F (Microprocessors and microcomputers); C5130
                 (Microprocessor chips); C5220 (Computer architecture);
                 C6130M (Multimedia)",
  classification = "714.2; 721.3; 722.4; 723.2; 723.5; 741",
  corpsource =   "Sun Microsyst. Inc., Mountain View, CA, USA",
  fjournal =     "IEEE Micro",
  journal-URL =  "http://www.computer.org/csdl/mags/mi/index.html",
  keywords =     "architecture; Buffer storage; Computer graphics;
                 Computer hardware; Computer simulation; Computer
                 software; four-issue processor supporting multimedia;
                 graphics instructions; image; image compression; Image
                 processing; instruction set; instruction set computing;
                 memory access instructions; Microarchitecture;
                 Microcomputers; Microprocessor chips; microprocessor
                 chips; Multimedia; multimedia; multimedia systems;
                 Parallel processing systems; Performance; Pipeline
                 processing systems; processing; Program compilers;
                 reduced; Reduced instruction set computing; SPARC
                 Version 9 64-bit RISC; Superscalar processor;
                 superscalar processor; UltraSparc I; Visual instruction
                 set",
  treatment =    "A Application; P Practical",
}

@Article{Tremblay:1996:VSN,
  author =       "Marc Tremblay and J. Michael O'Connor and Venkatesh
                 Narayanan and Liang He",
  title =        "{VIS} Speeds New Media Processing --- Enhancing
                 conventional {RISC} instruction sets to significantly
                 accelerate media-processing algorithms",
  journal =      j-IEEE-MICRO,
  volume =       "16",
  number =       "4",
  pages =        "10--20",
  month =        jul # "\slash " # aug,
  year =         "1996",
  CODEN =        "IEMIDZ",
  DOI =          "https://doi.org/10.1109/40.526921",
  ISSN =         "0272-1732 (print), 1937-4143 (electronic)",
  ISSN-L =       "0272-1732",
  bibdate =      "Thu Dec 14 06:08:58 MST 2000",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib;
                 Science Citation Index database (1980--2000)",
  acknowledgement = ack-nhfb,
  classcodes =   "C6130B (Graphics techniques); C6140B (Machine-oriented
                 languages); C5220 (Computer architecture)",
  fjournal =     "IEEE Micro",
  journal-URL =  "http://www.computer.org/csdl/mags/mi/index.html",
  keywords =     "3D graphics environments; computer graphics;
                 computing; instruction sets; media processing;
                 media-processing algorithms; reduced instruction set;
                 RISC-; style instructions; UltraSparc; VIS; Visual
                 Instruction Set",
  treatment =    "P Practical",
}

@Manual{Sun:2000:SAL,
  title =        "{SPARC} Assembly Language Reference Manual",
  organization = pub-SUN,
  address =      pub-SUN:adr,
  month =        feb,
  year =         "2000",
  bibdate =      "Tue Jan 09 07:38:42 2001",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  note =         "Part Number 806-3774. See Appendix E.6 for UltraSPARC
                 and VIS Instruction Set Extensions.",
  URL =          "ftp://192.18.99.138/806-3774/806-3774.pdf",
  acknowledgement = ack-nhfb,
}

@TechReport{Sun:2002:VIS,
  author =       "{Sun Microsystems}",
  title =        "The {VIS} Instruction Set, Version 1.0",
  type =         "White paper",
  institution =  "Sun Microsystems",
  address =      "Network Circle Santa Clara, CA 95054, USA",
  month =        jun,
  year =         "2002",
  bibdate =      "Mon Sep 16 06:35:33 2002",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  URL =          "http://www.sun.com/processors/vis/download/vis/vis_whitepaper.pdf;
                 http://www.sun.com/processors/whitepapers/vis_wp_external.pdf",
  acknowledgement = ack-nhfb,
}

@Article{Gove:2006:SSU,
  author =       "Darryl Gove and Geetha Vallabhanen",
  title =        "{Sun Studio}: Using {VIS} Instructions To Speed Up Key
                 Routines",
  journal =      "Sun Developer Network",
  day =          "5",
  month =        jan,
  year =         "2006",
  bibdate =      "Thu Jan 26 07:30:30 2006",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  URL =          "http://developers.sun.com/prodtech/cc/articles/vis.html",
  acknowledgement = ack-nhfb,
}

%%% ====================================================================
%%% Cross-referenced entries must come last:
@Proceedings{IEEE:1996:HCV,
  editor =       "IEEE",
  booktitle =    "Hot chips VIII: symposium record: Stanford University,
                 Stanford, California, August 18--20, 1996",
  title =        "Hot chips {VIII}: symposium record: Stanford
                 University, Stanford, California, August 18--20, 1996",
  publisher =    pub-IEEE,
  address =      pub-IEEE:adr,
  pages =        "????",
  year =         "1996",
  ISBN =         "????",
  ISBN-13 =      "????",
  LCCN =         "????",
  bibdate =      "Sat Jan 6 19:21:13 MST 2001",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib;
                 OCLC Proceedings database",
  acknowledgement = ack-nhfb,
  keywords =     "hot chips",
}

@Proceedings{IEEE:1997:HCI,
  editor =       "{IEEE}",
  booktitle =    "Hot Chips IX: Stanford University, Stanford,
                 California, August 24--26, 1997",
  title =        "Hot Chips {IX}: Stanford University, Stanford,
                 California, August 24--26, 1997",
  publisher =    pub-IEEE,
  address =      pub-IEEE:adr,
  pages =        "????",
  year =         "1997",
  ISBN =         "????",
  ISBN-13 =      "????",
  LCCN =         "????",
  bibdate =      "Mon Jan 08 05:05:12 2001",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  acknowledgement = ack-nhfb,
}

@Proceedings{IEEE:1998:HCC,
  editor =       "{IEEE}",
  booktitle =    "Hot chips 10: conference record: August 16--18, 1998,
                 Memorial Auditorium, Stanford University, Palo Alto,
                 California",
  title =        "Hot chips 10: conference record: August 16--18, 1998,
                 Memorial Auditorium, Stanford University, Palo Alto,
                 California",
  publisher =    pub-IEEE,
  address =      pub-IEEE:adr,
  pages =        "????",
  year =         "1998",
  ISBN =         "????",
  ISBN-13 =      "????",
  LCCN =         "????",
  bibdate =      "Mon Jan 08 05:06:55 2001",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/visual-instruction-set.bib",
  acknowledgement = ack-nhfb,
}