@Preamble{
"\ifx \undefined \pkg \def \pkg #1{{{\tt #1}}} \fi"
}
@String{ack-nhfb = "Nelson H. F. Beebe,
University of Utah,
Department of Mathematics, 110 LCB,
155 S 1400 E RM 233,
Salt Lake City, UT 84112-0090, USA,
Tel: +1 801 581 5254,
FAX: +1 801 581 4148,
e-mail: \path|beebe@math.utah.edu|,
\path|beebe@acm.org|,
\path|beebe@computer.org| (Internet),
URL: \path|http://www.math.utah.edu/~beebe/|"}
@String{j-CACM = "Communications of the Association
for Computing Machinery"}
@String{j-COMP-ARCH-NEWS = "ACM SIGARCH Computer Architecture News"}
@String{j-DTRAP = "Digital Threats: Research and Practice
(DTRAP)"}
@String{j-FUT-GEN-COMP-SYS = "Future Generation Computer Systems"}
@String{j-IEEE-COMPUT-ARCHIT-LETT = "IEEE Computer Architecture Letters"}
@String{j-IEEE-MICRO = "IEEE Micro"}
@String{j-IEEE-SPECTRUM = "IEEE Spectrum"}
@String{j-IEEE-TRANS-COMPUT = "IEEE Transactions on Computers"}
@String{j-J-CRYPTO-ENG = "Journal of Cryptographic Engineering"}
@String{j-JETC = "ACM Journal on Emerging Technologies
in Computing Systems (JETC)"}
@String{j-OPER-SYS-REV = "Operating Systems Review"}
@String{j-SIGPLAN = "ACM SIG{\-}PLAN Notices"}
@String{j-SOFTWAREX = "SoftwareX"}
@String{j-TACO = "ACM Transactions on Architecture and
Code Optimization"}
@String{j-TECS = "ACM Transactions on Embedded Computing
Systems"}
@String{j-TODAES = "ACM Transactions on Design Automation of
Electronic Systems"}
@String{j-TRETS = "ACM Transactions on Reconfigurable Technology
and Systems (TRETS)"}
@String{pub-ACM = "ACM Press"}
@String{pub-ACM:adr = "New York, NY 10036, USA"}
@String{pub-IEEE = "IEEE Computer Society Press"}
@String{pub-IEEE:adr = "1109 Spring Street, Suite 300, Silver
Spring, MD 20910, USA"}
@Article{Kim:2016:SCD,
author = "Channoh Kim and Sungmin Kim and Hyeon Gyu Cho and
Dooyoung Kim and Jaehyeok Kim and Young H. Oh and
Hakbeom Jang and Jae W. Lee",
title = "Short-circuit dispatch: accelerating virtual machine
interpreters on embedded processors",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "291--303",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001168",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/risc-v.bib;
http://www.math.utah.edu/pub/tex/bib/sigarch.bib;
http://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Interpreters are widely used to implement high-level
language virtual machines (VMs), especially on
resource-constrained embedded platforms. Many scripting
languages employ interpreter-based VMs for their
advantages over native code compilers, such as
portability, smaller resource footprint, and compact
codes. For efficient interpretation a script (program)
is first compiled into an intermediate representation,
or bytecodes. The canonical interpreter then runs an
infinite loop that fetches, decodes, and executes one
bytecode at a time. This bytecode dispatch loop is a
well-known source of inefficiency, typically featuring
a large jump table with a hard-to-predict indirect
jump. Most existing techniques to optimize this loop
focus on reducing the misprediction rate of this
indirect jump in both hardware and software. However,
these techniques are much less effective on embedded
processors with shallow pipelines and low IPCs.
Instead, we tackle another source of inefficiency more
prominent on embedded platforms--redundant computation
in the dispatch loop. To this end, we propose
Short-Circuit Dispatch (SCD), a low-cost architectural
extension that enables fast, hardware-based bytecode
dispatch with fewer instructions. The key idea of SCD
is to overlay the software-created bytecode jump table
on a branch target buffer (BTB). Once a bytecode is
fetched, the BTB is looked up using the bytecode,
instead of PC, as key. If it hits, the interpreter
directly jumps to the target address retrieved from the
BTB; otherwise, it goes through the original dispatch
path. This effectively eliminates redundant computation
in the dispatcher code for decode, bound check, and
target address calculation, thus significantly reducing
total instruction count. Our simulation results
demonstrate that SCD achieves geomean speedups of
19.9\% and 14.1\% for two production-grade script
interpreters for Lua and JavaScript, respectively.
Moreover, our fully synthesizable RTL design based on a
RISC-V embedded processor shows that SCD improves the
EDP of the Lua interpreter by 24.2\%, while increasing
the chip area by only 0.72\% at a 40nm technology
node.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J89",
remark = "ISCA '16 conference proceedings.",
}
@Article{Lee:2016:AAB,
author = "Yunsup Lee and Andrew Waterman and Henry Cook and
Brian Zimmer and Ben Keller and Alberto Puggelli and
Jaehwa Kwak and Ruzica Jevtic and Stevo Bailey and
Milovan Blagojevic and Pi-Feng Chiu and Rimas Avizienis
and Brian Richards and Jonathan Bachrach and David
Patterson and Elad Alon and Bora Nikolic and Krste
Asanovic",
title = "An Agile Approach to Building {RISC-V}
Microprocessors",
journal = j-IEEE-MICRO,
volume = "36",
number = "2",
pages = "8--20",
month = mar # "\slash " # apr,
year = "2016",
CODEN = "IEMIDZ",
DOI = "https://doi.org/10.1109/MM.2016.11",
ISSN = "0272-1732 (print), 1937-4143 (electronic)",
ISSN-L = "0272-1732",
bibdate = "Tue Apr 19 06:31:19 MDT 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/hot-chips.bib;
http://www.math.utah.edu/pub/tex/bib/ieeemicro.bib;
http://www.math.utah.edu/pub/tex/bib/risc-v.bib",
URL = "http://www.computer.org/csdl/mags/mi/2016/02/mmi2016020008-abs.html",
abstract-URL = "http://www.computer.org/csdl/mags/mi/2016/02/mmi2016020008-abs.html",
acknowledgement = ack-nhfb,
journal-URL = "http://www.computer.org/csdl/mags/mi/index.html",
}
@Article{Tan:2016:NVC,
author = "Yong Kiam Tan and Magnus O. Myreen and Ramana Kumar
and Anthony Fox and Scott Owens and Michael Norrish",
title = "A new verified compiler backend for {CakeML}",
journal = j-SIGPLAN,
volume = "51",
number = "9",
pages = "60--73",
month = sep,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022670.2951924",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/risc-v.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We have developed and mechanically verified a new
compiler backend for CakeML. Our new compiler features
a sequence of intermediate languages that allows it to
incrementally compile away high-level features and
enables verification at the right levels of semantic
detail. In this way, it resembles mainstream
(unverified) compilers for strict functional languages.
The compiler supports efficient curried multi-argument
functions, configurable data representations,
exceptions that unwind the call stack, register
allocation, and more. The compiler targets several
architectures: x86-64, ARMv6, ARMv8, MIPS-64, and
RISC-V. In this paper, we present the overall structure
of the compiler, including its 12 intermediate
languages, and explain how everything fits together. We
focus particularly on the interaction between the
verification of the register allocator and the garbage
collector, and memory representations. The entire
development has been carried out within the HOL4
theorem prover.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "ICFP '16 conference proceedings.",
}
@Article{Dietrich:2017:OVA,
author = "Christian Dietrich and Daniel Lohmann",
title = "{OSEK-V}: application-specific {RTOS} instantiation in
hardware",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "111--120",
month = may,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140582.3081030",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:15 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/risc-v.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The employment of a real-time operating system (RTOS)
in an embedded control systems is often an
all-or-nothing decision: While the RTOS-abstractions
provide for easier software composition and
development, the price in terms of event latencies and
memory costs are high. Especially in HW/SW codesign
settings, system developers try to avoid the employment
of a full-blown RTOS as far as possible. In OSEK-V, we
mitigate this trade-off by a very aggressive tailoring
of the concrete RTOS instance into the hardware.
Instead of implementing generic OS components as custom
hardware devices, we capture the actually possible
application-kernel interactions as a finite-state
machine and integrate the tailored RTOS semantics
directly into the processor pipeline. In our
experimental results with an OSEK-based implementation
of a quadrotor flight controller into the Rocket/RISC-V
softcore, we thereby can significantly reduce event
latencies, interrupt lock times, and memory footprint
at moderate costs in terms of FPGA resources.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "LCTES '17 conference proceedings.",
}
@Article{Kapre:2017:HDR,
author = "Nachiket Kapre and Jan Gray",
title = "{Hoplite}: a Deflection-Routed Directional Torus {NoC}
for {FPGAs}",
journal = j-TRETS,
volume = "10",
number = "2",
pages = "14:1--14:??",
month = apr,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3027486",
ISSN = "1936-7406 (print), 1936-7414 (electronic)",
ISSN-L = "1936-7406",
bibdate = "Sat Dec 23 10:23:01 MST 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/risc-v.bib;
http://www.math.utah.edu/pub/tex/bib/trets.bib",
abstract = "We can design an FPGA-optimized lightweight
network-on-chip (NoC) router for flit-oriented
packet-switched communication that is an order of
magnitude smaller (in terms of LUTs and FFs) than
state-of-the-art FPGA overlay routers available today.
We present Hoplite, an efficient, lightweight, and fast
FPGA overlay NoC that is designed to be small and
compact by (1) using deflection routing instead of
buffered switching to eliminate expensive FIFO buffers
and (2) using a torus topology to reduce the cost of
switch crossbar. Buffering and crossbar implementation
complexities have traditionally limited speeds and
imposed heavy resource costs in conventional FPGA
overlay NoCs. We take care to exploit the fracturable
lookup tables (LUT) organization of the FPGA to further
improve the resource efficiency of mapping the
expensive crossbar multiplexers. Hoplite can outperform
classic, bidirectional, buffered mesh networks for
single-flit-oriented FPGA applications by as much as $
1.5 \times $ (best achievable throughputs for a $ 10
\times 10 $ system) or $ 2.5 \times $ (allocating same
amount of FPGA resources to both NoCs) for uniform
random traffic. When compared to buffered mesh
switches, FPGA-based deflection routers are $ \approx
3.5 \times $ smaller (HLS-generated switch) and $ 2.5
\times $ faster (clock period) for 32b payloads. In a
separate experiment, we hand-crafted an RTL version of
our switch with location constraints that requires only
60 LUTs and 100 FFs per router and runs at 2.9ns. We
conduct additional layout experiments on modern Xilinx
and Altera FPGAs and demonstrate wide-channel
chip-spanning layouts that run in excess of 300MHz
while consuming 10--15\% of overall chip resources. We
also demonstrate a clustered RISC-V multiprocessor
organization that uses Hoplite to help deliver the high
processing throughputs of the FPGA architecture to user
applications.",
acknowledgement = ack-nhfb,
articleno = "14",
fjournal = "ACM Transactions on Reconfigurable Technology and
Systems (TRETS)",
journal-URL = "http://portal.acm.org/toc.cfm?id=J1151",
}
@Article{Kim:2017:TAAa,
author = "Channoh Kim and Jaehyeok Kim and Sungmin Kim and
Dooyoung Kim and Namho Kim and Gitae Na and Young H. Oh
and Hyeon Gyu Cho and Jae W. Lee",
title = "Typed Architectures: Architectural Support for
Lightweight Scripting",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "77--90",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037726",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/java2010.bib;
http://www.math.utah.edu/pub/tex/bib/risc-v.bib;
http://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Dynamic scripting languages are becoming more and more
widely adopted not only for fast prototyping but also
for developing production-grade applications. They
provide high-productivity programming environments
featuring high levels of abstraction with powerful
built-in functions, automatic memory management,
object-oriented programming paradigm and dynamic
typing. However, their flexible, dynamic type systems
easily become the source of inefficiency in terms of
instruction count, memory footprint, and energy
consumption. This overhead makes it challenging to
deploy these high-productivity programming technologies
on emerging single-board computers for IoT
applications. Addressing this challenge, this paper
introduces Typed Architectures, a high-efficiency,
low-cost execution substrate for dynamic scripting
languages, where each data variable retains high-level
type information at an ISA level. Typed Architectures
calculate and check the dynamic type of each variable
implicitly in hardware, rather than explicitly in
software, hence significantly reducing instruction
count for dynamic type checking. Besides, Typed
Architectures introduce polymorphic instructions (e.g.,
xadd), which are bound to the correct native
instruction at runtime within the pipeline (e.g., add
or fadd) to efficiently implement polymorphic
operators. Finally, Typed Architectures provide
hardware support for flexible yet efficient type tag
extraction and insertion, capturing common data layout
patterns of tag-value pairs. Our evaluation using a
fully synthesizable RISC-V RTL design on FPGA shows
that Typed Architectures achieve geomean speedups of
11.2\% and 9.9\% with maximum speedups of 32.6\% and
43.5\% for two production-grade scripting engines for
JavaScript and Lua, respectively. Moreover, Typed
Architectures improve the energy-delay product (EDP) by
19.3\% for JavaScript and 16.5\% for Lua with an area
overhead of 1.6\% at a 40nm technology node.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J89",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Kim:2017:TAAb,
author = "Channoh Kim and Jaehyeok Kim and Sungmin Kim and
Dooyoung Kim and Namho Kim and Gitae Na and Young H. Oh
and Hyeon Gyu Cho and Jae W. Lee",
title = "Typed Architectures: Architectural Support for
Lightweight Scripting",
journal = j-OPER-SYS-REV,
volume = "51",
number = "2",
pages = "77--90",
month = jun,
year = "2017",
CODEN = "OSRED8",
DOI = "https://doi.org/10.1145/3093315.3037726",
ISSN = "0163-5980 (print), 1943-586X (electronic)",
ISSN-L = "0163-5980",
bibdate = "Mon Jul 24 18:36:23 MDT 2017",
bibsource = "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/opersysrev.bib;
http://www.math.utah.edu/pub/tex/bib/risc-v.bib",
abstract = "Dynamic scripting languages are becoming more and more
widely adopted not only for fast prototyping but also
for developing production-grade applications. They
provide high-productivity programming environments
featuring high levels of abstraction with powerful
built-in functions, automatic memory management,
object-oriented programming paradigm and dynamic
typing. However, their flexible, dynamic type systems
easily become the source of inefficiency in terms of
instruction count, memory footprint, and energy
consumption. This overhead makes it challenging to
deploy these high-productivity programming technologies
on emerging single-board computers for IoT
applications. Addressing this challenge, this paper
introduces Typed Architectures, a high-efficiency,
low-cost execution substrate for dynamic scripting
languages, where each data variable retains high-level
type information at an ISA level. Typed Architectures
calculate and check the dynamic type of each variable
implicitly in hardware, rather than explicitly in
software, hence significantly reducing instruction
count for dynamic type checking. Besides, Typed
Architectures introduce polymorphic instructions (e.g.,
xadd), which are bound to the correct native
instruction at runtime within the pipeline (e.g., add
or fadd) to efficiently implement polymorphic
operators. Finally, Typed Architectures provide
hardware support for flexible yet efficient type tag
extraction and insertion, capturing common data layout
patterns of tag-value pairs. Our evaluation using a
fully synthesizable RISC-V RTL design on FPGA shows
that Typed Architectures achieve geomean speedups of
11.2\% and 9.9\% with maximum speedups of 32.6\% and
43.5\% for two production-grade scripting engines for
JavaScript and Lua, respectively. Moreover, Typed
Architectures improve the energy-delay product (EDP) by
19.3\% for JavaScript and 16.5\% for Lua with an area
overhead of 1.6\% at a 40nm technology node.",
acknowledgement = ack-nhfb,
fjournal = "Operating Systems Review",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J597",
}
@Article{Kim:2017:TAAc,
author = "Channoh Kim and Jaehyeok Kim and Sungmin Kim and
Dooyoung Kim and Namho Kim and Gitae Na and Young H. Oh
and Hyeon Gyu Cho and Jae W. Lee",
title = "Typed Architectures: Architectural Support for
Lightweight Scripting",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "77--90",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037726",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/risc-v.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Dynamic scripting languages are becoming more and more
widely adopted not only for fast prototyping but also
for developing production-grade applications. They
provide high-productivity programming environments
featuring high levels of abstraction with powerful
built-in functions, automatic memory management,
object-oriented programming paradigm and dynamic
typing. However, their flexible, dynamic type systems
easily become the source of inefficiency in terms of
instruction count, memory footprint, and energy
consumption. This overhead makes it challenging to
deploy these high-productivity programming technologies
on emerging single-board computers for IoT
applications. Addressing this challenge, this paper
introduces Typed Architectures, a high-efficiency,
low-cost execution substrate for dynamic scripting
languages, where each data variable retains high-level
type information at an ISA level. Typed Architectures
calculate and check the dynamic type of each variable
implicitly in hardware, rather than explicitly in
software, hence significantly reducing instruction
count for dynamic type checking. Besides, Typed
Architectures introduce polymorphic instructions (e.g.,
xadd), which are bound to the correct native
instruction at runtime within the pipeline (e.g., add
or fadd) to efficiently implement polymorphic
operators. Finally, Typed Architectures provide
hardware support for flexible yet efficient type tag
extraction and insertion, capturing common data layout
patterns of tag-value pairs. Our evaluation using a
fully synthesizable RISC-V RTL design on FPGA shows
that Typed Architectures achieve geomean speedups of
11.2\% and 9.9\% with maximum speedups of 32.6\% and
43.5\% for two production-grade scripting engines for
JavaScript and Lua, respectively. Moreover, Typed
Architectures improve the energy-delay product (EDP) by
19.3\% for JavaScript and 16.5\% for Lua with an area
overhead of 1.6\% at a 40nm technology node.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "ASPLOS '17 conference proceedings.",
}
@InProceedings{Koenig:2017:HAC,
author = "Jack Koenig and David Biancolin and Jonathan Bachrach
and Krste Asanovic",
title = "A Hardware Accelerator for Computing an Exact Dot
Product",
crossref = "Burgess:2017:ISC",
pages = "114--121",
month = jul,
year = "2017",
DOI = "https://doi.org/10.1109/ARITH.2017.38",
ISSN = "1063-6889",
bibdate = "Fri Nov 17 09:10:14 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/fparith.bib;
http://www.math.utah.edu/pub/tex/bib/risc-v.bib",
abstract = "We study the implementation of a hardware accelerator
that computes a dot product of IEEE-754 floating-point
numbers exactly. The accelerator uses a wide (640 or
4288 bits for single or double-precision respectively)
fixed-point representation into which intermediate
floating-point products are accumulated. We designed
the accelerator as a generator in Chisel, which can
synthesize various configurations of the accelerator
that make different area-performance trade-offs. We
integrated eight different configurations into an SoC
comprised of RISC-V in-order scalar core, split L1
instruction and data caches, and unified L2 cache. In a
TSMC 45 nm technology, the accelerator area ranges from
0.05 mm2 to 0.32 mm2, and all configurations could be
clocked at frequencies in excess of 900MHz. The
accelerator successfully saturates the SoC's memory
system, achieving the same per-element efficiency (1
cycle-per-element) as Intel MKL running on an x86
machine with a similar cache configuration.",
acknowledgement = ack-nhfb,
keywords = "accurate floating-point dot product; accurate
floating-point summation; area-performance trade-offs;
Bandwidth; cache configuration; cache storage; Chisel;
Coprocessors; data caches; exact dot product; fixed
point arithmetic; fixed-point representation; floating
point arithmetic; Generators; Hardware; hardware
accelerator; IEEE-754 floating-point numbers; Intel
MKL; intermediate floating-point products;
Microarchitecture; Registers; RISC-V in-order scalar
core; Rockets; size 45 nm; SoC memory system; split L1
instruction; system-on-chip; TSMC technology; unified
L2 cache",
}
@Book{Patterson:2017:RVR,
author = "David Patterson and Andrew Waterman",
title = "The {RISC-V} Reader: An Open Architecture Atlas",
publisher = "Strawberry Canyon",
address = "San Francisco, CA, USA",
pages = "xiv + 180",
year = "2017",
ISBN = "0-9992491-1-8",
ISBN-13 = "978-0-9992491-1-6",
LCCN = "QA76.9.A73 P388 2017",
bibdate = "Mon Nov 18 18:47:27 MST 2019",
bibsource = "fsz3950.oclc.org:210/WorldCat;
http://www.math.utah.edu/pub/tex/bib/master.bib;
http://www.math.utah.edu/pub/tex/bib/risc-v.bib",
acknowledgement = ack-nhfb,
shorttableofcontents = "RISC-V Reference Card / i \\
List of Figures / ix \\
Preface / xii \\
1 Why RISC-V? / 2 \\
2 RV32I: RISC-V Base Integer ISA / 14 \\
3 RISC-V Assembly Language / 32 \\
4 RV32M: Multiply and Divide / / 44 \\
5 RV32FD: Single/Double Floating Point / 48 \\
6 RV32A: Atomic / / 60 \\
7 RV32C: Compressed Instructions / 64 \\
8 RV32V: Vector / 72 \\
9 RV64: 64-bit Address Instructions / / 86 \\
10 RV32/64 Privileged Architecture / 100 \\
11 Future RISC-V Optional Extensions / 118 \\
Appendix A: RISC-V Instruction Listings / 120 \\
Appendix B: Transliteration from RISC-V / 168 \\
Index / 174",
tableofcontents = "List of Figures / x \\
Preface / xii \\
1 Why RISC-V? / 2 \\
1.1 Introduction / 2 \\
1.2 Modular vs. Incremental ISAs / 4 \\
1.3 ISA Design 101 / 5 \\
1.4 An Overview of this Book / 10 \\
1.5 Concluding Remarks / 11 \\
1.6 To Learn More / 12 \\
2 RV32I: RISC-V Base Integer ISA / 14 \\
2.1 Introduction / 14 \\
2.2 RV32I Instruction formats / 14 \\
2.3 RV32I Registers / 18 \\
2.4 RV32I Integer Computation. / 18 \\
2.5 RV32I Loads and Stores / 20 \\
2.6 RV32I Conditional Branch / 21 \\
2.7 RV32I Unconditional Jump / 22 \\
2.8 RV32I Miscellaneous / 23 \\
2.9 Comparing RV32I, ARM-32, MIPS-32, and x86-32 / 23
\\
2.10 Concluding Remarks / 24 \\
2.11 To Learn More / 26 \\
3 RISC-V Assembly Language / 32 \\
3.1 Introduction / 32 \\
3.2 Calling convention / 32 \\
3.3 Assembly / 35 \\
3.4 Linker / 40 \\
3.5 Static vs. Dynamic Linking / 41 \\
3.6 Loader / 42 \\
3.7 Concluding Remarks / 42 \\
3.8 To Learn More / 42 \\
4 RV32M: Multiply and Divide / 44 \\
4.1 Introduction / 44 \\
4.2 Concluding Remarks / 46 \\
4.3 To Learn More / 46 \\
5 RV32FD: Single/Double Floating Point / 48 \\
5.1 Introduction / 48 \\
5.2 Floating-Point Registers / 48 \\
5.3 Floating-Point Loads, Stores, and Arithmetic / 49
\\
5.4 Floating-Point Moves and Converts / 53 \\
5.5 Miscellaneous Floating-Point Instructions / 53 \\
5.6 Comparing RV32FD, ARM-32, MIPS-32, and x86-32 using
DAXPY / 55 \\
5.7 Concluding Remarks / 55 \\
5.8 To Learn More / 56 \\
6 RV32A: Atomic / 60 \\
6.1 Introduction / 60 \\
6.2 Concluding Remarks / 62 \\
6.3 To Learn More / 62 \\
7 RV32C: Compressed Instructions / 64 \\
7 1 Introduction / 64 \\
7.2 Comparing RV32GC, Thumb-2, microMIPS, and x86-32 /
66 \\
7.3 Concluding Remarks / 66 \\
7.4 To Learn More / 67 \\
8. RV32V: Vector / 72 \\
8.1 Introduction / 72 \\
8.2 Vector Computation Instructions / 73 \\
8.3 Vector Registers and Dynamic Typing / 74 \\
8.4 Vector Loads and Stores / 75 \\
8.5 Parallelism During Vector Execution / 76 \\
8.6 Conditional Execution of Vector Operations / 76 \\
8.7 Miscellaneous Vector Instructions / 77 \\
8.8 Vector Example: DAXPY in RV32V / 78 \\
8.9 Comparing RV32V, MIPS-32 MSA SIMD, and x86-32 AVX
SIMD / 79 \\
8.10 Concluding Remarks / 81 \\
8.11 To Learn More / 82 \\
9 RV64: 64-bit Address Instructions / 86 \\
9.1 Introduction / 86 \\
9.2 Comparison to Other 64-bit ISAs using Insertion
Sort / 90 \\
9.3 Program size / 92 \\
9.4 Concluding Remarks / 93 \\
9.5 To Learn More / 93 \\
10 RV32/64 Privileged Architecture / 100 \\
10.1 Introduction / 100 \\
10.2 Machine Mode for Simple Embedded Systems. / 101
\\
10.3 Machine-Mode Exception Handling / 103 \\
10.4 User Mode and Process Isolation in Embedded
Systems / 106 \\
10.5 Supervisor Mode for Modern Operating Systems / 108
\\
10.6 Page-Based Virtual Memory / 111 \\
10.7 Identification and Performance CSRs / 114 \\
10.8 Concluding Remarks / 115 \\
10.9 To Learn More / 117 \\
11 Future RISC-V Optional Extensions / 118 \\
11.1 ``B'' Standard Extension for Bit Manipulation /
118 \\
11.2 ``E'' Standard Extension for Embedded / 118 \\
11.3 ``H'' Privileged Architecture Extension for
Hypervisor Support / 118 \\
11.4 ``J'' Standard Extension for Dynamically
Translated Languages / 118 \\
11.5 ``L'' Standard Extension for Decimal
Floating-Point / 118 \\
11.6 ``N'' Standard Extension for User-Level Interrupts
/ 119 \\
11.7 ``P'' Standard Extension for Packed-SIMD
Instructions / 119 \\
11.8 ``Q'' Standard Extension for Quad-Precision
Floating-Point / 119 \\
11.9 Concluding Remarks / 119 \\
A RISC-V Instruction Listings / 120 \\
B Transliteration from RISC-V / 168 \\
B.1 Introduction / 168 \\
B.2 Comparing RV32I, ARM-32, and x86-32 using Tree Sum
/ 170 \\
B.3 Conclusion / 171 \\
Index / 174",
}
@Article{Trippel:2017:TMMa,
author = "Caroline Trippel and Yatin A. Manerkar and Daniel
Lustig and Michael Pellauer and Margaret Martonosi",
title = "{TriCheck}: Memory Model Verification at the
Trisection of Software, Hardware, and {ISA}",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "119--133",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037719",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/risc-v.bib;
http://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Memory consistency models (MCMs) which govern
inter-module interactions in a shared memory system,
are a significant, yet often under-appreciated, aspect
of system design. MCMs are defined at the various
layers of the hardware-software stack, requiring
thoroughly verified specifications, compilers, and
implementations at the interfaces between layers.
Current verification techniques evaluate segments of
the system stack in isolation, such as proving compiler
mappings from a high-level language (HLL) to an ISA or
proving validity of a microarchitectural implementation
of an ISA. This paper makes a case for full-stack MCM
verification and provides a toolflow, TriCheck, capable
of verifying that the HLL, compiler, ISA, and
implementation collectively uphold MCM requirements.
The work showcases TriCheck's ability to evaluate a
proposed ISA MCM in order to ensure that each layer and
each mapping is correct and complete. Specifically, we
apply TriCheck to the open source RISC-V ISA [55],
seeking to verify accurate, efficient, and legal
compilations from C11. We uncover under-specifications
and potential inefficiencies in the current RISC-V ISA
documentation and identify possible solutions for each.
As an example, we find that a RISC-V-compliant
microarchitecture allows 144 outcomes forbidden by C11
to be observed out of 1,701 litmus tests examined.
Overall, this paper demonstrates the necessity of
full-stack verification for detecting MCM-related bugs
in the hardware-software stack.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J89",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Trippel:2017:TMMb,
author = "Caroline Trippel and Yatin A. Manerkar and Daniel
Lustig and Michael Pellauer and Margaret Martonosi",
title = "{TriCheck}: Memory Model Verification at the
Trisection of Software, Hardware, and {ISA}",
journal = j-OPER-SYS-REV,
volume = "51",
number = "2",
pages = "119--133",
month = jun,
year = "2017",
CODEN = "OSRED8",
DOI = "https://doi.org/10.1145/3093315.3037719",
ISSN = "0163-5980 (print), 1943-586X (electronic)",
ISSN-L = "0163-5980",
bibdate = "Mon Jul 24 18:36:23 MDT 2017",
bibsource = "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/opersysrev.bib;
http://www.math.utah.edu/pub/tex/bib/risc-v.bib",
abstract = "Memory consistency models (MCMs) which govern
inter-module interactions in a shared memory system,
are a significant, yet often under-appreciated, aspect
of system design. MCMs are defined at the various
layers of the hardware-software stack, requiring
thoroughly verified specifications, compilers, and
implementations at the interfaces between layers.
Current verification techniques evaluate segments of
the system stack in isolation, such as proving compiler
mappings from a high-level language (HLL) to an ISA or
proving validity of a microarchitectural implementation
of an ISA. This paper makes a case for full-stack MCM
verification and provides a toolflow, TriCheck, capable
of verifying that the HLL, compiler, ISA, and
implementation collectively uphold MCM requirements.
The work showcases TriCheck's ability to evaluate a
proposed ISA MCM in order to ensure that each layer and
each mapping is correct and complete. Specifically, we
apply TriCheck to the open source RISC-V ISA [55],
seeking to verify accurate, efficient, and legal
compilations from C11. We uncover under-specifications
and potential inefficiencies in the current RISC-V ISA
documentation and identify possible solutions for each.
As an example, we find that a RISC-V-compliant
microarchitecture allows 144 outcomes forbidden by C11
to be observed out of 1,701 litmus tests examined.
Overall, this paper demonstrates the necessity of
full-stack verification for detecting MCM-related bugs
in the hardware-software stack.",
acknowledgement = ack-nhfb,
fjournal = "Operating Systems Review",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J597",
}
@Article{Trippel:2017:TMMc,
author = "Caroline Trippel and Yatin A. Manerkar and Daniel
Lustig and Michael Pellauer and Margaret Martonosi",
title = "{TriCheck}: Memory Model Verification at the
Trisection of Software, Hardware, and {ISA}",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "119--133",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037719",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/risc-v.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Memory consistency models (MCMs) which govern
inter-module interactions in a shared memory system,
are a significant, yet often under-appreciated, aspect
of system design. MCMs are defined at the various
layers of the hardware-software stack, requiring
thoroughly verified specifications, compilers, and
implementations at the interfaces between layers.
Current verification techniques evaluate segments of
the system stack in isolation, such as proving compiler
mappings from a high-level language (HLL) to an ISA or
proving validity of a microarchitectural implementation
of an ISA. This paper makes a case for full-stack MCM
verification and provides a toolflow, TriCheck, capable
of verifying that the HLL, compiler, ISA, and
implementation collectively uphold MCM requirements.
The work showcases TriCheck's ability to evaluate a
proposed ISA MCM in order to ensure that each layer and
each mapping is correct and complete. Specifically, we
apply TriCheck to the open source RISC-V ISA [55],
seeking to verify accurate, efficient, and legal
compilations from C11. We uncover under-specifications
and potential inefficiencies in the current RISC-V ISA
documentation and identify possible solutions for each.
As an example, we find that a RISC-V-compliant
microarchitecture allows 144 outcomes forbidden by C11
to be observed out of 1,701 litmus tests examined.
Overall, this paper demonstrates the necessity of
full-stack verification for detecting MCM-related bugs
in the hardware-software stack.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Davidson:2018:COS,
author = "Scott Davidson and Shaolin Xie and Christopher Torng
and Khalid Al-Hawai and Austin Rovinski and Tutu Ajayi
and Luis Vega and Chun Zhao and Ritchie Zhao and Steve
Dai and Aporva Amarnath and Bandhav Veluri and Paul Gao
and Anuj Rao and Gai Liu and Rajesh K. Gupta and Zhiru
Zhang and Ronald Dreslinski and Christopher Batten and
Michael Bedford Taylor",
title = "The {Celerity} Open-Source 511-Core {RISC-V} Tiered
Accelerator Fabric: Fast Architectures and Design
Methodologies for Fast Chips",
journal = j-IEEE-MICRO,
volume = "38",
number = "2",
pages = "30--41",
month = mar # "\slash " # apr,
year = "2018",
CODEN = "IEMIDZ",
DOI = "https://doi.org/10.1109/MM.2018.022071133",
ISSN = "0272-1732 (print), 1937-4143 (electronic)",
ISSN-L = "0272-1732",
bibdate = "Sat Apr 28 13:18:45 MDT 2018",
bibsource = "http://www.math.utah.edu/pub/tex/bib/gnu.bib;
http://www.math.utah.edu/pub/tex/bib/hot-chips.bib;
http://www.math.utah.edu/pub/tex/bib/ieeemicro.bib;
http://www.math.utah.edu/pub/tex/bib/risc-v.bib",
URL = "https://www.computer.org/csdl/mags/mi/2018/02/mmi2018020030-abs.html",
acknowledgement = ack-nhfb,
journal-URL = "http://www.computer.org/csdl/mags/mi/index.html",
}
@Article{Delshadtehrani:2018:NPM,
author = "Leila Delshadtehrani and Schuyler Eldridge and
Sadullah Canakci and Manuel Egele and Ajay Joshi",
title = "{Nile}: a Programmable Monitoring Coprocessor",
journal = j-IEEE-COMPUT-ARCHIT-LETT,
volume = "17",
number = "1",
pages = "92--95",
month = jan # "\slash " # jun,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1109/LCA.2017.2784416",
ISSN = "1556-6056 (print), 1556-6064 (electronic)",
ISSN-L = "1556-6056",
bibdate = "Tue Jun 25 07:41:05 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeecomputarchitlett.bib;
http://www.math.utah.edu/pub/tex/bib/risc-v.bib",
abstract = "Researchers widely employ hardware performance
counters (HPCs) as well as debugging and profiling
tools in processors for monitoring different events
such as cache hits, cache misses, and branch prediction
statistics during the execution of programs. The
collected information can be used for power,
performance, and thermal management of the system as
well as detecting anomalies or malicious behavior in
the software. However, monitoring new or complex events
using HPCs and existing tools is a challenging task
because HPCs only provide a fixed pool of raw events to
monitor. To address this challenge, we propose the
implementation of a programmable hardware monitor in a
complete system framework including the hardware
monitor architecture and its interface with an in-order
single-issue RISC-V processor as well as an operating
system. As a proof of concept, we demonstrate how to
programmatically implement a shadow stack using our
hardware monitor and how the programmed shadow stack
detects stack buffer overflow attacks. Our hardware
monitor design incurs a 26 percent power overhead and a
15 percent area overhead over an unmodified RISC-V
processor. Our programmed shadow stack has less than 3
percent performance overhead in the worst case.",
acknowledgement = ack-nhfb,
affiliation = "Delshadtehrani, L (Reprint Author), Boston Univ, Dept
Elect \& Comp Engn, Boston, MA 02215 USA.
Delshadtehrani, Leila; Eldridge, Schuyler; Canakci,
Sadullah; Egele, Manuel; Joshi, Ajay, Boston Univ, Dept
Elect \& Comp Engn, Boston, MA 02215 USA.",
author-email = "delshad@bu.edu schuye@bu.edu scanakci@bu.edu
megele@bu.edu joshi@bu.edu",
da = "2019-06-20",
doc-delivery-number = "FZ6EO",
eissn = "1556-6064",
fjournal = "IEEE Computer Architecture Letters",
funding-acknowledgement = "NSF [CCF-1533663]",
funding-text = "We thank Prof. Jonathan Appavoo for providing
invaluable help in designing the OS support and the
software interface for Nile. This work was supported in
part by NSF grant CCF-1533663.",
journal-iso = "IEEE Comput. Archit. Lett.",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=10208",
keywords = "branch prediction statistics; cache hits; cache
misses; cache storage; complete system framework;
complex events; coprocessors; Coprocessors; debugging;
fixed pool; Hardware; Hardware coprocessor; hardware
monitor architecture; hardware monitor design; hardware
performance counters; HPCs; Linux; malicious behavior;
Monitoring; Nile; operating system; operating systems
(computers); Pattern matching; performance evaluation;
performance overhead; power overhead; profiling tools;
Program processors; programmable hardware; programmable
hardware monitor; programmable monitoring coprocessor;
programmed shadow stack; raw events; reduced
instruction set computing; Rockets; security; shadow
stack; single-issue RISC-V processor; stack buffer
overflow attack; stack buffer overflow attacks; thermal
management; unmodified RISC-V processor",
number-of-cited-references = "17",
ORCID-numbers = "Joshi, AJay/0000-0002-3256-9942",
research-areas = "Computer Science",
times-cited = "0",
unique-id = "Delshadtehrani:2018:NPM",
web-of-science-categories = "Computer Science, Hardware \&
Architecture",
}
@InProceedings{Bocco:2019:DPN,
author = "Andrea Bocco and Yves Durand and Florent de Dinechin",
title = "Dynamic Precision Numerics Using a Variable-Precision
{UNUM Type I HW} Coprocessor",
crossref = "Takagi:2019:ISC",
pages = "104--107",
month = jun,
year = "2019",
DOI = "https://doi.org/10.1109/ARITH.2019.00028",
bibdate = "Fri Jan 31 08:18:07 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/fparith.bib;
http://www.math.utah.edu/pub/tex/bib/risc-v.bib",
abstract = "A very large internal accumulation register has been
proposed to increase the accuracy of scientific code.
However, there is a general class of iterative kernels
where a vector of high-precision data must be saved
from one iteration to the next. Saving the large
internal accumulator to memory is impractical in such
cases. This work proposes a Variable Precision (VP)
Floating Point (FP) arithmetic co-processor
architecture based on RISC-V, which 1/ supports legacy
IEEE formats for input and output variables, 2/ uses
variable length internal registers (up to 512 bits of
mantissa) for inner loop multiply-add and 3/ supports
loads and stores of intermediate results to cache
memory with a dynamically adjustable precision (up to
256 bits of mantissa). It exploits the UNUM type I
floating point format, proposing solutions to address
some of its pitfalls such as the variable latency of
the internal operation, and the variable memory
footprint of the intermediate variables. This work is
integrated on FPGA and demonstrated on a representative
example.",
acknowledgement = ack-nhfb,
keywords = "ARITH-26; Arrays; cache storage; Computational
modeling; coprocessors; Coprocessors; dynamic Precision
numerics; field programmable gate arrays; floating
point arithmetic; floating point arithmetic
co-processor architecture; FPGA; internal accumulation
register; iterative kernels; iterative methods; Kernel;
Programming; reduced instruction set computing;
Registers; RISC-V; Variable Precision; Variable
precision, Floating-point, UNUM, Scientific computing,
Instruction set design, Hardware architecture, RISC-V,
Coprocessor, Multiple precision, FPGA, ASIC;
variable-precision UNUM Type I HW coprocessor",
}
@InProceedings{Bocco:2019:SSM,
author = "Andrea Bocco and Yves Durand and Florent de Dinechin",
title = "{SMURF}: {Scalar Multiple-precision Unum Risc-V
Floating-point} Accelerator for Scientific Computing,",
crossref = "Gustafson:2019:PCN",
pages = "1:1--1:8",
year = "2019",
DOI = "https://doi.org/10.1145/3316279.3316280",
bibdate = "Mon Feb 10 09:31:49 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/fparith.bib;
http://www.math.utah.edu/pub/tex/bib/risc-v.bib",
URL = "https://hal.inria.fr/hal-02087098",
abstract = "This paper proposes an innovative Floating Point (FP)
architecture for Variable Precision (VP) computation
suitable for high precision FP computing, based on a
refined version of the UNUM type I format. This
architecture supports VP FP intervals where each
interval endpoint can have up to 512 bits of mantissa.
The proposed hardware architecture is pipelined and has
an internal word-size of 64 bits. Computations on
longer mantissas are performed iteratively on the
existing hardware. The prototype is integrated in a
RISC-V environment, it is exposed to the user through
an instruction set extension. The paper we provide an
example of software usage. The system has been
prototyped on a FPGA (Field-Programmable Gate Array)
platform and also synthesized for a 28nm FDSOI process
technology. The respective working frequency of FPGA
and ASIC implementations are 50MHz and 600MHz. The
estimated chip area is 1.5mm 2 and the estimated power
consumption is 95mW. The flops performance of this
architecture remains within the range of a regular
fixed-precision IEEE FPU while enabling arbitrary
precision computation at reasonable cost.",
acknowledgement = ack-nhfb,
articleno = "Article 1",
keywords = "ASIC, UNUM, Floating-point, RISC-V, Coprocessor,
Instruction set design, Variable precision, Scientific
computing, Hardware architecture, Multiple precision,
FPGA",
}
@Article{Dogan:2019:ASU,
author = "Halit Dogan and Masab Ahmad and Brian Kahne and Omer
Khan",
title = "Accelerating Synchronization Using Moving Compute to
Data Model at 1,000-core Multicore Scale",
journal = j-TACO,
volume = "16",
number = "1",
pages = "4:1--4:??",
month = mar,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3300208",
ISSN = "1544-3566 (print), 1544-3973 (electronic)",
ISSN-L = "1544-3566",
bibdate = "Mon Mar 11 19:00:20 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/risc-v.bib;
http://www.math.utah.edu/pub/tex/bib/taco.bib",
abstract = "Thread synchronization using shared memory hardware
cache coherence paradigm is prevalent in multicore
processors. However, as the number of cores increase on
a chip, cache line ping-pong prevents performance
scaling for algorithms that deploy fine-grain
synchronization. This article proposes an in-hardware
moving computation to data model (MC) that pins shared
data at dedicated cores. The critical code sections are
serialized and executed at these cores in a spatial
setting to enable data locality optimizations.
In-hardware messages enable non-blocking and blocking
communication between cores, without involving the
cache coherence protocol. The in-hardware MC model is
implemented on Tilera Tile-Gx72 multicore platform to
evaluate 8- to 64-core count scale. A simulated RISC-V
multicore environment is built to further evaluate the
performance scaling advantages of the MC model at
1,024-cores scale. The evaluation using graph and
machine-learning benchmarks illustrates that atomic
instructions based synchronization scales up to 512
cores, and the MC model at the same core count
outperforms by 27\% in completion time and 39\% in
dynamic energy consumption.",
acknowledgement = ack-nhfb,
articleno = "4",
fjournal = "ACM Transactions on Architecture and Code Optimization
(TACO)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J924",
}
@Article{Huang:2019:ILA,
author = "Bo-Yuan Huang and Hongce Zhang and Pramod Subramanyan
and Yakir Vizel and Aarti Gupta and Sharad Malik",
title = "Instruction-Level Abstraction {(ILA)}: a Uniform
Specification for System-on-Chip {(SoC)} Verification",
journal = j-TODAES,
volume = "24",
number = "1",
pages = "10:1--10:??",
month = jan,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3282444",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:40 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
http://www.math.utah.edu/pub/tex/bib/risc-v.bib;
http://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Modern Systems-on-Chip (SoC) designs are increasingly
heterogeneous and contain specialized semi-programmable
accelerators in addition to programmable processors. In
contrast to the pre-accelerator era, when the ISA
played an important role in verification by enabling a
clean separation of concerns between software and
hardware, verification of these ``accelerator-rich''
SoCs presents new challenges. From the perspective of
hardware designers, there is a lack of a common
framework for formal functional specification of
accelerator behavior. From the perspective of software
developers, there exists no unified framework for
reasoning about software/hardware interactions of
programs that interact with accelerators. This article
addresses these challenges by providing a formal
specification and high-level abstraction for
accelerator functional behavior. It formalizes the
concept of an Instruction Level Abstraction (ILA),
developed informally in our previous work, and shows
its application in modeling and verification of
accelerators. This formal ILA extends the familiar
notion of instructions to accelerators and provides a
uniform, modular, and hierarchical abstraction for
modeling software-visible behavior of both accelerators
and programmable processors. We demonstrate the
applicability of the ILA through several case studies
of accelerators (for image processing, machine
learning, and cryptography), and a general-purpose
processor (RISC-V). We show how the ILA model
facilitates equivalence checking between two ILAs, and
between an ILA and its hardware finite-state machine
(FSM) implementation. Further, this equivalence
checking supports accelerator upgrades using the notion
of ILA compatibility, similar to processor upgrades
using ISA compatibility.",
acknowledgement = ack-nhfb,
articleno = "10",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Ramos:2019:APM,
author = "A. Ramos and R. G. Toral and P. Reviriego and J. A.
Maestro",
title = "An {ALU} Protection Methodology for Soft Processors on
{SRAM}-Based {FPGAs}",
journal = j-IEEE-TRANS-COMPUT,
volume = "68",
number = "9",
pages = "1404--1410",
month = sep,
year = "2019",
CODEN = "ITCOB4",
DOI = "https://doi.org/10.1109/TC.2019.2907238",
ISSN = "0018-9340 (print), 1557-9956 (electronic)",
ISSN-L = "0018-9340",
bibdate = "Fri Aug 30 05:58:40 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranscomput2010.bib;
http://www.math.utah.edu/pub/tex/bib/risc-v.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Computers",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=12",
keywords = "adaptive protection; ALU; ALU protection methodology;
application-based methodology; arithmetic logic unit;
cosmic background radiation; cosmic radiation; digital
arithmetic; fault tolerance; Fault tolerant systems;
Field programmable gate arrays; field programmable gate
arrays; FPGA; hardware configuration library;
integrated circuit reliability; logic testing;
Microprocessors; modular redundancy techniques; Program
processors; radiation hardening (electronics);
redundancy; Redundancy; RISC-V; soft core; soft error;
soft errors; soft processor; space missions; SRAM
chips; SRAM-based FPGA; TMR configurations",
}
@Article{Rogers:2019:SLB,
author = "Samuel Rogers and Joshua Slycord and Ronak Raheja and
Hamed Tabkhi",
title = "Scalable {LLVM}-Based Accelerator Modeling in gem5",
journal = j-IEEE-COMPUT-ARCHIT-LETT,
volume = "18",
number = "1",
pages = "18--21",
month = jan # "\slash " # jun,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1109/LCA.2019.2893932",
ISSN = "1556-6056 (print), 1556-6064 (electronic)",
ISSN-L = "1556-6056",
bibdate = "Tue Jun 25 07:41:05 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeecomputarchitlett.bib;
http://www.math.utah.edu/pub/tex/bib/python.bib;
http://www.math.utah.edu/pub/tex/bib/risc-v.bib",
abstract = "This article proposes a scalable integrated system
architecture modeling for hardware accelerator based in
gem5 simulation framework. The core of proposed
modeling is a LLVM-based simulation engine for modeling
any customized data-path with respect to inherent
data/instruction-level parallelism (derived by
algorithms) and available compute units (defined by the
user). The simulation framework also offers a
general-purpose communication interface that allows a
scalable and flexible connection into the gem5
ecosystem. Python API of gem5, enabling modifications
to the system hierarchy without the need to rebuild the
underlying simulator. Our simulation framework
currently supports full-system simulation (both
bare-metal and a full Linux kernel) for ARM-based
systems, with future plans to add support for RISC-V.
The LLVM-based modeling and modular integration to gem5
allow long-term simulation expansion and sustainable
design modeling for emerging applications with demands
for acceleration.",
acknowledgement = ack-nhfb,
affiliation = "Rogers, S (Reprint Author), Univ Noth Carolina, Dept
Elect \& Comp Engn, Charlotte, NC 28223 USA. Rogers,
Samuel; Slycord, Joshua; Raheja, Ronak; Tabkhi, Hamed,
Univ Noth Carolina, Dept Elect \& Comp Engn, Charlotte,
NC 28223 USA.",
author-email = "sroger48@uncc.edu jslycord@uncc.edu rraheja@uncc.edu
htabkhiv@uncc.edu",
da = "2019-06-20",
doc-delivery-number = "HL5MF",
eissn = "1556-6064",
fjournal = "IEEE Computer Architecture Letters",
journal-iso = "IEEE Comput. Archit. Lett.",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=10208",
keywords = "application program interfaces; ARM-based systems;
Computational modeling; Computer architecture
simulation; customized data-path; Engines; field
programmable gate arrays; flexible connection;
full-system simulation; gem5 ecosystem; gem5 simulation
framework; general-purpose communication interface;
Hardware; hardware accelerator; hardware accelerators;
heterogeneous systems; inherent data; instruction-level
parallelism; Linux; LLVM-based modeling; LLVM-based
simulation engine; logic design; long-term simulation
expansion; microprocessor chips; multiprocessing
systems; parallel architectures; parallel programming;
program compilers; reduced instruction set computing;
Registers; RISC-V; Runtime; scalable connection;
scalable integrated system architecture modeling;
scalable LLVM-based accelerator modeling; Space
exploration; sustainable design modeling;
Synchronization; system hierarchy",
number-of-cited-references = "11",
ORCID-numbers = "Slycord, Joshua/0000-0002-0569-4094 Rogers,
Samuel/0000-0002-9697-2933",
research-areas = "Computer Science",
times-cited = "0",
unique-id = "Rogers:2019:SLB",
web-of-science-categories = "Computer Science, Hardware \&
Architecture",
}
@Article{Tiwari:2019:PPE,
author = "Sugandha Tiwari and Neel Gala and Chester Rebeiro and
V. Kamakoti",
title = "{PERI}: A Posit Enabled {RISC-V} Core",
journal = "arXiv.org",
volume = "??",
number = "??",
pages = "1--14",
month = nov,
year = "2019",
bibdate = "Thu Apr 09 15:06:39 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/fparith.bib;
http://www.math.utah.edu/pub/tex/bib/risc-v.bib",
URL = "https://arxiv.org/pdf/1908.01466.pdf",
abstract = "Owing to the failure of Dennard's scaling the last
decade has seen a steep growth of prominent new
paradigms leveraging opportunities in computer
architecture. Two technologies of interest are Posit
and RISC-V. Posit was introduced in mid-2017 as a
viable alternative to IEEE 754-2008. Posit promises
more accuracy, higher dynamic range and fewer unused
states along with simpler hardware designs as compared
to IEEE 754- 2008. RISC-V, on the other hand, provides
a commercial-grade open-source ISA. It is not only
elegant and simple but also highly extensible and
customizable, thereby facilitating novel
micro-architectural research and exploration. In this
paper, we bring these two technologies together and
propose the first Posit Enabled RISC-V core. The paper
provides insights on how the current 'F' extension and
the custom op-code space of RISCV can be
leveraged/modified to support Posit arithmetic. We also
present implementation details of a parameterized and
feature-complete Posit FPU which is integrated with the
RISC-V compliant SHAKTI C-class core either as an
execution unit or as an accelerator. To fully leverage
the potential of Posit, we further enhance our Posit
FPU, with minimal overheads, to support two different
exponent sizes (with posit-size being 32-bits). This
allows applications to switch from high-accuracy
computation mode to a mode with higher dynamic-range at
run-time. In the absence of viable software tool-chain
to enable porting of applications in the Posit domain,
we present a workaround on how certain applications can
be modified minimally to exploit the existing RISC-V
tool-chain. We also provide examples of applications
which can perform better with Posit as compared to IEEE
754-2008. The proposed Posit FPU consumes 3507 slice
LUTs and 1294 slice registers on an Artix-7-100T Xilinx
FPGA while capable of operating at 100 MHz.",
acknowledgement = ack-nhfb,
keywords = "floating-point; IEEE-754; Posit; processor; RISC-V",
}
@Article{Zhang:2019:CBB,
author = "S. Zhang and A. Wright and T. Bourgeat",
title = "Composable Building Blocks to Open Up Processor
Design",
journal = j-IEEE-MICRO,
volume = "39",
number = "3",
pages = "47--55",
month = may # "\slash " # jun,
year = "2019",
CODEN = "IEMIDZ",
DOI = "https://doi.org/10.1109/MM.2019.2910012",
ISSN = "0272-1732 (print), 1937-4143 (electronic)",
ISSN-L = "0272-1732",
bibdate = "Thu Jul 25 15:33:44 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeemicro.bib;
http://www.math.utah.edu/pub/tex/bib/risc-v.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Micro",
journal-URL = "http://www.computer.org/csdl/mags/mi/index.html",
keywords = "atomic rules; atomic updates; CMD ensure
composability; composable building blocks; composable
modular design; Concurrent computing; instantaneous
access; interface method; Linux; Microarchitecture;
microprocessor chips; Out of order; out-of-order
processors; out-of-order RISC-V processor; processor
design; reduced instruction set computing; Registers;
software architecture; state elements; System recovery;
Timing; Wires",
}
@Misc{Anonymous:2020:RVE,
author = "Anonymous",
title = "{RISC-V} embedded variant {RV32E} now fully supported
by {SEGGER}'s Floating-Point library",
howpublished = "Web site",
day = "21",
month = sep,
year = "2020",
bibdate = "Thu Jan 28 18:02:53 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/fparith.bib;
http://www.math.utah.edu/pub/tex/bib/risc-v.bib",
URL = "https://www.design-reuse.com/news/48672/segger-s-floating-point-library-risc-v-rv32e.html",
acknowledgement = ack-nhfb,
remark = "The story reports a significant code size reduction,
and speedup, over the GNU floating-point library.",
}
@Article{Greengard:2020:NWR,
author = "Samuel Greengard",
title = "News: Will {RISC-V} revolutionize computing?",
journal = j-CACM,
volume = "63",
number = "5",
pages = "30--32",
month = may,
year = "2020",
CODEN = "CACMA2",
DOI = "https://doi.org/10.1145/3386377",
ISSN = "0001-0782 (print), 1557-7317 (electronic)",
ISSN-L = "0001-0782",
bibdate = "Tue Apr 21 15:30:10 MDT 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/cacm2020.bib;
http://www.math.utah.edu/pub/tex/bib/risc-v.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3386377",
abstract = "The open instruction set for microprocessors promises
to reshape computing and introduce new, more powerful
capabilities.",
acknowledgement = ack-nhfb,
fjournal = "Communications of the ACM",
journal-URL = "https://dl.acm.org/loi/cacm",
}
@Article{Horne:2020:GSF,
author = "Mitchell Horne",
title = "Getting Started with {FreeBSD\slash RISC-V}",
journal = "FreeBSD Journal",
volume = "??",
number = "??",
pages = "12--17",
month = jan # "\slash " # feb,
year = "2020",
bibdate = "Fri Dec 23 11:24:52 2022",
URL = "https://freebsdfoundation.org/wp-content/uploads/2020/03/Getting-Started-With-FreeBSD-RISC-V.pdf",
acknowledgement = ack-nhfb,
journal-URL = "https://freebsdfoundation.org/our-work/journal/",
remark = "This article contains a clear description of
cross-compiling a kernel for FreeBSD on RISC-V on
another (non-RISC-V) FreeBSD system, creating a virtual
image, booting the new image with qemu-system-riscv64,
enabling networking, and later doing cross-compiled
kernel updates.",
}
@Article{Petrisko:2020:BAO,
author = "D. Petrisko and F. Gilani and M. Wyse and D. C. Jung
and S. Davidson and P. Gao and C. Zhao and Z. Azad and
S. Canakci and B. Veluri and T. Guarino and A. Joshi
and M. Oskin and M. B. Taylor",
title = "{BlackParrot}: An Agile Open-Source {RISC-V} Multicore
for Accelerator {SoCs}",
journal = j-IEEE-MICRO,
volume = "40",
number = "4",
pages = "93--102",
month = jul # "\slash " # aug,
year = "2020",
CODEN = "IEMIDZ",
DOI = "https://doi.org/10.1109/MM.2020.2996145",
ISSN = "0272-1732 (print), 1937-4143 (electronic)",
ISSN-L = "0272-1732",
bibdate = "Wed Jul 29 07:59:51 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/gnu.bib;
http://www.math.utah.edu/pub/tex/bib/ieeemicro.bib;
http://www.math.utah.edu/pub/tex/bib/linux.bib;
http://www.math.utah.edu/pub/tex/bib/risc-v.bib;
http://www.math.utah.edu/pub/tex/bib/unix.bib",
abstract = "This article introduces BlackParrot, which aims to be
the default open-source, Linux-capable, cache-coherent,
64-bit RISC-V multicore used by the world. In executing
this goal, our research aims to advance the world's
knowledge about the software engineering of hardware.
Although originally bootstrapped by the University of
Washington and Boston University via DARPA funding,
BlackParrot strives to be community driven and
infrastructure agnostic; a multicore which is Pareto
optimal in terms of power, performance, area, and
complexity. In order to ensure BlackParrot is easy to
use, extend, and, most importantly, trust, development
is guided by three core principles: Be Tiny, Be
Modular, and Be Friendly. Development efforts have
prioritized the use of intentional interfaces and
modularity and silicon validation as first-order design
metrics, so that users can quickly get started and
trust that their design will perform as expected when
deployed. BlackParrot has been validated in a
GlobalFoundries 12-nm FinFET tapeout. BlackParrot is
ideal as a standalone Linux processor or as a malleable
fabric for an agile accelerator SoC design flow.",
acknowledgement = ack-nhfb,
fjournal = "IEEE Micro",
journal-URL = "http://www.computer.org/csdl/mags/mi/index.html",
}
@Misc{SEGGER:2020:SFP,
author = "{SEGGER Microcontroller}",
title = "{SEGGER} Floating-Point Library",
howpublished = "Web site.",
month = jan,
year = "2020",
bibdate = "Fri Feb 07 06:02:26 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/fparith.bib;
http://www.math.utah.edu/pub/tex/bib/risc-v.bib",
URL = "https://www.segger.com/products/development-tools/runtime-library/technology/floating-point-library/",
abstract = "The floating-point library contains complete, fully
optimized and verified floating point functionality,
which is required for devices without an FPU. The
floating-point emulator, a crucial part of the
floating-point library, of the Arm and RISC-V variants
are written in assembly language, optimized either for
small code size or increased execution speed. For other
processor architectures the library has a portable C
implementation. \ldots{} The SEGGER Floating-Point
Library is delivered in source code, with optional
rights for redistribution in object code form. All
source files, a mix of C code and assembly, are fully
commented. The floating-point emulator, providing the
low-level functions, is entirely written in assembly.
Higher level functions are implemented as a mix of
primarily C code with some assembly routines. The code
can be compiled with any ANSO-compliant C compiler,
such as GCC, Clang, or IAR.",
acknowledgement = ack-nhfb,
}
@Article{Zhang:2020:MRB,
author = "Jialiang Zhang and Yue Zha and Nicholas Beckwith and
Bangya Liu and Jing Li",
title = "{MEG}: a {RISCV}-based System Emulation Infrastructure
for Near-data Processing Using {FPGAs} and
High-bandwidth Memory",
journal = j-TRETS,
volume = "13",
number = "4",
pages = "19:1--19:24",
month = oct,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3409114",
ISSN = "1936-7406 (print), 1936-7414 (electronic)",
ISSN-L = "1936-7406",
bibdate = "Fri Oct 2 07:58:13 MDT 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/risc-v.bib;
http://www.math.utah.edu/pub/tex/bib/trets.bib",
URL = "https://dl.acm.org/doi/10.1145/3409114",
abstract = "Emerging three-dimensional (3D) memory technologies,
such as the Hybrid Memory Cube (HMC) and High Bandwidth
Memory (HBM), provide high-bandwidth and massive
memory-level parallelism. With the growing
heterogeneity and complexity of computer systems
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "19",
fjournal = "ACM Transactions on Reconfigurable Technology and
Systems (TRETS)",
journal-URL = "https://dl.acm.org/loi/trets",
}
@Article{Zhu:2020:HIR,
author = "Lingjun Zhu and Lennart Bamberg and Anthony Agnesina
and Francky Catthoor and Dragomir Milojevic and Manu
Komalan and Julien Ryckaert and Alberto Garcia-Ortiz
and Sung Kyu Lim",
title = "Heterogeneous {$3$D} Integration for a {RISC-V} System
With {STT-MRAM}",
journal = j-IEEE-COMPUT-ARCHIT-LETT,
volume = "19",
number = "1",
pages = "51--54",
month = jan # "\slash " # jun,
year = "2020",
DOI = "https://doi.org/10.1109/LCA.2020.2992644",
ISSN = "1556-6056 (print), 1556-6064 (electronic)",
ISSN-L = "1556-6056",
bibdate = "Thu May 27 16:19:32 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeecomputarchitlett.bib;
http://www.math.utah.edu/pub/tex/bib/risc-v.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Computer Architecture Letters",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=10208",
}
@Article{Biswas:2021:CSI,
author = "Arnab Kumar Biswas",
title = "Cryptographic Software {IP} Protection without
Compromising Performance or Timing Side-channel
Leakage",
journal = j-TACO,
volume = "18",
number = "2",
pages = "20:1--20:20",
month = mar,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3443707",
ISSN = "1544-3566 (print), 1544-3973 (electronic)",
ISSN-L = "1544-3566",
bibdate = "Sat Mar 20 17:25:10 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
http://www.math.utah.edu/pub/tex/bib/risc-v.bib;
http://www.math.utah.edu/pub/tex/bib/taco.bib",
URL = "https://dl.acm.org/doi/10.1145/3443707",
abstract = "Program obfuscation is a widely used cryptographic
software intellectual property (IP) protection
technique against reverse engineering attacks in
embedded systems. However, very few works have studied
the impact of combining various obfuscation techniques
on the obscurity (difficulty of reverse engineering)
and performance (execution time) of obfuscated
programs. In this article, we propose a Genetic
Algorithm (GA)-based framework that not only optimizes
obscurity and performance of obfuscated cryptographic
programs, but it also ensures very low timing
side-channel leakage. Our proposed Timing Side Channel
Sensitive Program Obfuscation Optimization Framework
(TSC-SPOOF) determines the combination of obfuscation
transformation functions that produce optimized
obfuscated programs with preferred optimization
parameters. In particular, TSC-SPOOF employs normalized
compression distance (NCD) and channel capacity to
measure obscurity and timing side-channel leakage,
respectively. We also use RISC-V rocket core running on
a Xilinx Zynq FPGA device as part of our framework to
obtain realistic results. The experimental results
clearly show that our proposed solution leads to
cryptographic programs with lower execution time,
higher obscurity, and lower timing side-channel leakage
than unguided obfuscation.",
acknowledgement = ack-nhfb,
articleno = "20",
fjournal = "ACM Transactions on Architecture and Code Optimization
(TACO)",
journal-URL = "https://dl.acm.org/loi/taco",
}
@Article{Eliahu:2021:MME,
author = "Adi Eliahu and Ronny Ronen and Pierre-Emmanuel
Gaillardon and Shahar Kvatinsky",
title = "{multiPULPly}: a Multiplication Engine for
Accelerating Neural Networks on Ultra-low-power
Architectures",
journal = j-JETC,
volume = "17",
number = "2",
pages = "24:1--24:27",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3432815",
ISSN = "1550-4832",
ISSN-L = "1550-4832",
bibdate = "Fri Apr 30 06:39:29 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/fparith.bib;
http://www.math.utah.edu/pub/tex/bib/jetc.bib;
http://www.math.utah.edu/pub/tex/bib/risc-v.bib",
URL = "https://dl.acm.org/doi/10.1145/3432815",
abstract = "Computationally intensive neural network applications
often need to run on resource-limited low-power
devices. Numerous hardware accelerators have been
developed to speed up the performance of neural network
applications and reduce power consumption; however,
most focus on data centers and full-fledged systems.
Acceleration in ultra-low-power systems has been only
partially addressed. In this article, we present
multiPULPly, an accelerator that integrates memristive
technologies within standard low-power CMOS technology,
to accelerate multiplication in neural network
inference on ultra-low-power systems. This accelerator
was designated for PULP, an open-source microcontroller
system that uses low-power RISC-V processors.
Memristors were integrated into the accelerator to
enable power consumption only when the memory is
active, to continue the task with no context-restoring
overhead, and to enable highly parallel analog
multiplication. To reduce the energy consumption, we
propose novel dataflows that handle common
multiplication scenarios and are tailored for our
architecture. The accelerator was tested on FPGA and
achieved a peak energy efficiency of 19.5 TOPS/W,
outperforming state-of-the-art accelerators by $ 1.5
\times $ to $ 4.5 \times $.",
acknowledgement = ack-nhfb,
articleno = "24",
fjournal = "ACM Journal on Emerging Technologies in Computing
Systems (JETC)",
journal-URL = "https://dl.acm.org/loi/jetc",
}
@Misc{Horne:2021:RQ,
author = "Mitchell Horne",
title = "{riscv\slash QEMU}",
howpublished = "Web site",
day = "8",
month = jun,
year = "2021",
bibdate = "Fri Dec 23 12:03:37 2022",
bibsource = "http://www.math.utah.edu/pub/tex/bib/risc-v.bib",
note = "See also \cite{Horne:2020:GSF}.",
URL = "https://wiki.freebsd.org/riscv/QEMU",
acknowledgement = ack-nhfb,
}
@Misc{Horne:2021:S,
author = "Mitchell Horne",
title = "Spike",
howpublished = "Web site",
day = "8",
month = jun,
year = "2021",
bibdate = "Fri Dec 23 12:03:37 2022",
bibsource = "http://www.math.utah.edu/pub/tex/bib/risc-v.bib",
URL = "https://wiki.freebsd.org/riscv/Spike",
abstract = "Spike is the canonical RISC-V ISA simulator. It
supports several ISA extensions, including some that
are not yet ratified. See the README on GitHub for more
information.",
acknowledgement = ack-nhfb,
}
@Article{Schuiki:2021:SSR,
author = "F. Schuiki and F. Zaruba and T. Hoefler and L.
Benini",
title = "Stream Semantic Registers: A Lightweight {RISC-V ISA}
Extension Achieving Full Compute Utilization in
Single-Issue Cores",
journal = j-IEEE-TRANS-COMPUT,
volume = "70",
number = "2",
pages = "212--227",
month = feb,
year = "2021",
CODEN = "ITCOB4",
DOI = "https://doi.org/10.1109/TC.2020.2987314",
ISSN = "0018-9340 (print), 1557-9956 (electronic)",
ISSN-L = "0018-9340",
bibdate = "Fri Jan 29 17:51:47 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranscomput2020.bib;
http://www.math.utah.edu/pub/tex/bib/risc-v.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Computers",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=12",
}
@Article{Szkandera:2021:BYO,
author = "Filip Szkandera",
title = "Build Your Own {RISC-V CPU}: Even Home-Brew Processors
Can use Hot New Tech",
journal = j-IEEE-SPECTRUM,
volume = "58",
number = "6",
pages = "16--18",
month = jun,
year = "2021",
CODEN = "IEESAM",
DOI = "https://doi.org/10.1109/MSPEC.2021.9444942",
ISSN = "0018-9235 (print), 1939-9340 (electronic)",
ISSN-L = "0018-9235",
bibdate = "Fri Jun 4 12:04:57 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeespectrum2020.bib;
http://www.math.utah.edu/pub/tex/bib/risc-v.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Spectrum",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=6",
}
@Article{Tiwari:2021:PCP,
author = "Sugandha Tiwari and Neel Gala and Chester Rebeiro and
V. Kamakoti",
title = "{PERI}: a Configurable Posit Enabled {RISC-V} Core",
journal = j-TACO,
volume = "18",
number = "3",
pages = "25:1--25:26",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3446210",
ISSN = "1544-3566 (print), 1544-3973 (electronic)",
ISSN-L = "1544-3566",
bibdate = "Tue Jun 29 08:21:11 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/risc-v.bib;
http://www.math.utah.edu/pub/tex/bib/taco.bib",
URL = "https://dl.acm.org/doi/10.1145/3446210",
abstract = "Owing to the failure of Dennard's scaling, the past
decade has seen a steep growth of prominent new
paradigms leveraging opportunities in computer
architecture. Two technologies of interest are Posit
and RISC-V. Posit was introduced in mid-2017 as a
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "25",
fjournal = "ACM Transactions on Architecture and Code Optimization
(TACO)",
journal-URL = "https://dl.acm.org/loi/taco",
}
@Article{Zaruba:2021:MCR,
author = "F. Zaruba and F. Schuiki and L. Benini",
title = "{Manticore}: A 4096-Core {RISC-V} Chiplet Architecture
for Ultraefficient Floating-Point Computing",
journal = j-IEEE-MICRO,
volume = "41",
number = "2",
pages = "36--42",
month = mar # "\slash " # apr,
year = "2021",
CODEN = "IEMIDZ",
DOI = "https://doi.org/10.1109/MM.2020.3045564",
ISSN = "0272-1732 (print), 1937-4143 (electronic)",
ISSN-L = "0272-1732",
bibdate = "Thu Apr 1 10:32:23 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/fparith.bib;
http://www.math.utah.edu/pub/tex/bib/hot-chips.bib;
http://www.math.utah.edu/pub/tex/bib/ieeemicro.bib;
http://www.math.utah.edu/pub/tex/bib/risc-v.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Micro",
journal-URL = "http://www.computer.org/csdl/mags/mi/index.html",
}
@Article{Adit:2022:PLT,
author = "Neil Adit and Adrian Sampson",
title = "Performance Left on the Table: An Evaluation of
Compiler Autovectorization for {RISC-V}",
journal = j-IEEE-MICRO,
volume = "42",
number = "5",
pages = "41--48",
month = sep # "\slash " # oct,
year = "2022",
CODEN = "IEMIDZ",
DOI = "https://doi.org/10.1109/MM.2022.3184867",
ISSN = "0272-1732 (print), 1937-4143 (electronic)",
ISSN-L = "0272-1732",
bibdate = "Thu Nov 03 05:37:10 2022",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeemicro.bib;
http://www.math.utah.edu/pub/tex/bib/risc-v.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Micro",
journal-URL = "http://www.computer.org/csdl/mags/mi/index.html",
}
@Article{Alder:2022:FPU,
author = "Fritz Alder and Jo {Van Bulck} and Jesse Spielman and
David Oswald and Frank Piessens",
title = "Faulty Point Unit: {ABI} Poisoning Attacks on Trusted
Execution Environments",
journal = j-DTRAP,
volume = "3",
number = "2",
pages = "13:1--13:26",
month = jun,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3491264",
ISSN = "2692-1626 (print), 2576-5337 (electronic)",
ISSN-L = "2576-5337",
bibdate = "Sat Jul 30 07:34:14 MDT 2022",
bibsource = "http://www.math.utah.edu/pub/tex/bib/dtrap.bib;
http://www.math.utah.edu/pub/tex/bib/fparith.bib;
http://www.math.utah.edu/pub/tex/bib/risc-v.bib",
URL = "https://dl.acm.org/doi/10.1145/3491264",
abstract = "This article analyzes a previously overlooked attack
surface that allows unprivileged adversaries to impact
floating-point computations in enclaves through the
Application Binary Interface (ABI). In a comprehensive
study across 7 industry-standard and esearch enclave
shielding runtimes for Intel Software Guard Extensions
(SGX), we show that control and state registers of the
x87 Floating-Point Unit (FPU) and Intel Streaming SIMD
Extensions are not always properly sanitized on enclave
entry. We furthermore show that this attack goes beyond
the x86 architecture and can also affect RISC-V
enclaves. Focusing on SGX, we abuse the adversary's
control over precision and rounding modes as an ABI
fault injection primitive to corrupt enclaved
floating-point operations. Our analysis reveals that
this is especially relevant for applications that use
the older x87 FPU, which is still under certain
conditions used by modern compilers. We exemplify the
potential impact of ABI quality-degradation attacks for
enclaved machine learning and for the SPEC benchmarks.
We then explore the impact on confidentiality, showing
that control over exception masks can be abused as a
controlled channel to recover enclaved multiplication
operands. Our findings, affecting 5 of 7 studied SGX
runtimes and one RISC-V runtime, demonstrate the
challenges of implementing high-assurance trusted
execution across computing architectures.",
acknowledgement = ack-nhfb,
articleno = "13",
fjournal = "Digital Threats: Research and Practice (DTRAP)",
journal-URL = "https://dl.acm.org/loi/dtrap",
}
@Article{Amor:2022:RVI,
author = "Hela Belhadj Amor and Carolynn Bernier and
Zden{\v{e}}k P{\v{r}}ikryl",
title = "A {RISC-V ISA} Extension for Ultra-Low Power {IoT}
Wireless Signal Processing",
journal = j-IEEE-TRANS-COMPUT,
volume = "71",
number = "4",
pages = "766--778",
month = apr,
year = "2022",
CODEN = "ITCOB4",
DOI = "https://doi.org/10.1109/TC.2021.3063027",
ISSN = "0018-9340 (print), 1557-9956 (electronic)",
ISSN-L = "0018-9340",
bibdate = "Thu Mar 17 06:38:17 2022",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranscomput2020.bib;
http://www.math.utah.edu/pub/tex/bib/risc-v.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Computers",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=12",
}
@Article{Ditzel:2022:AMR,
author = "David R. Ditzel and the Esperanto team",
title = "Accelerating {ML} Recommendation With Over 1,000
{RISC-V\slash Tensor} Processors on {Esperanto}'s
{ET-SoC-1} Chip",
journal = j-IEEE-MICRO,
volume = "42",
number = "3",
pages = "31--38",
month = may # "\slash " # jun,
year = "2022",
CODEN = "IEMIDZ",
DOI = "https://doi.org/10.1109/MM.2022.3140674",
ISSN = "0272-1732 (print), 1937-4143 (electronic)",
ISSN-L = "0272-1732",
bibdate = "Fri May 27 06:13:54 2022",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeemicro.bib;
http://www.math.utah.edu/pub/tex/bib/risc-v.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Micro",
journal-URL = "http://www.computer.org/csdl/mags/mi/index.html",
}
@Article{Feng:2022:RRV,
author = "Lang Feng and Jiayi Huang and Luyi Li and Haochen
Zhang and Zhongfeng Wang",
title = "{RvDfi}: a {RISC-V} Architecture With Security
Enforcement by High Performance Complete Data-Flow
Integrity",
journal = j-IEEE-TRANS-COMPUT,
volume = "71",
number = "10",
pages = "2499--2512",
month = oct,
year = "2022",
CODEN = "ITCOB4",
DOI = "https://doi.org/10.1109/TC.2021.3133701",
ISSN = "0018-9340 (print), 1557-9956 (electronic)",
ISSN-L = "0018-9340",
bibdate = "Thu Sep 8 07:59:47 2022",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranscomput2020.bib;
http://www.math.utah.edu/pub/tex/bib/risc-v.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Computers",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=12",
}
@Article{Mariotti:2022:WVB,
author = "Gianfranco Mariotti and Roberto Giorgi",
title = "\pkg{WebRISC-V}: a 32\slash 64-bit {RISC-V} pipeline
simulation tool",
journal = j-SOFTWAREX,
volume = "18",
number = "??",
pages = "??--??",
month = jun,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1016/j.softx.2022.101105",
ISSN = "2352-7110",
ISSN-L = "2352-7110",
bibdate = "Thu Jun 2 09:45:22 MDT 2022",
bibsource = "http://www.math.utah.edu/pub/tex/bib/risc-v.bib;
http://www.math.utah.edu/pub/tex/bib/softwarex.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S235271102200070X",
acknowledgement = ack-nhfb,
articleno = "101105",
fjournal = "SoftwareX",
journal-URL = "https://www.sciencedirect.com/journal/softwarex/issues",
}
@Article{Sa:2022:FLR,
author = "Bruno S{\'a} and Jos{\'e} Martins and Sandro Pinto",
title = "A First Look at {RISC-V} Virtualization From an
Embedded Systems Perspective",
journal = j-IEEE-TRANS-COMPUT,
volume = "71",
number = "9",
pages = "2177--2190",
month = sep,
year = "2022",
CODEN = "ITCOB4",
DOI = "https://doi.org/10.1109/TC.2021.3124320",
ISSN = "0018-9340 (print), 1557-9956 (electronic)",
ISSN-L = "0018-9340",
bibdate = "Thu Aug 11 09:05:14 2022",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranscomput2020.bib;
http://www.math.utah.edu/pub/tex/bib/risc-v.bib;
http://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Computers",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=12",
}
@Article{Saarinen:2022:DRV,
author = "Markku-Juhani O. Saarinen and G. Richard Newell and
Ben Marshall",
title = "Development of the {RISC-V} entropy source interface",
journal = j-J-CRYPTO-ENG,
volume = "12",
number = "4",
pages = "371--386",
month = nov,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1007/s13389-021-00275-6",
ISSN = "2190-8508 (print), 2190-8516 (electronic)",
ISSN-L = "2190-8508",
bibdate = "Fri Jun 2 12:32:09 MDT 2023",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jcryptoeng.bib;
http://www.math.utah.edu/pub/tex/bib/linux.bib;
http://www.math.utah.edu/pub/tex/bib/prng.bib;
http://www.math.utah.edu/pub/tex/bib/risc-v.bib;
http://www.math.utah.edu/pub/tex/bib/unix.bib",
URL = "https://link.springer.com/article/10.1007/s13389-021-00275-6",
abstract = "The RISC-V true random number generator (TRNG)
architecture breaks with previous ISA TRNG practice by
splitting the entropy source (ES) component away from
cryptographic DRBGs into a separate privileged
interface, and in its use of polling. The modular
approach is suitable for the RISC-V hardware IP
ecosystem, allows a significantly smaller
implementation footprint on platforms that need it,
while directly supporting current standards compliance
testing methods. We describe the interface, its use in
cryptography, and offer additional discussion,
background, and rationale for various aspects of it.
The design was informed by lessons learned from earlier
mainstream ISAs, recently introduced SP 800-90B and
FIPS 140-3 entropy audit requirements, AIS 31 and
common criteria, current and emerging cryptographic
needs such as post-quantum cryptography, and the goal
of supporting a wide variety of RISC-V implementations
and applications. Many of the architectural choices
result from quantitative observations about random
number generators in secure microcontrollers, the Linux
kernel, and cryptographic libraries.",
acknowledgement = ack-nhfb,
ajournal = "J. Crypto. Eng.",
fjournal = "Journal of Cryptographic Engineering",
journal-URL = "http://link.springer.com/journal/13389",
}
@Article{Vijaykumar:2022:MPO,
author = "Nandita Vijaykumar and Ataberk Olgun and Konstantinos
Kanellopoulos and F. Nisa Bostanci and Hasan Hassan and
Mehrshad Lotfi and Phillip B. Gibbons and Onur Mutlu",
title = "\pkg{MetaSys}: a Practical Open-source Metadata
Management System to Implement and Evaluate Cross-layer
Optimizations",
journal = j-TACO,
volume = "19",
number = "2",
pages = "26:1--26:29",
month = jun,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3505250",
ISSN = "1544-3566 (print), 1544-3973 (electronic)",
ISSN-L = "1544-3566",
bibdate = "Fri Mar 25 07:03:00 MDT 2022",
bibsource = "http://www.math.utah.edu/pub/tex/bib/gnu.bib;
http://www.math.utah.edu/pub/tex/bib/risc-v.bib;
http://www.math.utah.edu/pub/tex/bib/taco.bib",
URL = "https://dl.acm.org/doi/10.1145/3505250",
abstract = "This article introduces the first open-source
FPGA-based infrastructure, MetaSys, with a prototype in
a RISC-V system, to enable the rapid implementation and
evaluation of a wide range of cross-layer techniques in
real hardware. Hardware-software \ldots{}",
acknowledgement = ack-nhfb,
articleno = "26",
fjournal = "ACM Transactions on Architecture and Code Optimization
(TACO)",
journal-URL = "https://dl.acm.org/loi/taco",
}
@Article{Zhang:2022:TMT,
author = "Jipeng Zhang and Junhao Huang and Zhe Liu and Sujoy
Sinha Roy",
title = "Time-Memory Trade-Offs for {Saber+} on
Memory-Constrained {RISC-V} Platform",
journal = j-IEEE-TRANS-COMPUT,
volume = "71",
number = "11",
pages = "2996--3007",
month = nov,
year = "2022",
CODEN = "ITCOB4",
DOI = "https://doi.org/10.1109/TC.2022.3143441",
ISSN = "0018-9340 (print), 1557-9956 (electronic)",
ISSN-L = "0018-9340",
bibdate = "Thu Oct 27 15:52:25 2022",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranscomput2020.bib;
http://www.math.utah.edu/pub/tex/bib/risc-v.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Computers",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=12",
}
@Misc{Zeeb:2022:RV,
author = "Bjoern Zeeb",
title = "{RISC-V}",
howpublished = "Web site",
day = "26",
month = jun,
year = "2022",
bibdate = "Fri Dec 23 12:05:46 2022",
bibsource = "http://www.math.utah.edu/pub/tex/bib/risc-v.bib",
URL = "https://wiki.freebsd.org/riscv",
abstract = "FreeBSD/RISC-V is a architecture port for FreeBSD to
run on the RISC-V Instruction-Set Architecture (ISA),
able to boot to multi-user mode on the QEMU emulator,
Spike simulator and real hardware.",
acknowledgement = ack-nhfb,
}
@Article{Gomez:2023:HLV,
author = "Constantino G{\'o}mez and Filippo Mantovani and Erich
Focht and Marc Casas",
title = "{HPCG} on long-vector architectures: Evaluation and
optimization on {NEC SX-Aurora} and {RISC-V}",
journal = j-FUT-GEN-COMP-SYS,
volume = "143",
number = "??",
pages = "152--162",
month = jun,
year = "2023",
CODEN = "FGSEVI",
DOI = "https://doi.org/10.1016/j.future.2023.01.015",
ISSN = "0167-739X (print), 1872-7115 (electronic)",
ISSN-L = "0167-739X",
bibdate = "Mon Mar 13 08:24:01 MDT 2023",
bibsource = "http://www.math.utah.edu/pub/tex/bib/futgencompsys2020.bib;
http://www.math.utah.edu/pub/tex/bib/risc-v.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167739X23000225",
abstract = "Accelerators are becoming a key component to improve
efficiency in High-Performance Computing systems (HPC).
While GPU based systems are widely used to accelerate
HPC workloads, new systems based on long-vector
architectures are rapidly gaining popularity. The
development of optimized math libraries becomes
fundamental to achieve high performance in those
emerging vector architectures. This paper focuses on
the optimization of the HPCG benchmark, which comprises
four fundamental kernels found in many numerical
applications. We target two relevant long-vector
architectures like the NEC Vector Engine and the RISC-V
`V' vector extension. Compared to the well-tuned
proprietary solution, our open HPCG implementation
achieves a 1.6\% improvement in performance on the NEC
Vector Engine and achieves near maximum memory
bandwidth utilization in the two evaluated RISC-V
vector accelerator designs.",
acknowledgement = ack-nhfb,
fjournal = "Future Generation Computer Systems",
journal-URL = "http://www.sciencedirect.com/science/journal/0167739X",
}
@Article{Gruin:2023:MTP,
author = "Alban Gruin and Thomas Carle and Christine Rochange
and Hugues Cass{\'e} and Pascal Sainrat",
title = "{MINOTAuR}: A Timing Predictable {RISC-V} Core
Featuring Speculative Execution",
journal = j-IEEE-TRANS-COMPUT,
volume = "72",
number = "1",
pages = "183--195",
month = jan,
year = "2023",
CODEN = "ITCOB4",
DOI = "https://doi.org/10.1109/TC.2022.3200000",
ISSN = "0018-9340 (print), 1557-9956 (electronic)",
ISSN-L = "0018-9340",
bibdate = "Mon Dec 19 08:41:53 2022",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranscomput2020.bib;
http://www.math.utah.edu/pub/tex/bib/risc-v.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Computers",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=12",
}
@Article{Jin:2023:SBS,
author = "Hai Jin and Zhuo He and Weizhong Qiang",
title = "{SpecTerminator}: Blocking Speculative Side Channels
Based on Instruction Classes on {RISC-V}",
journal = j-TACO,
volume = "20",
number = "1",
pages = "15:1--15:??",
month = mar,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3566053",
ISSN = "1544-3566 (print), 1544-3973 (electronic)",
ISSN-L = "1544-3566",
bibdate = "Fri Feb 17 06:54:21 MST 2023",
bibsource = "http://www.math.utah.edu/pub/tex/bib/risc-v.bib;
http://www.math.utah.edu/pub/tex/bib/taco.bib",
URL = "https://dl.acm.org/doi/10.1145/3566053",
abstract = "In modern processors, speculative execution has
significantly improved the performance of processors,
but it has also introduced speculative execution
vulnerabilities. Recent defenses are based on the
delayed execution to block various speculative side
channels, but we show that several of the current
state-of-the-art defenses fail to block some of the
available speculative side channels, and the current
most secure defense introduces a performance overhead
of up to 24.5\%.\par
We propose SpecTerminator, the first defense framework
based on instruction classes that can comprehensively
and precisely block all existing speculative side
channels. In SpecTerminator, a novel speculative side
channel classification scheme based on the features of
secret transmission is proposed, and the sensitive
instructions in the speculative window are classified
and identified using optimized hardware taint tracking
and instruction masking techniques to accurately
determine the scope of leakage. Then, according to the
execution characteristics of these instructions,
dedicated delayed execution strategies, such as TLB
request ignoring, selective issue, and extended
delay-on-miss, are designed for each type of sensitive
instruction to precisely control that these
instructions are delayed only in pipeline stages that
are at risk of leakage. In contrast to previous
defenses based on the Gem5 simulator, we have
innovatively implemented defenses against Spectre
attacks based on the open-source instruction set RISC-V
on an FPGA-accelerated simulation platform that is more
similar to real hardware. To evaluate the security of
SpecTerminator, we have replicated various existing
x86-based Spectre variants on RISC-V. On SPEC 2006,
SpecTerminator defends against Spectre attacks based on
memory hierarchy side channels with a performance
overhead of 2.6\% and against all existing Spectre
attacks with a performance overhead of 6.0\%.",
acknowledgement = ack-nhfb,
articleno = "15",
fjournal = "ACM Transactions on Architecture and Code Optimization
(TACO)",
journal-URL = "https://dl.acm.org/loi/taco",
}
@Article{Kuo:2023:RVG,
author = "Yao-Ming Kuo and Francisco Garc{\'\i}a-Herrero and
Oscar Ruano and Juan Antonio Maestro",
title = "{RISC-V} {Galois Field} {ISA} Extension for Non-Binary
Error-Correction Codes and Classical and Post-Quantum
Cryptography",
journal = j-IEEE-TRANS-COMPUT,
volume = "72",
number = "3",
pages = "682--692",
month = mar,
year = "2023",
CODEN = "ITCOB4",
DOI = "https://doi.org/10.1109/TC.2022.3174587",
ISSN = "0018-9340 (print), 1557-9956 (electronic)",
ISSN-L = "0018-9340",
bibdate = "Sat Feb 18 16:18:34 2023",
bibsource = "http://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
http://www.math.utah.edu/pub/tex/bib/ieeetranscomput2020.bib;
http://www.math.utah.edu/pub/tex/bib/risc-v.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Computers",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=12",
}
@Article{Snelgrove:2023:SPT,
author = "Martin Snelgrove and Robert Beachler",
title = "{speedAI240}: a 2-Petaflop, {30-Teraflops\slash W}
At-Memory Inference Acceleration Device With 1456
{RISC-V} Cores",
journal = j-IEEE-MICRO,
volume = "43",
number = "3",
pages = "58--63",
month = may # "\slash " # jun,
year = "2023",
CODEN = "IEMIDZ",
DOI = "https://doi.org/10.1109/MM.2023.3255864",
ISSN = "0272-1732 (print), 1937-4143 (electronic)",
ISSN-L = "0272-1732",
bibdate = "Thu May 18 07:38:12 2023",
bibsource = "http://www.math.utah.edu/pub/tex/bib/hot-chips.bib;
http://www.math.utah.edu/pub/tex/bib/ieeemicro.bib;
http://www.math.utah.edu/pub/tex/bib/risc-v.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Micro",
journal-URL = "https://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=40",
}
@Article{Talpes:2023:MDT,
author = "Emil Talpes and Debjit Das Sarma and Doug Williams and
Sahil Arora and Thomas Kunjan and Benjamin Floering and
Ankit Jalote and Christopher Hsiong and Chandrasekhar
Poorna and Vaidehi Samant and John Sicilia and Anantha
Kumar Nivarti and Raghuvir Ramachandran and Tim Fischer
and Ben Herzberg and Bill McGee and Ganesh
Venkataramanan and Pete Banon",
title = "The Microarchitecture of {DOJO}, {Tesla}'s Exa-Scale
Computer",
journal = j-IEEE-MICRO,
volume = "43",
number = "3",
pages = "31--39",
month = may # "\slash " # jun,
year = "2023",
CODEN = "IEMIDZ",
DOI = "https://doi.org/10.1109/MM.2023.3258906",
ISSN = "0272-1732 (print), 1937-4143 (electronic)",
ISSN-L = "0272-1732",
bibdate = "Thu May 18 07:38:12 2023",
bibsource = "http://www.math.utah.edu/pub/tex/bib/fparith.bib;
http://www.math.utah.edu/pub/tex/bib/hot-chips.bib;
http://www.math.utah.edu/pub/tex/bib/ieeemicro.bib;
http://www.math.utah.edu/pub/tex/bib/risc-v.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Micro",
journal-URL = "https://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=40",
remark = "DOJO is based on RISC-V64 with instruction set
extensions. Its arithmetic supports 8-, 16-, 32-, and
64-bit integers, and IEEE 754 FP32 (1/8/23), plus FP16
(1/5/10), BFP16 (1/8/7), CFP8 (1/4/3), CFP8 (1/5/2),
and CFP16 (1/5/10) floating-point formats. The latter
is unusual having an external register that records the
exponent bias (0, 31, or 63), so that it supports three
different ranges of numbers. There is no support for
FP64 or longer formats. There is support for stochastic
rounding.",
}
@Article{Wen:2023:WCP,
author = "Elliott Wen and Gerald Weber and Suranga Nanayakkara",
title = "{WasmAndroid}: a Cross-Platform Runtime for Native
Programming Languages on {Android}",
journal = j-TECS,
volume = "22",
number = "1",
pages = "4:1--4:??",
month = jan,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3530286",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:25 MST 2023",
bibsource = "http://www.math.utah.edu/pub/tex/bib/risc-v.bib;
http://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3530286",
abstract = "Open source hardware such as RISC-V has been gaining
substantial momentum. Recently, they have begun to
embrace Google's Android operating system to leverage
its software ecosystem. Despite the encouraging
progress, a challenging issue arises: a majority
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "4",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Yang:2023:ATF,
author = "Chun-Chieh Yang and Yi-Ru Chen and Hui-Hsin Liao and
Yuan-Ming Chang and Jenq-Kuen Lee",
title = "Auto-tuning Fixed-point Precision with {TVM} on
{RISC-V} Packed {SIMD} Extension",
journal = j-TODAES,
volume = "28",
number = "3",
pages = "33:1--33:??",
month = may,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3569939",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed May 17 08:06:20 MDT 2023",
bibsource = "http://www.math.utah.edu/pub/tex/bib/fparith.bib;
http://www.math.utah.edu/pub/tex/bib/risc-v.bib;
http://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3569939",
abstract = "Today, as deep learning (DL) is applied more often in
daily life, dedicated processors such as CPUs and GPUs
have become very important for accelerating model
executions. With the growth of technology, people are
becoming accustomed to using edge devices, such as
mobile phones, smart watches, and VR devices in their
daily lives. A variety of technologies using DL are
gradually being applied to these edge devices. However,
there is a large number of computations in DL. It faces
a challenging problem how to provide solutions in the
edge devices. In this article, the proposed method
enables a flow with the RISC-V Packed extension (P
extension) in TVM. TVM, an open deep learning compiler
for neural network models, is growing as a key
infrastructure for DL computing. RISC-V is an open
instruction set architecture (ISA) with customized and
flexible features. The Packed-SIMD extension is a
RISC-V extension that enables subword
single-instruction multiple-data (SIMD) computations in
RISC-V architectures to support fallback engines in AI
computing. In the proposed flow, a fixed-point type
that is supported by an integer of 16-bit type and
saturation instructions is added to replace the
original 32-bit float type. In addition, an auto-tuning
method is proposed to use a uniform selector mechanism
(USM) to find the binary point position for fixed-point
type use. The tensorization feature of TVM can be used
to optimize specific hardware such as subword SIMD
instructions with RISC-V P extension. With our
experiment on the Spike simulator, the proposed method
with the USM can improve performance by approximately
2.54 to 6.15$ \times $ in terms of instruction counts
with little accuracy loss.",
acknowledgement = ack-nhfb,
articleno = "33",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Proceedings{Burgess:2017:ISC,
editor = "Neil Burgess and Javier Bruguera and Florent de
Dinechin",
booktitle = "{24th IEEE Symposium on Computer Arithmetic (ARITH
24), London, UK, 24--26 July 2017}",
title = "{2017 IEEE 24th Symposium on Computer Arithmetic
(ARITH 24), London, UK, 24--26 July 2017}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xii + 198",
year = "2017",
ISBN = "1-5386-1966-0 (print), 1-5386-1965-2, 1-5386-1964-4",
ISBN-13 = "978-1-5386-1966-7 (print), 978-1-5386-1965-0,
978-1-5386-1964-3",
ISSN = "1063-6889",
LCCN = "QA76.9.C62 S95 2017",
bibdate = "Fri Nov 17 10:14:11 2017",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/h/higham-nicholas-john.bib;
http://www.math.utah.edu/pub/tex/bib/elefunt.bib;
http://www.math.utah.edu/pub/tex/bib/fparith.bib;
http://www.math.utah.edu/pub/tex/bib/gnu.bib;
http://www.math.utah.edu/pub/tex/bib/risc-v.bib",
URL = "http://ieeexplore.ieee.org/servlet/opac?punumber=8019911",
acknowledgement = ack-nhfb,
keywords = "ARITH-24; computer arithmetic units; correctness
proofs; cryptography; domain specific designs; error
analysis; exascale computing; floating point
arithmetic; floating-point error analysis; formal
verification; function approximation; modular
arithmetic; theorem proving; verification",
}
@Proceedings{Gustafson:2019:PCN,
editor = "John Gustafson and Vassil Dimitrov",
booktitle = "{Proceedings of the Conference for Next Generation
Arithmetic 2019, Singapore, March 2019}",
title = "{Proceedings of the Conference for Next Generation
Arithmetic 2019, Singapore, March 2019}",
publisher = pub-ACM,
address = pub-ACM:adr,
pages = "66",
year = "2019",
ISBN = "1-4503-7139-6",
ISBN-13 = "978-1-4503-7139-1",
LCCN = "????",
bibdate = "Mon Feb 10 12:06:51 MST 2020",
bibsource = "fsz3950.oclc.org:210/WorldCat;
http://www.math.utah.edu/pub/tex/bib/fparith.bib;
http://www.math.utah.edu/pub/tex/bib/risc-v.bib",
series = "ICPS",
acknowledgement = ack-nhfb,
meetingname = "Conference for Next Generation Arithmetic (2019:
Singapore)",
subject = "Computer arithmetic; Congresses; Computer algorithms;
Computer algorithms.; Computer arithmetic.",
}
@Proceedings{Takagi:2019:ISC,
editor = "Naofumi Takagi and Sylvie Boldo and Martin
Langhammer",
booktitle = "{2019 IEEE 26th Symposium on Computer Arithmetic
ARITH-26 (2019), Kyoto, Japan, 10--12 June 2019}",
title = "{2019 IEEE 26th Symposium on Computer Arithmetic
ARITH-26 (2019), Kyoto, Japan, 10--12 June 2019}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "15 + 220",
month = jun,
year = "2019",
DOI = "https://doi.org/10.1109/ARITH.2019.00001",
ISBN = "1-72813-366-1",
ISBN-13 = "978-1-72813-366-9",
ISSN = "1063-6889",
ISSN-L = "1063-6889",
bibdate = "Fri Jan 31 08:18:07 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/cryptography2000.bib;
http://www.math.utah.edu/pub/tex/bib/elefunt.bib;
http://www.math.utah.edu/pub/tex/bib/fparith.bib;
http://www.math.utah.edu/pub/tex/bib/risc-v.bib",
abstract = "Presents the title page of the proceedings record.",
acknowledgement = ack-nhfb,
keywords = "ARITH-26",
}