%%% -*-BibTeX-*- %%% ==================================================================== %%% BibTeX-file{ %%% author = "Nelson H. F. Beebe", %%% version = "2.19", %%% date = "15 March 2014", %%% time = "07:24:41 MST", %%% filename = "lawn.bib", %%% address = "University of Utah %%% Department of Mathematics, 110 LCB %%% 155 S 1400 E RM 233 %%% Salt Lake City, UT 84112-0090 %%% USA", %%% telephone = "+1 801 581 5254", %%% FAX = "+1 801 581 4148", %%% URL = "http://www.math.utah.edu/~beebe", %%% checksum = "18064 9495 41396 421852", %%% email = "beebe at math.utah.edu, beebe at acm.org, %%% beebe at computer.org (Internet)", %%% codetable = "ISO/ASCII", %%% keywords = "bibliography; BibTeX; LAPACK Working Note", %%% license = "public domain", %%% supported = "yes", %%% docstring = "This is a COMPLETE bibliography of the LAPACK %%% Working Note reports, which document the %%% research behind the LAPACK linear algebra %%% software package. %%% %%% The official Web site for this project is %%% %%% http://www.netlib.org/lapack/lawns/downloads/ %%% %%% and the data in this bibliography was %%% originally derived entirely from that %%% resource. However, almost half of the LAPACK %%% Working Notes have subsequently been %%% published in journal articles or conference %%% proceedings, and in a few cases, in both, so %%% at version 2.00 of this bibliography, %%% cross-referenced entries have been provided %%% for them. %%% %%% At version 2.19, the year coverage looked %%% like this: %%% %%% 1987 ( 2) 1997 ( 22) 2007 ( 16) %%% 1988 ( 6) 1998 ( 19) 2008 ( 17) %%% 1989 ( 12) 1999 ( 11) 2009 ( 13) %%% 1990 ( 17) 2000 ( 8) 2010 ( 13) %%% 1991 ( 13) 2001 ( 6) 2011 ( 25) %%% 1992 ( 29) 2002 ( 8) 2012 ( 12) %%% 1993 ( 23) 2003 ( 4) 2013 ( 10) %%% 1994 ( 32) 2004 ( 3) 2014 ( 1) %%% 1995 ( 25) 2005 ( 9) %%% 1996 ( 21) 2006 ( 8) %%% %%% Article: 66 %%% InCollection: 2 %%% InProceedings: 17 %%% Proceedings: 16 %%% TechReport: 284 %%% %%% Total entries: 385 %%% %%% The checksum field above contains a CRC-16 %%% checksum as the first value, followed by the %%% equivalent of the standard UNIX wc (word %%% count) utility output of lines, words, and %%% characters. This is produced by Robert %%% Solovay's checksum utility.", %%% } %%% ====================================================================

@Preamble{ "\hyphenation{}" }

%%% ==================================================================== %%% Acknowledgement abbreviations:

@String{ack-nhfb= "Nelson H. F. Beebe, University of Utah, Department of Mathematics, 110 LCB, 155 S 1400 E RM 233, Salt Lake City, UT 84112-0090, USA, Tel: +1 801 581 5254, FAX: +1 801 581 4148, e-mail: \path|beebe@math.utah.edu|, \path|beebe@acm.org|, \path|beebe@computer.org| (Internet), URL: \path|http://www.math.utah.edu/~beebe/|"}

%%% ==================================================================== %%% Institutions and their addresses:

@String{inst-ANL-MCS= "Mathematics and Computer Science Division, Argonne National Laboratory"} @String{inst-ANL-MCS:adr= "9700 South Cass Avenue, Argonne, IL 60439-4801, USA"} @String{inst-INRIA= "INRIA (Institut National de Recherche en Informatique et en Automatique)"} @String{inst-INRIA:adr= "Rocquencourt, France"} @String{inst-UTK-CS= "Department of Computer Science, University of Tennessee, Knoxville"} @String{inst-UTK-CS:adr= "Knoxville, TN 37996, USA"} @String{inst-UCB-EECS= "Department of Electrical Engineering and Computer Science, University of California, Berkeley"} @String{inst-UCB-EECS:adr= "Berkeley, CA, USA"}

%%% ==================================================================== %%% Journal abbreviations:

@String{j-BIT= "BIT"} @String{j-BIT-NUM-MATH= "BIT Numerical Mathematics"} @String{j-CCPE= "Concurrency and Computation: Prac\-tice and Experience"} @String{j-CPE= "Concurrency: Prac\-tice and Experience"} @String{j-ETNA= "Electron. Trans. Numer. Anal."} @String{j-IEEE-TRANS-COMPUT= "IEEE Transactions on Computers"} @String{j-IEEE-TRANS-PAR-DIST-SYS= "IEEE Transactions on Parallel and Distributed Systems"} @String{j-IJHPCA= "The International Journal of High Performance Computing Applications"} @String{j-IMA-J-NUMER-ANAL= "IMA Journal of Numerical Analysis"} @String{j-INT-J-HIGH-SPEED-COMPUTING= "International Journal of High Speed Computing (IJHSC)"} @String{j-J-COMP-APPL-MATH= "J. Comp. Appl. Math."} @String{j-J-NUM-LIN-ALG-APPL= "Journal of Numerical linear algebra with applications"} @String{j-J-PAR-DIST-COMP= "Journal of Parallel and Distributed Computing"} @String{j-LECT-NOTES-COMP-SCI= "Lecture Notes in Computer Science"} @String{j-LINEAR-ALGEBRA-APPL= "Linear Algebra and its Applications"} @String{j-NUM-MATH= "Numerische Mathematik"} @String{j-NUMER-ALGORITHMS= "Numerical Algorithms"} @String{j-PARALLEL-COMPUTING= "Parallel Computing"} @String{j-PARALLEL-DIST-COMP-PRACT= "Parallel and Distributed Computing Practices"} @String{j-PROC-IEEE= "Proceedings of the IEEE"} @String{j-SCI-PROG= "Scientific Programming"} @String{j-SIAM-J-MAT-ANA-APPL= "SIAM Journal on Matrix Analysis and Applications"} @String{j-SIAM-J-NUMER-ANAL= "SIAM Journal on Numerical Analysis"} @String{j-SIAM-J-SCI-COMP= "SIAM Journal on Scientific Computing"} @String{j-SUPERCOMPUTER= "Supercomputer"} @String{j-TOMS= "ACM Transactions on Mathematical Software"}

%%% ==================================================================== %%% Publishers and their addresses:

@String{pub-ACM= "ACM Press"} @String{pub-ACM:adr= "New York, NY 10036, USA"} @String{pub-CAMBRIDGE= "Cambridge University Press"} @String{pub-CAMBRIDGE:adr= "Cambridge, UK"} @String{pub-ELSEVIER= "Elsevier"} @String{pub-ELSEVIER:adr= "Amsterdam, The Netherlands"} @String{pub-IEEE= "IEEE Computer Society Press"} @String{pub-IEEE:adr= "1109 Spring Street, Suite 300, Silver Spring, MD 20910, USA"} @String{pub-KLUWER= "Kluwer Academic Publishers"} @String{pub-KLUWER:adr= "Dordrecht, The Netherlands"} @String{pub-LONGMAN= "Longman Scientific and Technical"} @String{pub-LONGMAN:adr= "Harlow, Essex, UK"} @String{pub-SIAM= "Society for Industrial and Applied Mathematics"} @String{pub-SIAM:adr= "Philadelphia, PA, USA"} @String{pub-SV= "Spring{\-}er-Ver{\-}lag"} @String{pub-SV:adr= "Berlin, Germany~/ Heidelberg, Germany~/ London, UK~/ etc."}

%%% ==================================================================== %%% Series abbreviations:

@String{ser-LNCS= "Lecture Notes in Computer Science"}

%%% ==================================================================== %%% Some statistics about the LAPACK Working Notes: %%% In April 2005, when there were 165 working notes, there were 117 %%% different author family names, but the distribution is very skewed, %%% with 55 family names appearing only once. Here are the frequencies %%% of family names: %%% %%% 73 Dongarra 2 Casanova 1 Gustavson %%% 46 Demmel 2 Day 1 Harrod %%% 15 Eijkhout 2 Desprez 1 Iskandar %%% 13 Anderson 2 Eisenstat 1 Kalhan %%% 13 Walker 2 Fahey 1 Kapur %%% 12 Hammarling 2 Gilbert 1 Lam %%% 12 Petitet 2 Hegland 1 Lehoucq %%% 12 Whaley 2 Hida 1 Liu %%% 11 Li 2 Kim 1 Lucas %%% 10 Bai 2 Kolatis 1 Luszczek %%% 9 Choi 2 Ling 1 Martin %%% 9 Du Croz 2 Loan 1 Mukherjee %%% 8 Higham 2 Lumsdaine 1 Niu %%% 7 Dhillon 2 Marques 1 Owczarz %%% 7 Ostrouchov 2 Poromaa 1 Papadopoulos %%% 6 Bischof 2 Remington 1 Plank %%% 6 Blackford 2 Robert 1 Randriamaro %%% 6 Kahan 2 Romine 1 Riedy %%% 6 Pozo 2 Tang 1 Robinson %%% 6 Sorensen 2 Tisseur 1 Roche %%% 6 van de Geijn 2 V{\"o}mel 1 Ruhe %%% 5 Stanley 2 van der Vorst 1 Rutter %%% 5 Wa{\'s}niewski 1 Ahues 1 Schreiber %%% 4 Cleary 1 Andersen 1 Sidani %%% 4 Greenbaum 1 Asanovic 1 Slapni{\v{c}}ar %%% 4 Heath 1 Bailey 1 Sun %%% 4 K{\aa}gstr{\"o}m 1 Barlow 1 Tomei %%% 4 Parlett 1 Barrett 1 Tourancheau %%% 4 Raghavan 1 Bilmes 1 Tung %%% 3 D'Azevedo 1 Bindel 1 Veselic %%% 3 Gu 1 Brewer 1 Veseli{\'c} %%% 3 Henry 1 Burkhart 1 Watkins %%% 3 Mayes 1 Chakrabarti 1 Watts %%% 3 McKenney 1 Chen 1 Wilkinson %%% 3 Quintana-Orti 1 Cheng 1 Yalamov %%% 3 Radicati 1 Chin 1 Yelick %%% 3 Ren 1 Deift 1 Yoo %%% 2 Arbenz 1 Drma{\v{c}} 1 Zemla %%% 2 Berry 1 Gragg

%%% ==================================================================== %%% Bibliography entries, sorted by ascending LAWN numbers:

@TechReport{Demmel:1987:PDL, author = "J. Demmel and J. Dongarra and J. {Du Croz} and A. Greenbaum and S. Hammarling and D. Sorensen", title = "Prospectus for the Development of a Linear Algebra Library for High-Performance Computers", type = "LAPACK Working Note", number = "01", institution = inst-ANL-MCS, address = inst-ANL-MCS:adr, month = sep, year = "1987", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "ANL, MCS-TM-97, September 1987.", URL = "http://www.netlib.org/lapack/lawns/lawn01.ps; http://www.netlib.org/lapack/lawnspdf/lawn01.pdf", acknowledgement = ack-nhfb, } @TechReport{Dongarra:1987:BRM, author = "J. Dongarra and S. Hammarling and D. Sorensen", title = "Block Reduction of Matrices to Condensed Forms for Eigenvalue Computations", type = "LAPACK Working Note", number = "02", institution = inst-ANL-MCS, address = inst-ANL-MCS:adr, month = sep, year = "1987", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "ANL, MCS-TM-99, September 1987. Published in \cite{Dongarra:1989:BRM}.", URL = "http://www.netlib.org/lapack/lawns/lawn02.ps; http://www.netlib.org/lapack/lawnspdf/lawn02.pdf", acknowledgement = ack-nhfb, } @TechReport{Demmel:1988:CSS, author = "J. Demmel and W. Kahan", title = "Computing Small Singular Values of Bidiagonal Matrices with Guaranteed High Relative Accuracy", type = "LAPACK Working Note", number = "03", institution = inst-ANL-MCS, address = inst-ANL-MCS:adr, month = feb, year = "1988", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "ANL, MCS-TM-110, February 1988.", URL = "http://www.netlib.org/lapack/lawns/lawn03.ps; http://www.netlib.org/lapack/lawnspdf/lawn03.pdf", acknowledgement = ack-nhfb, } @TechReport{Demmel:1988:GDS, author = "J. Demmel and J. {Du Croz} and S. Hammarling and D. Sorensen", title = "Guidelines for the Design of Symmetric Eigenroutines, {SVD}, and Iterative Refinement and Condition Estimation for Linear Systems", type = "LAPACK Working Note", number = "04", institution = inst-ANL-MCS, address = inst-ANL-MCS:adr, month = mar, year = "1988", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "ANL, MCS-TM-111, March 1988.", URL = "http://www.netlib.org/lapack/lawns/lawn04.ps; http://www.netlib.org/lapack/lawnspdf/lawn04.pdf", acknowledgement = ack-nhfb, } @TechReport{Bischof:1988:PC, author = "C. Bischof and J. Demmel and J. Dongarra and J. {Du Croz} and A. Greenbaum and S. Hammarling and D. Sorensen", title = "Provisional Contents", type = "LAPACK Working Note", number = "05", institution = inst-ANL-MCS, address = inst-ANL-MCS:adr, month = sep, year = "1988", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "ANL, MCS-TM-38, September 1988.", URL = "http://www.netlib.org/lapack/lawns/lawn05.ps; http://www.netlib.org/lapack/lawnspdf/lawn05.pdf", acknowledgement = ack-nhfb, } @TechReport{Brewer:1988:TAAa, author = "O. Brewer and J. Dongarra and D. Sorensen", title = "Tools to Aid in the Analysis of Memory Access Patterns for {FORTRAN} Programs", type = "LAPACK Working Note", number = "06", institution = inst-ANL-MCS, address = inst-ANL-MCS:adr, month = jun, year = "1988", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "ANL, MCS-TM-120, June 1988. Published in \cite{Brewer:1988:TAAb}.", URL = "http://www.netlib.org/lapack/lawns/lawn06.ps; http://www.netlib.org/lapack/lawnspdf/lawn06.pdf", acknowledgement = ack-nhfb, } @TechReport{Barlow:1988:CAE, author = "J. Barlow and J. Demmel", title = "Computing Accurate Eigensystems of Scaled Diagonally Dominant Matrices", type = "LAPACK Working Note", number = "07", institution = inst-ANL-MCS, address = inst-ANL-MCS:adr, month = dec, year = "1988", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "ANL, MCS-TM-126, December 1988. Published in \cite{Barlow:1990:CAE}.", URL = "http://www.netlib.org/lapack/lawns/lawn07.ps; http://www.netlib.org/lapack/lawnspdf/lawn07.pdf", acknowledgement = ack-nhfb, } @TechReport{Bai:1989:BIHa, author = "Z. Bai and J. Demmel", title = "On a Block Implementation of {Hessenberg} Multishift {$ Q R $} Iteration", type = "LAPACK Working Note", number = "08", institution = inst-ANL-MCS, address = inst-ANL-MCS:adr, month = jan, year = "1989", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "ANL, MCS-TM-127, January 1989. Published in \cite{Bai:1989:BIHb}.", URL = "http://www.netlib.org/lapack/lawns/lawn08.ps; http://www.netlib.org/lapack/lawnspdf/lawn08.pdf", acknowledgement = ack-nhfb, } @TechReport{Demmel:1989:TMG, author = "J. Demmel and A. McKenney", title = "A Test Matrix Generation Suite", type = "LAPACK Working Note", number = "09", institution = inst-ANL-MCS, address = inst-ANL-MCS:adr, month = mar, year = "1989", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "ANL, MCS-P69-0389, March 1989.", URL = "http://www.netlib.org/lapack/lawns/lawn09.ps; http://www.netlib.org/lapack/lawnspdf/lawn09.pdf", acknowledgement = ack-nhfb, } @TechReport{Anderson:1989:ITI, author = "E. Anderson and J. Dongarra", title = "Installing and Testing the Initial Release of {LAPACK} --- {Unix} and Non-{Unix} Versions", type = "LAPACK Working Note", number = "10", institution = inst-ANL-MCS, address = inst-ANL-MCS:adr, month = may, year = "1989", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "ANL, MCS-TM-130, May 1989.", URL = "http://www.netlib.org/lapack/lawns/lawn10.ps; http://www.netlib.org/lapack/lawnspdf/lawn10.pdf", acknowledgement = ack-nhfb, } @TechReport{Deift:1989:BSV, author = "P. Deift and J. Demmel and L.-C. Li and C. Tomei", title = "The Bidiagonal Singular Value Decomposition and {Hamiltonian} Mechanics", type = "LAPACK Working Note", number = "11", institution = inst-ANL-MCS, address = inst-ANL-MCS:adr, month = aug, year = "1989", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "ANL, MCS-TM-133, August 1989. Published in \cite{Deift:1991:BSV}.", URL = "http://www.netlib.org/lapack/lawns/lawn11.ps; http://www.netlib.org/lapack/lawnspdf/lawn11.pdf", acknowledgement = ack-nhfb, } @TechReport{Mayes:1989:BCF, author = "P. Mayes and G. Radicati", title = "Banded {Cholesky} factorization using level 3 {BLAS}", type = "LAPACK Working Note", number = "12", institution = inst-ANL-MCS, address = inst-ANL-MCS:adr, month = aug, year = "1989", bibdate = "Sat Apr 23 06:29:27 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "ANL, MCS-TM-134, August 1989", URL = "http://www.netlib.org/lapack/lawns/lawn12.ps; http://www.netlib.org/lapack/lawnspdf/lawn12.pdf", acknowledgement = ack-nhfb, xxnote = "Not available at Web site.", } @TechReport{Bai:1989:CNE, author = "Z. Bai and J. Demmel and A. McKenney", title = "On the Conditioning of the Nonsymmetric Eigenproblem: Theory and Software", type = "LAPACK Working Note", number = "13", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = oct, year = "1989", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-89-86, October 1989.", URL = "http://www.netlib.org/lapack/lawns/lawn13.ps; http://www.netlib.org/lapack/lawnspdf/lawn13.pdf", acknowledgement = ack-nhfb, } @TechReport{Demmel:1989:FPE, author = "J. Demmel", title = "On Floating Point Errors in {Cholesky}", type = "LAPACK Working Note", number = "14", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = oct, year = "1989", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-89-87, October 1989.", URL = "http://www.netlib.org/lapack/lawns/lawn14.ps; http://www.netlib.org/lapack/lawnspdf/lawn14.pdf", acknowledgement = ack-nhfb, } @TechReport{Demmel:1989:JMM, author = "J. Demmel and K. Veselic", title = "{Jacobi}'s Method is More Accurate than {$ Q R $}", type = "LAPACK Working Note", number = "15", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = oct, year = "1989", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-89-88, October 1989. Published in \cite{Demmel:1992:JMM}.", URL = "http://www.netlib.org/lapack/lawns/lawn15.ps; http://www.netlib.org/lapack/lawnspdf/lawn15.pdf", acknowledgement = ack-nhfb, } @TechReport{Anderson:1989:RIR, author = "E. Anderson and J. Dongarra", title = "Results from the Initial Release of {LAPACK}", type = "LAPACK Working Note", number = "16", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = nov, year = "1989", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-89-89, November 1989. (Replaced by LAWN 41 or 81!!)", URL = "http://www.netlib.org/lapack/lawns/lawn16.ps; http://www.netlib.org/lapack/lawnspdf/lawn16.pdf", acknowledgement = ack-nhfb, } @TechReport{Greenbaum:1989:EQQ, author = "A. Greenbaum and J. Dongarra", title = "Experiments with {QR\slash QL} Methods for the Symmetric Tridiagonal Eigenproblem", type = "LAPACK Working Note", number = "17", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = nov, year = "1989", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-89-92, November 1989.", URL = "http://www.netlib.org/lapack/lawns/lawn17.ps; http://www.netlib.org/lapack/lawnspdf/lawn17.pdf", acknowledgement = ack-nhfb, } @TechReport{Anderson:1990:IGL, author = "E. Anderson and J. Dongarra", title = "Implementation Guide for {LAPACK}", type = "LAPACK Working Note", number = "18", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = apr, year = "1990", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-90-101, April 1990.", URL = "http://www.netlib.org/lapack/lawns/lawn18.ps; http://www.netlib.org/lapack/lawnspdf/lawn18.pdf", acknowledgement = ack-nhfb, } @TechReport{Anderson:1990:EBA, author = "E. Anderson and J. Dongarra", title = "Evaluating Block Algorithm Variants in {LAPACK}", type = "LAPACK Working Note", number = "19", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = apr, year = "1990", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-90-103, April 1990.", URL = "http://www.netlib.org/lapack/lawns/lawn19.ps; http://www.netlib.org/lapack/lawnspdf/lawn19.pdf", acknowledgement = ack-nhfb, } @TechReport{Anderson:1990:LPLa, author = "E. Anderson and Z. Bai and C. Bischof and J. Demmel and J. Dongarra and J. {Du Croz} and A. Greenbaum and S. Hammarling and A. McKenney and D. Sorensen", title = "{LAPACK}: {A} Portable Linear Algebra Library for High-Performance Computers", type = "LAPACK Working Note", number = "20", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = may, year = "1990", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-90-105, May 1990. Published in \cite{Anderson:1990:LPLb}.", URL = "http://www.netlib.org/lapack/lawns/lawn20.ps; http://www.netlib.org/lapack/lawnspdf/lawn20.pdf", acknowledgement = ack-nhfb, } @TechReport{Croz:1990:FBM, author = "Jeremy {Du Croz} and Peter Mayes and Giuseppe Radicati", title = "Factorizations of Band Matrices Using Level 3 {BLAS}", type = "LAPACK Working Note", number = "21", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = jul, year = "1990", bibdate = "Sat Apr 23 06:32:16 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT, CS-90-109, July 1990.", URL = "http://www.netlib.org/lapack/lawns/lawn21.ps; http://www.netlib.org/lapack/lawnspdf/lawn21.pdf", acknowledgement = ack-nhfb, remark = "Published in \cite[pp.~222--231]{Burkhart:1990:CVI}.", xxnote = "Not available at Web site.", } @TechReport{Demmel:1990:SBA, author = "J. Demmel and N. Higham", title = "Stability of Block Algorithms with Fast Level 3 {BLAS}", type = "LAPACK Working Note", number = "22", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = jul, year = "1990", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-90-110, July 1990. Published in \cite{Demmel:1992:SBA}.", URL = "http://www.netlib.org/lapack/lawns/lawn22.ps; http://www.netlib.org/lapack/lawnspdf/lawn22.pdf", acknowledgement = ack-nhfb, } @TechReport{Demmel:1990:IEB, author = "J. Demmel and N. Higham", title = "Improved Error Bounds for Underdetermined System Solvers", type = "LAPACK Working Note", number = "23", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = aug, year = "1990", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-90-113, August 1990. Published in \cite{Demmel:1993:IEB}.", URL = "http://www.netlib.org/lapack/lawns/lawn23.ps; http://www.netlib.org/lapack/lawnspdf/lawn23.pdf", acknowledgement = ack-nhfb, } @TechReport{Dongarra:1990:LBF, author = "J. Dongarra and S. Ostrouchov", title = "{LAPACK} Block Factorization Algorithms on the {Intel iPSC\slash 860}", type = "LAPACK Working Note", number = "24", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = oct, year = "1990", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-90-115, October, 1990.", URL = "http://www.netlib.org/lapack/lawns/lawn24.ps; http://www.netlib.org/lapack/lawnspdf/lawn24.pdf", acknowledgement = ack-nhfb, } @TechReport{Dongarra:1990:NCC, author = "J. Dongarra and S. Hammarling and J. Wilkinson", title = "Numerical Considerations in Computing Invariant Subspaces", type = "LAPACK Working Note", number = "25", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = oct, year = "1990", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-90-117, October, 1990. Published in \cite{Dongarra:1992:NCC}.", URL = "http://www.netlib.org/lapack/lawns/lawn25.ps; http://www.netlib.org/lapack/lawnspdf/lawn25.pdf", acknowledgement = ack-nhfb, } @TechReport{Anderson:1990:PEL, author = "E. Anderson and C. Bischof and J. Demmel and J. Dongarra and J. {Du Croz} and S. Hammarling and W. Kahan", title = "Prospectus for an Extension to {LAPACK}: {A} Portable Linear Algebra Library for High-Performance Computers", type = "LAPACK Working Note", number = "26", institution = inst-UTK-CS, address = inst-UTK-CS:adr, pages = "10", month = nov, year = "1990", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-90-118, November 1990.", URL = "http://www.netlib.org/lapack/lawns/lawn26.ps; http://www.netlib.org/lapack/lawnspdf/lawn26.pdf", acknowledgement = ack-nhfb, } @TechReport{DuCroz:1990:SMM, author = "J. {Du Croz} and N. Higham", title = "Stability of Methods for Matrix Inversion", type = "LAPACK Working Note", number = "27", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = oct, year = "1990", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-90-119, October, 1990. Published in \cite{Croz:1992:SMM}.", URL = "http://www.netlib.org/lapack/lawns/lawn27.ps; http://www.netlib.org/lapack/lawnspdf/lawn27.pdf", acknowledgement = ack-nhfb, } @TechReport{Dongarra:1990:IRS, author = "J. Dongarra and P. Mayes and G. Radicati", title = "The {IBM RISC System\slash 6000} and Linear Algebra Operations", type = "LAPACK Working Note", number = "28", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = dec, year = "1990", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-90-122, December 1990. Published in \cite{Dongarra:1991:IRS}.", URL = "http://www.netlib.org/lapack/lawns/lawn28.ps; http://www.netlib.org/lapack/lawnspdf/lawn28.pdf", acknowledgement = ack-nhfb, } @TechReport{vandeGeijn:1991:GCO, author = "R. van de Geijn", title = "On Global Combine Operations", type = "LAPACK Working Note", number = "29", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = apr, year = "1991", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-91-129, April 1991. Published in \cite{vandeGeijn:1994:GCO}.", URL = "http://www.netlib.org/lapack/lawns/lawn29.ps; http://www.netlib.org/lapack/lawnspdf/lawn29.pdf", acknowledgement = ack-nhfb, } @TechReport{Dongarra:1991:RCF, author = "J. Dongarra and R. van de Geijn", title = "Reduction to Condensed Form for the Eigenvalue Problem on Distributed Memory Architectures", type = "LAPACK Working Note", number = "30", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = apr, year = "1991", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-91-130, April 1991. Published in \cite{Dongarra:1992:RCFb}.", URL = "http://www.netlib.org/lapack/lawns/lawn30.ps; http://www.netlib.org/lapack/lawnspdf/lawn30.pdf", acknowledgement = ack-nhfb, } @TechReport{Anderson:1991:GQF, author = "E. Anderson and Z. Bai and J. Dongarra", title = "Generalized {$ Q R $} Factorization and its Applications", type = "LAPACK Working Note", number = "31", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = apr, year = "1991", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-91-131, April 1991. Published in \cite{Anderson:1992:GFA}.", URL = "http://www.netlib.org/lapack/lawns/lawn31.ps; http://www.netlib.org/lapack/lawnspdf/lawn31.pdf", acknowledgement = ack-nhfb, } @TechReport{Bischof:1991:GIC, author = "C. Bischof and P. T. P. Tang", title = "Generalized Incremental Condition Estimation", type = "LAPACK Working Note", number = "32", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = may, year = "1991", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-91-132, May 1991. Published in \cite{Bischof:1992:GIC}.", URL = "http://www.netlib.org/lapack/lawns/lawn32.ps; http://www.netlib.org/lapack/lawnspdf/lawn32.pdf", acknowledgement = ack-nhfb, } @TechReport{Bischof:1991:RIC, author = "C. Bischof and P. T. P. Tang", title = "Robust Incremental Condition Estimation", type = "LAPACK Working Note", number = "33", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = may, year = "1991", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-91-133, May 1991.", URL = "http://www.netlib.org/lapack/lawns/lawn33.ps; http://www.netlib.org/lapack/lawnspdf/lawn33.pdf", acknowledgement = ack-nhfb, } @TechReport{Dongarra:1991:WB, author = "J. J. Dongarra", title = "Workshop on the {BLACS}", type = "LAPACK Working Note", number = "34", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = may, year = "1991", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-91-134, May 1991.", URL = "http://www.netlib.org/lapack/lawns/lawn34.ps; http://www.netlib.org/lapack/lawnspdf/lawn34.pdf", acknowledgement = ack-nhfb, } @TechReport{Anderson:1991:IGL, author = "E. Anderson and J. Dongarra and S. Ostrouchov", title = "Implementation guide for {LAPACK}", type = "LAPACK Working Note", number = "35", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = aug, year = "1991", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-91-138, August 1991.", URL = "http://www.netlib.org/lapack/lawns/lawn35.ps; http://www.netlib.org/lapack/lawnspdf/lawn35.pdf", acknowledgement = ack-nhfb, } @TechReport{Anderson:1991:RTS, author = "E. Anderson", title = "Robust Triangular solvers", type = "LAPACK Working Note", number = "36", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = aug, year = "1991", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-91-142, August, 1991.", URL = "http://www.netlib.org/lapack/lawns/lawn36.ps; http://www.netlib.org/lapack/lawnspdf/lawn36.pdf", acknowledgement = ack-nhfb, } @TechReport{Dongarra:1991:TDB, author = "Jack J. Dongarra and Robert A. van de Geijn", title = "Two Dimensional Basic Linear Algebra Communication Subprograms", type = "LAPACK Working Note", number = "37", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = oct, year = "1991", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-91-138, October, 1991. Published in \cite{Dongarra:1993:TDB}.", URL = "http://www.netlib.org/lapack/lawns/lawn37.ps; http://www.netlib.org/lapack/lawnspdf/lawn37.pdf", acknowledgement = ack-nhfb, } @TechReport{Bai:1991:DAC, author = "Zhaojun Bai and James W. Demmel", title = "On a Direct Algorithm for Computing Invariant Subspaces with Specified Eigenvalues", type = "LAPACK Working Note", number = "38", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = nov, year = "1991", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-91-139, November, 1991.", URL = "http://www.netlib.org/lapack/lawns/lawn38.ps; http://www.netlib.org/lapack/lawnspdf/lawn38.pdf", acknowledgement = ack-nhfb, } @TechReport{Demmel:1991:DPH, author = "James Demmel and Jack Dongarra and W. Kahan", title = "On Designing Portable High Performance Numerical Libraries", type = "LAPACK Working Note", number = "39", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = jul, year = "1991", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-91-141, July, 1991. Published in \cite{Demmel:1992:DPH}.", URL = "http://www.netlib.org/lapack/lawns/lawn39.ps; http://www.netlib.org/lapack/lawnspdf/lawn39.pdf", acknowledgement = ack-nhfb, } @TechReport{Demmel:1992:BLF, author = "James Demmel and Nick Higham and Rob Schreiber", title = "Block {$ L U $} Factorization", type = "LAPACK Working Note", number = "40", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = feb, year = "1992", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-92-149, February 1992.", URL = "http://www.netlib.org/lapack/lawns/lawn40.ps; http://www.netlib.org/lapack/lawnspdf/lawn40.pdf", acknowledgement = ack-nhfb, } @TechReport{Blackford:1992:IGL, author = "Susan Blackford and Jack Dongarra", title = "Installation Guide for {LAPACK}", type = "LAPACK Working Note", number = "41", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = mar, year = "1992", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-92-151, March, 1992.", URL = "http://www.netlib.org/lapack/lawns/lawn41.ps; http://www.netlib.org/lapack/lawnspdf/lawn41.pdf", acknowledgement = ack-nhfb, } @TechReport{Higham:1992:PTB, author = "Nick Higham", title = "Perturbation Theory and Backward Error for {$ A X - X B = C $}", type = "LAPACK Working Note", number = "42", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = apr, year = "1992", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-92-153, April, 1992. Published in \cite{Higham:1993:PTB}.", URL = "http://www.netlib.org/lapack/lawns/lawn42.ps; http://www.netlib.org/lapack/lawnspdf/lawn42.pdf", acknowledgement = ack-nhfb, } @TechReport{Dongarra:1992:LSD, author = "Jack Dongarra and Robert van de Geijn and David Walker", title = "A Look at Scalable Dense Linear Algebra Libraries", type = "LAPACK Working Note", number = "43", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = apr, year = "1992", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-92-155, April, 1992. Published in \cite{Dongarra:1992:LASb}.", URL = "http://www.netlib.org/lapack/lawns/lawn43.ps; http://www.netlib.org/lapack/lawnspdf/lawn43.pdf", acknowledgement = ack-nhfb, } @TechReport{Anderson:1992:PLP, author = "Edward Anderson and Jack Dongarra", title = "Performance of {LAPACK}: {A} Portable Library of Numerical Linear Algebra Routines", type = "LAPACK Working Note", number = "44", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = may, year = "1992", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-92-156, May 1992. Published in \cite{Anderson:1993:PLP}.", URL = "http://www.netlib.org/lapack/lawns/lawn44.ps; http://www.netlib.org/lapack/lawnspdf/lawn44.pdf", acknowledgement = ack-nhfb, } @TechReport{Demmel:1992:III, author = "J. Demmel", title = "The Inherent Inaccuracy of Implicit Tridiagonal {$ Q R $}", type = "LAPACK Working Note", number = "45", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = may, year = "1992", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-92-162, May 1992.", URL = "http://www.netlib.org/lapack/lawns/lawn45.ps; http://www.netlib.org/lapack/lawnspdf/lawn45.pdf", acknowledgement = ack-nhfb, } @TechReport{Bai:1992:CGS, author = "Z. Bai and J. Demmel", title = "Computing the Generalized Singular Value Decomposition", type = "LAPACK Working Note", number = "46", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = may, year = "1992", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-92-163, May 1992. Published in \cite{Bai:1993:CGS}.", URL = "http://www.netlib.org/lapack/lawns/lawn46.ps; http://www.netlib.org/lapack/lawnspdf/lawn46.pdf", acknowledgement = ack-nhfb, } @TechReport{Demmel:1992:OPN, author = "J. Demmel", title = "Open Problems in Numerical Linear Algebra", type = "LAPACK Working Note", number = "47", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = may, year = "1992", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-92-164, May 1992.", URL = "http://www.netlib.org/lapack/lawns/lawn47.ps; http://www.netlib.org/lapack/lawnspdf/lawn47.pdf", acknowledgement = ack-nhfb, } @TechReport{Demmel:1992:CAS, author = "J. Demmel and W. Gragg", title = "On Computing Accurate Singular Values and Eigenvalues of Matrices with Acyclic Graphs", type = "LAPACK Working Note", number = "48", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = may, year = "1992", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-92-166, May 1992. Published in \cite{Demmel:1993:CAS}.", URL = "http://www.netlib.org/lapack/lawns/lawn48.ps; http://www.netlib.org/lapack/lawnspdf/lawn48.pdf", acknowledgement = ack-nhfb, } @TechReport{Demmel:1992:SFP, author = "J. Demmel", title = "A Specification for Floating Point Parallel Prefix", type = "LAPACK Working Note", number = "49", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = may, year = "1992", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-92-167, May 1992.", URL = "http://www.netlib.org/lapack/lawns/lawn49.ps; http://www.netlib.org/lapack/lawnspdf/lawn49.pdf", acknowledgement = ack-nhfb, } @TechReport{Eijkhout:1992:DSD, author = "Victor Eijkhout", title = "Distributed Sparse Data Structures for Linear Algebra Operations", type = "LAPACK Working Note", number = "50", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = may, year = "1992", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-92-169, May 1992.", URL = "http://www.netlib.org/lapack/lawns/lawn50.ps; http://www.netlib.org/lapack/lawnspdf/lawn50.pdf", acknowledgement = ack-nhfb, } @TechReport{Eijkhout:1992:QPC, author = "Victor Eijkhout", title = "Qualitative Properties of the Conjugate Gradient and {Lanczos} Methods in a Matrix Framework", type = "LAPACK Working Note", number = "51", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = may, year = "1992", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-92-170, May 1992.", URL = "http://www.netlib.org/lapack/lawns/lawn51.ps; http://www.netlib.org/lapack/lawnspdf/lawn51.pdf", acknowledgement = ack-nhfb, } @TechReport{Heath:1992:CPN, author = "Michael T. Heath and Padma Raghavan", title = "A {Cartesian} Parallel Nested Dissection Algorithm", type = "LAPACK Working Note", number = "52", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = jun, year = "1992", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-92-178, June 1992. Published in \cite{Heath:1995:CPN}.", URL = "http://www.netlib.org/lapack/lawns/lawn52.ps; http://www.netlib.org/lapack/lawnspdf/lawn52.pdf", acknowledgement = ack-nhfb, } @TechReport{Demmel:1992:TPN, author = "J. W. Demmel", title = "Trading Off Parallelism and Numerical Stability", type = "LAPACK Working Note", number = "53", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = jun, year = "1992", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-92-179, June 1992. Published in \cite{Demmel:1993:TPN}.", URL = "http://www.netlib.org/lapack/lawns/lawn53.ps; http://www.netlib.org/lapack/lawnspdf/lawn53.pdf", acknowledgement = ack-nhfb, } @TechReport{Bai:1992:SDB, author = "Z. Bai and J. W. Demmel", title = "On Swapping Diagonal Blocks in Real {Schur} Form", type = "LAPACK Working Note", number = "54", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = oct, year = "1992", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-92-182, October 1992. Published in \cite{Bai:1993:SDB}.", URL = "http://www.netlib.org/lapack/lawns/lawn54.ps; http://www.netlib.org/lapack/lawnspdf/lawn54.pdf", acknowledgement = ack-nhfb, } @TechReport{Choi:1992:SSLa, author = "J. Choi and J. Dongarra and R. Pozo and D. Walker", title = "{ScaLAPACK}: {A} Scalable Linear Algebra for Distributed Memory Concurrent Computers", type = "LAPACK Working Note", number = "55", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = nov, year = "1992", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-92-181, November 1992. Published in \cite{Choi:1992:SSLb}.", URL = "http://www.netlib.org/lapack/lawns/lawn55.ps; http://www.netlib.org/lapack/lawnspdf/lawn55.pdf", acknowledgement = ack-nhfb, } @TechReport{DAzevedo:1993:RCC, author = "E. F. D'Azevedo and V. L. Eijkhout and C. H. Romine", title = "Reducing Communication Costs in the Conjugate Gradient Algorithm on Distributed Memory Multiprocessors", type = "LAPACK Working Note", number = "56", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = jan, year = "1993", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-93-185, January 1993.", URL = "http://www.netlib.org/lapack/lawns/lawn56.ps; http://www.netlib.org/lapack/lawnspdf/lawn56.pdf", acknowledgement = ack-nhfb, } @TechReport{Choi:1993:PPU, author = "Jaeyoung Choi and Jack J. Dongarra and David W. Walker", title = "{PUMMA}: {Parallel Universal Matrix Multiplication Algorithms} on Distributed Memory Concurrent Computers", type = "LAPACK Working Note", number = "57", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = may, year = "1993", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-93-187, May 1993. Published in \cite{Choi:1994:PPU}.", URL = "http://www.netlib.org/lapack/lawns/lawn57.ps; http://www.netlib.org/lapack/lawnspdf/lawn57.pdf", acknowledgement = ack-nhfb, } @TechReport{Dongarra:1993:DLA, author = "Jack Dongarra and David Walker", title = "The Design of Linear Algebra Libraries for High Performance Computer", type = "LAPACK Working Note", number = "58", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = jun, year = "1993", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-93-188, June 1993.", URL = "http://www.netlib.org/lapack/lawns/lawn58.ps; http://www.netlib.org/lapack/lawnspdf/lawn58.pdf", acknowledgement = ack-nhfb, } @TechReport{Demmel:1993:FNA, author = "James W. Demmel and Xiaoye Li", title = "Faster Numerical Algorithms via Exception Handling", type = "LAPACK Working Note", number = "59", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = mar, year = "1993", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-93-192, March 1993. Published in \cite{Demmel:1994:FNA}.", URL = "http://www.netlib.org/lapack/lawns/lawn59.ps; http://www.netlib.org/lapack/lawnspdf/lawn59.pdf", acknowledgement = ack-nhfb, } @TechReport{Demmel:1993:PNLa, author = "James W. Demmel and Michael T. Heath and Henk A. van der Vorst", title = "Parallel Numerical Linear Algebra", type = "LAPACK Working Note", number = "60", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = mar, year = "1993", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-93-192, March 1993. Published in \cite{Demmel:1993:PNLb}.", URL = "http://www.netlib.org/lapack/lawns/lawn60.ps; http://www.netlib.org/lapack/lawnspdf/lawn60.pdf", acknowledgement = ack-nhfb, } @TechReport{Dongarra:1993:OOD, author = "J. Dongarra and R. Pozo and D. Walker", title = "An Object Oriented Design for High Performance Linear Algebra on Distributed Memory Architectures", type = "LAPACK Working Note", number = "61", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = aug, year = "1993", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-93-200, August 1993.", URL = "http://www.netlib.org/lapack/lawns/lawn61.ps; http://www.netlib.org/lapack/lawnspdf/lawn61.pdf", acknowledgement = ack-nhfb, } @TechReport{Heath:1993:DSS, author = "Michael T. Heath and Padma Raghavan", title = "Distributed Solution of Sparse Linear Systems", type = "LAPACK Working Note", number = "62", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = aug, year = "1993", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-93-201, August 1993.", URL = "http://www.netlib.org/lapack/lawns/lawn62.ps; http://www.netlib.org/lapack/lawnspdf/lawn62.pdf", acknowledgement = ack-nhfb, } @TechReport{Heath:1993:LPS, author = "Michael T. Heath and Padma Raghavan", title = "Line and Plane Separators", type = "LAPACK Working Note", number = "63", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = aug, year = "1993", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-93-202, August 1993.", URL = "http://www.netlib.org/lapack/lawns/lawn63.ps; http://www.netlib.org/lapack/lawnspdf/lawn63.pdf", acknowledgement = ack-nhfb, } @TechReport{Raghavan:1993:DSG, author = "Padma Raghavan", title = "Distributed Sparse {Gaussian} Elimination and Orthogonal Factorization", type = "LAPACK Working Note", number = "64", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = aug, year = "1993", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-93-203, August 1993. Published in \cite{Raghavan:1995:DSG}.", URL = "http://www.netlib.org/lapack/lawns/lawn64.ps; http://www.netlib.org/lapack/lawnspdf/lawn64.pdf", acknowledgement = ack-nhfb, } @TechReport{Choi:1993:PMT, author = "Jaeyoung Choi and Jack J. Dongarra and David W. Walker", title = "Parallel Matrix Transpose Algorithms on Distributed Memory Concurrent Computers", type = "LAPACK Working Note", number = "65", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = nov, year = "1993", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-93-215, November, 1993. Published in \cite{Choi:1994:PMT}.", URL = "http://www.netlib.org/lapack/lawns/lawn65.ps; http://www.netlib.org/lapack/lawnspdf/lawn65.pdf", acknowledgement = ack-nhfb, } @TechReport{Eijkhout:1993:CPI, author = "Victor Eijkhout", title = "A Characterization of Polynomial Iterative Methods", type = "LAPACK Working Note", number = "66", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = nov, year = "1993", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-93-216, November 1993.", URL = "http://www.netlib.org/lapack/lawns/lawn66.ps; http://www.netlib.org/lapack/lawnspdf/lawn66.pdf", acknowledgement = ack-nhfb, } @TechReport{Desprez:1993:PCF, author = "F. Desprez and J. Dongarra and B. Tourancheau", title = "Performance Complexity of {$ L U $} Factorization with Efficient Pipelining and Overlap on a Multiprocessor", type = "LAPACK Working Note", number = "67", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = dec, year = "1993", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-93-218, December, 1993.", URL = "http://www.netlib.org/lapack/lawns/lawn67.ps; http://www.netlib.org/lapack/lawnspdf/lawn67.pdf", acknowledgement = ack-nhfb, } @TechReport{Berry:1994:HPA, author = "Michael W. Berry and Jack J. Dongarra and Youngbae Kim", title = "A Highly Parallel Algorithm for the Reduction of a Nonsymmetric Matrix to Block Upper-{Hessenberg} Form", type = "LAPACK Working Note", number = "68", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = feb, year = "1994", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-94-221, February 1994.", URL = "http://www.netlib.org/lapack/lawns/lawn68.ps; http://www.netlib.org/lapack/lawnspdf/lawn68.pdf", acknowledgement = ack-nhfb, } @TechReport{Rutter:1994:SIC, author = "J. Rutter", title = "A Serial Implementation of {Cuppen}'s Divide and Conquer Algorithm for the Symmetric Eigenvalue Problem", type = "LAPACK Working Note", number = "69", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = mar, year = "1994", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-94-225, March 1994.", URL = "http://www.netlib.org/lapack/lawns/lawn69.ps; http://www.netlib.org/lapack/lawnspdf/lawn69.pdf", acknowledgement = ack-nhfb, } @TechReport{Demmel:1994:CPB, author = "James Demmel and Inderjit Dhillon and Huan Ren", title = "On the Correctness of Parallel Bisection in Floating Point", type = "LAPACK Working Note", number = "70", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = mar, year = "1994", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-94-228, March 1994. Published in \cite{Demmel:1995:CSB}.", URL = "http://www.netlib.org/lapack/lawns/lawn70.ps; http://www.netlib.org/lapack/lawnspdf/lawn70.pdf", acknowledgement = ack-nhfb, } @TechReport{Dongarra:1994:IRP, author = "Jack Dongarra and Michael Kolatis", title = "{IBM RS\slash 6000-550 \& -590} Performance for Selected Routines in {ESSL}", type = "LAPACK Working Note", number = "71", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = apr, year = "1994", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-94-231, April 1994.", URL = "http://www.netlib.org/lapack/lawns/lawn71.ps; http://www.netlib.org/lapack/lawnspdf/lawn71.pdf", acknowledgement = ack-nhfb, } @TechReport{Lehoucq:1995:CEU, author = "R. Lehoucq", title = "The Computation of Elementary Unitary Matrices", type = "LAPACK Working Note", number = "72", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = oct, year = "1995", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-94-233, October 1995. Published in \cite{Lehoucq:1996:CEU}.", URL = "http://www.netlib.org/lapack/lawns/lawn72.ps; http://www.netlib.org/lapack/lawnspdf/lawn72.pdf", acknowledgement = ack-nhfb, } @TechReport{Whaley:1994:BLA, author = "R. Clint Whaley", title = "Basic Linear Algebra Communication Subprograms: Analysis and Implementation Across Multiple Parallel Architectures", type = "LAPACK Working Note", number = "73", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = may, year = "1994", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-94-234, May 1994.", URL = "http://www.netlib.org/lapack/lawns/lawn73.ps; http://www.netlib.org/lapack/lawnspdf/lawn73.pdf", acknowledgement = ack-nhfb, } @TechReport{Dongarra:1994:SMLa, author = "J. Dongarra and A. Lumsdaine and X. Niu and R. Pozo and K. Remington", title = "A Sparse Matrix Library in {C++} for High Performance Architectures", type = "LAPACK Working Note", number = "74", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = jul, year = "1994", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-94-236, July 1994. Published in \cite{Dongarra:1994:SMLb}.", URL = "http://www.netlib.org/lapack/lawns/lawn74.ps; http://www.netlib.org/lapack/lawnspdf/lawn74.pdf", acknowledgement = ack-nhfb, } @TechReport{Kaagstrom:1994:LSA, author = "Bo K{\aa}gstr{\"o}m and Peter Poromaa", title = "{LAPACK}-Style Algorithms and Software for Solving the Generalized {Sylvester} Equation and Estimating the Separating Between Regular Matrix Pairs", type = "LAPACK Working Note", number = "75", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = jul, year = "1994", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-94-237, July 1994. Published in \cite{Kaagstrom:1996:LSA}.", URL = "http://www.netlib.org/lapack/lawns/lawn75.ps; http://www.netlib.org/lapack/lawnspdf/lawn75.pdf", acknowledgement = ack-nhfb, } @TechReport{Barrett:1994:ABI, author = "Richard Barrett and Michael Berry and Jack Dongarra and Victor Eijkhout and Charles Romine", title = "Algorithmic Bombardment for the Iterative Solution of Linear Systems: {A} Poly-Iterative Approach", type = "LAPACK Working Note", number = "76", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = aug, year = "1994", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-94-239, August, 1994. Published in \cite{Barrett:1996:ABI}.", URL = "http://www.netlib.org/lapack/lawns/lawn76.ps; http://www.netlib.org/lapack/lawnspdf/lawn76.pdf", acknowledgement = ack-nhfb, } @TechReport{Eijkhout:1994:BCD, author = "Victor Eijkhout and Roldan Pozo", title = "Basic Concepts for Distributed Sparse Linear Algebra Operations", type = "LAPACK Working Note", number = "77", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = aug, year = "1994", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-94-240, August, 1994.", URL = "http://www.netlib.org/lapack/lawns/lawn77.ps; http://www.netlib.org/lapack/lawnspdf/lawn77.pdf", acknowledgement = ack-nhfb, } @TechReport{Eijkhout:1994:CVC, author = "Victor Eijkhout", title = "Computational variants of the {CGS} and {BiCGstab} methods", type = "LAPACK Working Note", number = "78", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = aug, year = "1994", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-94-241, August, 1994.", URL = "http://www.netlib.org/lapack/lawns/lawn78.ps; http://www.netlib.org/lapack/lawnspdf/lawn78.pdf", acknowledgement = ack-nhfb, } @TechReport{Henry:1994:PQA, author = "Greg Henry and Robert van de Geijn", title = "Parallelizing the {$ Q R $} Algorithm for the Unsymmetric Algebraic Eigenvalue Problem: Myths and Reality", type = "LAPACK Working Note", number = "79", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = aug, year = "1994", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-94-244, August, 1994. Published in \cite{Henry:1996:PAU}.", URL = "http://www.netlib.org/lapack/lawns/lawn79.ps; http://www.netlib.org/lapack/lawnspdf/lawn79.pdf", acknowledgement = ack-nhfb, } @TechReport{Choi:1994:DIS, author = "J. Choi and J. J. Dongarra and S. Ostrouchov and A. P. Petitet and D. W. Walker and R. C. Whaley", title = "The Design and Implementation of the {ScaLAPACK} {$ L U $}, {$ Q R $}, and {Cholesky} Factorization Routines", type = "LAPACK Working Note", number = "80", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = sep, year = "1994", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-94-246, September, 1994. Published in \cite{Choi:1996:DIS}.", URL = "http://www.netlib.org/lapack/lawns/lawn80.ps; http://www.netlib.org/lapack/lawnspdf/lawn80.pdf", acknowledgement = ack-nhfb, } @TechReport{Blackford:1994:QIG, author = "S. Blackford and J. Dongarra", title = "Quick Installation Guide for {LAPACK} on {Unix} Systems", type = "LAPACK Working Note", number = "81", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = sep, year = "1994", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-94-249, September, 1994.", URL = "http://www.netlib.org/lapack/lawns/lawn81.ps; http://www.netlib.org/lapack/lawnspdf/lawn81.pdf", acknowledgement = ack-nhfb, } @TechReport{Dongarra:1994:CCI, author = "J. Dongarra and M. Kolatis", title = "Call Conversion Interface ({CCI}) for {LAPACK\slash ESSL}", type = "LAPACK Working Note", number = "82", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = aug, year = "1994", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-94-250, August, 1994.", URL = "http://www.netlib.org/lapack/lawns/lawn82.ps; http://www.netlib.org/lapack/lawnspdf/lawn82.pdf", acknowledgement = ack-nhfb, } @TechReport{Li:1994:RPB, author = "Ren-Cang Li", title = "Relative Perturbation Bounds for the Unitary Polar Factor", type = "LAPACK Working Note", number = "83", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = sep, year = "1994", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-94-251, September, 1994. Published in \cite{Li:1997:RPB}.", URL = "http://www.netlib.org/lapack/lawns/lawn83.ps; http://www.netlib.org/lapack/lawnspdf/lawn83.pdf", acknowledgement = ack-nhfb, } @TechReport{Li:1994:RPTa, author = "Ren-Cang Li", title = "Relative Perturbation Theory: ({I}) Eigenvalue Variations", type = "LAPACK Working Note", number = "84", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = sep, year = "1994", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-94-252, September, 1994. Published in \cite{Li:1998:RPT}.", URL = "http://www.netlib.org/lapack/lawns/lawn84.ps; http://www.netlib.org/lapack/lawnspdf/lawn84.pdf", acknowledgement = ack-nhfb, } @TechReport{Li:1994:RPTb, author = "Ren-Cang Li", title = "Relative Perturbation Theory: ({II}) Eigenspace Variations", type = "LAPACK Working Note", number = "85", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = sep, year = "1994", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-94-253, September, 1994. Published in \cite{Li:1999:RPT}.", URL = "http://www.netlib.org/lapack/lawns/lawn85.ps; http://www.netlib.org/lapack/lawnspdf/lawn85.pdf", acknowledgement = ack-nhfb, } @TechReport{Demmel:1994:PFE, author = "J. Demmel and K. Stanley", title = "The Performance of Finding Eigenvalues and Eigenvectors of Dense Symmetric Matrices on Distributed Memory Computers", type = "LAPACK Working Note", number = "86", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = sep, year = "1994", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-94-254, September, 1994.", URL = "http://www.netlib.org/lapack/lawns/lawn86.ps; http://www.netlib.org/lapack/lawnspdf/lawn86.pdf", acknowledgement = ack-nhfb, } @TechReport{Kaagstrom:1994:CES, author = "B. K{\aa}gstr{\"o}m and P. Poromaa", title = "Computing Eigenspaces with Specified Eigenvalues of a Regular Matrix Pair ({A},{B}) and Condition Estimation: Theory, Algorithms and Software", type = "LAPACK Working Note", number = "87", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = sep, year = "1994", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-94-255, September, 1994. Published in \cite{Kaagstrom:1996:CES}.", URL = "http://www.netlib.org/lapack/lawns/lawn87.ps; http://www.netlib.org/lapack/lawnspdf/lawn87.pdf", acknowledgement = ack-nhfb, } @TechReport{Gu:1994:ECS, author = "Ming Gu and James Demmel and Inderjit Dhillon", title = "Efficient Computation of the Singular Value Decomposition with Applications to Least Squares Problems", type = "LAPACK Working Note", number = "88", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = oct, year = "1994", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-94-257, October, 1994.", URL = "http://www.netlib.org/lapack/lawns/lawn88.ps; http://www.netlib.org/lapack/lawnspdf/lawn88.pdf", acknowledgement = ack-nhfb, } @TechReport{Li:1994:SSE, author = "Ren-Cang Li", title = "Solving Secular Equations Stably and Efficiently", type = "LAPACK Working Note", number = "89", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = nov, year = "1994", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-94-260, November, 1994.", URL = "http://www.netlib.org/lapack/lawns/lawn89.ps; http://www.netlib.org/lapack/lawnspdf/lawn89.pdf", acknowledgement = ack-nhfb, } @TechReport{Plank:1994:ABD, author = "J. S. Plank and Y. Kim and J. J. Dongarra", title = "Algorithm-Based Diskless Checkpointing for Fault Tolerant Matrix Operations", type = "LAPACK Working Note", number = "90", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = dec, year = "1994", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-94-268, December 1994. Published in \cite{Plank:1995:ADC}.", URL = "http://www.netlib.org/lapack/lawns/lawn90.ps; http://www.netlib.org/lapack/lawnspdf/lawn90.pdf", acknowledgement = ack-nhfb, } @TechReport{Bai:1995:SDN, author = "Z. Bai and J. Demmel and J. Dongarra and A. Petitet and H. Robinson and K. Stanley", title = "The Spectral Decomposition of Nonsymmetric Matrices on Distributed Memory Parallel Computers", type = "LAPACK Working Note", number = "91", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = jan, year = "1995", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-95-273, January 1995. Published in \cite{Bai:1997:SDN}.", URL = "http://www.netlib.org/lapack/lawns/lawn91.ps; http://www.netlib.org/lapack/lawnspdf/lawn91.pdf", acknowledgement = ack-nhfb, } @TechReport{Choi:1995:DPDa, author = "J. Choi and J. Dongarra and D. Walker", title = "The Design of a Parallel Dense Linear Algebra Software Library: Reduction to {Hessenberg}, Tridiagonal, and Bidiagonal Form", type = "LAPACK Working Note", number = "92", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = feb, year = "1995", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-95-275, February 1995. Published in \cite{Choi:1994:DPD,Choi:1995:DPDb}.", URL = "http://www.netlib.org/lapack/lawns/lawn92.ps; http://www.netlib.org/lapack/lawnspdf/lawn92.pdf", acknowledgement = ack-nhfb, } @TechReport{Choi:2001:IGS, author = "J. Choi and J. Demmel and I. Dhillon and J. Dongarra and S. Ostrouchov and A. Petitet and K. Stanley and D. Walker and R. C. Whaley", title = "Installation Guide for {ScaLAPACK}", type = "LAPACK Working Note", number = "93", institution = inst-UTK-CS, address = inst-UTK-CS:adr, day = "31", month = aug, year = "2001", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "Updated August 31, 2001 (Version 1.7).", URL = "http://www.netlib.org/lapack/lawns/lawn93.ps; http://www.netlib.org/lapack/lawnspdf/lawn93.pdf", acknowledgement = ack-nhfb, } @TechReport{Dongarra:1997:UGB, author = "J. Dongarra and R. C. Whaley", title = "A User's Guide to the {BLACS v1.1}", type = "LAPACK Working Note", number = "94", institution = inst-UTK-CS, address = inst-UTK-CS:adr, day = "5", month = may, year = "1997", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "Updated May 5, 1997 (Version 1.1).", URL = "http://www.netlib.org/lapack/lawns/lawn94.ps; http://www.netlib.org/lapack/lawnspdf/lawn94.pdf", acknowledgement = ack-nhfb, } @TechReport{Choi:1995:SPL, author = "J. Choi and J. Demmel and I. Dhillon and J. Dongarra and S. Ostrouchov and A. Petitet and K. Stanley and D. Walker and R. C. Whaley", title = "{ScaLAPACK}: {A} Portable Linear Algebra Library for Distributed Memory Computers --- Design Issues and Performance", type = "LAPACK Working Note", number = "95", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = mar, year = "1995", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-95-283, March 1995. Published in \cite{Blackford:1996:SPL}.", URL = "http://www.netlib.org/lapack/lawns/lawn95.ps; http://www.netlib.org/lapack/lawnspdf/lawn95.pdf", acknowledgement = ack-nhfb, } @TechReport{vandeGeijn:1995:SSU, author = "R. A. van de Geijn and J. Watts", title = "{SUMMA}: {Scalable Universal Matrix Multiplication Algorithm}", type = "LAPACK Working Note", number = "96", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = apr, year = "1995", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-95-286, April 1995. Published in \cite{vandeGeijn:1997:SSU}.", URL = "http://www.netlib.org/lapack/lawns/lawn96.ps; http://www.netlib.org/lapack/lawnspdf/lawn96.pdf", acknowledgement = ack-nhfb, } @TechReport{Chakrabarti:1995:MBM, author = "S. Chakrabarti and J. Demmel and D. Yelick", title = "Modeling the Benefits of Mixed Data and Task Parallelism", type = "LAPACK Working Note", number = "97", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = may, year = "1995", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-95-289, May 1995.", URL = "http://www.netlib.org/lapack/lawns/lawn97.ps; http://www.netlib.org/lapack/lawnspdf/lawn97.pdf", acknowledgement = ack-nhfb, } @TechReport{Dongarra:1995:LVH, author = "J. Dongarra and R. Pozo and D. Walker", title = "{LAPACK++ V. 1.0}: High Performance Linear Algebra Users' Guide", type = "LAPACK Working Note", number = "98", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = may, year = "1995", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-95-290, May 1995.", URL = "http://www.netlib.org/lapack/lawns/lawn98.ps; http://www.netlib.org/lapack/lawnspdf/lawn98.pdf", acknowledgement = ack-nhfb, } @TechReport{Dongarra:1995:RCI, author = "J. Dongarra and V. Eijkhout and A. Kalhan", title = "Reverse Communication Interface for Linear Algebra Templates for Iterative Methods", type = "LAPACK Working Note", number = "99", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = may, year = "1995", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-95-291, May 1995.", URL = "http://www.netlib.org/lapack/lawns/lawn99.ps; http://www.netlib.org/lapack/lawnspdf/lawn99.pdf", acknowledgement = ack-nhfb, } @TechReport{Choi:1995:PSP, author = "J. Choi and J. Dongarra and S. Ostrouchov and A. Petitet and D. Walker and R. C. Whaley", title = "A Proposal for a Set of Parallel Basic Linear Algebra Subprograms", type = "LAPACK Working Note", number = "100", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = may, year = "1995", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-95-292, May 1995. Published in \cite{Choi:1995:PSP}.", URL = "http://www.netlib.org/lapack/lawns/lawn100.ps; http://www.netlib.org/lapack/lawnspdf/lawn100.pdf", acknowledgement = ack-nhfb, } @TechReport{Dongarra:1995:PFI, author = "J. J. Dongarra and J. {Du Croz} and S. Hammarling and J. Wa{\'s}niewski and A. Zemla", title = "A Proposal for a {Fortran 90} Interface for {LAPACK}", type = "LAPACK Working Note", number = "101", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = jul, year = "1995", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-95-295, July 1995. Published in \cite{Dongarra:1996:PFI}.", URL = "http://www.netlib.org/lapack/lawns/lawn101.ps; http://www.netlib.org/lapack/lawnspdf/lawn101.pdf", acknowledgement = ack-nhfb, } @TechReport{Dongarra:1995:IVI, author = "J. Dongarra and A. Lumsdaine and R. Pozo and K. Remington", title = "{IML++ v. 1.2}: Iterative Methods Library Reference Guide", type = "LAPACK Working Note", number = "102", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = aug, year = "1995", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-95-303, August 1995.", URL = "http://www.netlib.org/lapack/lawns/lawn102.ps; http://www.netlib.org/lapack/lawnspdf/lawn102.pdf", acknowledgement = ack-nhfb, } @TechReport{Demmel:1995:SAS, author = "J. W. Demmel and S. C. Eisenstat and J. R. Gilbert and X. S. Li and J. W. H. Liu", title = "A Supernodal Approach to Sparse Partial Pivoting", type = "LAPACK Working Note", number = "103", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = sep, year = "1995", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-95-304, September 1995. Published in \cite{Demmel:1999:SAS}.", URL = "http://www.netlib.org/lapack/lawns/lawn103.ps; http://www.netlib.org/lapack/lawnspdf/lawn103.pdf", acknowledgement = ack-nhfb, } @TechReport{Higham:1995:IRL, author = "N. J. Higham", title = "Iterative Refinement and {LAPACK}", type = "LAPACK Working Note", number = "104", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = oct, year = "1995", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-95-308, October 1995. Published in \cite{Higham:1997:IRL}.", URL = "http://www.netlib.org/lapack/lawns/lawn104.ps; http://www.netlib.org/lapack/lawnspdf/lawn104.pdf", acknowledgement = ack-nhfb, } @TechReport{Higham:1995:SDP, author = "N. J. Higham", title = "Stability of the Diagonal Pivoting Method with Partial Pivoting", type = "LAPACK Working Note", number = "105", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = oct, year = "1995", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-95-309, October 1995. Published in \cite{Higham:1997:SDP}.", URL = "http://www.netlib.org/lapack/lawns/lawn105.ps; http://www.netlib.org/lapack/lawnspdf/lawn105.pdf", acknowledgement = ack-nhfb, } @TechReport{Bai:1995:TLAa, author = "Z. Bai and D. Day and J. Demmel and J. Dongarra and M. Gu and A. Ruhe and H. van der Vorst", title = "Templates for Linear Algebra Problems", type = "LAPACK Working Note", number = "106", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = oct, year = "1995", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-95-311, October 1995. Published in \cite{Bai:1995:TLAb}.", URL = "http://www.netlib.org/lapack/lawns/lawn106.ps; http://www.netlib.org/lapack/lawnspdf/lawn106.pdf", acknowledgement = ack-nhfb, } @TechReport{Kaagstrom:1995:GBLa, author = "B. K{\aa}gstr{\"o}m and P. Ling and C. {Van Loan}", title = "{GEMM}-Based Level 3 {BLAS}: High-Performance Model Implementations and Performance Evaluation Benchmark", type = "LAPACK Working Note", number = "107", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = nov, year = "1995", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-95-315, November 1995. Published in \cite{Kaagstrom:1998:GBL}.", URL = "http://www.netlib.org/lapack/lawns/lawn107.ps; http://www.netlib.org/lapack/lawnspdf/lawn107.pdf", acknowledgement = ack-nhfb, } @TechReport{Kaagstrom:1995:GBLb, author = "B. K{\aa}gstr{\"o}m and P. Ling and C. {Van Loan}", title = "{GEMM}-Based Level 3 {BLAS}: Installation, Tuning and Use of the Model Implementations and the Performance Evaluation Benchmark", type = "LAPACK Working Note", number = "108", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = nov, year = "1995", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-95-316, November 1995.", URL = "http://www.netlib.org/lapack/lawns/lawn108.ps; http://www.netlib.org/lapack/lawnspdf/lawn108.pdf", acknowledgement = ack-nhfb, } @TechReport{Dongarra:1995:BTW, author = "J. Dongarra and S. Hammarling and S. Ostrouchov", title = "{BLAS} Technical Workshop", type = "LAPACK Working Note", number = "109", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = nov, year = "1995", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-95-317, November 1995.", URL = "http://www.netlib.org/lapack/lawns/lawn109.ps; http://www.netlib.org/lapack/lawnspdf/lawn109.pdf", acknowledgement = ack-nhfb, } @TechReport{Dongarra:1996:KCP, author = "J. J. Dongarra and S. Hammarling and D. W. Walker", title = "Key Concepts For Parallel Out-Of-Core {$ L U $} Factorization", type = "LAPACK Working Note", number = "110", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = apr, year = "1996", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-96-324, April 1996. Published in \cite{Dongarra:1997:KCPb}.", URL = "http://www.netlib.org/lapack/lawns/lawn110.ps; http://www.netlib.org/lapack/lawnspdf/lawn110.pdf", acknowledgement = ack-nhfb, } @TechReport{Bilmes:1996:OMM, author = "J. Bilmes and K. Asanovic and J. Demmel and D. Lam and C.-W. Chin", title = "Optimizing Matrix Multiply using {PHiPAC}: a Portable, High-Performance, {ANSI C} Coding Methodology", type = "LAPACK Working Note", number = "111", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = may, year = "1996", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-96-326, May 1996.", URL = "http://www.netlib.org/lapack/lawns/lawn111.ps; http://www.netlib.org/lapack/lawnspdf/lawn111.pdf", acknowledgement = ack-nhfb, } @TechReport{Blackford:1996:PEDa, author = "L. S. Blackford and A. Cleary and J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and A. Petitet and H. Ren and K. Stanley and R. C. Whaley", title = "Practical Experience in the Dangers of Heterogeneous Computing", type = "LAPACK Working Note", number = "112", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = jul, year = "1996", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-96-330, July 1996. Published in \cite{Blackford:1996:PEDb,Blackford:1997:PEN}.", URL = "http://www.netlib.org/lapack/lawns/lawn112.ps; http://www.netlib.org/lapack/lawnspdf/lawn112.pdf", acknowledgement = ack-nhfb, } @TechReport{Quintana-Orti:1996:BPA, author = "G. Quintana-Orti and E. S. Quintana-Orti and A. Petitet", title = "Block-Partitioned Algorithms for Solving the Linear Least Squares Problem", type = "LAPACK Working Note", number = "113", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = jul, year = "1996", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-96-333, July 1996.", URL = "http://www.netlib.org/lapack/lawns/lawn113.ps; http://www.netlib.org/lapack/lawnspdf/lawn113.pdf", acknowledgement = ack-nhfb, } @TechReport{Quintana-Orti:1996:BVQ, author = "G. Quintana-Orti and X. Sun and C. Bischof", title = "A {BLAS-3} Version of the {$ Q R $} Factorization with Column Pivoting", type = "LAPACK Working Note", number = "114", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = aug, year = "1996", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-96-334, August 1996.", URL = "http://www.netlib.org/lapack/lawns/lawn114.ps; http://www.netlib.org/lapack/lawnspdf/lawn114.pdf", acknowledgement = ack-nhfb, } @TechReport{Ren:1996:EAI, author = "H. Ren", title = "On the Error Analysis and Implementation of Some Eigenvalue Decomposition and Singular Value Decomposition Algorithms", type = "LAPACK Working Note", number = "115", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = sep, year = "1996", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-96-336, September 1996.", URL = "http://www.netlib.org/lapack/lawns/lawn115.ps; http://www.netlib.org/lapack/lawnspdf/lawn115.pdf", acknowledgement = ack-nhfb, } @TechReport{Sidani:1996:PMD, author = "M. Sidani and B. Harrod", title = "Parallel Matrix Distributions: Have we been doing it all right?", type = "LAPACK Working Note", number = "116", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = nov, year = "1996", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-96-340, November 1996.", URL = "http://www.netlib.org/lapack/lawns/lawn116.ps; http://www.netlib.org/lapack/lawnspdf/lawn116.pdf", acknowledgement = ack-nhfb, } @TechReport{Blackford:1996:FIL, author = "L. Susan Blackford and Jack J. Dongarra and Jeremy {Du Croz} and Sven Hammarling and Jerzy Wa{\'s}niewski", title = "A {Fortran 90} Interface for {LAPACK}", type = "LAPACK Working Note", number = "117", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = dec, year = "1996", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-96-341, December 1996.", URL = "http://www.netlib.org/lapack/lawns/lawn117.ps; http://www.netlib.org/lapack/lawnspdf/lawn117.pdf", acknowledgement = ack-nhfb, } @TechReport{Dongarra:1997:DIP, author = "J. J. Dongarra and E. F. D'Azevedo", title = "The Design and Implementation of the Parallel Out-of-core {ScaLAPACK} {$ L U $}, {$ Q R $}, and {Cholesky} Factorization Routines", type = "LAPACK Working Note", number = "118", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = jan, year = "1997", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-97-347, January 1997. Published in \cite{DAzevedo:2000:DIP}.", URL = "http://www.netlib.org/lapack/lawns/lawn118.ps; http://www.netlib.org/lapack/lawnspdf/lawn118.pdf", acknowledgement = ack-nhfb, } @TechReport{Demmel:1997:CSV, author = "James Demmel and Ming Gu and Stanley Eisenstat and Ivan Slapni{\v{c}}ar and Kre{\v{s}}imir Veseli{\'c} and Zlatko Drma{\v{c}}", title = "Computing the Singular Value Decomposition with High Relative Accuracy", type = "LAPACK Working Note", number = "119", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = feb, year = "1997", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-97-348, February 1997. Published in \cite{Demmel:1999:CSV}.", URL = "http://www.netlib.org/lapack/lawns/lawn119.ps; http://www.netlib.org/lapack/lawnspdf/lawn119.pdf", acknowledgement = ack-nhfb, } @TechReport{Desprez:1997:SBC, author = "F. Desprez and J. Dongarra and A. Petitet and C. Randriamaro and Y. Robert", title = "Scheduling Block-Cyclic Array Redistribution", type = "LAPACK Working Note", number = "120", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = feb, year = "1997", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-97-349, February 1997. Published in \cite{Desprez:1998:SBA,Desprez:1998:SBC}.", URL = "http://www.netlib.org/lapack/lawns/lawn120.ps; http://www.netlib.org/lapack/lawnspdf/lawn120.pdf", acknowledgement = ack-nhfb, } @TechReport{Henry:1997:PIN, author = "G. Henry and D. Watkins and J. Dongarra", title = "A Parallel Implementation of the Nonsymmetric {$ Q R $} Algorithm for Distributed Memory Architectures", type = "LAPACK Working Note", number = "121", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = mar, year = "1997", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-97-352, March 1997. Published in \cite{Henry:2002:PIN}.", URL = "http://www.netlib.org/lapack/lawns/lawn121.ps; http://www.netlib.org/lapack/lawnspdf/lawn121.pdf", acknowledgement = ack-nhfb, } @TechReport{Ahues:1997:NDC, author = "M. Ahues and F. Tisseur", title = "A New Deflation Criterion for the {$ Q R $} Algorithm", type = "LAPACK Working Note", number = "122", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = mar, year = "1997", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-97-353, March 1997.", URL = "http://www.netlib.org/lapack/lawns/lawn122.ps; http://www.netlib.org/lapack/lawnspdf/lawn122.pdf", acknowledgement = ack-nhfb, } @TechReport{Bai:1997:TMC, author = "Z. Bai and D. Day and J. Demmel and J. Dongarra", title = "A Test Matrix Collection for Non-{Hermitian} Eigenvalue Problems", type = "LAPACK Working Note", number = "123", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = mar, year = "1997", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-97-355, March 1997.", URL = "http://www.netlib.org/lapack/lawns/lawn123.ps; http://www.netlib.org/lapack/lawnspdf/lawn123.pdf", acknowledgement = ack-nhfb, } @TechReport{Demmel:1997:APS, author = "J. Demmel and J. Gilbert and X. Li", title = "An Asynchronous Parallel Supernodal Algorithm for Sparse {Gaussian} Elimination", type = "LAPACK Working Note", number = "124", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = apr, year = "1997", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-97-357, April 1997. Published in \cite{Demmel:1999:APS}.", URL = "http://www.netlib.org/lapack/lawns/lawn124.ps; http://www.netlib.org/lapack/lawnspdf/lawn124.pdf", acknowledgement = ack-nhfb, } @TechReport{Cleary:1997:ISD, author = "A. Cleary and J. Dongarra", title = "Implementation in {ScaLAPACK} of Divide-and-Conquer Algorithms for Banded and Tridiagonal Linear Systems", type = "LAPACK Working Note", number = "125", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = apr, year = "1997", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-97-358, April 1997.", URL = "http://www.netlib.org/lapack/lawns/lawn125.ps; http://www.netlib.org/lapack/lawnspdf/lawn125.pdf", acknowledgement = ack-nhfb, } @TechReport{Anderson:1997:PIL, author = "E. Anderson and M. Fahey", title = "Performance Improvements to {LAPACK} for the {Cray Scientific Library}", type = "LAPACK Working Note", number = "126", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = apr, year = "1997", bibdate = "Fri Apr 22 17:06:37 2005 UT-CS-97-359, April 1997.", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", URL = "http://www.netlib.org/lapack/lawns/lawn126.ps; http://www.netlib.org/lapack/lawnspdf/lawn126.pdf", acknowledgement = ack-nhfb, } @TechReport{Li:1997:SGE, author = "X. Li", title = "Sparse {Gaussian} Elimination on High Performance Computers", type = "LAPACK Working Note", number = "127", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = jun, year = "1997", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-97-368, June 1997.", URL = "http://www.netlib.org/lapack/lawns/lawn127.ps; http://www.netlib.org/lapack/lawnspdf/lawn127.pdf", acknowledgement = ack-nhfb, } @TechReport{Petitet:1997:ARM, author = "A. Petitet", title = "Algorithmic Redistribution Methods for Block Cyclic Decompositions", type = "LAPACK Working Note", number = "128", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = jul, year = "1997", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-97-371, July 1997. Published in \cite{Petitet:1999:ARM}.", URL = "http://www.netlib.org/lapack/lawns/lawn128.ps; http://www.netlib.org/lapack/lawnspdf/lawn128.pdf", acknowledgement = ack-nhfb, } @TechReport{Choi:1997:NPM, author = "J. Choi", title = "A New Parallel Matrix Multiplication Algorithm on Distributed-Memory Concurrent Computers", type = "LAPACK Working Note", number = "129", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = sep, year = "1997", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-97-369, September 1997. Published in \cite{Choi:1998:NPM}.", URL = "http://www.netlib.org/lapack/lawns/lawn129.ps; http://www.netlib.org/lapack/lawnspdf/lawn129.pdf", acknowledgement = ack-nhfb, } @TechReport{Demmel:1997:ASS, author = "J. Demmel", title = "Accurate {SVDs} of Structured Matrices", type = "LAPACK Working Note", number = "130", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = oct, year = "1997", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-97-375, October 1997.", URL = "http://www.netlib.org/lapack/lawns/lawn130.ps; http://www.netlib.org/lapack/lawnspdf/lawn130.pdf", acknowledgement = ack-nhfb, } @TechReport{Whaley:1997:ATL, author = "R. Whaley and J. Dongarra", title = "Automatically Tuned Linear Algebra Software", type = "LAPACK Working Note", number = "131", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = dec, year = "1997", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-97-366, December 1997. Published in \cite{Whaley:1998:ATL}.", URL = "http://www.netlib.org/lapack/lawns/lawn131.ps; http://www.netlib.org/lapack/lawnspdf/lawn131.pdf", acknowledgement = ack-nhfb, } @TechReport{Tisseur:1998:PDC, author = "F. Tisseur and J. Dongarra", title = "Parallelizing the Divide and Conquer Algorithm for the Symmetric Tridiagonal Eigenvalue Problem on Distributed Memory Architectures", type = "LAPACK Working Note", number = "132", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = mar, year = "1998", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-98-382, March 1998.", URL = "http://www.netlib.org/lapack/lawns/lawn132.ps; http://www.netlib.org/lapack/lawnspdf/lawn132.pdf", acknowledgement = ack-nhfb, } @TechReport{Petitet:1998:ARM, author = "A. Petitet and J. Dongarra", title = "Algorithmic Redistribution Methods for Block Cyclic Distributions", type = "LAPACK Working Note", number = "133", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = mar, year = "1998", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-98-383, March 1998. Published in \cite{Petitet:1999:ARM}.", URL = "http://www.netlib.org/lapack/lawns/lawn133.ps; http://www.netlib.org/lapack/lawnspdf/lawn133.pdf", acknowledgement = ack-nhfb, } @TechReport{Wasniewski:1998:HPL, author = "J. Wa{\'s}niewski and J. Dongarra", title = "High Performance Linear Algebra Package --- {LAPACK90}", type = "LAPACK Working Note", number = "134", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = apr, year = "1998", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-98-384, April 1998. Published in \cite{Dongarra:1998:HPL}.", URL = "http://www.netlib.org/lapack/lawns/lawn134.ps; http://www.netlib.org/lapack/lawnspdf/lawn134.pdf", acknowledgement = ack-nhfb, } @TechReport{DAzevedo:1998:PSE, author = "E. D'Azevedo and J. Dongarra", title = "Packed Storage Extensions for {ScaLAPACK}", type = "LAPACK Working Note", number = "135", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = apr, year = "1998", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-98-385, April 1998.", URL = "http://www.netlib.org/lapack/lawns/lawn135.ps; http://www.netlib.org/lapack/lawnspdf/lawn135.pdf", acknowledgement = ack-nhfb, } @TechReport{Blackford:1998:SEP, author = "L. S. Blackford and R. C. Whaley", title = "{ScaLAPACK} Evaluation and Performance at the {DoD} {MSRCs}", type = "LAPACK Working Note", number = "136", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = apr, year = "1998", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-98-388, April 1998.", URL = "http://www.netlib.org/lapack/lawns/lawn136.ps; http://www.netlib.org/lapack/lawnspdf/lawn136.pdf", acknowledgement = ack-nhfb, } @TechReport{Blackford:1998:IGD, author = "L. S. Blackford and J. J. Dongarra and C. A. Papadopoulos and R. C. Whaley", title = "Installation Guide and Design of the {HPF 1.1} interface to {ScaLAPACK}, {SLHPF}", type = "LAPACK Working Note", number = "137", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = aug, year = "1998", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-98-396, August 1998.", URL = "http://www.netlib.org/lapack/lawns/lawn137.ps; http://www.netlib.org/lapack/lawnspdf/lawn137.pdf", acknowledgement = ack-nhfb, } @TechReport{Dongarra:1998:TSL, author = "J. Dongarra and W. Owczarz and J. Wa{\'s}niewski and P. Yalamov", title = "Testing Software for {LAPACK90}", type = "LAPACK Working Note", number = "138", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = sep, year = "1998", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-98-401, Sept 1998.", URL = "http://www.netlib.org/lapack/lawns/lawn138.ps; http://www.netlib.org/lapack/lawnspdf/lawn138.pdf", acknowledgement = ack-nhfb, } @TechReport{Petitet:1998:NLA, author = "A. Petitet and H. Casanova and J. Dongarra and Y. Robert and R. C. Whaley", title = "A Numerical Linear Algebra Problem Solving Environment Designer's Perspective", type = "LAPACK Working Note", number = "139", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = oct, year = "1998", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-98-405, Oct 1998. Published in \cite{Petitet:1999:NLA,Petitet:2000:PDS}.", URL = "http://www.netlib.org/lapack/lawns/lawn139.ps; http://www.netlib.org/lapack/lawnspdf/lawn139.pdf", acknowledgement = ack-nhfb, } @TechReport{Casanova:1998:NVD, author = "H. Casanova and J. Dongarra", title = "{NetSolve version 1.2}: Design and Implementation", type = "LAPACK Working Note", number = "140", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = nov, year = "1998", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-98-406, Nov 1998.", URL = "http://www.netlib.org/lapack/lawns/lawn140.ps; http://www.netlib.org/lapack/lawnspdf/lawn140.pdf", acknowledgement = ack-nhfb, } @TechReport{Eijkhout:1998:OIL, author = "Victor Eijkhout", title = "Overview of Iterative Linear System Solver Packages", type = "LAPACK Working Note", number = "141", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = dec, year = "1998", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-98-411, Dec 1998.", URL = "http://www.netlib.org/lapack/lawns/lawn141.ps; http://www.netlib.org/lapack/lawnspdf/lawn141.pdf", acknowledgement = ack-nhfb, } @TechReport{Arbenz:1999:CPSa, author = "P. Arbenz and A. Cleary and J. Dongarra and M. Hegland", title = "A Comparison of Parallel Solvers for Diagonally Dominant and General Narrow-Banded Linear Systems", type = "LAPACK Working Note", number = "142", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = feb, year = "1999", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-99-414, Feb 1999. Published in \cite{Arbenz:1999:CPSc}.", URL = "http://www.netlib.org/lapack/lawns/lawn142.ps; http://www.netlib.org/lapack/lawnspdf/lawn142.pdf", acknowledgement = ack-nhfb, } @TechReport{Arbenz:1999:CPSb, author = "P. Arbenz and A. Cleary and J. Dongarra and M. Hegland", title = "A Comparison of Parallel Solvers for Diagonally Dominant and General Narrow-Banded Linear Systems {II}", type = "LAPACK Working Note", number = "143", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = may, year = "1999", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-99-415, May 1999.", URL = "http://www.netlib.org/lapack/lawns/lawn143.ps; http://www.netlib.org/lapack/lawnspdf/lawn143.pdf", acknowledgement = ack-nhfb, } @TechReport{Eijkhout:1999:EPI, author = "V. Eijkhout", title = "On the Existence Problem of Incomplete Factorisation Methods", type = "LAPACK Working Note", number = "144", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = dec, year = "1999", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-99-435, Dec 1999.", URL = "http://www.netlib.org/lapack/lawns/lawn144.ps; http://www.netlib.org/lapack/lawnspdf/lawn144.pdf", acknowledgement = ack-nhfb, } @TechReport{Eijkhout:1999:WMI, author = "V. Eijkhout", title = "The `weighted modification' incomplete factorisation method", type = "LAPACK Working Note", number = "145", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = dec, year = "1999", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-99-436, Dec 1999.", URL = "http://www.netlib.org/lapack/lawns/lawn145.ps; http://www.netlib.org/lapack/lawnspdf/lawn145.pdf", acknowledgement = ack-nhfb, } @TechReport{Andersen:2000:RFC, author = "B. Andersen and F. Gustavson and J. Wa{\'s}niewski", title = "A recursive formulation of {Cholesky} factorization of a matrix in packed storage", type = "LAPACK Working Note", number = "146", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = may, year = "2000", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-00-441, May 2000. Published in \cite{Andersen:2001:RFC}.", URL = "http://www.netlib.org/lapack/lawns/lawn146.ps; http://www.netlib.org/lapack/lawnspdf/lawn146.pdf", acknowledgement = ack-nhfb, } @TechReport{Whaley:2000:AEO, author = "R. C. Whaley and A. Petitet and J. Dongarra", title = "Automated Empirical Optimization of Software and the {ATLAS Project}", type = "LAPACK Working Note", number = "147", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = sep, year = "2000", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-00-448, September 2000. Published in \cite{Whaley:2001:AEO}.", URL = "http://www.netlib.org/lapack/lawns/lawn147.ps; http://www.netlib.org/lapack/lawnspdf/lawn147.pdf", acknowledgement = ack-nhfb, } @TechReport{Bindel:2000:CGR, author = "D. Bindel and J. Demmel and W. Kahan and O. Marques", title = "On Computing {Givens} rotations reliably and efficiently", type = "LAPACK Working Note", number = "148", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = oct, year = "2000", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-00-449, October 2000. Published in \cite{Bindel:2002:CGR}.", URL = "http://www.netlib.org/lapack/lawns/lawn148.ps; http://www.netlib.org/lapack/lawnspdf/lawn148.pdf", acknowledgement = ack-nhfb, } @TechReport{Li:2000:DIT, author = "X. Li and J. Demmel and D. Bailey and G. Henry and Y. Hida and J. Iskandar and W. Kahan and A. Kapur and M. Martin and T. Tung and D. J. Yoo", title = "Design, Implementation and Testing of Extended and Mixed Precision {BLAS}", type = "LAPACK Working Note", number = "149", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = oct, year = "2000", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-00-451, October 2000. Published in \cite{Li:2002:DIT}.", URL = "http://www.netlib.org/lapack/lawns/lawn149.ps; http://www.netlib.org/lapack/lawnspdf/lawn149.pdf", acknowledgement = ack-nhfb, } @TechReport{Anderson:2000:DPR, author = "E. Anderson", title = "Discontinuous Plane Rotations and the Symmetric Eigenvalue Problem", type = "LAPACK Working Note", number = "150", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = dec, year = "2000", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-00-454, December 2000.", URL = "http://www.netlib.org/lapack/lawns/lawn150.ps; http://www.netlib.org/lapack/lawnspdf/lawn150.pdf", acknowledgement = ack-nhfb, } @TechReport{Eijkhout:2001:ADM, author = "V. Eijkhout", title = "Automatic Determination of Matrix-Blocks", type = "LAPACK Working Note", number = "151", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = apr, year = "2001", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-01-458, April 2001.", URL = "http://www.netlib.org/lapack/lawns/lawn151.ps; http://www.netlib.org/lapack/lawnspdf/lawn151.pdf", acknowledgement = ack-nhfb, } @TechReport{Cheng:2001:ILB, author = "S. Cheng and N. Higham", title = "Implementation for {LAPACK} of a Block Algorithm for Matrix $1$-Norm Estimation", type = "LAPACK Working Note", number = "152", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = aug, year = "2001", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-01-470, August 2001.", URL = "http://www.netlib.org/lapack/lawns/lawn152.ps; http://www.netlib.org/lapack/lawnspdf/lawn152.pdf", acknowledgement = ack-nhfb, } @TechReport{Fahey:2001:NCP, author = "M. Fahey", title = "New Complex Parallel Eigenvalue and Eigenvector Routines", type = "LAPACK Working Note", number = "153", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = aug, year = "2001", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-01-471, August 2001.", URL = "http://www.netlib.org/lapack/lawns/lawn153.ps; http://www.netlib.org/lapack/lawnspdf/lawn153.pdf", acknowledgement = ack-nhfb, } @TechReport{Dhillon:2002:OER, author = "Inderjit S. Dhillon and Beresford N. Parlett", title = "Orthogonal Eigenvectors and Relative Gaps", type = "LAPACK Working Note", number = "154", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = aug, year = "2002", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-02-474, August 2002 Published in \cite{Dhillon:2004:OER}.", URL = "http://www.netlib.org/lapack/lawns/lawn154.ps; http://www.netlib.org/lapack/lawnspdf/lawn154.pdf", acknowledgement = ack-nhfb, } @TechReport{Parlett:2002:IDA, author = "Beresford N. Parlett and Osni A. Marques", title = "An implementation of the $ d q d s $ algorithm positive case", type = "LAPACK Working Note", number = "155", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = aug, year = "2002", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "LBNL-43726, UT-CS-02-475, August 2002. Published in \cite{Parlett:2000:IAP}.", URL = "http://www.netlib.org/lapack/lawns/lawn155.ps; http://www.netlib.org/lapack/lawnspdf/lawn155.pdf", acknowledgement = ack-nhfb, } @TechReport{Eijkhout:2002:PAO, author = "Victor Eijkhout", title = "Polynomial acceleration of optimised multi-grid smoothers basic theory", type = "LAPACK Working Note", number = "156", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = aug, year = "2002", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-02-477, August 2002", URL = "http://www.netlib.org/lapack/lawns/lawn156.ps; http://www.netlib.org/lapack/lawnspdf/lawn156.pdf", acknowledgement = ack-nhfb, } @TechReport{Dongarra:2002:SAN, author = "Jack Dongarra and Victor Eijkhout", title = "Self-adapting Numerical Software for Next Generation Applications", type = "LAPACK Working Note", number = "157", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = aug, year = "2002", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-02-484, August 2002 Published in \cite{Dongarra:2003:SANb}.", URL = "http://www.netlib.org/lapack/lawns/lawn157.ps; http://www.netlib.org/lapack/lawnspdf/lawn157.pdf", acknowledgement = ack-nhfb, } @TechReport{Anderson:2002:LFE, author = "Edward Anderson", title = "{LAPACK3E} --- {A} {Fortran 90}-enhanced version of {LAPACK}", type = "LAPACK Working Note", number = "158", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = dec, year = "2002", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-02-497, December 2002", URL = "http://www.netlib.org/lapack/lawns/lawn158.ps; http://www.netlib.org/lapack/lawnspdf/lawn158.pdf", acknowledgement = ack-nhfb, } @TechReport{Dongarra:2003:FCA, author = "Jack Dongarra and Victor Eijkhout", title = "Finite-choice algorithm optimization in {Conjugate Gradients}", type = "LAPACK Working Note", number = "159", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = jan, year = "2003", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-03-502, January 2003", URL = "http://www.netlib.org/lapack/lawns/lawn159.ps; http://www.netlib.org/lapack/lawnspdf/lawn159.pdf", acknowledgement = ack-nhfb, } @TechReport{Chen:2003:SAS, author = "Zizhong Chen and Jack Dongarra and Piotr Luszczek and Kenneth Roche", title = "Self Adapting Software for Numerical Linear Algebra and {LAPACK} for Clusters", type = "LAPACK Working Note", number = "160", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = jan, year = "2003", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-03-499, January 2003", URL = "http://www.netlib.org/lapack/lawns/lawn160.ps; http://www.netlib.org/lapack/lawnspdf/lawn160.pdf", acknowledgement = ack-nhfb, } @TechReport{Lucas:2003:LSC, author = "Craig Lucas", title = "{LAPack}-Style Codes for Level 2 and 3 Pivoted {Cholesky} Factorizations", type = "LAPACK Working Note", number = "161", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = jan, year = "2003", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-04-522, February 2004", URL = "http://www.netlib.org/lapack/lawns/lawn161.ps; http://www.netlib.org/lapack/lawnspdf/lawn161.pdf", acknowledgement = ack-nhfb, } @TechReport{Dhillon:2004:DIM, author = "Inderjit S. Dhillon and Beresford N. Parlett and Christof V{\"o}mel", title = "The Design and Implementation of the {MRRR} Algorithm", type = "LAPACK Working Note", number = "162", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = feb, year = "2004", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-04-541, December, 2004.", URL = "http://www.netlib.org/lapack/lawns/lawn162.ps; http://www.netlib.org/lapack/lawnspdf/lawn162.pdf", acknowledgement = ack-nhfb, } @TechReport{Parlett:2004:HMA, author = "Beresford N. Parlett and Christof V{\"o}mel", title = "How the {MRRR} Algorithm Can Fail on Tight Eigenvalue Clusters", type = "LAPACK Working Note", number = "163", institution = inst-UTK-CS, address = inst-UTK-CS:adr, pages = "15", month = dec, year = "2004", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-04-542, December, 2004.", URL = "http://www.eecs.berkeley.edu/Pubs/TechRpts/2004/CSD-04-1367.pdf; http://www.netlib.org/lapack/lawns/lawn163.ps; http://www.netlib.org/lapack/lawnspdf/lawn163.pdf", acknowledgement = ack-nhfb, } @TechReport{Demmel:2005:LPR, author = "Jim Demmel and Jack Dongarra", title = "{LAPACK 2005} Prospectus: Reliable and Scalable Software for Linear Algebra Computations on High End Computers", type = "LAPACK Working Note", number = "164", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = feb, year = "2005", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-05-546, February 2005.", URL = "http://www.netlib.org/lapack/lawns/lawn164.ps; http://www.netlib.org/lapack/lawnspdf/lawn164.pdf", acknowledgement = ack-nhfb, } @TechReport{Demmel:2005:EBE, author = "James Demmel and Yozo Hida and W. Kahan and Xiaoye S. Li and Soni Mukherjee and E. Jason Riedy", title = "Error Bounds from Extra Precise Iterative Refinement", type = "LAPACK Working Note", number = "165", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = feb, year = "2005", bibdate = "Fri Apr 22 17:06:37 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-05-547, February 2005.", URL = "http://www.netlib.org/lapack/lawns/lawn165.ps; http://www.netlib.org/lapack/lawnspdf/lawn165.pdf", acknowledgement = ack-nhfb, } @TechReport{Willems:2005:CBS, author = "Paul R. Willems and Bruno Lang and Christof V{\"o}mel", title = "Computing the Bidiagonal {SVD} Using Multiple Relatively Robust Representations", type = "LAPACK Working Note", number = "166", institution = "Computer Science Division, University of California, Berkeley", address = "Berkeley, CA, USA", pages = "20", day = "29", month = aug, year = "2005", MRclass = "15A18, 65-04, 65F15", bibdate = "Mon Mar 20 12:30:00 2006", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "Also issued as Technical Report Technical Report UCB//CSD-05-1376", URL = "http://www.netlib.org/lapack/lawnspdf/lawn166.pdf", abstract = "We describe the design and implementation of a new algorithm for computing the singular value decomposition of a real bidiagonal matrix. This algorithm uses ideas developed by Gro{\ss}er and Lang that extend Parlett's and Dhillon's MRRR algorithm for the tridiagonal symmetric eigenproblem. One key feature of our new implementation is, that $k$ singular triplets can be computed using only {$ O(n k) $} storage units and floating point operations, where $n$ is the dimension of the matrix. The algorithm will be made available as routine xBDSCR in the upcoming new release of the LAPACK library.", acknowledgement = ack-nhfb, keywords = "Bidiagonal Singular Value Decomposition; Coupling Relations; LAPACK library; MRRR algorithm; Tridiagonal Symmetric Eigenproblem", } @TechReport{Marques:2005:SCM, author = "Osni A. Marques and Beresford N. Parlett and Christof V{\"o}mel", title = "Subset Computations with the {MRRR} Algorithm", type = "LAPACK Working Note", number = "167", institution = "Computer Science Division, University of California, Berkeley", address = "Berkeley, CA, USA", pages = "9", day = "26", month = sep, year = "2005", bibdate = "Mon Mar 20 12:30:00 2006", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "Also issued as Technical Report UCB//CSD-05-1392", URL = "http://www.netlib.org/lapack/lawnspdf/lawn167.pdf", abstract = "The main advantage of inverse iteration over the QR algorithm and Divide \& Conquer for the symmetric tridiagonal eigenproblem is that subsets of eigenpairs can be computed at reduced cost. The MRRR algorithm (MRRR = Multiple Relatively Robust Representations) is a clever variant of inverse iteration without the need for reorthogonalization. {\tt stegr}, the current version of MRRR in LAPACK 3.0, does not allow for subset computations. The next release of {\tt stegr} is designed to compute a (sub-)set of $k$ eigenpairs with {$ O(k n) $} operations. Because of the special way in which eigenvectors are computed, MRRR subset computations are more complicated than when using inverse iteration. Unlike the latter, MRRR sometimes cannot ignore the unwanted part of the spectrum. We describe the problems with what we call false singletons. These are eigenvalues that appear to be isolated with respect to the wanted eigenvalues but in fact belong to a tight cluster of unwanted eigenvalues. This paper analyzes these complications and ways to deal with them.", acknowledgement = ack-nhfb, keywords = "false singleton; Multiple relatively robust representations; numerically orthogonal eigenvectors; subset computation; symmetric tridiagonal matrix", } @TechReport{Antonelli:2005:PSP, author = "Dominic Antonelli and Christof V{\"o}mel", title = "{PDSYEVR}. {ScaLAPACK}'s Parallel {MRRR} Algorithm for the Symmetric Eigenvalue Problem", type = "LAPACK Working Note", number = "168", institution = "Computer Science Division, University of California, Berkeley", address = "Berkeley, CA, USA", pages = "18", day = "29", month = aug, year = "2005", MRclass = "65F15, 65Y15.", bibdate = "Mon Mar 20 12:30:00 2006", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "Also issued as Technical Report UCB//CSD-05-1399.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn168.pdf", abstract = "In the 90s, Dhillon and Parlett devised a new algorithm (Multiple Relatively Robust Representations, MRRR) for computing numerically orthogonal eigenvectors of a symmetric tridiagonal matrix {$T$} with {$ O(n^2) $} cost. In this paper, we describe the design of PDSYEVR, a ScaLAPACK implementation of the MRRR algorithm to compute the eigenpairs in parallel. It represents a substantial improvement over the symmetric eigensolver PDSYEVX that is currently in ScaLAPACK and is going to be part of the next ScaLAPACK release.", acknowledgement = ack-nhfb, keywords = "design; implementation; Multiple relatively robust representations; numerical software; parallel computation; ScaLAPACK; symmetric eigenvalue problem", } @TechReport{Drmac:2005:NFA, author = "Zlatko Drma{\v{c}} and Kre{\v{s}}imir Veseli{\'c}", title = "New Fast and Accurate {Jacobi} {SVD} Algorithm: {I}", type = "LAPACK Working Note", number = "169", institution = "Department of Mathematics, University of Zagreb", address = "Bijeni{\v{c}}ka 30, 10000 Zagreb, Croatia.", pages = "39", day = "30", month = aug, year = "2005", MRclass = "15A09, 15A12, 15A18, 15A23, 65F15, 65F22, 65F35", bibdate = "Mon Mar 20 12:30:00 2006", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", URL = "http://www.netlib.org/lapack/lawnspdf/lawn169.pdf", abstract = "This paper is the result of contrived efforts to break the barrier between numerical accuracy and run time efficiency in computing the fundamental decomposition of numerical linear algebra the singular value decomposition (SVD) of a general dense matrix. It is an unfortunate fact that the numerically most accurate one sided Jacobi SVD algorithm is several times slower than generally less accurate bidiagonalization based methods such as the QR or the divide and conquer algorithm. Despite its sound numerical qualities, the Jacobi SVD is not included in the state of the art matrix computation libraries and it is even considered obsolete by some leading researches. Our quest for a highly accurate and efficient SVD algorithm has led us to a new, superior variant of the Jacobi algorithm. The new algorithm has inherited all good high accuracy properties, and it outperforms not only the best implementations of the one sided Jacobi algorithm but also the QR algorithm. Moreover, it seems that the potential of the new approach is yet to be fully exploited.", acknowledgement = ack-nhfb, keywords = "eigenvalues; Jacobi method; singular value decomposition", } @TechReport{Drmac:2005:NFAb, author = "Zlatko Drma{\v{c}} and Kre{\v{s}}imir Veseli{\'c}", title = "New Fast and Accurate {Jacobi} {SVD} Algorithm: {II}", type = "LAPACK Working Note", number = "170", institution = "Department of Mathematics, University of Zagreb", address = "Bijeni{\v{c}}ka 30, 10000 Zagreb, Croatia.", pages = "25", day = "30", month = aug, year = "2005", bibdate = "Mon Mar 20 12:30:00 2006", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", URL = "http://www.netlib.org/lapack/lawnspdf/lawn170.pdf", abstract = "This paper presents new implementation of one sided Jacobi SVD for triangular matrices and its use as the core routine in a new preconditioned Jacobi SVD algorithm, recently proposed by the authors. New pivot strategy exploits the triangular form and uses the fact that the input triangular matrix is the result of rank revealing QR factorization. If used in the preconditioned Jacobi SVD algorithm, described in the first part of this report, it delivers superior performance leading to the currently fastest method for computing SVD decomposition with high relative accuracy. Furthermore, the efficiency of the new algorithm is comparable to the less accurate bidiagonalization based methods. The paper also discusses underflow issues in floating point implementation, and shows how to use perturbation theory to fix the imperfectness of machine arithmetic on some systems.", acknowledgement = ack-nhfb, keywords = "eigenvalues; Jacobi method; singular value decomposition; underflow", } @TechReport{Kressner:2006:BAR, author = "Daniel Kressner", title = "Block Algorithms for Reordering Standard and Generalized {Schur} Forms", type = "LAPACK Working Note", number = "171", institution = "Department of Mathematics, University of Zagreb", address = "Bijeni{\v{c}}ka 30, 10000 Zagreb, Croatia.", pages = "11", day = "17", month = feb, year = "2006", MRclass = "65F15, 65Y20.", bibdate = "Mon Mar 20 12:30:00 2006", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", URL = "http://www.netlib.org/lapack/lawnspdf/lawn171.pdf", abstract = "Block algorithms for reordering a selected set of eigenvalues in a standard or generalized Schur form are proposed. Efficiency is achieved by delaying orthogonal transformations and (optionally) making use of level 3 BLAS operations. Numerical experiments demonstrate that existing algorithms, as currently implemented in LAPACK, are outperformed by up to a factor of four.", acknowledgement = ack-nhfb, keywords = "deflating subspace; invariant subspace; reordering; Schur form", } @TechReport{Marques:2005:BIF, author = "Osni A. Marques and E. Jason Riedy and Christof V{\"o}mel", title = "Benefits of {IEEE-754} Features in Modern Symmetric Tridiagonal Eigensolvers", type = "LAPACK Working Note", number = "172", institution = "Computer Science Division, University of California, Berkeley", address = "Berkeley, CA, USA", pages = "22", day = "30", month = sep, year = "2005", MRclass = "15A18, 15A23.", bibdate = "Mon Mar 20 12:18:56 2006", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "Also issued as Technical Report UCB//CSD-05-1414.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn172.pdf", abstract = "Bisection is one of the most common methods used to compute the eigenvalues of symmetric tridiagonal matrices. Bisection relies on the Sturm count: for a given shift $ \sigma $, the number of negative pivots in the factorization {$ T \sigma I = L D L^T $} equals the number of eigenvalues of {$T$} that are smaller than $ \sigma $. In IEEE-754 arithmetic, the value $ \infty $ permits the computation to continue past a zero pivot, producing a correct Sturm count when {$T$} is unreduced. Demmel and Li showed in the 90s that using $ \infty $ rather than testing for zero pivots within the loop could improve performance significantly on certain architectures. When eigenvalues are to be computed to high relative accuracy, it is often preferable to work with {$ L D L^T $} factorizations instead of the original tridiagonal {$T$}, see for example the MRRR algorithm. In these cases, the Sturm count has to be computed from {$ L D L^T $} . The differential stationary and progressive qds algorithms are the methods of choice. While it seems trivial to replace {$T$} by {$ L D L^T $}, in reality these algorithms are more complicated: in IEEE-754 arithmetic, a zero pivot produces an overflow, followed by an invalid exception (NaN), that renders the Sturm count incorrect. We present alternative, safe formulations that are guaranteed to produce the correct result. Benchmarking these algorithms on a variety of platforms shows that the original formulation without tests is always faster provided no exception occurs. The transforms see speed-ups of up to $ 2.6 \times $ over the careful formulations. Tests on industrial matrices show that encountering exceptions in practice is rare. This leads to the following design: First, compute the Sturm count by the fast but unsafe algorithm. Then, if an exception occurred, recompute the count by a safe, slower alternative. The new Sturm count algorithms improve the speed of bisection by up to $ 2 \times $ on our test matrices. Furthermore, unlike the traditional tiny-pivot substitution, proper use of IEEE-754 features provides a careful formulation that imposes no input range restrictions.", acknowledgement = ack-nhfb, keywords = "differential qds algorithms; IEEE-754 arithmetic; IEEE-754 performance; LAPACK; MRRR algorithm; NaN arithmetic", } @TechReport{Kaagstrom:2006:MVQ, author = "Bo K{\aa}gstr{\"o}m and Daniel Kressner", title = "Multishift Variants of the {$ Q Z $} Algorithm with Aggressive Early Deflation", type = "LAPACK Working Note", number = "173", institution = "Department of Computing Science, Ume{\aa} University", address = "Ume{\aa}, Sweden", pages = "42", day = "20", month = feb, year = "2006", MRclass = "65F15, 15A18, 15A22, 47A75", bibdate = "Mon Mar 20 12:30:00 2006", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "Also appeared as technical report UMINF-05.11", URL = "http://www.netlib.org/lapack/lawnspdf/lawn173.pdf", abstract = "New variants of the QZ algorithm for solving the generalized eigenvalue problem are proposed. An extension of the small-bulge multishift QR algorithm is developed, which chases chains of many small bulges instead of only one bulge in each QZ iteration. This allows the effective use of level 3 BLAS operations, which in turn can provide efficient utilization of high performance computing systems with deep memory hierarchies. Moreover, an extension of the aggressive early deflation strategy is proposed, which can identify and deflate converged eigenvalues long before classic deflation strategies would. Consequently, the number of overall QZ iterations needed until convergence is considerably reduced. As a third ingredient, we reconsider the deflation of infinite eigenvalues and present a new deflation algorithm, which is particularly effective in the presence of a large number of infinite eigenvalues. Combining all these developments, our implementation significantly improves existing implementations of the QZ algorithm. This is demonstrated by numerical experiments with random matrix pairs as well as with matrix pairs arising from various applications.", acknowledgement = ack-nhfb, keywords = "aggressive early deflation; blocked algorithms; Generalized eigenvalue problem; generalized Schur form; multishifts; QZ algorithm", } @TechReport{Howell:2005:CEB, author = "G. W. Howell and J. W. Demmel and C. T. Fulton and S. Hammarling and K. Marmol", title = "Cache Efficient Bidiagonalization Using {BLAS 2.5} Operators", type = "LAPACK Working Note", number = "174", institution = "North Carolina State University; University of California, Berkeley; Florida Institute of Technology; Numerical Algorithms Group; Harris Corporation", address = "Raleigh, NC 27697, USA; Berkeley, CA 94720, USA; Melbourne, FL 32901, USA; Oxford, UK; Melbourne, FL 32901", pages = "39", day = "1", month = nov, year = "2005", bibdate = "Mon Mar 20 12:30:00 2006", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", URL = "http://www.netlib.org/lapack/lawnspdf/lawn174.pdf", abstract = "In this paper we reorganize the sequence of operations for Householder bidiagonalization of a general $ m \times n $ matrix, so that two (\_GMEV) vector-matrix multiplications can be done with one pass of the unreduced trailing part of the matrix through cache. Two new BLAS 2.5 operations approximately cut in half the transfer of data from main memory to cache. We give detailed algorithm descriptions and compare timings with the current LAPACK bidiagonalization algorithm.", acknowledgement = ack-nhfb, } @TechReport{Langou:2006:EPB, author = "Julie Langou and Julien Langou and Piotr Luszczek and Jakub Kurzak and Alfredo Buttari and Jack Dongarra", title = "Exploiting the Performance of 32 bit Floating Point Arithmetic in Obtaining 64 bit Accuracy (Revisiting Iterative Refinement for Linear Systems)", type = "LAPACK Working Note", number = "175", institution = inst-UTK-CS, address = inst-UTK-CS:adr, pages = "17", month = jun, year = "2006", bibdate = "Mon Oct 09 12:05:43 2006", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", URL = "http://www.netlib.org/lapack/lawnspdf/lawn175.pdf; http://www.netlib.org/lapack/lawnspdf/lawn175.ps", abstract = "Recent versions of microprocessors exhibit performance characteristics for 32 bit floating point arithmetic (single precision) that is substantially higher than 64 bit floating point arithmetic (double precision). Examples include the Intel's Pentium IV and M processors, AMD's Opteron architectures and the IBM's Cell Broad Engine processor. When working in single precision, floating point operations can be performed up to two times faster on the Pentium and up to ten times faster on the Cell over double precision. The performance enhancements in these architectures are derived by accessing extensions to the basic architecture, such as SSE2 in the case of the Pentium and the vector functions on the IBM Cell. The motivation for this paper is to exploit single precision operations whenever possible and resort to double precision at critical stages while attempting to provide the full double precision results. The results described here are fairly general and can be applied to various problems in linear algebra such as solving large sparse systems, using direct or iterative methods and some eigenvalue problems. There are limitations to the success of this process, such as when the conditioning of the problem exceeds the reciprocal of the accuracy of the single precision computations. In that case the double precision algorithm should be used.", acknowledgement = ack-nhfb, } @TechReport{Drmac:2006:FRR, author = "Zlatko Drma{\v{c}} and Zvonimir Bujanovi{\'c}", title = "On the failure of rank revealing {$ Q R $} factorization software --- a case study", type = "LAPACK Working Note", number = "176", institution = "Department of Mathematics, University of Zagreb", address = "Bijeni{\v{c}}ka 30, 10000 Zagreb, Croatia", pages = "27", day = "2", month = jun, year = "2006", bibdate = "Mon Oct 09 12:05:43 2006", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", URL = "http://www.netlib.org/lapack/lawnspdf/lawn176.pdf; http://www.netlib.org/lapack/lawnspdf/lawn176.ps", abstract = "This note reports an unexpected and rather erratic behavior of the LAPACK software implementation of the QR factorization with Businger--Golub column pivoting. It is shown that, due to finite precision arithmetic, software implementation of the factorization can catastrophically fail to produce triangular factor with the structure characteristic to the Businger--Golub pivot strategy. The failure of current state of the art software, and a proposed alternative implementations are analyzed in detail.", acknowledgement = ack-nhfb, } @TechReport{Kurzak:2006:IMP, author = "Jakub Kurzak and Jack Dongarra", title = "Implementation of the Mixed-Precision High Performance {LINPACK} Benchmark on the {CELL Processor}", type = "LAPACK Working Note", number = "177", institution = inst-UTK-CS, address = inst-UTK-CS:adr, pages = "12", month = sep, year = "2006", bibdate = "Mon Oct 09 12:05:43 2006", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "Also available as UT-CS-06-580.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn177.pdf; http://www.netlib.org/lapack/lawnspdf/lawn177.ps", abstract = "This paper describes the design concepts behind implementations of mixed-precision linear algebra routines targeted for the Cell processor. It describes in detail the implementation of code to solve linear system of equations using Gaussian elimination in single precision with iterative refinement of the solution to the full double precision accuracy. By utilizing this approach the algorithm achieves close to an order of magnitude higher performance on the Cell processor than the performance offered by the standard double precision algorithm. Effectively the code is an implementation of the high performance LINPACK benchmark, since it meets all the requirements concerning the problem being solved and the numerical properties of the solution.", acknowledgement = ack-nhfb, } @TechReport{Kurzak:2006:ILA, author = "Jakub Kurzak and Jack Dongarra", title = "Implementing Linear Algebra Routines on Multi-Core Processors with Pipelining and a Look Ahead", type = "LAPACK Working Note", number = "178", institution = inst-UTK-CS, address = inst-UTK-CS:adr, pages = "11", month = sep, year = "2006", bibdate = "Mon Oct 09 12:05:43 2006", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "Also available as UT-CS-06-581.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn178.pdf; http://www.netlib.org/lapack/lawnspdf/lawn178.ps", abstract = "Linear algebra algorithms commonly encapsulate parallelism in Basic Linear Algebra Subroutines (BLAS). This solution relies on the fork-join model of parallel execution, which may result in suboptimal performance on current and future generations of multi-core processors. To overcome the shortcomings of this approach a pipelined model of parallel execution is presented, and the idea of the look ahead is utilized in order to suppress the negative effects of sequential formulation of the algorithms. Application to one-sided matrix factorizations, LU, Cholesky and QR, is described. Shared memory implementation using POSIX threads is presented.", acknowledgement = ack-nhfb, keywords = "linear algebra; look ahead; multi-core processors; pipelining", } @TechReport{Baboulin:2006:PTS, author = "Marc Baboulin and Luc Giraud and Serge Gratton and Julien Langou", title = "Parallel tools for solving incremental dense least squares problems. Application to space geodesy", type = "LAPACK Working Note", number = "179", institution = "CERFACS", address = "42 avenue Gaspard Coriolis, 31057 Toulouse Cedex, France", month = sep, year = "2006", bibdate = "Mon Oct 09 12:05:43 2006", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "Also available as UT-CS-06-582.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn179.pdf; http://www.netlib.org/lapack/lawnspdf/lawn179.ps", abstract = "We present a parallel distributed solver that enables us to solve incremental dense least squares arising in some parameter estimation problems. This solver is based on ScaLAPACK [8] and PBLAS [9] kernel routines. In the incremental process, the observations are collected periodically and the solver updates the solution with new observations using a QR factorization algorithm. It uses a recently defined distributed packed format [3] that handles symmetric or triangular matrices in ScaLAPACK-based implementations. We provide performance analysis on IBM pSeries 690. We also present an example of application in the area of space geodesy for gravity field computations with some experimental results.", acknowledgement = ack-nhfb, keywords = "dense linear algebra; gravity field computation; parallel distributed algorithms; QR factorization; ScaLAPACK; scientific computing", } @TechReport{Buttari:2006:UMP, author = "Alfredo Buttari and Jack J. Dongarra and Jakub Kurzak and Piotr Luszczek and Stanimire Tomov", title = "Using Mixed Precision for Sparse Matrix Computations to Enhance the Performance while Achieving 64-bit Accuracy", type = "LAPACK Working Note", number = "180", institution = inst-UTK-CS, address = inst-UTK-CS:adr, day = "22", month = oct, year = "2006", bibdate = "Fri Apr 24 12:25:43 2009", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", URL = "http://www.netlib.org/lapack/lawnspdf/lawn180.pdf", abstract = "By using a combination of 32-bit and 64-bit floating point arithmetic the performance of many sparse linear algebra algorithms can be significantly enhanced while maintaining the 64-bit accuracy of the resulting solution. These ideas can be applied to sparse multifrontal and supernodal direct techniques, and sparse iterative techniques such as Krylov subspace methods. The approach presented here can apply not only to conventional processors but also to exotic technologies such as Field Programmable Gate Arrays (FPGA), Graphical Processing Units (GPU), and the Cell BE processor.", acknowledgement = ack-nhfb, note = "UT-CS-06-584", } @TechReport{Demmel:2007:PNL, author = "James W. Demmel and Jack J. Dongarra and Beresford N. Parlett and William Kahan and Ming Gu and David S. Bindel and Yozo Hida and Xiaoye S. Li and Osni A. Marques and E. Jason Riedy and Christof V{\"o}mel and Julien Langou and Piotr Luszczek and Jakub Kurzak and Alfredo Buttari and Julie Langou and Stanimire Tomov", title = "Prospectus for the Next {LAPACK} and {ScaLAPACK} Libraries", type = "LAPACK Working Note", number = "181", institution = inst-UTK-CS, address = inst-UTK-CS:adr, day = "11", month = mar, year = "2007", bibdate = "Fri Apr 24 12:25:43 2009", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", URL = "http://www.netlib.org/lapack/lawnspdf/lawn181.pdf", acknowledgement = ack-nhfb, note = "UT-CS-07-592", } @TechReport{Demmel:2007:TIL, author = "James W. Demmel and Osni A. Marques and Beresford N. Parlett and Christof V{\"o}mel", title = "A Testing Infrastructure for {LAPACK}'s Symmetric Eigensolvers", type = "LAPACK Working Note", number = "182", institution = inst-UCB-EECS, address = inst-UCB-EECS:adr, month = apr, year = "2007", MRclass = "15A18, 15A23", bibdate = "Fri Apr 24 12:25:43 2009", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", URL = "http://www.netlib.org/lapack/lawnspdf/lawn182.pdf", abstract = "LAPACK is often mentioned as a positive example of a software library that encapsulates complex, robust, and widely used numerical algorithms for a wide range of applications. At installation time, the user has the option of running a (limited) number of test cases to verify the integrity of the installation process. On the algorithm developer's side, however, more exhaustive tests are usually performed to study algorithm behavior on a variety of problem settings and also computer architectures. In this process, difficult test cases need to be found that reflect particular challenges of an application or push algorithms to extreme behavior. These tests are then assembled into a comprehensive collection, therefore making it possible for any new or competing algorithm to be stressed in a similar way. This note describes such an infrastructure for exhaustively testing the symmetric tridiagonal eigensolvers implemented in LAPACK. It consists of two parts: a selection of carefully chosen test matrices with particular idiosyncrasies and a portable testing framework that allows easy testing and data processing. The tester facilitates experiments with algorithmic choices, parameter and threshold studies, and performance comparisons on different architectures.", acknowledgement = ack-nhfb, } @TechReport{Demmel:2007:PAL, author = "James W. Demmel and Osni A. Marques and Beresford N. Parlett and Christof V{\"o}mel", title = "Performance and Accuracy of {LAPACK}'s Symmetric Tridiagonal Eigensolvers", type = "LAPACK Working Note", number = "183", institution = inst-UCB-EECS, address = inst-UCB-EECS:adr, month = apr, year = "2007", MRclass = "15A18, 15A23", bibdate = "Fri Apr 24 12:25:43 2009", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", URL = "http://www.netlib.org/lapack/lawnspdf/lawn183.pdf", abstract = "We compare four algorithms from the latest LAPACK 3.1 release for computing eigenpairs of a symmetric tridiagonal matrix. These include QR iteration, bisection and inverse iteration (BI), the Divide-and-Conquer method (DC), and the method of Multiple Relatively Robust Representations (MR).\par Our evaluation considers speed and accuracy when computing all eigenpairs, and additionally subset computations. Using a variety of carefully selected test problems, our study includes a variety of today's computer architectures.\par Our conclusions can be summarized as follows. (1) DC and MR are generally much faster than QR and BI on large matrices. (2) MR almost always does the fewest floating point operations, but at a lower MFlop rate than all the other algorithms. (3) The exact performance of MR and DC strongly depends on the matrix at hand. (4) DC and QR are the most accurate algorithms with observed accuracy {$ O(\sqrt {n} \epsilon) $}. The accuracy of BI and MR is generally {$ O(n \epsilon) $}. (5) MR is preferable to BI for subset computations.", acknowledgement = ack-nhfb, } @TechReport{Kurzak:2007:SSL, author = "Jakub Kurzak and Alfredo Buttari and Jack J. Dongarra", title = "Solving Systems of Linear Equations on the {CELL} Processor Using {Cholesky} Factorization", type = "LAPACK Working Note", number = "184", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = may, year = "2007", bibdate = "Fri Apr 24 12:25:43 2009", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", URL = "http://www.netlib.org/lapack/lawnspdf/lawn184.pdf", abstract = "The STI CELL processor introduces pioneering solutions in processor architecture. At the same time it presents new challenges for the development of numerical algorithms. One is effective exploitation of the differential between the speed of single and double precision arithmetic; the other is efficient parallelization between the short vector SIMD cores. In this work, the first challenge is addressed by utilizing a mixed-precision algorithm for the solution of a dense symmetric positive definite system of linear equations, which delivers double precision accuracy, while performing the bulk of the work in single precision. The second challenge is approached by introducing much finer granularity of parallelization than has been used for other architectures and using a lightweight decentralized synchronization. The implementation of the computationally intensive sections gets within 90 percent of peak floating point performance, while the implementation of the memory intensive sections reaches within 90 percent of peak memory bandwidth. On a single CELL processor, the algorithm achieves over 170 Gflop/s when solving a symmetric positive definite system of linear equation in single precision and over 150 Gflop/s when delivering the result in double precision accuracy.", acknowledgement = ack-nhfb, keywords = "CELL BE; Cholesky factorization; iterative refinement; mixed-precision algorithms", note = "UT-CS-07-596", } @TechReport{Buttari:2007:LPH, author = "Alfredo Buttari and Jack J. Dongarra and Jakub Kurzak", title = "Limitations of the {PlayStation 3} for High Performance Cluster Computing", type = "LAPACK Working Note", number = "185", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = may, year = "2007", bibdate = "Fri Apr 24 12:25:43 2009", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", URL = "http://www.netlib.org/lapack/lawnspdf/lawn185.pdf", acknowledgement = ack-nhfb, note = "UT-CS-07-597", } @TechReport{Demmel:2007:FLAa, author = "James W. Demmel and Ioana Dumitriu and Olga Holtz", title = "Fast Linear Algebra is Stable", type = "LAPACK Working Note", number = "186", institution = inst-UCB-EECS, address = inst-UCB-EECS:adr, day = "18", month = may, year = "2007", bibdate = "Fri Apr 24 12:25:43 2009", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "Published in \cite{Demmel:2007:FLAb}.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn186.pdf", abstract = "In [23] we showed that a large class of fast recursive matrix multiplication algorithms is stable in a normwise sense, and that in fact if multiplication of $n$-by-$n$ matrices can be done by any algorithm in {$ O(n^{\omega + \eta }) $} operations for any $ \eta > 0 $, then it can be done stably in {$ O(n^{\omega + \eta }) $} operations for any $ \eta > 0 $. Here we extend this result to show that essentially all standard linear algebra operations, including LU decomposition, QR decomposition, linear equation solving, matrix inversion, solving least squares problems, (generalized) eigenvalue problems and the singular value decomposition can also be done stably (in a normwise sense) in {$ O(n^{\omega + \eta }) $} operations.", acknowledgement = ack-nhfb, } @TechReport{Byers:2007:LXT, author = "Ralph Byers", title = "{LAPACK 3.1 xHSEQR}: Tuning and Implementation Notes on the Small Bulge Multi-shift {$ Q R $} Algorithm with Aggressive Early Deflation", type = "LAPACK Working Note", number = "187", institution = "Department of Mathematics, University of Kansas", address = "Lawrence, Kansas 66045, USA", month = may, year = "2007", bibdate = "Fri Apr 24 12:25:43 2009", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", URL = "http://www.netlib.org/lapack/lawnspdf/lawn187.pdf", abstract = "This note documents implementation details of the small bulge, multi-shift QR algorithm with aggressive early deflation that appears as LAPACK version 3.1 programs CHSEQR, DHSEQR, SHSEQR and ZHSEQR and the subroutines they call. These codes calculate eigenvalues and optionally a Schur factorization of a Hessenberg matrix. They do the bulk of the work required to calculate eigenvalues and optionally eigen- vectors of a general non-symmetric matrix. This report is intended to provide some guidance for setting the machine dependent tuning parameters, to help maintainers to identify and correct problems, and to help developers improve upon this implementation.", acknowledgement = ack-nhfb, } @TechReport{Demmel:2007:EPI, author = "James W. Demmel and Yozo Hida and Xiaoye S. Li and E. Jason Riedy", title = "Extra-precise Iterative Refinement for Overdetermined Least Squares Problems", type = "LAPACK Working Note", number = "188", institution = inst-UCB-EECS, address = inst-UCB-EECS:adr, day = "30", month = may, year = "2007", bibdate = "Fri Apr 24 12:25:43 2009", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "Published in \cite{Demmel:2009:EPI}.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn188.pdf", abstract = "We present the algorithm, error bounds, and numerical results for extra-precise iterative refinement applied to overdetermined linear least squares (LLS) problems. We apply our linear system refinement algorithm to Bj{\"o}rck's augmented linear system formulation of an LLS problem. Our algorithm reduces the forward normwise and componentwise errors to {$ O(\epsilon) $} unless the system is too ill conditioned. In contrast to linear systems, we provide two separate error bounds for the solution $x$ and the residual $r$. The refinement algorithm requires only limited use of extra precision and adds only {$ O(m n) $} work to the {$ O(m n^2) $} cost of QR factorization for problems of size $m$-by-$n$. The extra precision calculation is facilitated by the new extended-precision BLAS standard in a portable way, and the refinement algorithm will be included in a future release of LAPACK and can be extended to the other types of least squares problems.", acknowledgement = ack-nhfb, } @TechReport{Alvaro:2008:FSS, author = "Wesley Alvaro and Jakub Kurzak and Jack J. Dongarra", title = "Fast and Small Short Vector {SIMD} Matrix Multiplication Kernels for the {CELL} Processor", type = "LAPACK Working Note", number = "189", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = jan, year = "2008", bibdate = "Fri Apr 24 12:25:43 2009", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", URL = "http://www.netlib.org/lapack/lawnspdf/lawn189.pdf", abstract = "Matrix multiplication is one of the most common numerical operations, especially in the area of dense linear algebra, where it forms the core of many important algorithms, including solvers of linear systems of equations, least square problems, and singular and eigenvalue computations. The STI CELL processor exceeds the capabilities of any other processor available today in terms of peak single precision, floating point performance. In order to fully exploit the potential of the CELL processor for a wide range of numerical algorithms, fast implementation of the matrix multiplication operation is essential. The crucial component is the matrix multiplication kernel crafted for the short vector Single Instruction Multiple Data architecture of the Synergistic Processing Element of the CELL processor. In this paper, single precision matrix multiplication kernels are presented implementing the {$ C = C - A \times B T $} operation and the {$ C = C - A \times B $} operation for matrices of size $ 64 \times 64 $ elements. For the latter case, the performance of 25.55 Gflop/s is reported, or 99.80 percent of the peak, using as little as 5.9 KB of storage for code and auxiliary data structures.", acknowledgement = ack-nhfb, keywords = "CELL BE; matrix multiplication; SGEMM; short vector SIMD; SPE", note = "UT-CS-08-609", } @TechReport{Buttari:2007:PTQ, author = "Alfredo Buttari and Julien Langou and Jakub Kurzak and Jack J. Dongarra", title = "Parallel Tiled {$ Q R $} Factorization for Multicore Architectures", type = "LAPACK Working Note", number = "190", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = jul, year = "2007", bibdate = "Fri Apr 24 12:25:43 2009", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-07-598. Published in \cite{Buttari:2008:PTF}.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn190.pdf", abstract = "As multicore systems continue to gain ground in the High Performance Computing world, linear algebra algorithms have to be reformulated or new algorithms have to be developed in order to take advantage of the architectural features on these new processors. Fine grain parallelism becomes a major requirement and introduces the necessity of loose synchronization in the parallel execution of an operation. This paper presents an algorithm for the QR factorization where the operations can be represented as a sequence of small tasks that operate on square blocks of data. These tasks can be dynamically scheduled for execution based on the dependencies among them and on the availability of computational resources. This may result in an out of order execution of the tasks which will completely hide the presence of intrinsically sequential tasks in the factorization. Performance comparisons are presented with the LAPACK algorithm for QR factorization where parallelism can only be exploited at the level of the BLAS operations.", acknowledgement = ack-nhfb, } @TechReport{Buttari:2007:CPT, author = "Alfredo Buttari and Julien Langou and Jakub Kurzak and Jack J. Dongarra", title = "A Class of Parallel Tiled Linear Algebra Algorithms for Multicore Architectures", type = "LAPACK Working Note", number = "191", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = sep, year = "2007", bibdate = "Fri Apr 24 12:25:43 2009", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", URL = "http://www.netlib.org/lapack/lawnspdf/lawn191.pdf", abstract = "As multicore systems continue to gain ground in the High Performance Computing world, linear algebra algorithms have to be reformulated or new algorithms have to be developed in order to take advantage of the architectural features on these new processors. Fine grain parallelism becomes a major requirement and introduces the necessity of loose synchronization in the parallel execution of an operation. This paper presents an algorithm for the Cholesky, LU and QR factorization where the operations can be represented as a sequence of small tasks that operate on square blocks of data. These tasks can be dynamically scheduled for execution based on the dependencies among them and on the availability of computational resources. This may result in an out of order execution of the tasks which will completely hide the presence of intrinsically sequential tasks in the factorization. Performance comparisons are presented with the LAPACK algorithms where parallelism can only be exploited at the level of the BLAS operations and vendor implementations.", acknowledgement = ack-nhfb, note = "UT-CS-07-600", } @TechReport{Granat:2007:PER, author = "Robert Granat and Bo K{\aa}gstr{\"o}m and Daniel Kressner", title = "Parallel eigenvalue reordering in real {Schur} forms", type = "LAPACK Working Note", number = "192", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = sep, year = "2007", bibdate = "Fri Apr 24 12:25:43 2009", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", URL = "http://www.netlib.org/lapack/lawnspdf/lawn192.pdf", acknowledgement = ack-nhfb, } @TechReport{Baboulin:2007:CCC, author = "Marc Baboulin and Jack J. Dongarra and Serge Gratton and Julien Langou", title = "Computing the Conditioning of the Components of a Linear Least Squares Solution", type = "LAPACK Working Note", number = "193", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = sep, year = "2007", bibdate = "Fri Apr 24 12:25:43 2009", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", URL = "http://www.netlib.org/lapack/lawnspdf/lawn193.pdf", abstract = "In this paper, we address the accuracy of the results for the overdetermined full rank linear least squares problem. We recall theoretical results obtained in [2] on conditioning of the least squares solution and the components of the solution when the matrix perturbations are measured in Frobenius or spectral norms. Then we define computable estimates for these condition numbers and we interpret them in terms of statistical quantities. In particular, we show that, in the classical linear statistical model, the ratio of the variance of one component of the solution by the variance of the right-hand side is exactly the condition number of this solution component when perturbations on the right-hand side are considered. We also provide fragment codes using LAPACK [1] routines to compute the variance-covariance matrix and the least squares conditioning and we give the corresponding computational cost. Finally we present a small historical numerical example that was used by Laplace [19] for computing the mass of Jupiter and experiments from the space industry with real physical data.", acknowledgement = ack-nhfb, keywords = "condition number; LAPACK; Linear least squares; parameter estimation; ScaLAPACK; statistical linear least squares; variance-covariance matrix", note = "UT-CS-07-604", } @TechReport{Vomel:2007:RRT, author = "Christof V{\"o}mel", title = "A Refined Representation Tree for {MRRR}", type = "LAPACK Working Note", number = "194", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = nov, year = "2007", MRclass = "65F15, 65Y15", bibdate = "Fri Apr 24 12:25:43 2009", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", URL = "http://www.netlib.org/lapack/lawnspdf/lawn194.pdf", abstract = "In order to compute orthogonal eigenvectors of a symmetric tridiagonal matrix without Gram--Schmidt orthogonalization, the MRRR algorithm finds a shifted LDLT factorization (representation) for each eigenvalue such that the local eigenvalue is a singleton, that is defined to high relative accuracy and has a large relative gap.\par MRRR's representation tree describes how, by successive shifting and refinement, each eigenvalue becomes relatively isolated. Its shape plays a crucial role for complexity: deeper trees are associated with more eigenvalue refinement to resolve clustering of eigenvalues.\par Motivated by recently observed deteriorating complexity of the LAPACK 3.1 MRRR kernels for certain matrices of large dimension, we here re-examine and refine the representation tree concept.\par We first describe the discovery of what we call a spectrum peeling problem: even though the matrix at hand might not have a spectrum with clusters within clusters, the representation tree might still contain a long chain of large nodes.\par We then formulate a refined proposal for the representation tree that aims at avoiding the un- warranted work while preserving tight accuracy bounds where possible. The trade-off between performance and accuracy in our solution is discussed by practical examples.", acknowledgement = ack-nhfb, keywords = "complexity; LAPACK; MRRR; Multiple relatively robust representations; representation tree; ScaLAPACK; spectrum peeling", } @TechReport{Vomel:2007:SMA, author = "Christof V{\"o}mel", title = "{ScaLAPACK}'s {MRRR} Algorithm", type = "LAPACK Working Note", number = "195", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = nov, year = "2007", MRclass = "65F15, 65Y15", bibdate = "Fri Apr 24 12:25:43 2009", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", URL = "http://www.netlib.org/lapack/lawnspdf/lawn195.pdf", abstract = "The sequential algorithm of Multiple Relatively Robust Representations, MRRR, can compute numerically orthogonal eigenvectors of an unreduced symmetric tridiagonal matrix {$ T \subset R^{n \times n} $} with {$ O(n^2) $} cost.\par This paper describes the design of ScaLAPACK's parallel MRRR algorithm. One emphasis is on the critical role of the representation tree in achieving both numerical accuracy and parallel scalability. A second point concerns the favorable properties of this code: subset computation, the use of static memory, and scalability.\par Unlike ScaLAPACK's Divide \& Conquer and QR, MRRR can compute subsets of eigenpairs at reduced cost. And in contrast to inverse iteration which can fail, it is guaranteed to produce a numerically satisfactory answer while maintaining memory scalability.\par ParEig, the parallel MRRR algorithm for PLAPACK, uses dynamic memory allocation. This is avoided by our code at marginal additional cost. We also use a different representation tree criterion that allows for more accurate computation of the eigenvectors but can make parallelization more difficult.", acknowledgement = ack-nhfb, keywords = "multiple relatively robust representations; numerical software; ScaLAPACK; Symmetric eigenproblem", } @TechReport{Drmac:2007:GCP, author = "Zlatko Drma{\v{c}}", title = "A global convergence proof of cyclic {Jacobi} methods with block rotations", type = "LAPACK Working Note", number = "196", institution = inst-UTK-CS, address = inst-UTK-CS:adr, day = "10", month = dec, year = "2007", bibdate = "Fri Apr 24 12:25:43 2009", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", URL = "http://www.netlib.org/lapack/lawnspdf/lawn196.pdf", abstract = "This paper introduces a globally convergent block (column- and row-) cyclic Jacobi method for diagonalization of Hermitian matrices and for computation of the singular value decomposition of general matrices. It is shown that a block rotation (generalization of the Jacobi's 2 \times 2 rotation) must be computed and implemented in a particular way to guarantee global convergence. This solves a long standing open problem of convergence of block cyclic Jacobi methods. The proof includes the convergence of the eigenspaces in the general case of multiple eigenvalues.", acknowledgement = ack-nhfb, } @TechReport{Volkov:2008:UGA, author = "Vasily Volkov and James W. Demmel", title = "Using {GPUs} to Accelerate the Bisection Algorithm for Finding Eigenvalues of Symmetric Tridiagonal Matrices", type = "LAPACK Working Note", number = "197", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = jan, year = "2008", bibdate = "Fri Apr 24 12:25:43 2009", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", URL = "http://www.netlib.org/lapack/lawnspdf/lawn197.pdf", abstract = "Graphical Processing Units (GPUs) potentially promise widespread and inexpensive high performance computation. However, architectural limitations (only some operations and memory access patterns can be performed quickly, partial support for IEEE floating point arithmetic) make it necessary to change existing algorithms to attain high performance and correctness. Here we show how to make the bisection algorithm for eigenvalues of symmetric tridiagonal matrices (sstebz from LAPACK) run both fast and correctly on an ATI Radeon X1900 GPU. Our fastest algorithm takes up to 156! less time than IntelYs Math Kernel Library version of sstebz running on the CPU, but does so by doing many redundant floating point operations compared to the CPU version. We use an automatic tuning procedure analogous to ATLAS or PHiPAC to decide the optimal redundancy. Correctness despite partial IEEE floating point semantics required explicitly adding 0 in the inner loop. The problems and solutions discussed here are of interest on other GPU architectures.", acknowledgement = ack-nhfb, ucbnumber = "UCB/EECS-2007-179", } @TechReport{Kaagstrom:2008:BAR, author = "Bo K{\aa}gstr{\"o}m and Daniel Kressner and Enrique S. Quintana-Orti and Gregorio Quintana-Orti", title = "Blocked Algorithms for the Reduction to {Hessenberg}-Triangular Form Revisited", type = "LAPACK Working Note", number = "198", institution = "Department of Computing Science and HPC2N", address = "Ume{\aa} University, S-901 Ume{\aa}, Sweden", month = feb, year = "2008", bibdate = "Fri Apr 24 12:25:43 2009", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", URL = "http://www.netlib.org/lapack/lawnspdf/lawn198.pdf", abstract = "We present two variants of Moler and Stewart's algorithm for reducing a matrix pair to Hessenberg-triangular (HT) form with increased data locality in the access to the matrices. In one of these variants, a careful reoganization and accumulation of Givens rotations enables the use of efficient level 3 BLAS. Experimental results on four different architectures, representative of current high performance processors, compare the performances of the new variants with those of the implementation of Moler and Stewart's algorithm in subroutine DGGHRD from LAPACK, Dackland and K{\aa}gstr{\"o}m's two-stage algorithm for the HT form, and a modified version of the latter which requires considerably less flops.", acknowledgement = ack-nhfb, keywords = "blocked algorithms; Generalized eigenvalue problems; Hessenberg-triangular form; high-performance computing; level 3 BLAS; orthogonal transformations; QZ algorithm", } @TechReport{Gustavson:2008:RFP, author = "Fred G. Gustavson and Jerzy Wasniewski and Jack J. Dongarra and Julien Langou", title = "Rectangular Full Packed Format for {Cholesky}'s Algorithm: Factorization, Solution and Inversion", type = "LAPACK Working Note", number = "199", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = apr, year = "2008", bibdate = "Fri Apr 24 12:25:43 2009", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", URL = "http://www.netlib.org/lapack/lawnspdf/lawn199.pdf", abstract = "We describe a new data format for storing triangular, symmetric, and Hermitian matrices called RFPF (Rectangular Full Packed Format). The standard two dimensional arrays of Fortran and C (also known as full format) that are used to represent triangular and symmetric matrices waste nearly half of the storage space but provide high performance via the use of Level 3 BLAS. Standard packed format arrays fully utilize storage (array space) but provide low performance as there is no Level 3 packed BLAS. We combine the good features of packed and full storage using RFPF to obtain high performance via using Level 3 BLAS as RFPF is a standard full format representation. Also, RFPF requires exactly the same minimal storage as packed format. Each LAPACK full and/or packed triangular, symmetric, and Hermitian routine becomes a single new RFPF routine based on eight possible data layouts of RFPF. This new RFPF routine usually consists of two calls to the corresponding LAPACK full format routine and two calls to Level 3 BLAS routines. This means no new software is required. As examples, we present LAPACK routines for Cholesky factorization, Cholesky solution and Cholesky inverse computation in RFPF to illustrate this new work and to describe its performance on several commonly used computer platforms. Performance of LAPACK full routines using RFPF versus LAPACK full routines using standard format for both serial and SMP parallel processing is about the same while using half the storage. Performance gains are roughly one to a factor of 43 for serial and one to a factor of 97 for SMP parallel times faster using vendor LAPACK full routines with RFPF than with using vendor and/or reference packed routines.", acknowledgement = ack-nhfb, keywords = "Algorithms; BLAS; Linear Algebra Libraries; Performance", subject = "G.1.3 [Numerical Analysis]: Numerical Linear Algebra - Linear Systems (symmetric and Hermitian); G.4 [Mathematics of Computing]: Mathematical Software", note = "UT-CS-08-614", } @TechReport{Baboulin:2008:SID, author = "Marc Baboulin and Jack J. Dongarra and Stanimire Tomov", title = "Some Issues in Dense Linear Algebra for Multicore and Special Purpose Architectures", type = "LAPACK Working Note", number = "200", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = may, year = "2008", bibdate = "Fri Apr 24 12:25:43 2009", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", URL = "http://www.netlib.org/lapack/lawnspdf/lawn200.pdf", abstract = "We address some key issues in designing dense linear algebra (DLA) algorithms that are common for both multi/many-cores and special purpose architectures (in particular GPUs). We present them in the context of an LU factorization algorithm, where randomization techniques are used as an alternative to pivoting. This approach yields an algorithm based entirely on a collection of small Level 3 BLAS type computational tasks, which has emerged as a common goal in designing DLA algorithms for new architectures. Other common trends, also considered here, are block asynchronous task execution and ``Block'' layouts for the data associated with the separate tasks. We present numerical results and other specific experiments with DLA algorithms on NVIDIA GPUs using CUDA. The GPU results are also of interest themselves as we show a performance of up to 160 Glop/s on a single Quadro FX 5600 card. Keywords: dense linear algebra, parallel algorithms, LU factorization, multicore processors, graphic process units.", acknowledgement = ack-nhfb, note = "UT-CS-08-615", } @TechReport{Kurzak:2008:QFC, author = "Jakub Kurzak and Jack J. Dongarra", title = "{$ Q R $} Factorization for the {CELL} Processor", type = "LAPACK Working Note", number = "201", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = may, year = "2008", bibdate = "Fri Apr 24 12:25:43 2009", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", URL = "http://www.netlib.org/lapack/lawnspdf/lawn201.pdf", abstract = "The QR factorization is one of the most important operations in dense linear algebra, offering a numerically stable method for solving linear systems of equations including overdetermined and underdetermined systems. Classic implementation of the QR factorization suffers from performance limitations due to the use of matrix-vector type operations in the phase of panel factorization. These limitations can be remedied by using the idea of updating of QR factorization, rendering an algorithm, which is much more scalable and much more suitable for implementation on a multi-core processor. It is demonstrated how the potential of the CELL processor can be utilized to the fullest by employing the new algorithmic approach and successfully exploiting the capabilities of the CELL processor in terms of Instruction Level Parallelism and Thread-Level Parallelism.", acknowledgement = ack-nhfb, keywords = "CELL processor; linear algebra; matrix factorization; multi-core; numerical algorithms", note = "UT-CS-08-616", } @TechReport{Volkov:2008:LQC, author = "Vasily Volkov and James W. Demmel", title = "{$ L U $}, {$ Q R $} and {Cholesky} Factorizations using Vector Capabilities of {GPUs}", type = "LAPACK Working Note", number = "202", institution = inst-UCB-EECS, address = inst-UCB-EECS:adr, month = may, year = "2008", bibdate = "Fri Apr 24 12:25:43 2009", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", URL = "http://www.netlib.org/lapack/lawnspdf/lawn202.pdf", abstract = "We present performance results for dense linear algebra using the 8-series NVIDIA GPUs. Our matrix-matrix multiply routine (GEMM) runs 60\% faster than the vendor implementation in CUBLAS 1.1 and approaches the peak of hardware capabilities. Our LU, QR and Cholesky factorizations achieve up to 80--90\% of the peak GEMM rate. Our parallel LU running on two GPUs achieves up to $ \approx $300 Gflop/s. These results are accomplished by challenging the accepted view of the GPU architecture and programming guidelines. We argue that modern GPUs should be viewed as multithreaded multicore vector units. We exploit blocking similarly to vector computers and heterogeneity of the system by computing both on GPU and CPU. This study includes detailed benchmarking of the GPU memory system that reveals sizes and latencies of caches and TLB. We present a couple of algorithmic optimizations aimed at increasing parallelism and regularity in the problem that provide us with slightly higher performance.", acknowledgement = ack-nhfb, ucbnumber = "UCB/EECS-2008-49,", } @TechReport{Demmel:2008:NND, author = "James W. Demmel and Yozo Hida and Mark F. Hoemmen and E. Jason Riedy", title = "Non-Negative Diagonals and High Performance on Low-Profile Matrices from Householder {$ Q R $}", type = "LAPACK Working Note", number = "203", institution = inst-UCB-EECS, address = inst-UCB-EECS:adr, day = "30", month = may, year = "2008", bibdate = "Fri Apr 24 12:25:43 2009", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", URL = "http://www.netlib.org/lapack/lawnspdf/lawn203.pdf", abstract = "The Householder reflections used in LAPACK's {$ Q R $} factorization leave positive and negative real entries along {$R$}'s diagonal. This is sufficient for most applications of {$ Q R $} factorizations, but a few require that {$R$} have a non-negative diagonal. This note provides a new Householder generation routine to produce a non-negative diagonal. Additionally, we find that scanning for trailing zeros in the generated reflections leads to large performance improvements when applying reflections with many trailing zeros. Factoring low-profile matrices, those with non-zero entries mostly near the diagonal (e.g. band matrices), now requires far fewer operations. For example, {$ Q R $} factorization of matrices with profile width $b$ that are stored densely in an $ n \times n $ matrix improves from {$ O(n^3) $} to {$ O(n^2 + n b^2) $}.", acknowledgement = ack-nhfb, ucbnumber = "UCB/EECS-2008-76", } @TechReport{Demmel:2008:COP, author = "James W. Demmel and Laura Grigori and Mark F. Hoemmen and Julien Langou", title = "Communication-optimal parallel and sequential {$ Q R $} and {$ L U $} factorizations", type = "LAPACK Working Note", number = "204", institution = inst-UCB-EECS, address = inst-UCB-EECS:adr, month = aug, year = "2008", bibdate = "Fri Apr 24 12:25:43 2009", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", URL = "http://www.eecs.berkeley.edu/Pubs/TechRpts/2008/EECS-2008-89.html; http://www.netlib.org/lapack/lawnspdf/lawn204.pdf", abstract = "We present parallel and sequential dense QR factorization algorithms that are both optimal (up to polylogarithmic factors) in the amount of communication they perform, and just as stable as Householder QR. Our first algorithm, Tall Skinny QR (TSQR), factors m \times n matrices in a one-dimensional (1-D) block cyclic row layout, and is optimized for m n. Our second algorithm, CAQR (Communication-Avoiding QR), factors general rectangular matrices distributed in a two-dimensional block cyclic layout. It invokes TSQR for each block column factorization.\par The new algorithms are superior in both theory and practice. We have extended known lower bounds on communication for sequential and parallel matrix multiplication to provide latency lower bounds, and show these bounds apply to the LU and QR decompositions. We not only show that our QR algorithms attain these lower bounds (up to polylogarithmic factors), but that existing LAPACK and ScaLAPACK algorithms perform asymptotically more communication. We also point out recent LU algorithms in the literature that attain at least some of these lower bounds.\par Both TSQR and CAQR have asymptotically lower latency cost in the parallel case, and asymptotically lower latency and bandwidth costs in the sequential case. In practice, we have implemented parallel TSQR on several machines, with speedups of up to 6.7 \times on 16 processors of a Pentium III cluster, and up to 4 \times on 32 processors of a BlueGene/L. We have also implemented sequential TSQR on a laptop for matrices that do not fit in DRAM, so that slow memory is disk. Our out-of-DRAM implementation was as little as 2 \times slower than the predicted runtime as though DRAM were infinite.\par We have also modeled the performance of our parallel CAQR algorithm, yielding predicted speedups over ScaLAPACK's PDGEQRF of up to 9.7 \times on an IBM Power5, up to 22.9 \times on a model Petascale machine, and up to 5.3 \times on a model of the Grid.", acknowledgement = ack-nhfb, ucbnumber = "UCB/EECS-2008-89", } @TechReport{Bosilca:2008:ABF, author = "George Bosilca and Remi Delmas and Jack J. Dongarra and Julien Langou", title = "Algorithmic Based Fault Tolerance Applied to High Performance Computing", type = "LAPACK Working Note", number = "205", institution = inst-UTK-CS, address = inst-UTK-CS:adr, day = "23", month = may, year = "2008", bibdate = "Fri Apr 24 12:25:43 2009", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", URL = "http://www.netlib.org/lapack/lawnspdf/lawn205.pdf", abstract = "We present a new approach to fault tolerance for High Performance Computing system. Our approach is based on a careful adaptation of the Algorithmic Based Fault Tolerance technique (Huang and Abraham, 1984) to the need of parallel distributed computation. We obtain a strongly scalable mechanism for fault tolerance. We can also detect and correct errors (bit-flip) on the fly of a computation. To assess the viability of our approach, we have developed a fault tolerant matrix-matrix multiplication subroutine and we propose some models to predict its running time. Our parallel fault-tolerant matrix-matrix multiplication scores 1.4 TFLOPS on 484 processors (cluster {\tt jacquard.nersc.gov}) and returns a correct result while one process failure has happened. This represents 65\% of the machine peak efficiency and less than 12\% overhead with respect to the fastest failure-free implementation. We predict (and have observed) that, as we increase the processor count, the overhead of the fault tolerance drops significantly.", acknowledgement = ack-nhfb, note = "UT-CS-08-620", } @TechReport{Dongarra:2008:PLB, author = "Jack J. Dongarra and Julien Langou", title = "The Problem with the {Linpack} Benchmark Matrix Generator", type = "LAPACK Working Note", number = "206", institution = inst-UTK-CS, address = inst-UTK-CS:adr, day = "12", month = jun, year = "2008", bibdate = "Fri Apr 24 12:25:43 2009", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "Version 1; version 2 is dated 18 September 2008.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn206.pdf", abstract = "We characterize the matrix sizes for which the Linpack Benchmark 1.0 matrix generator constructs a matrix with identical columns.", acknowledgement = ack-nhfb, ucdenvernumber = "UCD-CCM-271", } @TechReport{Baboulin:2008:UDT, author = "Marc Baboulin and Serge Gratton", title = "Using dual techniques to derive componentwise and mixed condition numbers for a linear functional of a linear least squares solution", type = "LAPACK Working Note", number = "207", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = aug, year = "2008", MRclass = "65F35", bibdate = "Fri Apr 24 12:25:43 2009", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", URL = "http://www.netlib.org/lapack/lawnspdf/lawn207.pdf", abstract = "We prove duality results for adjoint operators and product norms in the framework of Euclidean spaces. We show how these results can be used to derive condition numbers especially when perturbations on data are measured componentwise relatively to the original data. We apply this technique to obtain formulas for componentwise and mixed condition numbers for a linear functional of a linear least squares solution. These expressions are closed when perturbations of the solution are measured using a componentwise norm or the infinity norm and we get an upper bound for the Euclidean norm.", acknowledgement = ack-nhfb, keywords = "adjoint operator; componentwise perturbations; condition number; Dual norm; linear least squares", note = "UT-CS-08-622", } @TechReport{Ltaief:2008:PBH, author = "Hatem Ltaief and Jakub Kurzak and Jack Dongarra", title = "Parallel Block {Hessenberg} Reduction using Algorithms-By-Tiles for Multicore Architectures Revisited", type = "LAPACK Working Note", number = "208", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = aug, year = "2008", bibdate = "Fri Apr 24 12:25:43 2009", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", URL = "http://www.netlib.org/lapack/lawnspdf/lawn208.pdf", abstract = "The objective of this paper is to extend and redesign the block matrix reduction applied for the family of two-sided factorizations, introduced by Dongarra et al. [9], to the context of multicore architectures using algorithms-by-tiles. In particular, the Block Hessenberg Reduction is very often used as a pre-processing step in solving dense linear algebra problems, such as the standard eigenvalue problem. Although expensive, orthogonal transformations are commonly used for this reduction because they guarantee stability, as opposed to Gaussian Elimi- nation. Two versions of the Block Hessenberg Reduction are presented in this paper, the first one with Householder reflectors and the second one with Givens rotations. A short investigation on variants of Fast Givens Rotations is also mentioned. Furthermore, in the last Top500 list from June 2008, 98\% of the fastest parallel systems in the world are based on multicores. The emerging petascale systems consisting of hundreds of thousands of cores have exacerbated the problem even more and it becomes judicious to efficiently integrate existing or new numerical linear algebra algorithms suitable for such hardware. By exploiting the concepts of algorithms-by-tiles in the multicore environment (i.e., high level of parallelism with fine granularity and high performance data representation combined with a dynamic data driven execution), the Block Hessenberg Reduction presented here achieves 72\% of the DGEMM peak on a 12000 \times 12000 matrix with 16 Intel Tigerton 2.4 GHz processors.", acknowledgement = ack-nhfb, note = "UT-CS-08-624", } @TechReport{Ltaief:2008:PBT, author = "Hatem Ltaief and Jakub Kurzak and Jack Dongarra", title = "Parallel Band Two-Sided Matrix Bidiagonalization for Multicore Architectures", type = "LAPACK Working Note", number = "209", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = oct, year = "2008", bibdate = "Fri Apr 24 12:25:43 2009", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", URL = "http://www.netlib.org/lapack/lawnspdf/lawn209.pdf", abstract = "The objective of this paper is to extend, in the context of multicore architectures, the concepts of algorithms-by-tiles [Buttari et al., 2007] for Cholesky, LU, QR factorizations to the family of two- sided factorizations. In particular, the bidiagonal reduction of a general, dense matrix is very often used as a pre-processing step for calculating the singular value decomposition. Furthermore, in the last Top500 list from June 2008, 98\% of the fastest parallel systems in the world were based on multicores. The manycore trend has increasingly exacerbated the problem, and it becomes critical to efficiently integrate existing or new numerical linear algebra algorithms suitable for such hardware. By exploiting the concept of algorithms-by-tiles in the multicore environment (i.e., high level of parallelism with fine granularity and high performance data representation combined with a dynamic data driven execution), the band bidiagonal reduction presented here achieves 94 Gflop/s on a 12000 \times 12000 matrix with 16 Intel Tigerton 2.4 GHz processors.", acknowledgement = ack-nhfb, note = "UT-CS-08-631", } @TechReport{Tomov:2008:TDL, author = "Stanimire Tomov and Jack Dongarra and Marc Baboulin", title = "Towards Dense Linear Algebra for Hybrid {GPU} Accelerated Manycore Systems", type = "LAPACK Working Note", number = "210", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = oct, year = "2008", bibdate = "Fri Apr 24 12:25:43 2009", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", URL = "http://www.netlib.org/lapack/lawnspdf/lawn210.pdf", abstract = "If multicore is a disruptive technology, try to imagine hybrid multicore systems enhanced with accelerators! This is happening today as accelerators, in particular Graphics Processing Units (GPUs), are steadily making their way into the high performance computing (HPC) world. We highlight the trends leading to the idea of hybrid manycore/GPU systems, and we present a set of techniques that can be used to efficiently program them. The presentation is in the context of Dense Linear Algebra (DLA), a major building block for many scientific computing applications. We motivate the need for new algorithms that would split the computation in a way that would fully exploit the power that each of the hybrid components offers. As the area of hybrid multicore/GPU computing is still in its infancy, we also argue for its importance in view of what future architectures may look like. We therefore envision the need for a DLA library similar to LAPACK but for hybrid manycore/GPU systems. We illustrate the main ideas with an LU-factorization algorithm where particular techniques are used to reduce the amount of pivoting, resulting in an algorithm achieving up to 388 GFlop/s for single and up to 99.4 GFlop/s for double precision factorization on a hybrid Intel Xeon (2x4 cores @ 2.33 GHz) --- NVIDIA GeForce GTX 280 5 (240 cores @ 1.30 GHz) system.", acknowledgement = ack-nhfb, keywords = "dense linear algebra; graphics processing units.; hybrid computing; LU factorization; multicore processors; parallel algorithms", note = "UT-CS-08-632", } @TechReport{Gustavson:2008:LCK, author = "Fred G. Gustavson and Jerzy Wasniewski and Jack Dongarra", title = "Level-3 {Cholesky} kernel subroutine of a fully portable High Performance minimal storage hybrid format {Cholesky} algorithm", type = "LAPACK Working Note", number = "211", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = dec, year = "2008", bibdate = "Fri Apr 24 12:25:43 2009", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", URL = "http://www.netlib.org/lapack/lawnspdf/lawn211.pdf", acknowledgement = ack-nhfb, note = "UT-CS-08-634", } @TechReport{Li:2009:NAT, author = "Yinan Li and Jack Dongarra and Stanimire Tomov", title = "A Note on Auto-tuning {GEMM} for {GPUs}", type = "LAPACK Working Note", number = "212", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = jan, year = "2009", bibdate = "Fri Apr 24 12:25:43 2009", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", URL = "http://www.netlib.org/lapack/lawnspdf/lawn212.pdf", abstract = "The development of high performance dense linear algebra (DLA) critically depends on highly optimized BLAS, and especially on the matrix multiplication routine (GEMM). This is especially true for Graphics Processing Units (GPUs), as evidenced by recently published results on DLA for GPUs that rely on highly optimized GEMM [13, 11]. However, the current best GEMM performance, e.g. of up to 375 GFlop/s in single precision and of up to 75 GFlop/s in double precision arithmetic on NVIDIA's GTX 280, is difficult to achieve. The development involves extensive GPU knowledge and even backward engineering to understand some undocumented insides about the architecture that have been of key importance in the development [12]. In this paper, we describe some GPU GEMM auto-tuning optimization techniques that allow us to keep up with changing hardware by rapidly reusing, rather than reinventing, the existing ideas. Auto-tuning, as we show in this paper, is a very practical solution where in addition to getting an easy portability, we can often get substantial speedups even on current GPUs (e.g. up to 27\% in certain cases for both single and double precision GEMMs on the GTX 280).", acknowledgement = ack-nhfb, keywords = "Auto-tuning; dense linear algebra; GPUs; matrix multiply", note = "UT-CS-09-635", } @TechReport{Kurzak:2009:SLA, author = "Jakub Kurzak and Hatem Ltaief and Jack Dongarra and Rosa M. Badia", title = "Scheduling Linear Algebra Operations on Multicore Processors", type = "LAPACK Working Note", number = "213", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = feb, year = "2009", bibdate = "Fri Apr 24 12:25:43 2009", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", URL = "http://www.netlib.org/lapack/lawnspdf/lawn213.pdf", abstract = "We present performance results for dense linear algebra using the 8-series NVIDIA GPUs. Our matrix-matrix multiply routine (GEMM) runs 60\% faster than the vendor implementation in CUBLAS 1.1 and approaches the peak of hardware capabilities. Our LU, QR and Cholesky factorizations achieve up to 80--90\% of the peak GEMM rate. Our parallel LU running on two GPUs achieves up to $ \approx $300 Gflop/s. These results are accomplished by challenging the accepted view of the GPU architecture and programming guidelines. We argue that modern GPUs should be viewed as multithreaded multicore vector units. We exploit blocking similarly to vector computers and heterogeneity of the system by computing both on GPU and CPU. This study includes detailed benchmarking of the GPU memory system that reveals sizes and latencies of caches and TLB. We present a couple of algorithmic optimizations aimed at increasing parallelism and regularity in the problem that provide us with slightly higher performance.", acknowledgement = ack-nhfb, keywords = "Cholesky; factorization; linear algebra; LU; multicore; QR; scheduling; task graph", note = "UT-CS-09-636", } @TechReport{Kurzak:2009:STS, author = "Jakub Kurzak and Hatem Ltaief and Jack Dongarra", title = "Scheduling Two-sided Transformations using Algorithms-by-Tiles on Multicore Architectures", type = "LAPACK Working Note", number = "214", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = feb, year = "2009", bibdate = "Fri Apr 24 12:25:43 2009", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", URL = "http://www.netlib.org/lapack/lawnspdf/lawn214.pdf", abstract = "The objective of this paper is to describe, in the context of multicore architectures, different scheduler implementations for the two-sided linear algebra transformations, in particular the Hessenberg and Bidiagonal reductions which are the first steps for the standard eigenvalue problems and the singular value decompositions respectively. State-of-the-art dense linear algebra software, such as the LAPACK and ScaLAPACK libraries, suffer performance losses on multicore processors due to their inability to fully exploit thread-level parallelism. At the same time the coarse-grain dataflow model gains popularity as a paradigm for programming multicore architectures. By using the concepts of algorithms-by-tiles [Buttari et al., 2007] along with efficient mechanisms for data-driven execution, these two-sided reductions achieve high performance computing. The main drawback of the algorithms-by-tiles approach for two-sided transformations is that the full reduction can not be obtained in one stage. Other methods have to be considered to further reduce the band matrices to the required forms.", acknowledgement = ack-nhfb, note = "UT-CS-09-637", } @TechReport{Ballard:2009:COP, author = "Grey Ballard and James Demmel and Olga Holtz and Oded Schwartz", title = "Communication-optimal Parallel and Sequential {Cholesky} decomposition", type = "LAPACK Working Note", number = "215", institution = inst-UCB-EECS, address = inst-UCB-EECS:adr, day = "13", month = feb, year = "2009", bibdate = "Fri Apr 24 12:25:43 2009", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", URL = "http://www.netlib.org/lapack/lawnspdf/lawn215.pdf", abstract = "Numerical algorithms have two kinds of costs: arithmetic and communication, by which we mean either moving data between levels of a memory hierarchy (in the sequential case) or over a network connecting processors (in the parallel case). Communication costs often dominate arithmetic costs, so it is of interest to design algorithms minimizing communication. In this paper we first extend known lower bounds on the communication cost (both for bandwidth and for latency) of conventional ({$ O(n^3) $}) matrix multiplication to Cholesky factorization, which is used for solving dense symmetric positive definite linear systems. Second, we compare the cost of various Cholesky decomposition implementations to this lower bound, and draw the following conclusions:\par \begin{itemize} \item ``Naive'' sequential algorithms for Cholesky attain neither the bandwidth nor latency lower bounds. \item The sequential blocked algorithm in LAPACK (with the right block size), as well as various recursive algorithms [AP00, GJ01, AGW01, ST04], and one based on work of Toledo [Tol97], can attain the bandwidth lower bound. \item The LAPACK algorithm can also attain the latency bound if used with blocked data structures rather than column-wise or row-wise matrix data structures, though the Toledo algorithm cannot. \item The recursive sequential algorithm due to [AP00] attains the bandwidth and latency lower bounds at every level of a multi-level memory hierarchy, in a `cache-oblivious' way. \item The parallel implementation of Cholesky in the ScaLAPACK library (again with the right block-size) attains both the bandwidth and latency lower bounds to within a poly- logarithmic factor. \end{itemize} Combined with prior results in [DGHL08a, DGHL08b, DGX08] this gives a complete set of communication-optimal algorithms for {$ O(n^3) $} implementations of three basic factorizations of dense linear algebra: LU with pivoting, QR and Cholesky. But it goes beyond this prior work on sequential LU and QR by optimizing communication for any number of levels of memory hierarchy.", acknowledgement = ack-nhfb, ucbnumber = "UCB/EECS-2009-29", } @TechReport{Granat:2009:NPQ, author = "Robert Granat and Bo K{\aa}gstr{\"o}m and Daniel Kressner", title = "A novel parallel {$ Q R $} algorithm for hybrid distributed memory {HPC} systems", type = "LAPACK Working Note", number = "216", institution = "Department of Computing Science and HPC2N", address = "Ume{\aa} University, S-901 Ume{\aa}, Sweden", month = apr, year = "2009", bibdate = "Fri Apr 24 12:25:43 2009", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", URL = "http://www.netlib.org/lapack/lawnspdf/lawn216.pdf", abstract = "A novel variant of the parallel QR algorithm for solving dense nonsymmetric eigenvalue problems on hybrid distributed high performance computing (HPC) systems is presented. For this purpose, we introduce the concept of multi-window bulge chain chasing and parallelize aggressive early deflation. The multi-window approach ensures that most computations when chasing chains of bulges are performed in level 3 BLAS operations, while the aim of aggressive early deflation is to speed up the convergence of the QR algorithm. Mixed MPI-OpenMP coding techniques are utilized for porting the codes to distributed memory platforms with multithreaded nodes, such as multicore processors. Numerous numerical experiments confirm the superior performance of our parallel QR algorithm in comparison with the existing ScaLAPACK code, leading to an implementation that is one to two orders of magnitude faster for sufficiently large problems, including a number of examples from applications.", acknowledgement = ack-nhfb, keywords = "aggressive early deflation; bulge chasing; Eigenvalue problem; hybrid distributed memory systems.; level 3 performance; multishift; nonsymmetric QR algorithm; parallel algorithms; parallel computations", note = "UMINF-09.06", } @TechReport{Agullo:2009:CSO, author = "Emmanuel Agullo and Bilel Hadri and Hatem Ltaief and Jack Dongarra", title = "Comparative Study of One-Sided Factorizations with Multiple Software Packages on Multi-Core Hardware", type = "LAPACK Working Note", number = "217", institution = inst-UTK-CS, address = inst-UTK-CS:adr, day = "28", month = apr, year = "2009", bibdate = "Wed Aug 24 12:36:41 MDT 2011", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-09-640.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn217.pdf", acknowledgement = ack-nhfb, } @TechReport{Ballard:2009:MCL, author = "Grey Ballard and James Demmel and Olga Holtz and Oded Schwartz", title = "Minimizing Communication in Linear Algebra", type = "LAPACK Working Note", number = "218", institution = inst-UCB-EECS, address = inst-UCB-EECS:adr, day = "15", month = may, year = "2009", bibdate = "Wed Aug 24 12:36:41 MDT 2011", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UCB/EECS-2009-62", URL = "http://www.netlib.org/lapack/lawnspdf/lawn218.pdf", acknowledgement = ack-nhfb, } @TechReport{Tomov:2009:ARU, author = "Stanimire Tomov and Jack Dongarra", title = "Accelerating the reduction to upper {Hessenberg} form through hybrid {GPU}-based computing", type = "LAPACK Working Note", number = "219", institution = inst-UTK-CS, address = inst-UTK-CS:adr, day = "24", month = may, year = "2009", bibdate = "Wed Aug 24 12:36:41 MDT 2011", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-09-642.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn219.pdf", acknowledgement = ack-nhfb, } @TechReport{Kurzak:2009:FDS, author = "Jakub Kurzak and Jack Dongarra", title = "Fully Dynamic Scheduler for Numerical Computing on Multicore Processors", type = "LAPACK Working Note", number = "220", institution = inst-UTK-CS, address = inst-UTK-CS:adr, day = "4", month = jun, year = "2009", bibdate = "Wed Aug 24 12:36:41 MDT 2011", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-09-643.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn220.pdf", acknowledgement = ack-nhfb, } @TechReport{Song:2009:DTS, author = "Fengguang Song and Asim YarKhan and Jack Dongarra", title = "Dynamic Task Scheduling for Linear Algebra Algorithms on Distributed-Memory Multicore Systems", type = "LAPACK Working Note", number = "221", institution = inst-UTK-CS, address = inst-UTK-CS:adr, day = "13", month = apr, year = "2009", bibdate = "Wed Aug 24 12:36:41 MDT 2011", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-09-638.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn221.pdf", acknowledgement = ack-nhfb, } @TechReport{Hadri:2009:EPT, author = "Bilel Hadri and Hatem Ltaief and Emmanuel Agullo and Jack Dongarra", title = "Enhancing Parallelism of Tile {$ Q R $} Factorization for Multicore Architectures", type = "LAPACK Working Note", number = "222", institution = inst-UTK-CS, address = inst-UTK-CS:adr, day = "4", month = sep, year = "2009", bibdate = "Wed Aug 24 12:36:41 MDT 2011", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-09-645.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn222.pdf", acknowledgement = ack-nhfb, } @TechReport{Ltaief:2009:SHP, author = "Hatem Ltaief and Stanimire Tomov and Rajib Nath and Peng Du and Jack Dongarra", title = "A Scalable High Performant {Cholesky} Factorization for Multicore with {GPU} Accelerators", type = "LAPACK Working Note", number = "223", institution = inst-UTK-CS, address = inst-UTK-CS:adr, day = "25", month = nov, year = "2009", bibdate = "Wed Aug 24 12:36:41 MDT 2011", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-09-646.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn223.pdf", acknowledgement = ack-nhfb, } @TechReport{Agullo:2010:QFT, author = "Emmanuel Agullo and Camille Coti and Jack Dongarra and Thomas Herault and Julien Langou", title = "{$ Q R $} Factorization of Tall and Skinny Matrices in a Grid Computing Environment", type = "LAPACK Working Note", number = "224", institution = inst-UTK-CS, address = inst-UTK-CS:adr, day = "6", month = apr, year = "2010", bibdate = "Wed Aug 24 12:36:41 MDT 2011", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-10-651. Published in the Proceedings of IPDPS 2010: 24th IEEE International Parallel and Distributed Processing Symposium Atlanta GA April 2010.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn224.pdf", acknowledgement = ack-nhfb, } @TechReport{Tomov:2010:DLA, author = "Stanimire Tomov and Rajib Nath and Hatem Ltaief and Jack Dongarra", title = "Dense Linear Algebra Solvers for Multicore with {GPU} Accelerators", type = "LAPACK Working Note", number = "225", institution = inst-UTK-CS, address = inst-UTK-CS:adr, day = "18", month = apr, year = "2010", bibdate = "Wed Aug 24 12:36:41 MDT 2011", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-09-649. Published in the Proceedings of IPDPS 2010: 24th IEEE International Parallel and Distributed Processing Symposium Atlanta GA April 2010.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn225.pdf", acknowledgement = ack-nhfb, } @TechReport{Grigori:2010:CCO, author = "Laura Grigori and James W. Demmel and Hua Xiang", title = "{CALU}: a communication optimal {$ L U $} factorization algorithm", type = "LAPACK Working Note", number = "226", institution = inst-UCB-EECS, address = inst-UCB-EECS:adr, day = "15", month = mar, year = "2010", bibdate = "Wed Aug 24 12:36:41 MDT 2011", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UCB/EECS-2010-29. Submitted to SIAM Journal on Matrix Analysis and Applications (SIMAX).", URL = "http://www.netlib.org/lapack/lawnspdf/lawn226.pdf", acknowledgement = ack-nhfb, } @TechReport{Nath:2010:IMG, author = "Rajib Nath and Stanimire Tomov and Jack Dongarra", title = "An Improved {MAGMA GEMM} for {Fermi GPUs}", type = "LAPACK Working Note", number = "227", institution = inst-UTK-CS, address = inst-UTK-CS:adr, day = "29", month = jul, year = "2010", bibdate = "Wed Aug 24 12:36:41 MDT 2011", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-10-655.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn227.pdf", acknowledgement = ack-nhfb, } @TechReport{Du:2010:COT, author = "Peng Du and Rick Weber and Piotr Luszczek and Stanimire Tomov and Gregory Peterson and Jack Dongarra", title = "From {CUDA} to {OpenCL}: Towards a Performance-portable Solution for Multi-platform {GPU} Programming", type = "LAPACK Working Note", number = "228", institution = inst-UTK-CS, address = inst-UTK-CS:adr, day = "6", month = sep, year = "2010", bibdate = "Wed Aug 24 12:36:41 MDT 2011", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-10-656.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn228.pdf", acknowledgement = ack-nhfb, } @TechReport{Kurzak:2010:ITF, author = "Jakub Kurzak and Rajib Nath and Peng Du and Jack Dongarra", title = "An Implementation of the Tile {$ Q R $} Factorization for a {GPU} and Multiple {CPUs}", type = "LAPACK Working Note", number = "229", institution = inst-UTK-CS, address = inst-UTK-CS:adr, day = "15", month = sep, year = "2010", bibdate = "Wed Aug 24 12:36:41 MDT 2011", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-10-657. Submitted to PARA'10", URL = "http://www.netlib.org/lapack/lawnspdf/lawn229.pdf", acknowledgement = ack-nhfb, } @TechReport{Agullo:2010:FCB, author = "Emmanuel Agullo and Cedric Augonnet and Jack Dongarra and Hatem Ltaief and Raymond Namyst and Samuel Thibault and Stanimire Tomov", title = "Faster, Cheaper, Better --- a Hybridization Methodology to Develop Linear Algebra Software for {GPUs}", type = "LAPACK Working Note", number = "230", institution = inst-UTK-CS, address = inst-UTK-CS:adr, day = "15", month = sep, year = "2010", bibdate = "Wed Aug 24 12:36:41 MDT 2011", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-10-658. To appear in GPU Computing GEMs, vol. 2", URL = "http://www.netlib.org/lapack/lawnspdf/lawn230.pdf", acknowledgement = ack-nhfb, } @TechReport{Bosilca:2010:DGD, author = "G. Bosilca and A. Bouteiller and A. Danalis and T. Herault and P. Lemarinier and J. Dongarra", title = "{DAGuE}: {A} generic distributed {DAG} engine for high performance computing", type = "LAPACK Working Note", number = "231", institution = inst-UTK-CS, address = inst-UTK-CS:adr, day = "15", month = sep, year = "2010", bibdate = "Wed Aug 24 12:36:41 MDT 2011", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-10-659.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn231.pdf", acknowledgement = ack-nhfb, } @TechReport{Bosilca:2010:DMT, author = "G. Bosilca and A. Bouteiller and A. Danalis and M. Faverge and H. Haidar and T. Herault and J. Kurzak and J. Langou and P. Lemarinier and H. Ltaief and P. Luszczekl and A. YarKhan and J. Dongarra", title = "Distributed-Memory Task Execution and Dependence Tracking within {DAGuE} and the {DPLASMA Project}", type = "LAPACK Working Note", number = "232", institution = inst-UTK-CS, address = inst-UTK-CS:adr, day = "15", month = sep, year = "2010", bibdate = "Wed Aug 24 12:36:41 MDT 2011", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-10-660.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn232.pdf", acknowledgement = ack-nhfb, } @TechReport{Agullo:2010:FMN, author = "E. Agullo and C. Augonnet and J. Dongarra and M. Faverge and H. Ltaief and S. Thibault and S. Tomov", title = "{$ Q R $} Factorization on a Multicore Node Enhanced with Multiple {GPU} Accelerators", type = "LAPACK Working Note", number = "233", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = oct, year = "2010", bibdate = "Wed Aug 24 12:36:41 MDT 2011", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-10-XXX, published in Proceedings of IPDPS 2011.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn233.pdf", acknowledgement = ack-nhfb, } @TechReport{Dongarra:2010:RTT, author = "Jack Dongarra and Piotr Luszczek", title = "Reducing the time to tune parallel dense linear algebra routines with partial execution and performance modelling", type = "LAPACK Working Note", number = "235", institution = inst-UTK-CS, address = inst-UTK-CS:adr, day = "8", month = oct, year = "2010", bibdate = "Wed Aug 24 12:36:41 MDT 2011", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-10-661.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn235.pdf", acknowledgement = ack-nhfb, } @TechReport{Baboulin:2010:CCT, author = "Marc Baboulin and Serge Gratton", title = "A contribution to the conditioning of the total least squares problem", type = "LAPACK Working Note", number = "236", institution = inst-INRIA, address = inst-INRIA:adr, day = "5", month = nov, year = "2010", bibdate = "Wed Aug 24 12:36:41 MDT 2011", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "INRIA report.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn236.pdf", acknowledgement = ack-nhfb, } @TechReport{Ballard:2010:MCE, author = "Grey Ballard and James Demmel and Ioana Dumitriu", title = "Minimizing Communication for Eigenproblems and the Singular Value Decomposition", type = "LAPACK Working Note", number = "237", institution = inst-UCB-EECS, address = inst-UCB-EECS:adr, day = "13", month = nov, year = "2010", bibdate = "Wed Aug 24 12:36:41 MDT 2011", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UCB/EECS-2010-136.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn237.pdf", acknowledgement = ack-nhfb, } @TechReport{Solomonik:2011:COPa, author = "Edgar Solomonik and James Demmel", title = "Communication-optimal parallel {$ 2.5 $D} matrix multiplication and {$ L U $} factorization algorithms", type = "LAPACK Working Note", number = "238", institution = inst-UCB-EECS, address = inst-UCB-EECS:adr, day = "9", month = feb, year = "2011", bibdate = "Wed Aug 24 12:36:41 MDT 2011", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UCB/EECS-2011-10.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn238.pdf", acknowledgement = ack-nhfb, } @TechReport{Ballard:2011:CBH, author = "Grey Ballard and James Demmel and Andrew Gearhart", title = "Communication bounds for heterogeneous architectures", type = "LAPACK Working Note", number = "239", institution = inst-UCB-EECS, address = inst-UCB-EECS:adr, day = "11", month = feb, year = "2011", bibdate = "Wed Aug 24 12:36:41 MDT 2011", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UCB/EECS-2011-13.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn239.pdf", acknowledgement = ack-nhfb, } @TechReport{Anderson:2011:CAD, author = "Michael Anderson and Grey Ballard and James Demmel and Kurt Keutzer", title = "Communication-Avoiding {$ Q R $} Decomposition for {GPUs}", type = "LAPACK Working Note", number = "240", institution = inst-UCB-EECS, address = inst-UCB-EECS:adr, day = "18", month = feb, year = "2011", bibdate = "Wed Aug 24 12:36:41 MDT 2011", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "Update of UCB/EECS-2010-131. To appear in IPDPS'11.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn240.pdf", acknowledgement = ack-nhfb, } @TechReport{Song:2011:STC, author = "Fengguang Song and Hatem Ltaief and Bilel Hadri and Jack Dongarra", title = "Scalable Tile Communication-Avoiding {$ Q R $} Factorization on Multicore Cluster Systems", type = "LAPACK Working Note", number = "241", institution = inst-UTK-CS, address = inst-UTK-CS:adr, day = "4", month = mar, year = "2011", bibdate = "Wed Aug 24 12:36:41 MDT 2011", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-10-653. Published at SC'10", URL = "http://www.netlib.org/lapack/lawnspdf/lawn241.pdf", acknowledgement = ack-nhfb, } @TechReport{Agullo:2011:FEA, author = "Emmanuel Agullo and Jack Dongarra and Rajib Nath and Stanimire Tomov", title = "A Fully Empirical Autotuned Dense {$ Q R $} Factorization For Multicore Architectures", type = "LAPACK Working Note", number = "242", institution = inst-INRIA, address = inst-INRIA:adr, day = "9", month = mar, year = "2011", bibdate = "Wed Aug 24 12:36:41 MDT 2011", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "INRIA-7526.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn242.pdf", acknowledgement = ack-nhfb, } @TechReport{Haidar:2011:ADS, author = "Azzam Haidar and Hatem Ltaief and Asim YarKhan and Jack Dongarra", title = "Analysis of Dynamically Scheduled Tile Algorithms for Dense Linear Algebra on Multicore Architectures", type = "LAPACK Working Note", number = "243", institution = inst-UTK-CS, address = inst-UTK-CS:adr, day = "10", month = mar, year = "2011", bibdate = "Wed Aug 24 12:36:41 MDT 2011", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-11-666. Submitted at Concurrency and Computations.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn243.pdf", acknowledgement = ack-nhfb, } @TechReport{Luszczek:2011:TST, author = "Piotr Luszczek and Hatem Ltaief and Jack Dongarra", title = "Two-Stage Tridiagonal Reduction for Dense Symmetric Matrices using Tile Algorithms on Multicore Architectures", type = "LAPACK Working Note", number = "244", institution = inst-UTK-CS, address = inst-UTK-CS:adr, day = "18", month = apr, year = "2011", bibdate = "Wed Aug 24 12:36:41 MDT 2011", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-11-670.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn244.pdf", acknowledgement = ack-nhfb, } @TechReport{Kurzak:2011:AGF, author = "Jakub Kurzak and Stanimire Tomov and Jack Dongarra", title = "Autotuning {GEMMs} for {Fermi}", type = "LAPACK Working Note", number = "245", institution = inst-UTK-CS, address = inst-UTK-CS:adr, day = "18", month = apr, year = "2011", bibdate = "Wed Aug 24 12:36:41 MDT 2011", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-11-671. Submitted at SC11 November 12-18, 2011, Seattle, Washington, USA.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn245.pdf", acknowledgement = ack-nhfb, } @TechReport{Baboulin:2011:ALS, author = "Marc Baboulin and Jack Dongarra and Julien Herrmann and Stanimire Tomov", title = "Accelerating linear system solutions using randomization techniques", type = "LAPACK Working Note", number = "246", institution = inst-INRIA, address = inst-INRIA:adr, day = "15", month = may, year = "2011", bibdate = "Wed Aug 24 12:36:41 MDT 2011", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "INRIA RR-7616.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn246.pdf", acknowledgement = ack-nhfb, } @TechReport{Ltaief:2011:HPB, author = "Hatem Ltaief and Piotr Luszczek and Jack Dongarra", title = "High Performance Bidiagonal Reduction using Tile Algorithms on Homogeneous Multicore Architectures", type = "LAPACK Working Note", number = "247", institution = inst-UTK-CS, address = inst-UTK-CS:adr, day = "18", month = may, year = "2011", bibdate = "Wed Aug 24 12:36:41 MDT 2011", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-11-673. Submitted at TOMS.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn247.pdf", acknowledgement = ack-nhfb, } @TechReport{Solomonik:2011:COPb, author = "Edgar Solomonik and James Demmel", title = "Communication-optimal parallel {$ 2.5 $D} matrix multiplication and {$ L U $} factorization algorithms", type = "LAPACK Working Note", number = "248", institution = inst-UCB-EECS, address = inst-UCB-EECS:adr, day = "7", month = jun, year = "2011", bibdate = "Wed Aug 24 12:36:41 MDT 2011", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UCB/EECS-2011-72.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn248.pdf", acknowledgement = ack-nhfb, } @TechReport{Gustavson:2011:LCF, author = "Fred G. Gustavson and Jerzy W{\'a}sniewski and Jack J. Dongarra and Jos{\'e} R. Herrero and Julien Langou", title = "Level-3 {Cholesky} Factorization Routines as Part of Many {Cholesky} Algorithms", type = "LAPACK Working Note", number = "249", institution = "????", address = "????", year = "2011", bibdate = "Wed Aug 24 12:36:41 MDT 2011", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "DTU/IMM-Technical-Report-2011-11, submitted at TOMS.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn249.pdf", acknowledgement = ack-nhfb, } @TechReport{Song:2011:ESM, author = "Fengguang Song and Stanimire Tomov and Jack Dongarra", title = "Efficient Support for Matrix Computations on Heterogeneous Multi-core and Multi-{GPU} Architectures", type = "LAPACK Working Note", number = "250", institution = inst-UTK-CS, address = inst-UTK-CS:adr, day = "16", month = jun, year = "2011", bibdate = "Wed Aug 24 12:36:41 MDT 2011", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-11-668.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn250.pdf", acknowledgement = ack-nhfb, } @TechReport{Ltaief:2011:PHP, author = "Hatem Ltaief and Piotr Luszczek and Jack Dongarra", title = "Profiling High Performance Dense Linear Algebra Algorithms on Multicore Architectures for Power and Energy Efficiency", type = "LAPACK Working Note", number = "251", institution = inst-UTK-CS, address = inst-UTK-CS:adr, day = "21", month = jun, year = "2011", bibdate = "Wed Aug 24 12:36:41 MDT 2011", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-11-674.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn251.pdf", acknowledgement = ack-nhfb, } @TechReport{Du:2011:SER, author = "Peng Du and Piotr Luszczek and Stanimire Tomov and Jack Dongarra", title = "Soft Error Resilient {$ Q R $} Factorization for Hybrid System", type = "LAPACK Working Note", number = "252", institution = inst-UTK-CS, address = inst-UTK-CS:adr, day = "1", month = jul, year = "2011", bibdate = "Wed Aug 24 12:36:41 MDT 2011", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-11-675.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn252.pdf", acknowledgement = ack-nhfb, } @TechReport{Du:2011:ABF, author = "Peng Du and Aurelien Bouteiller and George Bosilca and Thomas Herault and Jack Dongarra", title = "Algorithm-based Fault Tolerance for Dense Matrix Factorizations", type = "LAPACK Working Note", number = "253", institution = inst-UTK-CS, address = inst-UTK-CS:adr, day = "5", month = aug, year = "2011", bibdate = "Wed Aug 24 12:36:41 MDT 2011", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-11-676.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn253.pdf", acknowledgement = ack-nhfb, } @TechReport{Haidar:2011:PRC, author = "Azzam Haidar and Hatem Ltaief and Jack Dongarra", title = "Parallel Reduction to Condensed Forms for Symmetric Eigenvalue Problems using Aggregated Fine-Grained and Memory-Aware Kernels", type = "LAPACK Working Note", number = "254", institution = inst-UTK-CS, address = inst-UTK-CS:adr, day = "5", month = aug, year = "2011", bibdate = "Wed Aug 24 12:36:41 MDT 2011", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-11-677 Aug 5 2011.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn254.pdf", acknowledgement = ack-nhfb, } @TechReport{Solomonik:2011:ICP, author = "Edgar Solomonik and Abhinav Bhatele and James Demmel", title = "Improving communication performance in dense linear algebra via topology aware collectives", type = "LAPACK Working Note", number = "255", institution = inst-UCB-EECS, address = inst-UCB-EECS:adr, day = "15", month = aug, year = "2011", bibdate = "Wed Aug 24 12:36:41 MDT 2011", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UCB/EECS-2011-92.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn255.pdf", acknowledgement = ack-nhfb, } @TechReport{Du:2011:HPL, author = "Peng Du and Piotr Luszczek and Jack Dongarra", title = "High Performance Linear System Solver with Resilience to Multiple Soft Errors", type = "LAPACK Working Note", number = "256", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = oct, year = "2011", bibdate = "Sun May 5 11:20:19 2013", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-11-683.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn256.pdf", acknowledgement = ack-nhfb, } @TechReport{Dongarra:2011:HFA, author = "Jack Dongarra and Mathieu Faverge and Thomas Herault and Julien Langou and Yves Robert", title = "Hierarchical {$ Q R $} factorization algorithms for multi-core cluster systems", type = "LAPACK Working Note", number = "257", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = oct, year = "2011", bibdate = "Sun May 5 11:20:19 2013", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-11-684.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn257.pdf", acknowledgement = ack-nhfb, } @TechReport{Anzt:2011:BAR, author = "Hartwig Anzt and Stanimire Tomov and Jack Dongarra and Vincent Heuveline", title = "A Block-Asynchronous Relaxation Method for Graphics Processing Units", type = "LAPACK Working Note", number = "258", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = dec, year = "2011", bibdate = "Sun May 5 11:20:19 2013", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-11-687.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn258.pdf", acknowledgement = ack-nhfb, } @TechReport{Dongarra:2011:ANA, author = "Jack Dongarra and Mathieu Faverge and Hatem Ltaief and Piotr Luszczek", title = "Achieving Numerical Accuracy and High Performance using Recursive Tile {$ L U $} Factorization", type = "LAPACK Working Note", number = "259", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = dec, year = "2011", bibdate = "Sun May 5 11:20:19 2013", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-11-688.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn259.pdf", acknowledgement = ack-nhfb, } @TechReport{Anzt:2011:GAA, author = "Hartwig Anzt and Piotr Luszczek and Jack Dongarra and Vincent Heuveline", title = "{GPU}-Accelerated Asynchronous Error Correction for Mixed Precision Iterative Refinement", type = "LAPACK Working Note", number = "260", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = dec, year = "2011", bibdate = "Sun May 5 11:20:19 2013", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-11-690.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn260.pdf", acknowledgement = ack-nhfb, } @TechReport{Baboulin:2011:PTS, author = "Marc Baboulin and Dulceneia Becker and Jack Dongarra", title = "A parallel tiled solver for dense symmetric indefinite systems on multicore architectures", type = "LAPACK Working Note", number = "261", institution = inst-INRIA, address = inst-INRIA:adr, month = dec, year = "2011", bibdate = "Sun May 5 11:20:19 2013", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "INRIA-7762.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn261.pdf", acknowledgement = ack-nhfb, } @TechReport{Bougeret:2011:URR, author = "Marin Bougeret and Henri Casanova and Yves Robert and Fr{\'e}d{\'e}ric Vivien and Dounia Zaidouni", title = "Using replication for resilience on exascale systems", type = "LAPACK Working Note", number = "262", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = dec, year = "2011", bibdate = "Sun May 5 11:20:19 2013", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-11-691.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn262.pdf", acknowledgement = ack-nhfb, } @TechReport{Khabou:2012:FPR, author = "Amal Khabou and James W. Demmel and Laura Grigori and Ming Gu", title = "{$ L U $} factorization with panel rank revealing pivoting and its communication avoiding version", type = "LAPACK Working Note", number = "263", institution = inst-UCB-EECS, address = inst-UCB-EECS:adr, month = jan, year = "2012", bibdate = "Sun May 5 11:20:19 2013", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UCB/EECS-2012-XX.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn263.pdf", acknowledgement = ack-nhfb, } @TechReport{Bosilca:2012:DLA, author = "George Bosilca and Aurelien Bouteiller and Anthony Danalis and Thomas Herault and Piotr Luszczek and Jack J. Dongarra", title = "Dense Linear Algebra on Distributed Heterogeneous Hardware with a Symbolic {DAG} Approach", type = "LAPACK Working Note", number = "264", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = jan, year = "2012", bibdate = "Sun May 5 11:20:19 2013", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", URL = "http://www.netlib.org/lapack/lawnspdf/lawn264.pdf", acknowledgement = ack-nhfb, } @TechReport{Bougeret:2012:UGR, author = "Marin Bougeret and Henri Casanova and Yves Robert and Fr{\'e}d{\'e}ric Vivien and Dounia Zaidouni", title = "Using group replication for resilience on exascale systems", type = "LAPACK Working Note", number = "265", institution = inst-INRIA, address = inst-INRIA:adr, month = mar, year = "2012", bibdate = "Sun May 5 11:20:19 2013", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", URL = "http://www.netlib.org/lapack/lawnspdf/lawn265.pdf", acknowledgement = ack-nhfb, } @TechReport{Kurzak:2012:FPP, author = "Jakub Kurzak and Piotr Luszczek and Mathieu Faverge and Jack Dongarra", title = "{$ L U $} Factorization with Partial Pivoting for a Multi-{CPU}, Multi-{GPU} Shared Memory System", type = "LAPACK Working Note", number = "266", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = apr, year = "2012", bibdate = "Sun May 5 11:20:19 2013", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", URL = "http://www.netlib.org/lapack/lawnspdf/lawn266.pdf", acknowledgement = ack-nhfb, } @TechReport{Kurzak:2012:PRA, author = "Jakub Kurzak and Piotr Luszczek and Stanimire Tomov and Jack Dongarra", title = "Preliminary Results of Autotuning {GEMM} Kernels for the {NVIDIA Kepler Architecture GeForce GTX 680}", type = "LAPACK Working Note", number = "267", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = apr, year = "2012", bibdate = "Sun May 5 11:20:19 2013", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", URL = "http://www.netlib.org/lapack/lawnspdf/lawn267.pdf", acknowledgement = ack-nhfb, } @TechReport{Robert:2012:CPR, author = "Yves Robert and Fr{\'e}d{\'e}ric Vivien and Dounia Zaidouni", title = "Combining Process Replication and Checkpointing for Resilience on Exascale Systems", type = "LAPACK Working Note", number = "268", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = jun, year = "2012", bibdate = "Sun May 5 11:20:19 2013", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-12-696.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn268.pdf", acknowledgement = ack-nhfb, } @TechReport{Bosilca:2012:UMA, author = "George Bosilca and Aurelien Bouteiller and Elisabeth Brunet and Franck Cappello and Jack Dongarra and Amina Guermouche and Thomas Herault and Yves Robert and Frederic Vivien and Dounia Zaidouni", title = "Unified Model for Assessing Checkpointing Protocols at Extreme-Scale", type = "LAPACK Working Note", number = "269", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = jun, year = "2012", bibdate = "Sun May 5 11:20:19 2013", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-12-697.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn269.pdf", acknowledgement = ack-nhfb, } @TechReport{Langou:2012:HLL, author = "Julie Langou and Bill Hofman and Brad King", title = "How {LAPACK} library enables {Microsoft Visual Studio} support with {CMake} and {LAPACKE}", type = "LAPACK Working Note", number = "270", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = jul, year = "2012", bibdate = "Sun May 5 11:20:19 2013", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-12-698.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn270.pdf", acknowledgement = ack-nhfb, } @TechReport{Karlsson:2012:OPC, author = "Lars Karlsson and Daniel Kressner", title = "Optimally packed chains of bulges in multishift {$ Q R $} algorithms", type = "LAPACK Working Note", number = "271", institution = "Department of Computing Science, Ume{\aa} University and EPF", address = "Ume{\aa}, Sweden and Lausanne, Switzerland", month = aug, year = "2012", bibdate = "Sun May 5 11:20:19 2013", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", URL = "http://www.netlib.org/lapack/lawnspdf/lawn271.pdf", acknowledgement = ack-nhfb, } @TechReport{Du:2012:PGC, author = "Peng Du and Stanimire Tomov and Jack Dongarra", title = "Providing {GPU} Capability to {$ L U $} and {$ Q R $} within the {ScaLAPACK} Framework", type = "LAPACK Working Note", number = "272", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = sep, year = "2012", bibdate = "Sun May 5 11:20:19 2013", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-12-699.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn272.pdf", acknowledgement = ack-nhfb, } @TechReport{Baboulin:2012:ECC, author = "Marc Baboulin and Serge Gratton and Remi Lacroix and Alan Laub", title = "Efficient computation of condition estimates for linear least squares problems", type = "LAPACK Working Note", number = "273", institution = inst-INRIA, address = inst-INRIA:adr, month = sep, year = "2012", bibdate = "Sun May 5 11:20:19 2013", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "INRIA-8065.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn273.pdf", acknowledgement = ack-nhfb, } @TechReport{Dongarra:2012:RDC, author = "Jack Dongarra and Thomas Herault and Yves Robert", title = "Revisiting the double checkpointing algorithm", type = "LAPACK Working Note", number = "274", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = dec, year = "2012", bibdate = "Sun May 5 11:20:19 2013", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-13-705.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn274.pdf", acknowledgement = ack-nhfb, } @TechReport{Cao:2013:CHP, author = "Chongxiao Cao and Jack Dongarra and Peng Du and Mark Gates and Piotr Luszczek and Stanimire Tomov", title = "{clMAGMA}: High Performance Dense Linear Algebra with {OpenCL}", type = "LAPACK Working Note", number = "275", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = mar, year = "2013", bibdate = "Sun May 5 11:20:19 2013", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-13-706.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn275.pdf", acknowledgement = ack-nhfb, } @TechReport{Demmel:2013:CAR, author = "James W. Demmel and Laura Grigori and Ming Gu and Hua Xiang", title = "Communication Avoiding Rank Revealing {$ Q R $} Factorization With Column Pivoting", type = "LAPACK Working Note", number = "276", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = may, year = "2013", bibdate = "Sat Mar 15 07:08:58 2014", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UCB/EECS-2013-46.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn276.pdf", acknowledgement = ack-nhfb, } @TechReport{Aupy:2013:ISA, author = "Guillaume Aupy and Mathieu Faverge and Yves Robert and Jakub Kurzak and Piotr Luszczek and Jack Dongarra", title = "Implementing a systolic algorithm for {$ Q R $} factorization on multicore clusters with {PaRSEC}", type = "LAPACK Working Note", number = "277", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = may, year = "2013", bibdate = "Sat Mar 15 07:08:58 2014", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-13-709.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn277.pdf", acknowledgement = ack-nhfb, } @TechReport{Aupy:2013:CSE, author = "Guillaume Aupy and Anne Benoit and Thomas H{\'e}rault and Yves Robert and Fr{\'e}d{\'e}ric Vivien and Dounia Zaidouni", title = "On the Combination of Silent Error Detection and Checkpointing", type = "LAPACK Working Note", number = "278", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = jun, year = "2013", bibdate = "Sat Mar 15 07:08:58 2014", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-13-710.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn278.pdf", acknowledgement = ack-nhfb, } @TechReport{Jia:2013:TER, author = "Yulu Jia and Piotr Luszczek and Jack Dongarra", title = "Transient Error Resilient {Hessenberg} Reduction on {GPU}-based Hybrid Architectures", type = "LAPACK Working Note", number = "279", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = jun, year = "2013", bibdate = "Sat Mar 15 07:08:58 2014", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-13-712.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn279.pdf", acknowledgement = ack-nhfb, } @TechReport{Donfack:2013:AVP, author = "Simplice Donfack and Jack Dongarra and Mathieu Faverge and Mark Gates and Jakub Kurzak and Piotr Luszczek and Ichitaro Yamazaki", title = "On Algorithmic Variants of Parallel {Gaussian} Elimination: Comparison of Implementations in Terms of Performance and Numerical Properties", type = "LAPACK Working Note", number = "280", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = jul, year = "2013", bibdate = "Sat Mar 15 07:08:58 2014", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-CS-13-715", URL = "http://www.netlib.org/lapack/lawnspdf/lawn280.pdf", acknowledgement = ack-nhfb, } @TechReport{Aupy:2013:OCP, author = "Guillaume Aupy and Anne Benoit and Thomas Herault and Yves Robert and Jack Dongarra", title = "Optimal Checkpointing Period: Time vs. Energy", type = "LAPACK Working Note", number = "281", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = oct, year = "2013", bibdate = "Sat Mar 15 07:08:58 2014", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-EECS-13-718.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn281.pdf", acknowledgement = ack-nhfb, } @TechReport{Faverge:2013:DHS, author = "Mathieu Faverge and Julien Herrmann and Julien Langou and Bradley Lowery and Yves Robert and Jack Dongarra", title = "Designing {$ L U $--$ Q R $} hybrid solvers for performance and stability", type = "LAPACK Working Note", number = "282", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = oct, year = "2013", bibdate = "Sat Mar 15 07:08:58 2014", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-EECS-13-719.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn282.pdf", acknowledgement = ack-nhfb, } @TechReport{Haidar:2013:IPS, author = "Azzam Haidar and Piotr Luszczek and Jakub Kurzak and Jack Dongarra", title = "An Improved Parallel Singular Value Algorithm and Its Implementation for Multicore Hardware", type = "LAPACK Working Note", number = "283", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = oct, year = "2013", bibdate = "Sat Mar 15 07:08:58 2014", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "UT-EECS-13-720.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn283.pdf", acknowledgement = ack-nhfb, } @TechReport{Kohler:2013:FFB, author = "Martin K{\"o}hler and Jens Saak", title = "{FlexiBLAS} --- A flexible {BLAS} library with runtime exchangeable backends", type = "LAPACK Working Note", number = "284", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = "????", year = "2013", bibdate = "Sat Mar 15 07:08:58 2014", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", URL = "http://www.netlib.org/lapack/lawnspdf/lawn284.pdf", acknowledgement = ack-nhfb, } @TechReport{Baboulin:2014:URB, author = "Marc Baboulin and Xiaoye S. Li and Fran{\c{c}}ois-Henry Rouet", title = "Using Random Butterfly Transformations to Avoid Pivoting in Sparse Direct Methods", type = "LAPACK Working Note", number = "285", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = feb, year = "2014", bibdate = "Sat Mar 15 07:08:58 2014", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "Inria Research Report RR-8481.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn285.pdf", acknowledgement = ack-nhfb, }

%%% ==================================================================== %%% Entries for journal and conference proceedings publication of %%% LAPACK Working Notes:

@Article{Brewer:1988:TAAb, author = "Orlie Brewer and Jack Dongarra and Danny Sorensen", title = "Tools to aid in the analysis of memory access patterns for {FORTRAN} programs", journal = j-PARALLEL-COMPUTING, volume = "9", number = "1", pages = "25--35", month = dec, year = "1988", CODEN = "PACOEJ", ISSN = "0167-8191", bibdate = "Sat Mar 22 15:39:54 MST 1997", bibsource = "Compendex database; http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Brewer:1988:TAAa}.", URL = "http://www.netlib.org/utk/people/JackDongarra/PAPERS/Tools-to-Aid-Analysis-of-Memory-Access-Patterns-for-FORTRAN-Programs.pdf", abstract = "In order to improve the performance of algorithms implemented on high-performance computers, we must consider not only the total number of memory references, but also the pattern of memory references. We would like our algorithms to observe the principle of locality of reference, so that the data can be effectively utilized. This paper describes a set of tools that can be used as an aid in the analysis of memory access patterns of FORTRAN programs.", acknowledgement = ack-nhfb, affiliation = "Argonne", affiliationaddress = "Argonne, IL, USA", classcodes = "C6115 (Programming support); C6110 (Systems analysis and programming)", classification = "723", corpsource = "Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL, USA", journalabr = "Parallel Comput", keywords = "Computer Programming Languages--FORTRAN; Computer Software; Data Storage, Digital; FORTRAN Programs; FORTRAN programs; Linear Algebra; Memory Access Patterns; memory access patterns analysis; Parallel Processing Computers; parallel programming; Software Engineering; software tools; Visualization Tools", treatment = "P Practical", } @Article{Bai:1989:BIHb, author = "Z. Bai and J. Demmel", title = "On a Block Implementation of {Hessenberg} Multishift {$ Q R $} Iteration", journal = j-INT-J-HIGH-SPEED-COMPUTING, volume = "1", number = "1", pages = "97--112", year = "1989", CODEN = "IHSCEZ", ISSN = "0129-0533", bibsource = "ftp://ftp.ira.uka.de/bibliography/Parallel/par.lin.alg.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Bai:1989:BIHa}.", } @Article{Dongarra:1989:BRM, author = "J. J. Dongarra and D. C. Sorensen and S. J. Hammarling", title = "Block reduction of matrices to condensed forms for eigenvalue computations", journal = j-J-COMP-APPL-MATH, volume = "27", number = "1--2", pages = "215--227", month = sep, year = "1989", CODEN = "JCAMDI", ISSN = "0377-0427 (print), 1879-1778 (electronic)", bibdate = "Sat Mar 22 15:39:54 MST 1997", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Dongarra:1987:BRM}.", acknowledgement = ack-nhfb, classcodes = "C4140 (Linear algebra); C4240 (Programming and algorithm theory)", corpsource = "Math. and Comput. Sci. Div., Argonne Nat. Lab., IL, USA", keywords = "algorithms; bidiagonal; block algorithms; block reduction of matrices; condensed; divide and conquer technique; eigenvalue computations; eigenvalues and eigenfunctions; forms; Hessenberg form; Householder transformations; linear algebra; matrix-matrix operations; parallel", treatment = "T Theoretical or Mathematical", } @InProceedings{Anderson:1990:LPLb, author = "E. Anderson and Z. Bai and C. Bischof and J. Demmel and J. Dongarra and J. DuCroz and A. Greenbaum and S. Hammarling and A. McKenney and D. Sorensen", title = "{LAPACK}: {A} Portable Linear Algebra Library for High-Performance Computers", crossref = "IEEE:1990:PSN", pages = "2--11", year = "1990", bibdate = "Mon Sep 9 14:47:18 1996", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Anderson:1990:LPLa}.", abstract = "The goal of the LAPACK project is to design and implement a portable linear algebra library for efficient use on a variety of high-performance computers. The library is based on the widely used LINPACK and EISPACK packages for solving linear equations, eigenvalue problems, and linear least-squares problems, but extends their functionality in a number of ways. The major methodology for making the algorithms run faster is to restructure them to perform block matrix operations (e.g., matrix-matrix multiplication) in their inner loops. These block operations may be optimized to exploit the memory hierarchy of a specific architecture. The LAPACK project is also working on new algorithms that yield higher relative accuracy for a variety of linear algebra problems.", acknowledgement = ack-nhfb, affiliation = "Tennessee Univ., Knoxville, TN, USA", classification = "C4140 (Linear algebra); C7310 (Mathematics)", keywords = "Block matrix operations; Block operations; Eigenvalue problems; Functionality; Inner loops; LAPACK; Linear equations; Linear least-squares problems; Matrix-matrix multiplication; Memory hierarchy; Portable linear algebra library; Relative accuracy", page = "1--10", thesaurus = "Eigenvalues and eigenfunctions; Matrix algebra; Software portability; Subroutines", } @Article{Barlow:1990:CAE, author = "Jesse Barlow and James Demmel", title = "Computing Accurate Eigensystems of Scaled Diagonally Dominant Matrices", journal = j-SIAM-J-NUMER-ANAL, volume = "27", number = "3", pages = "762--791", month = jun, year = "1990", CODEN = "SJNAAM", ISSN = "0036-1429 (print), 1095-7170 (electronic)", MRclass = "65F15", MRnumber = "91g:65071", MRreviewer = "Alan L. Andrew", bibdate = "Fri Oct 16 06:57:22 MDT 1998", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib; JSTOR database; Parallel/par.lin.alg.bib", note = "See original LAPACK Working note in \cite{Barlow:1988:CAE}.", acknowledgement = ack-nhfb, } @Article{Dongarra:1990:ASL, author = "Jack J. Dongarra and Jeremy Du Croz and Sven Hammarling and Iain Duff", title = "{Algorithm 679}: {A} Set of Level 3 {Basic Linear Algebra Subprograms}: Model Implementation and Test Programs", journal = j-TOMS, volume = "16", number = "1", pages = "18--28", month = mar, year = "1990", CODEN = "ACMSCU", ISSN = "0098-3500 (print), 1557-7295 (electronic)", bibdate = "Sat Aug 27 17:29:49 1994", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See also \cite{Higham:1990:EFM,Demmel:1992:SBA,Dayde:1994:PBI}.", URL = "http://doi.acm.org/10.1145/77626.77627; http://www.acm.org/pubs/citations/journals/toms/1990-16-1/p18-dongarra/", acknowledgement = ack-nhfb, keywords = "algorithms; measurement; performance; reliability; verification", subject = "{\bf D.3.2}: Software, PROGRAMMING LANGUAGES, Language Classifications, FORTRAN 8X. {\bf F.2.1}: Theory of Computation, ANALYSIS OF ALGORITHMS AND PROBLEM COMPLEXITY, Numerical Algorithms and Problems, Computations on matrices. {\bf G.1.3}: Mathematics of Computing, NUMERICAL ANALYSIS, Numerical Linear Algebra, Linear systems (direct and iterative methods). {\bf G.4}: Mathematics of Computing, MATHEMATICAL SOFTWARE.", } @Article{Higham:1990:EFM, author = "Nicholas J. Higham", title = "Exploiting Fast Matrix Multiplication Within the Level 3 {BLAS}", journal = j-TOMS, volume = "16", number = "4", pages = "352--368", month = dec, year = "1990", CODEN = "ACMSCU", ISSN = "0098-3500 (print), 1557-7295 (electronic)", MRclass = "65-04 (65F99)", MRnumber = "1 095 133", bibdate = "Sun Sep 04 23:21:57 1994", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "Describes algorithms based on Strassen's method which are asymptotically faster than the standard {$ {N}^3 $} algorithm, and in practice, faster for {$ {N} \approx 100 $}, and examines their numerical stability. See \cite{Dongarra:1990:ASL,Demmel:1992:SBA,Dayde:1994:PBI}.", URL = "http://doi.acm.org/10.1145/98267.98290; http://www.acm.org/pubs/citations/journals/toms/1990-16-4/p352-higham/", abstract = "The Level 3 BLAS (BLAS3) are a set of specifications of FORTRAN 77 subprograms for carrying out matrix multiplications and the solution of triangular systems with multiple right-hand sides. They are intended to provide efficient and portable building blocks for linear algebra algorithms on high-performance computers. We describe algorithms for the BLAS3 operations that are asymptotically faster than the conventional ones. These algorithms are based on Strassen's method for fast matrix multiplication, which is now recognized to be a practically useful technique once matrix dimensions exceed about 100. We pay particular attention to the numerical stability of these ``fast BLAS3.'' Error bounds are given and their significance is explained and illustrated with the aid of numerical experiments. Our conclusion is that the fast BLAS3, although not as strongly stable as conventional implementations, are stable enough to merit careful consideration in many applications.", acknowledgement = ack-nhfb, keywords = "algorithms", subject = "{\bf G.1.3}: Mathematics of Computing, NUMERICAL ANALYSIS, Numerical Linear Algebra. {\bf D.3.2}: Software, PROGRAMMING LANGUAGES, Language Classifications, FORTRAN 77.", } @Article{Deift:1991:BSV, author = "Percy Deift and James Demmel and Luen Chau Li and Carlos Tomei", title = "The Bidiagonal Singular Value Decomposition and {Hamiltonian} Mechanics", journal = j-SIAM-J-NUMER-ANAL, volume = "28", number = "5", pages = "1463--1516", month = oct, year = "1991", CODEN = "SJNAAM", ISSN = "0036-1429 (print), 1095-7170 (electronic)", MRclass = "65F15 (58F05)", MRnumber = "92i:65071", MRreviewer = "T. Y. Li", bibdate = "Fri Oct 16 06:57:22 MDT 1998", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib; JSTOR database", note = "See original LAPACK Working note in \cite{Deift:1989:BSV}.", acknowledgement = ack-nhfb, } @Article{Dongarra:1991:IRS, author = "J. J. Dongarra and P. Mayes and G. {Radicati di Brozolo}", title = "The {IBM RISC System\slash 6000} and Linear Algebra Operations", journal = j-SUPERCOMPUTER, volume = "8", number = "4", pages = "15--30", month = jul, year = "1991", CODEN = "SPCOEL", ISSN = "0168-7875", bibdate = "Sat Mar 22 15:39:54 MST 1997", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Dongarra:1990:IRS}.", URL = "http://www.netlib.org/utk/people/JackDongarra/PAPERS/The-IBM-RISC-System-6000-and-Linear-Algebra-Operations.pdf", abstract = "The paper discusses the IBM RISC System/6000 workstation and a set of experiments with blocked algorithms commonly used in solving problems in numerical linear algebra. The authors describe the performance of these algorithms and discuss the techniques used in achieving high performance on such an architecture.", acknowledgement = ack-nhfb, affiliation = "Math. Sci. Section, Oak Ridge Nat. Lab., TN, USA", classcodes = "C5420 (Mainframes and minicomputers); C5470 (Performance evaluation and testing); C4140 (Linear algebra)", classification = "C4140 (Linear algebra); C5420 (Mainframes and minicomputers); C5470 (Performance evaluation and testing)", corpsource = "Math. Sci. Section, Oak Ridge Nat. Lab., TN, USA", keywords = "blocked algorithms; Blocked algorithms; Floating point performance; floating point performance; IBM computers; IBM RISC System/6000; IBM RISC System/6000 workstation; linear algebra; numerical linear algebra; Numerical linear algebra; performance evaluation; reduced instruction set computing; workstation", pubcountry = "Netherlands", thesaurus = "IBM computers; Linear algebra; Performance evaluation; Reduced instruction set computing", treatment = "P Practical", } @Article{Anderson:1992:GFA, author = "E. Anderson and Z. Bai and J. Dongarra", title = "Generalized {$ Q R $} factorization and its applications", journal = j-LINEAR-ALGEBRA-APPL, volume = "162/164", pages = "243--271", year = "1992", CODEN = "LAAPAW", ISSN = "0024-3795 (print), 1873-1856 (electronic)", MRclass = "65F15 15A23", MRnumber = "92j:65050", bibdate = "Thu Dec 19 14:07:22 1996", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "Directions in matrix theory (Auburn, AL, 1990). See original LAPACK Working note in \cite{Anderson:1991:GQF}.", URL = "http://www.netlib.org/utk/people/JackDongarra/PAPERS/Generalized-QR-Factorization-and-Its-Applications.pdf", acknowledgement = ack-nhfb, } @Article{Bischof:1992:GIC, author = "Christian H. Bischof and Ping Tak Peter Tang", title = "Generalizing incremental condition estimation", journal = j-J-NUM-LIN-ALG-APPL, volume = "1", number = "2", pages = "149--163", year = "1992", CODEN = "NLAAEM", ISSN = "0129-3281", MRclass = "65F30", MRnumber = "93e:65068", bibdate = "Thu Jan 23 19:03:25 MST 1997", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Bischof:1991:GIC}.", acknowledgement = ack-nhfb, } @InProceedings{Choi:1992:SSLb, author = "J. Choi and J. J. Dongarra and R. Pozo and D. W. Walker", title = "{ScaLAPACK}: a scalable linear algebra library for distributed memory concurrent computers", crossref = "Siegel:1992:FSF", pages = "120--127", year = "1992", bibdate = "Sat Mar 22 15:39:54 MST 1997", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "IEEE catalog number 92CH3185-6. See original LAPACK Working note in \cite{Choi:1992:SSLa}.", acknowledgement = ack-nhfb, classcodes = "C7310 (Mathematics); C4140 (Linear algebra); C6110J (Object-oriented programming); C5470 (Performance evaluation and testing); C5440 (Multiprocessor systems and techniques)", conflocation = "McLean, VA, USA; 19-21 Oct. 1992", corpsource = "Oak Ridge Nat. Lab., TN, USA", keywords = "algorithm; computations; computing; distributed; distributed memory systems; distributed memory version; evaluation; Intel Delta multicomputer; Level 3 BLAS; library routines; linear algebra; mathematics; matrix; memory concurrent computers; object-oriented interface; object-oriented programming; performance; right-looking LU factorization; scalable linear algebra library; ScaLAPACK; software package; software packages; square block scattered decomposition", sponsororg = "IEEE; NASA", treatment = "A Application; P Practical", } @Article{Croz:1992:SMM, author = "Jeremy J. Du Croz and Nicholas J. Higham", title = "Stability of Methods for Matrix Inversion", journal = j-IMA-J-NUMER-ANAL, volume = "12", pages = "1--19", year = "1992", CODEN = "IJNADH", ISSN = "0272-4979 (print), 1464-3642 (electronic)", bibdate = "Sat Dec 23 14:54:28 2000", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/h/higham-nicholas-john.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{DuCroz:1990:SMM}.", acknowledgement = ack-njh, } @InProceedings{Demmel:1992:DPH, author = "J. Demmel and J. Dongarra and W. Kahan", title = "On Designing Portable High Performance Numerical Libraries", crossref = "Griffiths:1992:NAP", pages = "??--??", month = jun, year = "1991", bibdate = "Tue Feb 26 10:10:44 2002", bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Parallel/par.lin.alg.bib; http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Demmel:1991:DPH}.", acknowledgement = ack-nhfb, } @Article{Demmel:1992:JMM, author = "James Demmel and Kre{\v{s}}imir Veseli{\'c}", title = "{Jacobi}'s Method is More Accurate than {$ Q R $}", journal = j-SIAM-J-MAT-ANA-APPL, volume = "13", number = "4", pages = "1204--1245", month = oct, year = "1992", CODEN = "SJMAEL", ISSN = "0895-4798 (print), 1095-7162 (electronic)", MRclass = "65F15 (65G05)", MRnumber = "93e:65057", bibdate = "Tue Jan 21 08:54:30 MST 1997", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Demmel:1989:JMM}.", acknowledgement = ack-nhfb, } @Article{Demmel:1992:SBA, author = "James W. Demmel and Nicholas J. Higham", title = "Stability of Block Algorithms with Fast Level-3 {BLAS}", journal = j-TOMS, volume = "18", number = "3", pages = "274--291", month = sep, year = "1992", CODEN = "ACMSCU", ISSN = "0098-3500 (print), 1557-7295 (electronic)", bibdate = "Fri Sep 30 01:27:16 1994", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/duff-iain-s.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See \cite{Dongarra:1990:ASL,Higham:1990:EFM,Dayde:1994:PBI}. See original LAPACK Working note in \cite{Demmel:1990:SBA}.", URL = "http://www.acm.org/pubs/toc/Abstracts/0098-3500/131769.html", abstract = "Block algorithms are becoming increasingly popular in matrix computations. Since their basic unit of data is a submatrix rather than a scalar, they have a higher level of granularity than point algorithms, and this makes them well suited to high-performance computers. The numerical stability of the block algorithms in the new linear algebra program library LAPACK is investigated here. It is shown that these algorithms have backward error analyses in which the backward error bounds are commensurate with the error bounds for the underlying level-3 BLAS (BLAS3). One implication is that the block algorithms are as stable as the corresponding point algorithms when conventional BLAS3 are used. A second implication is that the use of BLAS3 based on fast matrix multiplication techniques affects the stability only insofar as it increases the constant terms in the normwise backward error bounds. For linear equation solvers employing {\em LU} factorization, it is shown that fixed precision iterative refinement helps to mitigate the effect of the larger error constants. Despite the positive results presented here, not all plausible block algorithms are stable; we illustrate this with the example of {\em LU} factorization with block triangular factors and describe how to check a block algorithm for stability without doing a full error analysis.", acknowledgement = ack-nhfb, keywords = "algorithms; performance", subject = "{\bf G.1.3}: Mathematics of Computing, NUMERICAL ANALYSIS, Numerical Linear Algebra. {\bf F.2.1}: Theory of Computation, ANALYSIS OF ALGORITHMS AND PROBLEM COMPLEXITY, Numerical Algorithms and Problems, Computations on matrices.", } @InProceedings{Dongarra:1992:LASb, author = "J. Dongarra and R. {van de Geijn} and D. Walker", title = "A look at scalable dense linear algebra libraries", crossref = "IEEE:1992:SHP", pages = "??--??", year = "1992", bibdate = "Sat Mar 22 15:39:54 MST 1997", bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Parallel/par.lin.alg.bib; http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "IEEE catalog number 92TH0432-5. See original LAPACK Working note in \cite{Anderson:1992:PLP}.", acknowledgement = ack-nhfb, classcodes = "C4140 (Linear algebra); C7310 (Mathematics); C6110P (Parallel programming)", conflocation = "Williamsburg, VA, USA; 26-29 April 1992", corpsource = "Dept. of Comput. Sci., Tennessee Univ., TN, USA", keywords = "14 GFLOPS; applications; concurrent computers; Delta system; dense matrix problems; distributed memory; double precision; Intel Touchstone; linear algebra; LU factorization; mathematics computing; object-oriented; object-oriented interface; parallel implementation; parallel programming; portable; programming; scalable dense linear algebra libraries; software portability; square block scattered decomposition; subroutines; user interfaces", sponsororg = "IEEE", treatment = "P Practical", } @Article{Dongarra:1992:NCC, author = "Jack J. Dongarra and Sven Hammarling and James H. Wilkinson", title = "Numerical Considerations in Computing Invariant Subspaces", journal = j-SIAM-J-MAT-ANA-APPL, volume = "13", number = "1", pages = "145--161", month = jan, year = "1992", CODEN = "SJMAEL", ISSN = "0895-4798 (print), 1095-7162 (electronic)", MRclass = "65F15", MRnumber = "93a:65049", MRreviewer = "Colette Lebaud", bibdate = "Tue Jan 21 08:54:30 MST 1997", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Dongarra:1990:NCC}.", acknowledgement = ack-nhfb, } @Article{Dongarra:1992:RCFb, author = "Jack J. Dongarra and Robert A. {van de Geijn}", title = "Reduction to condensed form for the eigenvalue problem on distributed memory architectures", journal = j-PARALLEL-COMPUTING, volume = "18", number = "9", pages = "973--982", month = sep, year = "1992", CODEN = "PACOEJ", ISSN = "0167-8191", MRclass = "65Y05 (65F15)", MRnumber = "1 190 458", bibdate = "Thu Sep 16 09:30:12 1999", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Dongarra:1991:RCF}.", URL = "http://www.netlib.org/utk/people/JackDongarra/PAPERS/Reduction-to-Condensed-Form-for-the-Eigenvalue-Problem-on-Distributed-Memory.pdf", abstract = "The authors describe a parallel implementation for the reduction of general and symmetric matrices to Hessenberg and tridiagonal form, respectively. The methods are based on LAPACK sequential codes and use a panel-wrapped mapping of matrices to nodes. Results from experiments on the Intel Touchstone Delta are given.", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci., Tennessee Univ., Knoxville, TN, USA", classcodes = "C7310 (Mathematics); C5220P (Parallel architecture); C4140 (Linear algebra)", classification = "C4140 (Linear algebra); C5220P (Parallel architecture); C7310 (Mathematics)", corpsource = "Dept. of Comput. Sci., Tennessee Univ., Knoxville, TN, USA", keywords = "architectures; distributed memory; Distributed memory architectures; distributed memory systems; Eigenvalue problem; eigenvalue problem; eigenvalues and eigenfunctions; Hessenberg form; Intel Touchstone Delta; LAPACK sequential codes; linear algebra; mapping; mathematics computing; panel-wrapped; Panel-wrapped mapping; parallel; Parallel implementation; parallel implementation; Symmetric matrices; symmetric matrices; Tridiagonal form; tridiagonal form", pubcountry = "Netherlands", thesaurus = "Distributed memory systems; Eigenvalues and eigenfunctions; Linear algebra; Mathematics computing; Parallel architectures", treatment = "P Practical", } @Article{Anderson:1993:PLP, author = "E. C. Anderson and J. Dongarra", title = "Performance of {LAPACK}: a portable library of numerical linear algebra routines", journal = j-PROC-IEEE, volume = "81", number = "8", pages = "1094--1102", month = aug, year = "1993", CODEN = "IEEPAD", ISSN = "0018-9219", bibdate = "Sat Mar 22 15:39:54 MST 1997", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Anderson:1992:PLP}.", URL = "http://www.netlib.org/utk/people/JackDongarra/PAPERS/Performance-of-LAPACK-A-Portable-Library.pdf", acknowledgement = ack-nhfb, classcodes = "C7310 (Mathematics); C4140 (Linear algebra); C5440 (Multiprocessor systems and techniques); C6150G (Diagnostic, testing, debugging and evaluating systems)", corpsource = "Cray Res. Center, Eagan, MN, USA", keywords = "algebra routines; computers; evaluation; LAPACK project; library; linear algebra; mathematics computing; numerical linear; numerical linear algebra; parallel; parallel processors; performance; performance tuning; portability; portable library; program testing; shared memory systems; shared-memory vector; software", treatment = "P Practical", } @Article{Bai:1993:CGS, author = "Zhao Jun Bai and James W. Demmel", title = "Computing the generalized singular value decomposition", journal = j-SIAM-J-SCI-COMP, volume = "14", number = "6", pages = "1464--1486", month = nov, year = "1993", CODEN = "SJOCE3", ISSN = "1064-8275 (print), 1095-7197 (electronic)", ISSN-L = "1064-8275", MRclass = "65F30", MRnumber = "94h:65043", bibdate = "Tue Apr 29 18:15:07 MDT 1997", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Bai:1992:CGS}.", acknowledgement = ack-nhfb, } @Article{Bai:1993:SDB, author = "Zhaojun Bai and James W. Demmel", title = "On Swapping Diagonal Blocks in Real {Schur} Form", journal = j-LINEAR-ALGEBRA-APPL, volume = "186", pages = "73--95", year = "1993", CODEN = "LAAPAW", ISSN = "0024-3795 (print), 1873-1856 (electronic)", MRclass = "15A18", MRnumber = "94d:15006", bibdate = "Wed Jan 22 17:57:24 MST 1997", bibsource = "/usr/local/src/bib/bibliography/Parallel/par.lin.alg.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Bai:1992:SDB}.", acknowledgement = ack-nhfb, } @Article{Demmel:1993:CAS, author = "James W. Demmel and William Gragg", title = "On Computing Accurate Singular Values and Eigenvalues of Matrices With Acyclic Graphs", journal = j-LINEAR-ALGEBRA-APPL, volume = "185", pages = "203--217", month = may, year = "1993", CODEN = "LAAPAW", ISSN = "0024-3795 (print), 1873-1856 (electronic)", MRclass = "65F30 (15A18)", MRnumber = "94h:65044", bibdate = "Wed Jan 22 17:57:24 MST 1997", bibsource = "/usr/local/src/bib/bibliography/Parallel/par.lin.alg.bib; /usr/local/src/bib/bibliography/Theory/Matrix.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Demmel:1992:CAS}.", acknowledgement = ack-nhfb, keywords = "nla, la, pert, svd, eig, arrowhead matrix, acyclic graph", } @Article{Demmel:1993:IEB, author = "James W. Demmel and Nicholas J. Higham", title = "Improved Error Bounds for Underdetermined System Solvers", journal = j-SIAM-J-MAT-ANA-APPL, volume = "14", number = "1", pages = "1--14", month = jan, year = "1993", CODEN = "SJMAEL", ISSN = "0895-4798 (print), 1095-7162 (electronic)", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/h/higham-nicholas-john.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Demmel:1990:IEB}.", acknowledgement = ack-njh, mynote = "Also LAPACK Working Note \#23.", } @InCollection{Demmel:1993:PNLb, author = "J. Demmel and M. Heath and H. {van der Vorst}", booktitle = "Acta Numerica 1993", title = "Parallel Numerical Linear Algebra", publisher = pub-CAMBRIDGE, address = pub-CAMBRIDGE:adr, pages = "111--198", year = "1993", bibdate = "Thu Jun 8 12:55:05 MDT 1995", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/v/vandervorst-henk-a.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Demmel:1993:PNLa}.", } @InProceedings{Demmel:1993:TPN, author = "James W. Demmel", editor = "Marc S. Moonen and Gene H. Golub and Bart L. De Moor", booktitle = "Linear Algebra for Large Scale and Real-Time Applications", title = "Trading Off Parallelism and Numerical Stability", volume = "232", publisher = pub-KLUWER, address = pub-KLUWER:adr, pages = "49--68", year = "1993", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Demmel:1992:TPN}.", series = "NATO ASI Series E", } @InProceedings{Dongarra:1993:TDB, author = "J. J. Dongarra and R. A. {Van de Geijn} and R. {Clint Whaley}", title = "Two Dimensional Basic Linear Algebra Communication Subprograms", crossref = "Sincovec:1993:SCP", pages = "347--352", year = "1993", bibdate = "Fri Mar 1 10:04:10 MST 1996", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Dongarra:1991:TDB}.", acknowledgement = ack-nhfb, } @Article{Higham:1993:PTB, author = "Nicholas J. Higham", title = "Perturbation theory and backward error analysis for {$ A X - X B = C $}", journal = j-BIT, volume = "33", number = "1", pages = "124--136", year = "1993", CODEN = "BITTEL, NBITAB", ISSN = "0006-3835 (print), 1572-9125 (electronic)", MRclass = "65F05 (65G05)", MRnumber = "96a:65036", bibdate = "Fri Nov 13 07:00:34 MST 1998", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/h/higham-nicholas-john.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Higham:1992:PTB}.", URL = "http://www.mai.liu.se/BIT/contents/bit33.html", acknowledgement = ack-njh # " and " # ack-nhfb, } @InProceedings{Choi:1994:DPD, author = "J. Choi and J. J. Dongarra and D. W. Walker", title = "The design of a parallel, dense linear algebra software library: reduction to {Hessenberg}, tridiagonal, and bidiagonal form", crossref = "Dongarra:1994:PSW", pages = "98--111", year = "1994", bibdate = "Sat Mar 22 15:39:54 MST 1997", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Choi:1995:DPDa} and \cite{Choi:1995:DPDb}.", acknowledgement = ack-nhfb, classcodes = "C7310 (Mathematics computing); C6110B (Software engineering techniques); C5440 (Multiprocessing systems); C4140 (Linear algebra); C6110P (Parallel programming)", conflocation = "Townsend, TN, USA; 25-27 May 1994", conftitle = "Proceedings of the Second Workshop on Environments and Tools for Parallel Scientific Computing", corpsource = "Dept. of Comput. Sci., Tennessee Univ., Knoxville, TN, USA", keywords = "algebra; Basic Linear Algebra Communication Subprograms; bidiagonal form; distributed Level 3 BLAS routines; distributed memory concurrent computers; distributed memory systems; Hessenberg; higher level; library routines; mathematics computing; matrix; panel reduction phase; Parallel Block BLAS; parallel dense linear algebra software library; parallel programming; PB-BLAS; reduction algorithms; ScaLAPACK; sequential BLAS; software engineering considerations; software libraries; tridiagonal", treatment = "P Practical", } @InProceedings{Choi:1994:PMT, author = "Jaeyoung Choi and J. J. Dongarra and D. W. Walker", title = "Parallel matrix transpose algorithms on distributed memory concurrent computers", crossref = "IEEE:1994:PSP", pages = "245--252", year = "1994", bibdate = "Sat Mar 22 15:39:54 MST 1997", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Choi:1993:PMT}.", acknowledgement = ack-nhfb, classcodes = "C7310 (Mathematics); C4240P (Parallel programming and algorithm theory); C4140 (Linear algebra); C5440 (Multiprocessor systems and techniques)", conflocation = "Mississippi State, MS, USA; 6-8 Oct. 1993", conftitle = "Proceedings of Scalable Parallel Libraries Conference", corpsource = "Math. Sci. Sect., Oak Ridge Nat. Lab., TN, USA", keywords = "algebra; block scattered data distribution; computer; concurrent computers; distributed memory; distributed memory systems; Intel Touchstone Delta; mathematics computing; matrix; matrix multiplication routine; parallel algorithms; parallel matrix transpose algorithms; point-to-point communication; PUMMA package; synchronisation; transposed matrices", sponsororg = "Mississippi State Univ.; Nat. Sci. Found", treatment = "A Application; P Practical", } @Article{Choi:1994:PPU, author = "Jaeyoung Choi and Jack J. Dongarra and David W. Walker", title = "{PUMMA}: {Parallel Universal Matrix Multiplication Algorithms} on distributed memory concurrent computers", journal = j-CPE, volume = "6", number = "7", pages = "543--570", month = oct, year = "1994", CODEN = "CPEXEI", ISSN = "1040-3108", bibdate = "Tue Feb 26 09:30:21 2002", bibsource = "Compendex database; http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Choi:1993:PPU}", URL = "http://www.netlib.org/utk/people/JackDongarra/PAPERS/PUMMA-Parallel-Universal-Matrix-Multiplication-Algorithms.pdf", abstract = "The paper describes Parallel Universal Matrix Multiplication Algorithms (PUMMA) on distributed memory concurrent computers. The PUMMA package includes not only the non-transposed matrix multiplication routine {$ C = A \dot B $}, but also transposed multiplication routines {$ C = A^T \DOT B $}, {$ C = A \dot B^T $}, and {$ C = A^T \dot B^T $}, for a block cyclic data distribution. The routines perform efficiently for a wide range of processor configurations and block sizes. The PUMMA together provide the same functionality as the Level 3 BLAS routine xGEMM. Details of the parallel implementation of the routines are given, and results are presented for runs on the Intel Touchstone Delta computer.", acknowledgement = ack-nhfb, affiliation = "Oak Ridge Natl Lab", affiliationaddress = "Oak Ridge, TN, USA", classcodes = "C7310 (Mathematics); C5440 (Multiprocessor systems and techniques); C4240P (Parallel programming and algorithm theory); C4140 (Linear algebra)", classification = "722.4; 723.1; 921.1", corpsource = "Math. Sci. Sect., Oak Ridge Nat. Lab., TN, USA", journalabr = "Concurrency Pract Exper", keywords = "algebra; Algorithms; block cyclic data distribution; block sizes; Computer architecture; configurations; Distributed memory concurrent computers; distributed memory concurrent computers; distributed memory systems; Intel Touchstone Delta Computer; level 3 BLAS routine xGEMM; Mathematical operators; mathematics computing; matrix; Matrix algebra; matrix multiplication routine; Multiprogramming; nontransposed; parallel algorithms; Parallel processing systems; Parallel Universal Matrix Multiplication Algorithm (PUMMA); parallel universal matrix multiplication algorithms; processor; PUMMA; routines; transposed multiplication", treatment = "A Application; P Practical", } @Article{Dayde:1994:PBI, author = "Michael J. Dayd{\'e} and Iain S. Duff and Antoine Petitet", title = "A Parallel Block Implementation of Level-3 {BLAS} for {MIMD} Vector Processors", journal = j-TOMS, volume = "20", number = "2", pages = "178--193", month = jun, year = "1994", CODEN = "ACMSCU", ISSN = "0098-3500 (print), 1557-7295 (electronic)", bibdate = "Fri Sep 09 13:52:29 1994", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See \cite{Dongarra:1990:ASL,Higham:1990:EFM,Demmel:1992:SBA}.", URL = "http://doi.acm.org/10.1145/178365.174413; http://www.acm.org/pubs/citations/journals/toms/1994-20-2/p178-dayde/", acknowledgement = ack-nhfb, keywords = "algorithms; Level-3 BLAS; matrix-matrix kernels; measurement; parallelization; performance; vectorization", subject = "F.2.1 [Analysis of Algorithms and Problem Complexity]: Numerical Algorithms and Problems--computations on matrices; G.1.0 [Numerical Analysis]: General--numerical algorithms; G.1.3 [Numerical Analysis]: Numerical Linear Algebra--linear systems (direct and iterative methods); G.4 [Mathematics of Computing]: Mathematical Software--certification and testing; efficiency; portability; reliability and robustness; verification", } @Article{Demmel:1994:FNA, author = "James W. Demmel and Xiaoye Li", title = "Faster Numerical Algorithms via Exception Handling", journal = j-IEEE-TRANS-COMPUT, volume = "43", number = "8", pages = "983--992", month = aug, year = "1994", CODEN = "ITCOB4", ISSN = "0018-9340 (print), 1557-9956 (electronic)", ISSN-L = "0018-9340", bibdate = "Mon May 20 06:16:49 MDT 2002", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib; OCLC Proceedings database", note = "This is an expanded version of \cite{Demmel:1993:FNA}.", URL = "http://www.cs.berkeley.edu/~xiaoye/ieee.ps.gz", acknowledgement = ack-nhfb, remark = "Selected revised and extended papers from ARITH'11 \cite{Swartzlander:1993:PSC}.", } @InProceedings{Dongarra:1994:SMLb, author = "J. Dongarra and A. Lumsdaine and X. Niu and R. Pozo and K. Remington", title = "A Sparse Matrix Library in {C++} For High Performance Architectures", crossref = "Anonymous:1994:OON", pages = "214--218", year = "1994", bibdate = "Thu Sep 16 09:48:36 MDT 1999", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Dongarra:1994:SMLa}.", URL = "http://www.netlib.org/netlib/lapack/lawns/lawn74.ps; http://www.netlib.org/netlib/lapack/lawnspdf/lawn74.pdf", acknowledgement = ack-nhfb, } @Article{vandeGeijn:1994:GCO, author = "R. A. {van de Geijn}", title = "On Global Combine Operations", journal = j-J-PAR-DIST-COMP, volume = "22", number = "2", pages = "324--328", month = aug, year = "1994", CODEN = "JPDCER", DOI = "http://dx.doi.org/10.1006/jpdc.1994.1091", ISSN = "0743-7315 (print), 1096-0848 (electronic)", bibdate = "Thu Mar 9 09:18:55 MST 2000", bibsource = "http://www.idealibrary.com/servlet/useragent?func=showAllIssues&curIssueID=jpdc; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{vandeGeijn:1991:GCO}.", URL = "http://www.idealibrary.com/links/doi/10.1006/jpdc.1994.1091/production; http://www.idealibrary.com/links/doi/10.1006/jpdc.1994.1091/production/pdf", acknowledgement = ack-nhfb, classification = "C4230M (Multiprocessor interconnection); C4240P (Parallel programming and algorithm theory); C5220P (Parallel architecture); C5440 (Multiprocessor systems and techniques)", corpsource = "Dept. of Comput. Sci., Texas Univ., Austin, TX, USA", keywords = "algorithms; distributed memory multiple instruction multiple data; distributed memory systems; global combine operations; hybrid strategy; hypercube networks; Intel iPSC/860; multicomputers; parallel", treatment = "P Practical", } @Article{Bai:1995:TLAb, author = "Z. Bai and D. Day and J. Demmel and J. Dongarra", title = "Templates for Linear Algebra Problems", journal = j-LECT-NOTES-COMP-SCI, volume = "1000", pages = "115--??", year = "1995", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", bibdate = "Sat May 11 13:45:32 MDT 1996", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Bai:1995:TLAa}.", URL = "http://www.netlib.org/utk/papers/etemplates.ps; http://www.netlib.org/utk/papers/etemplates/paper.html", acknowledgement = ack-nhfb, } @Article{Choi:1995:DPDb, author = "Jaeyoung Choi and Jack J. Dongarra and David W. Walker", title = "The design of a parallel dense linear algebra software library: reduction to {Hessenberg}, tridiagonal, and bidiagonal form", journal = j-NUMER-ALGORITHMS, volume = "10", number = "3--4", pages = "379--399", month = oct, year = "1995", CODEN = "NUALEG", ISSN = "1017-1398 (print), 1572-9265 (electronic)", MRclass = "65-04 (65Y10)", MRnumber = "1 355 739", bibdate = "Sat Mar 22 15:39:54 MST 1997", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Choi:1995:DPDa,Choi:1994:DPD}.", acknowledgement = ack-nhfb, classcodes = "B0290H (Linear algebra); C7310 (Mathematics computing); C4140 (Linear algebra); C6110B (Software engineering techniques); C6115 (Programming support)", corpsource = "Dept. of Comput. Sci., Tennessee Univ., Knoxville, TN, USA", keywords = "Basic Linear Algebra Communication; bidiagonal; BLACS; computers; computing; dense; distributed memory concurrent; eigenproblems; eigenvalues and eigenfunctions; form; Hessenberg form; LAPACK; linear algebra; linear algebra computations; mathematics; matrices; matrix reduction algorithms; parallel BLAS; parallel dense linear algebra software library; routine; ScaLAPACK; sequencing BLAS; software engineering; software libraries; Subprograms; tridiagonal form", treatment = "A Application; P Practical", } @Article{Demmel:1995:CSB, author = "James W. Demmel and Inderjit Dhillon and Huan Ren", title = "On the Correctness of Some Bisection-Like Parallel Eigenvalue Algorithms in Floating Point Arithmetic", journal = j-ETNA, volume = "3", pages = "116--149", year = "1995", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Demmel:1994:CPB}.", } @Article{Heath:1995:CPN, author = "Michael T. Heath and Padma Raghavan", title = "A {Cartesian} Parallel Nested Dissection Algorithm", journal = j-SIAM-J-MAT-ANA-APPL, volume = "16", number = "1", pages = "235--253", month = jan, year = "1995", CODEN = "SJMAEL", ISSN = "0895-4798 (print), 1095-7162 (electronic)", MRclass = "65F05 (65F50 65Y05)", MRnumber = "95m:65046", MRreviewer = "Ming Kui Chen", bibdate = "Fri Dec 4 12:14:09 MST 1998", bibsource = "http://epubs.siam.org/sam-bin/dbq/toc/SIMAX/16/1; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Heath:1992:CPN}.", URL = "http://epubs.siam.org/sam-bin/dbq/article/23827", acknowledgement = ack-nhfb, } @InProceedings{Plank:1995:ADC, author = "James S. Plank and Youngbae Kim and Jack J. Dongarra", title = "Algorithm-Based Diskless Checkpointing for Fault-Tolerant Matrix Operations", crossref = "IEEE:1995:DPT", pages = "351--360", year = "1995", bibdate = "Mon Aug 26 07:58:57 1996", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Plank:1994:ABD}.", URL = "http://www.cs.utk.edu/~plank/plank/papers/FTCS25.1995.html; http://www.netlib.org/utk/papers/fault.ps; http://www.netlib.org/utk/people/JackDongarra/pdf/fault.pdf", abstract = "This paper is an exploration of diskless check-pointing for distributed scientific computations. With the widespread use of the `Network Of Workstation' (NOW) platform for distributed computing, long-running scientific computations need to tolerate the changing and often faulty nature of NOW environments. We present high-performance implementations of several algorithms for distributed scientific computing, including Cholesky factorization, LU factorization, QR factorization, and Preconditioned Conjugate Gradient. These implementations are able to run on PVM networks of at least N processors, and can complete with low overhead as long as any N processors remain functional. We discuss the details of how the algorithms are tuned for fault-tolerance, and present the performance results on a PVM network of SUN workstations, and on the IBM SP2.", acknowledgement = ack-nhfb, affiliation = "Univ of Tennessee", affiliationaddress = "TN, USA", classcodes = "C6150N (Distributed systems software); C6110B (Software engineering techniques); C4140 (Linear algebra); C7300 (Natural sciences computing); C4130 (Interpolation and function approximation)", classification = "722.2; 722.4; 723.1", conference = "Proceedings of the 25th International Symposium on Fault-Tolerant Computing", conflocation = "Pasadena, CA, USA; 27-30 June 1995", conftitle = "Twenty-Fifth International Symposium on Fault-Tolerant Computing. Digest of Papers", corpsource = "Dept. of Comput. Sci., Tennessee Univ., TN, USA", journalabr = "Dig Pap Int Symp Fault Tolerant Comput", keywords = "algebra; Algorithm based diskless checkpointing; algorithm-based diskless checkpointing; Algorithms; Cholesky; Cholesky factorization; computations; Computer networks; Computer workstations; conjugate gradient methods; Distributed computer systems; Distributed scientific computations; distributed scientific computations; factorization; fault tolerant; Fault tolerant computer systems; Fault tolerant matrix operations; fault-tolerance; high-performance implementations; IBM SP2; local area networks; long-running scientific; low overhead; LU factorization; matrix; matrix operations; natural sciences computing; Network of workstation (NOW) platform; Parallel processing systems; performance; preconditioned conjugate gradient; Preconditioned conjugate gradient; processors; PVM networks; QR factorization; software fault; subroutines; SUN; tolerance; workstation network platform; workstations", meetingaddress = "Pasadena, CA, USA", meetingdate = "Jun 27--30 1995", meetingdate2 = "06/27--30/95", sponsor = "IEEE", sponsororg = "IEEE Comput. Soc. Tech. Committee on Fault-Tolerant Comput.; LAAS-CNRS, France; Univ. Illinois at Urbana-Champaign; Univ. California at Los Angeles; Jep Propulsion Lab.; IFIP WG 10.4", treatment = "T Theoretical or Mathematical", } @Article{Raghavan:1995:DSG, author = "Padma Raghavan", title = "Distributed sparse {Gaussian} elimination and orthogonal factorization", journal = j-SIAM-J-SCI-COMP, volume = "16", number = "6", pages = "1462--1477", month = nov, year = "1995", CODEN = "SJOCE3", ISSN = "1064-8275 (print), 1095-7197 (electronic)", ISSN-L = "1064-8275", MRclass = "65F50 (65F05 65F20)", MRnumber = "96g:65046", MRreviewer = "Zahari Zlatev", bibdate = "Tue Apr 29 18:25:50 MDT 1997", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Raghavan:1993:DSG}.", acknowledgement = ack-nhfb, } @Article{Barrett:1996:ABI, author = "R. Barrett and M. Berry and J. Dongarra and V. Eijkhout and Romine and C.", title = "Algorithmic bombardment for the iterative solution of linear systems: a poly-iterative approach", journal = j-J-COMP-APPL-MATH, volume = "74", number = "1--2", pages = "91--109", day = "5", month = "????", year = "1996", CODEN = "JCAMDI", ISSN = "0377-0427 (print), 1879-1778 (electronic)", MRclass = "65F10 (65N22 65Y05)", MRnumber = "97j:65052", bibdate = "Sat Mar 22 15:39:54 MST 1997", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Barrett:1994:ABI}.", URL = "http://www.netlib.org/utk/people/JackDongarra/PAPERS/bombard.ps; http://www.netlib.org/utk/people/JackDongarra/pdf/bombard.pdf", acknowledgement = ack-nhfb, classcodes = "B0290H (Linear algebra); B0290F (Interpolation and function approximation); C4140 (Linear algebra); C4130 (Interpolation and function approximation); C4240P (Parallel programming and algorithm theory)", conflocation = "Austin, TX, USA; April 1995", conftitle = "TICAM Symposium. Texas Institute for Computational and Applied Mathematics", corpsource = "Distributed Comput. Group, Los Alamos Nat. Lab., NM, USA", keywords = "algorithmic bombardment; convergence; cost; environment; global communications; indefinite; iterative methods; iterative solution; linear systems; matrix; matrix algebra; matrix properties; nonsymmetric matrix; parallel algorithms; parallel environment; poly-iterative approach; sequential computing", treatment = "T Theoretical or Mathematical", } @InProceedings{Blackford:1996:PEDb, author = "L. S. Blackford and A. Cleary and J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and A. Petitet and H. Ren and K. Stanley and R. C. Whaley", title = "Practical experience in the dangers of heterogeneous computing", crossref = "Wasniewski:1996:APC", pages = "57--64", year = "1996", bibdate = "Tue Feb 26 08:49:09 2002", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Blackford:1996:PEDa}.", URL = "http://www.netlib.org/utk/papers/practical-hetro/paper.html; http://www.netlib.org/utk/papers/practical-hetro/paper.ps; http://www.netlib.org/utk/people/JackDongarra/pdf/prac-het.pdf", acknowledgement = ack-nhfb, classcodes = "C7310 (Mathematics computing); C6110B (Software engineering techniques); C6110P (Parallel programming); C6150N (Distributed systems software); C6115 (Programming support)", conflocation = "Lyngby, Denmark; 18-21 Aug. 1996", conftitle = "Applied Parallel Computing. Industrial Computation and Optimization. Third International Workshop, PARA'96. Proceedings", corpsource = "Tennessee Univ., Knoxville, TN, USA", keywords = "distributed memory systems; floating point arithmetic; heterogeneous computing; libraries; mathematics computing; numerical library software; parallel algorithms; ScaLAPACK; software; software portability; software reliability; software robustness", treatment = "A Application; G General Review", } @InProceedings{Blackford:1996:SPL, author = "Laura Susan Blackford and J. Choi and A. Cleary and A. Petitet and R. C. Whaley and J. Demmel and I. Dhillon and K. Stanley and J. Dongarra and S. Hammarling and G. Henry and D. Walker", title = "{ScaLAPACK}: {A} Portable Linear Algebra Library for Distributed Memory Computers --- Design Issues and Performance", crossref = "ACM:1996:SCP", pages = "??--??", year = "1996", bibdate = "Mon Mar 23 12:31:18 1998", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Choi:1995:SPL}.", URL = "http://www.netlib.org/utk/papers/sc96-scalapack/paper.html; http://www.netlib.org/utk/papers/sc96-scalapack/paper.ps; http://www.netlib.org/utk/people/JackDongarra/pdf/scala96.pdf; http://www.supercomp.org/sc96/proceedings/SC96PROC/DONGARRA/INDEX.HTM", acknowledgement = ack-nhfb, } @Article{Choi:1996:DIS, author = "Jaeyoung Choi and J. J. Dongarra and L. S. Ostrouchov and Petitet and A. P. and D. W. Walker and R. C. Whaley", title = "Design and implementation of the {ScaLAPACK LU}, {$ Q R $}, and {Cholesky} factorization routines", journal = j-SCI-PROG, volume = "5", number = "3", pages = "173--184", month = "Fall", year = "1996", CODEN = "SCIPEV", ISSN = "1058-9244 (print), 1875-919X (electronic)", bibdate = "Sat Mar 22 15:39:54 MST 1997", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Choi:1994:DIS}.", URL = "http://www.netlib.org/netlib/lapack/lawns/lawn80.ps; http://www.netlib.org/netlib/lapack/lawnspdf/lawn80.pdf; http://www.netlib.org/utk/papers/factor/ftcover.html", acknowledgement = ack-nhfb, classcodes = "C4140 (Linear algebra); C6110B (Software engineering techniques); C6115 (Programming support); C5440 (Multiprocessing systems); C6150N (Distributed systems software); C6110P (Parallel programming)", corpsource = "Dept. of Comput. Sci., Tennessee Univ., Knoxville, TN, USA", keywords = "BLACS; BLAS; block; communication; cyclic data distribution; de facto standard kernels; dense linear equation system; distributed memory systems; engineering; factorization routine; Intel; Intel Touchstone Delta; Inter Paragon System; iPSC/860; linear algebra; matrix; message passing; operations; parallel implementations; parallel machines; parallel programming; parallelized sequential LAPACK; PBLAS; performance; performance evaluation; scalability; ScaLAPACK Cholesky factorization routine; ScaLAPACK library; ScaLAPACK LU factorization routine; ScaLAPACK QR; software; software libraries; software packages; vector operations", treatment = "P Practical", } @InProceedings{Choi:1996:PSP, author = "Jaeyoung Choi and J. Dongarra and S. Ostrouchov and A. Petitet and D. Walker and R. C. Whaley", title = "A proposal for a set of {Parallel Basic Linear Algebra Subprograms}", crossref = "Dongarra:1996:APC", pages = "107--114", year = "1996", bibdate = "Sat Mar 22 15:39:54 MST 1997", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Choi:1995:PSP}.", acknowledgement = ack-nhfb, classcodes = "C7310 (Mathematics computing); C6110P (Parallel programming); C4140 (Linear algebra)", conflocation = "Lyngby, Denmark; 21-24 Aug. 1995", conftitle = "Applied Parallel Computing. Computations in Physics, Chemistry and Engineering Science", corpsource = "Sch. of Comput., Soongsil Univ., Seoul, South Korea", keywords = "basic linear algebra; distributed memory; linear algebra; linear algebra subprograms; parallel; parallel programming; PBLAS; software libraries", treatment = "T Theoretical or Mathematical", } @InProceedings{Dongarra:1996:PFI, author = "J. J. Dongarra and J. {Du Croz} and S. Hammarling and J. Wa{\'s}niewski and A. Zemla", title = "A proposal for a {Fortran 90} interface for {LAPACK}", crossref = "Dongarra:1996:APC", pages = "158--165", year = "1996", bibdate = "Sat Mar 22 15:39:54 MST 1997", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Dongarra:1995:PFI}.", acknowledgement = ack-nhfb, classcodes = "C7310 (Mathematics computing); C4140 (Linear algebra); C6140D (High level languages); C6180 (User interfaces)", conflocation = "Lyngby, Denmark; 21-24 Aug. 1995", conftitle = "Applied Parallel Computing. Computations in Physics, Chemistry and Engineering Science", corpsource = "Dept. of Comput. Sci., Tennessee Univ., Knoxville, TN, USA", keywords = "FORTRAN; Fortran 90 interface; LAPACK; LAPACK code; linear algebra; mathematics computing; packages; software; user interfaces; user-interface", treatment = "P Practical; T Theoretical or Mathematical", } @Article{Henry:1996:PAU, author = "Greg Henry and Robert van de Geijn", title = "Parallelizing the {$ Q R $} Algorithm for the Unsymmetric Algebraic Eigenvalue Problem: Myths and Reality", journal = j-SIAM-J-SCI-COMP, volume = "17", number = "4", pages = "870--883", month = jul, year = "1996", CODEN = "SJOCE3", ISSN = "1064-8275 (print), 1095-7197 (electronic)", ISSN-L = "1064-8275", MRclass = "65F15 (15A18)", MRnumber = "97b:65044", bibdate = "Tue Apr 29 18:25:50 MDT 1997", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Henry:1994:PQA}.", acknowledgement = ack-nhfb, } @Article{Kaagstrom:1996:CES, author = "Bo K{\aa}gstr{\"o}m and Peter Poromaa", title = "Computing eigenspaces with specified eigenvalues of a regular matrix pair {$ ({A}, {B}) $} and condition estimation: theory, algorithms and software", journal = j-NUMER-ALGORITHMS, volume = "12", number = "3--4", pages = "369--407", month = jul, year = "1996", CODEN = "NUALEG", ISSN = "1017-1398 (print), 1572-9265 (electronic)", MRclass = "65Fxx", MRnumber = "1 402 856", bibdate = "Tue Apr 29 08:56:05 MDT 1997", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Kaagstrom:1994:CES}.", acknowledgement = ack-nhfb, classification = "B0290H (Linear algebra); C4140 (Linear algebra)", corpsource = "Dept. of Comput. Sci., Umea Univ., Sweden", keywords = "condition estimation; deflating sub-spaces; eigenspaces; eigenvalues; eigenvalues and eigenfunctions; error bounds; matrix algebra; numerical stability; reciprocal values; regular matrix pair; specified eigenvalues", pubcountry = "Switzerland", treatment = "T Theoretical or Mathematical", } @Article{Kaagstrom:1996:LSA, author = "Bo K{\aa}gstr{\"o}m and Peter Poromaa", title = "{LAPACK-style} algorithms and software for solving the generalized {Sylvester} equation and estimating the separation between regular matrix pairs", journal = j-TOMS, volume = "22", number = "1", pages = "78--103", month = mar, year = "1996", CODEN = "ACMSCU", ISSN = "0098-3500 (print), 1557-7295 (electronic)", MRclass = "65-04 (65F30)", MRnumber = "1 383 186", bibdate = "Sat Aug 31 16:07:02 MDT 1996", bibsource = "http://www.acm.org/pubs/contents/journals/toms/; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Kaagstrom:1994:LSA}.", URL = "http://doi.acm.org/10.1145/225545.225552; http://www.acm.org/pubs/citations/journals/toms/1996-22-1/p78-kagstrom/", abstract = "Robust and fast software to solve the generalized Sylvester equation ({$ A R - L B = C, D R - L E = F $}) for unknowns {$R$} and {$L$} is presented. This special linear system of equations, and its transpose, arises in computing error bounds for computed eigenvalues and eigenspaces of the generalized eigenvalue problem {$ S - \lambda T $}, in computing deflating subspaces of the same problem, and in computing certain decompositions of transfer matrices arising in control theory. Our contributions are twofold. First, we reorganize the standard algorithm for this problem to use Level 3 BLAS operations, like matrix multiplication, in its inner loop. This speeds up the algorithm by a factor of 9 on an IBM RS6000. Second, we develop and compare several condition estimation algorithms, which inexpensively but accurately estimate the sensitivity of the solution of this linear system.", acknowledgement = ack-nhfb, keywords = "algorithms", subject = "{\bf G.4}: Mathematics of Computing, MATHEMATICAL SOFTWARE, Algorithm analysis. {\bf F.2.1}: Theory of Computation, ANALYSIS OF ALGORITHMS AND PROBLEM COMPLEXITY, Numerical Algorithms and Problems, Computations on matrices. {\bf G.1.3}: Mathematics of Computing, NUMERICAL ANALYSIS, Numerical Linear Algebra, Linear systems (direct and iterative methods). {\bf G.4}: Mathematics of Computing, MATHEMATICAL SOFTWARE, Reliability and robustness. {\bf G.1.3}: Mathematics of Computing, NUMERICAL ANALYSIS, Numerical Linear Algebra, Conditioning. {\bf G.1.3}: Mathematics of Computing, NUMERICAL ANALYSIS, Numerical Linear Algebra, Eigenvalues. {\bf G.4}: Mathematics of Computing, MATHEMATICAL SOFTWARE, Efficiency. {\bf F.2.1}: Theory of Computation, ANALYSIS OF ALGORITHMS AND PROBLEM COMPLEXITY, Numerical Algorithms and Problems, Computations on matrices. {\bf G.1.3}: Mathematics of Computing, NUMERICAL ANALYSIS, Numerical Linear Algebra, Matrix inversion.", } @Article{Lehoucq:1996:CEU, author = "R. B. Lehoucq", title = "The Computation of Elementary Unitary Matrices", journal = j-TOMS, volume = "22", number = "4", pages = "393--400", month = dec, year = "1996", CODEN = "ACMSCU", ISSN = "0098-3500 (print), 1557-7295 (electronic)", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Lehoucq:1995:CEU}.", abstract = "The construction of elementary unitary matrices that transform a complex vector to a multiple of $ e_1 $, the first column of the identity matrix, is studied. We present four variants and their software implementation, including a discussion on the {LAPACK} subroutine {CLARFG}. Comparisons are also given.", accepted = "June 1996", acknowledgement = ack-rfb, keywords = "algorithms", subject = "{\bf F.2}: Theory of Computation, ANALYSIS OF ALGORITHMS AND PROBLEM COMPLEXITY, Numerical Algorithms and Problems, Computations on matrices. {\bf G.1.3}: Mathematics of Computing, NUMERICAL ANALYSIS, Numerical Linear Algebra. {\bf G.4}: Mathematics of Computing, MATHEMATICAL SOFTWARE, Algorithm analysis.", } @Article{Bai:1997:SDN, author = "Z. Bai and J. Demmel and J. Dongarra and A. Petitet and H. Robinson and K. Stanley", title = "The Spectral Decomposition of Nonsymmetric Matrices on Distributed Memory Parallel Computers", journal = j-SIAM-J-SCI-COMP, volume = "18", number = "5", pages = "1446--1461", month = sep, year = "1997", CODEN = "SJOCE3", ISSN = "1064-8275 (print), 1095-7197 (electronic)", ISSN-L = "1064-8275", MRclass = "65F05 (65F30 65Y05)", MRnumber = "98d:65027", bibdate = "Tue Feb 26 10:04:07 2002", bibsource = "http://epubs.siam.org/sam-bin/dbq/toc/SISC/18/5; http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib; http://www.math.utah.edu/pub/tex/bib/siamjscicomput.bib", note = "See original LAPACK Working note in \cite{Bai:1995:SDN}.", URL = "http://epubs.siam.org/sam-bin/dbq/article/28136; http://www.netlib.org/utk/papers/sign/sign.html; http://www.netlib.org/utk/papers/sign/sign.ps; http://www.netlib.org/utk/people/JackDongarra/pdf/sign.pdf", acknowledgement = ack-nhfb, } @Article{Blackford:1997:PEN, author = "L. S. Blackford and A. Cleary and A. Petitet and R. C. Whaley and J. Demmel and I. Dhillon and H. Ren and K. Stanley and J. Dongarra and S. Hammarling", title = "Practical Experience in the Numerical Dangers of Heterogeneous Computing", journal = j-TOMS, volume = "23", number = "2", pages = "133--147", month = jun, year = "1997", CODEN = "ACMSCU", ISSN = "0098-3500 (print), 1557-7295 (electronic)", bibdate = "Tue Feb 26 10:10:44 2002", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Blackford:1996:PEDa} and \cite{Blackford:1996:PEDb}.", URL = "http://doi.acm.org/10.1145/264029.264030; http://www.acm.org/pubs/citations/journals/toms/1997-23-2/p133-blackford/", abstract = "Special challenges exist in writing reliable numerical library software for heterogeneous computing environments. Although a lot of software for distributed-memory parallel computers has been written, porting this software to a network of workstations requires careful consideration. The symptoms of heterogeneous computing failures can range from erroneous results without warning to deadlock. Some of the problems are straightforward to solve, but for others the solutions are not so obvious, or incur an unacceptable overhead. Making software robust on heterogeneous systems often requires additional communication. We describe and illustrate the problems encountered during the development of ScaLAPACK and the NAG Numerical PVM Library. Where possible, we suggest ways to avoid potential pitfalls, or if that is not possible, we recommend that the software not be used on heterogeneous networks.", acknowledgement = ack-rfb # " and " # ack-kr, keywords = "distributed-memory systems, floating-point arithmetic, heterogeneous processor networks, message passing, numerical software, reliability", subject = "{\bf D.1.3} Software, PROGRAMMING TECHNIQUES, Concurrent Programming, Distributed programming. {\bf G.1.0} Mathematics of Computing, NUMERICAL ANALYSIS, General, Computer arithmetic. {\bf G.1.0} Mathematics of Computing, NUMERICAL ANALYSIS, General, Parallel algorithms.", } @Article{Dongarra:1997:KCPb, author = "Jack J. Dongarra and Sven Hammarling and David W. Walker", title = "Key concepts for parallel out-of-core {$ L U $} factorization", journal = j-PARALLEL-COMPUTING, volume = "23", number = "1--2", pages = "49--70", day = "16", month = apr, year = "1997", CODEN = "PACOEJ", ISSN = "0167-8191", bibdate = "Tue Oct 21 15:14:48 MDT 1997", bibsource = "Compendex database; http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Dongarra:1996:KCP}.", acknowledgement = ack-nhfb, affiliation = "Univ of Tennessee", affiliationaddress = "Knoxville, TN, USA", classification = "714.2; 722.1; 722.4; 723; 723.1; 921", conference = "Proceedings of the 1996 International Workshop on Environments and Tools for Parallel Scientific Computing", journalabr = "Parallel Comput", keywords = "Algorithms; Computer architecture; Input output programs; lu factorization; Microprocessor chips; Parallel processing systems; Percolation (computer storage); Storage allocation (computer)", meetingaddress = "Faverges de la Tour, Fr", meetingdate = "Aug 22--23 1996", meetingdate2 = "08/22--23/96", } @Article{Higham:1997:IRL, author = "Nicholas J. Higham", title = "Iterative refinement for linear systems and {LAPACK}", journal = j-IMA-J-NUMER-ANAL, volume = "17", number = "4", pages = "495--509", month = oct, year = "1997", CODEN = "IJNADH", ISSN = "0272-4979 (print), 1464-3642 (electronic)", MRclass = "65F30", MRnumber = "98e:65036", bibdate = "Sat Dec 23 17:06:35 MST 2000", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib; http://www3.oup.co.uk/imanum/hdb/Volume_17/Issue_04/; MathSciNet database", note = "Preprint published as Numerical Analysis Report 277, Manchester Centre for Computational Mathematics, Manchester, England, and as LAPACK Working Note 104. See original LAPACK Working note in \cite{Higham:1995:IRL}.", URL = "http://www3.oup.co.uk/imanum/hdb/Volume_17/Issue_04/170495.sgm.abs.html", acknowledgement = ack-nhfb, } @Article{Higham:1997:SDP, author = "Nicholas J. Higham", title = "Stability of the Diagonal Pivoting Method with Partial Pivoting", journal = j-SIAM-J-MAT-ANA-APPL, volume = "18", number = "1", pages = "52--65", month = jan, year = "1997", CODEN = "SJMAEL", ISSN = "0895-4798 (print), 1095-7162 (electronic)", bibdate = "Sun Mar 2 11:16:54 GMT 1997", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/h/higham-nicholas-john.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Higham:1995:SDP}.", acknowledgement = ack-njh, } @Article{Li:1997:RPB, author = "Ren-Cang Li", title = "Relative perturbation bounds for the unitary polar factor", journal = j-BIT-NUM-MATH, volume = "37", number = "1", pages = "67--75", month = mar, year = "1997", CODEN = "BITTEL, NBITAB", ISSN = "0006-3835 (print), 1572-9125 (electronic)", MRclass = "15A18 (15A23 65F35)", MRnumber = "97k:15026", MRreviewer = "Roy Mathias", bibdate = "Fri Nov 13 07:00:34 MST 1998", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Li:1994:RPB}.", URL = "http://www.mai.liu.se/BIT/contents/bit37.html", acknowledgement = ack-nhfb, } @Article{vandeGeijn:1997:SSU, author = "R. A. van de Geijn and J. Watts", title = "{SUMMA}: scalable universal matrix multiplication algorithm", journal = j-CPE, volume = "9", number = "4", pages = "255--274", month = apr, year = "1997", CODEN = "CPEXEI", ISSN = "1040-3108", bibdate = "Tue Sep 7 06:06:30 MDT 1999", bibsource = "http://www.interscience.wiley.com/jpages/1040-3108/; http://www.math.utah.edu/pub/tex/bib/lawn.bib; http://www3.interscience.wiley.com/journalfinder.html", note = "See original LAPACK Working note in \cite{vandeGeijn:1995:SSU}.", URL = "http://www3.interscience.wiley.com/cgi-bin/abstract?ID=13861; http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=13861&PLACEBO=IE.pdf", acknowledgement = ack-nhfb, } @Article{Choi:1998:NPM, author = "Jaeyoung Choi", title = "A new parallel matrix multiplication algorithm on distributed-memory concurrent computers", journal = j-CPE, volume = "10", number = "8", pages = "655--670", month = jul, year = "1998", CODEN = "CPEXEI", ISSN = "1040-3108", bibdate = "Tue Sep 7 06:06:42 MDT 1999", bibsource = "http://www.interscience.wiley.com/jpages/1040-3108/; http://www.math.utah.edu/pub/tex/bib/lawn.bib; http://www3.interscience.wiley.com/journalfinder.html", note = "See original LAPACK Working note in \cite{Choi:1997:NPM}.", URL = "http://www3.interscience.wiley.com/cgi-bin/abstract?ID=10008698; http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=10008698&PLACEBO=IE.pdf", acknowledgement = ack-nhfb, } @InProceedings{Desprez:1998:SBA, author = "F. Desprez and J. Dongarra and A. Petitet and C. Randriamaro", title = "Scheduling Block-Cyclic Array Redistribution", crossref = "DHollander:1998:PCF", pages = "227--234", year = "1998", bibdate = "Thu Sep 16 09:48:36 MDT 1999", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Desprez:1997:SBC} and \cite{Desprez:1998:SBC}.", acknowledgement = ack-nhfb, } @Article{Desprez:1998:SBC, author = "F. Desprez and J. Dongarra and A. Petitet and C. Randriamaro and Y. Robert", title = "Scheduling Block-Cyclic Array Redistribution", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "9", number = "2", pages = "192--??", month = feb, year = "1998", CODEN = "ITDSEO", ISSN = "1045-9219 (print), 1558-2183 (electronic)", bibdate = "Fri Nov 6 12:31:15 MST 1998", bibsource = "http://www.computer.org/tpds/td1998/; http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Desprez:1997:SBC}.", URL = "http://dlib.computer.org/td/books/td1998/pdf/l0192.pdf; http://www.computer.org/tpds/td1998/l0192abs.htm", acknowledgement = ack-nhfb, } @Article{Dongarra:1998:HPL, author = "J. Dongarra and J. Wa{\'s}niewski", title = "High Performance Linear Algebra Package {LAPACK90}", journal = j-LECT-NOTES-COMP-SCI, volume = "1388", pages = "387--391", year = "1998", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", bibdate = "Sat Oct 10 14:40:24 MDT 1998", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib; http://www.math.utah.edu/pub/tex/bib/lncs1998a.bib", note = "See original LAPACK Working note in \cite{Wasniewski:1998:HPL}.", acknowledgement = ack-nhfb, } @Article{Kaagstrom:1998:GBL, author = "Bo K{\aa}gstr{\"o}m and Per Ling and Charles {Van Loan}", title = "{GEMM-based} level 3 {BLAS}: high-performance model implementations and performance evaluation benchmark", journal = j-TOMS, volume = "24", number = "3", pages = "268--302", month = sep, year = "1998", CODEN = "ACMSCU", ISSN = "0098-3500 (print), 1557-7295 (electronic)", bibdate = "Mon Feb 8 17:51:43 MST 1999", bibsource = "http://www.acm.org/pubs/contents/journals/toms/1998-24/; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Kaagstrom:1995:GBLa}.", URL = "http://doi.acm.org/10.1145/292395.292412; http://www.acm.org:80/pubs/citations/journals/toms/1998-24-3/p268-kagstrom/", abstract = "The level 3 Basic Linear Algebra Subprograms (BLAS) are designed to perform various matrix multiply and triangular system solving computations. Due to the complex hardware organization of advanced computer architectures the development of optimal level 3 BLAS code is costly and time consuming. However, it is possible to develop a portable and high-performance level 3 BLAS library mainly relying on a highly optimized GEMM, the routine for the general matrix multiply and add operation. With suitable partitioning, all the other level 3 BLAS can be defined in terms of GEMM and a small amount of level 1 and level 2 computations. Our contribution is twofold. First, the model implementations in Fortran 77 of the GEMM-based level 3 BLAS are structured to reduced effectively data traffic in a memory hierarchy. Second, the GEMM-based level 3 BLAS performance evaluation benchmark is a tool for evaluating and comparing different implementations of the level 3 BLAS with the GEMM-based model implementations.", acknowledgement = ack-nhfb, keywords = "algorithms; measurement; performance", subject = "{\bf G.1.3} Mathematics of Computing, NUMERICAL ANALYSIS, Numerical Linear Algebra, Linear systems (direct and iterative methods). {\bf D.3.2} Software, PROGRAMMING LANGUAGES, Language Classifications, FORTRAN 77. {\bf F.2.1} Theory of Computation, ANALYSIS OF ALGORITHMS AND PROBLEM COMPLEXITY, Numerical Algorithms and Problems, Computations on matrices. {\bf G.4} Mathematics of Computing, MATHEMATICAL SOFTWARE, Certification and testing. {\bf G.4} Mathematics of Computing, MATHEMATICAL SOFTWARE, Efficiency. {\bf G.4} Mathematics of Computing, MATHEMATICAL SOFTWARE, Portability**. {\bf G.4} Mathematics of Computing, MATHEMATICAL SOFTWARE, Reliability and robustness. {\bf G.4} Mathematics of Computing, MATHEMATICAL SOFTWARE, Verification**.", } @Article{Li:1998:RPT, author = "Ren-Cang Li", title = "Relative Perturbation Theory: {I}. Eigenvalue and Singular Value Variations", journal = j-SIAM-J-MAT-ANA-APPL, volume = "19", number = "4", pages = "956--982", month = oct, year = "1998", CODEN = "SJMAEL", ISSN = "0895-4798 (print), 1095-7162 (electronic)", bibdate = "Fri Dec 4 12:14:09 MST 1998", bibsource = "http://epubs.siam.org/sam-bin/dbq/toc/SIMAX/19/4; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Li:1994:RPTa}.", URL = "http://epubs.siam.org/sam-bin/dbq/article/29849", acknowledgement = ack-nhfb, } @InProceedings{Whaley:1998:ATL, author = "R. Clint Whaley and Jack J. Dongarra", title = "{Automatically Tuned Linear Algebra Software} ({ATLAS})", crossref = "ACM:1998:SHP", year = "1998", bibdate = "Wed Mar 06 06:37:44 2002", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "Best Paper Award for Systems. See original LAPACK Working note in \cite{Whaley:1997:ATL}.", URL = "http://www.netlib.org/utk/people/JackDongarra/PAPERS/atlas-sc98.ps; http://www.supercomp.org/sc98/TechPapers/sc98_FullAbstracts/Whaley814/INDEX.HTM", acknowledgement = ack-nhfb, } @Article{Arbenz:1999:CPSc, author = "P. Arbenz and A. Cleary and J. Dongarra and M. Hegland", title = "A Comparison of Parallel Solvers for Diagonally Dominant and General Narrow-Banded Linear Systems", journal = j-PARALLEL-DIST-COMP-PRACT, volume = "2", number = "4", pages = "??--??", month = "????", year = "1999", CODEN = "????", ISSN = "1097-2803", bibdate = "Fri Dec 19 08:14:14 MST 2003", bibsource = "http://www.cs.okstate.edu/~pdcp/vols/vol02/vol02no4.html; http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Arbenz:1999:CPSa}.", URL = "http://www.cs.okstate.edu/~pdcp/vols/vol02/vol02no4abs.html#arbenz", acknowledgement = ack-nhfb, } @Article{Demmel:1999:APS, author = "James W. Demmel and John R. Gilbert and Xiaoye S. Li", title = "An Asynchronous Parallel Supernodal Algorithm for Sparse {Gaussian} Elimination", journal = j-SIAM-J-MAT-ANA-APPL, volume = "20", number = "4", pages = "915--952", month = oct, year = "1999", CODEN = "SJMAEL", ISSN = "0895-4798 (print), 1095-7162 (electronic)", bibdate = "Sat Jan 22 14:39:14 MST 2000", bibsource = "http://epubs.siam.org/sam-bin/dbq/toc/SIMAX/20/4; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Demmel:1997:APS}.", URL = "http://epubs.siam.org/sam-bin/dbq/article/31768", acknowledgement = ack-nhfb, } @Article{Demmel:1999:CSV, author = "James Demmel and others", title = "Computing the singular value decomposition with high relative accuracy", journal = j-LINEAR-ALGEBRA-APPL, volume = "299", number = "1--3", pages = "21--80", day = "15", month = sep, year = "1999", CODEN = "LAAPAW", ISSN = "0024-3795 (print), 1873-1856 (electronic)", bibdate = "Wed Nov 01 08:18:32 2000", bibsource = "http://www.elsevier.com/locate/laa; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Demmel:1997:CSV}.", URL = "http://www.elsevier.nl/gej-ng/10/30/19/112/21/22/abstract.html; http://www.elsevier.nl/gej-ng/10/30/19/112/21/22/article.pdf", acknowledgement = ack-nhfb, } @Article{Demmel:1999:SAS, author = "James W. Demmel and Stanley C. Eisenstat and John R. Gilbert and Xiaoye S. Li and Joseph W. H. Liu", title = "A Supernodal Approach to Sparse Partial Pivoting", journal = j-SIAM-J-MAT-ANA-APPL, volume = "20", number = "3", pages = "720--755", month = jul, year = "1999", CODEN = "SJMAEL", ISSN = "0895-4798 (print), 1095-7162 (electronic)", bibdate = "Sat Jan 22 14:39:12 MST 2000", bibsource = "http://epubs.siam.org/sam-bin/dbq/toc/SIMAX/20/3; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Demmel:1995:SAS}.", URL = "http://epubs.siam.org/sam-bin/dbq/article/29176", acknowledgement = ack-nhfb, } @Article{Li:1999:RPT, author = "Ren-Cang Li", title = "Relative Perturbation Theory: {II}. Eigenspace and Singular Subspace Variations", journal = j-SIAM-J-MAT-ANA-APPL, volume = "20", number = "2", pages = "471--492", month = apr, year = "1999", CODEN = "SJMAEL", ISSN = "0895-4798 (print), 1095-7162 (electronic)", bibdate = "Fri Dec 4 12:14:09 MST 1998", bibsource = "http://epubs.siam.org/sam-bin/dbq/toc/SIMAX/20/2; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Li:1994:RPTb}.", URL = "http://epubs.siam.org/sam-bin/dbq/article/29850", acknowledgement = ack-nhfb, } @Article{Petitet:1999:ARM, author = "A. P. Petitet and J. J. Dongarra", title = "Algorithmic Redistribution Methods for Block-Cyclic Decompositions", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "10", number = "12", pages = "201--??", month = dec, year = "1999", CODEN = "ITDSEO", ISSN = "1045-9219 (print), 1558-2183 (electronic)", bibdate = "Thu Oct 12 18:48:32 MDT 2000", bibsource = "http://www.computer.org/tpds/td1999/; http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Petitet:1997:ARM}.", URL = "http://dlib.computer.org/td/books/td1999/pdf/l1201.pdf; http://www.computer.org/tpds/td1999/l1201abs.htm; http://www.netlib.org/utk/people/JackDongarra/PAPERS/alg-dist.ps; http://www.netlib.org/utk/people/JackDongarra/pdf/alg-dist.pdf", acknowledgement = ack-nhfb, } @InProceedings{Petitet:1999:NLA, author = "A. Petitet and H. Casanova and R. Whaley and J. Dongarra and Y. Robert", booktitle = "SIAM Annual Meeting, Atlanta, GA, May 13, 1999", title = "A Numerical Linear Algebra Problem Solving Environment Designer's Perspective", publisher = pub-SIAM, address = pub-SIAM:adr, year = "1999", bibdate = "Tue Feb 26 10:10:44 2002", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Petitet:1998:NLA} and \cite{Petitet:2000:PDS}.", URL = "http://www.netlib.org/utk/people/JackDongarra/PAPERS/la-handbook-chp10.ps", acknowledgement = ack-nhfb, } @Article{DAzevedo:2000:DIP, author = "Eduardo D'Azevedo and Jack Dongarra", title = "The design and implementation of the parallel out-of-core {ScaLAPACK} {$ L U $}, {$ Q R $}, and {Cholesky} factorization routines", journal = j-CPE, volume = "12", number = "15", pages = "1481--1493", month = "????", year = "2000", CODEN = "CPEXEI", DOI = "http://dx.doi.org/10.1002/1096-9128(20001225)12:15<1481::AID-CPE540>3.0.CO;2-V", ISSN = "1040-3108", bibdate = "Sat Apr 7 06:56:11 MDT 2001", bibsource = "http://www.interscience.wiley.com/jpages/1040-3108; http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib; http://www3.interscience.wiley.com/journalfinder.html", note = "See original LAPACK Working note in \cite{Dongarra:1997:DIP}.", URL = "http://www3.interscience.wiley.com/cgi-bin/abstract/76505648/START; http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=76505648&PLACEBO=IE.pdf", acknowledgement = ack-nhfb, } @Article{Parlett:2000:IAP, author = "Beresford N. Parlett and Osni A. Marques", title = "An implementation of the $ d q d s $ algorithm (positive case)", journal = j-LINEAR-ALGEBRA-APPL, volume = "309", number = "1--3", pages = "217--259", day = "15", month = apr, year = "2000", CODEN = "LAAPAW", ISSN = "0024-3795 (print), 1873-1856 (electronic)", bibdate = "Mon Oct 9 10:54:41 MDT 2000", bibsource = "http://www.elsevier.com/locate/laa; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Parlett:2002:IDA}.", URL = "http://www.elsevier.nl/gej-ng/10/30/19/126/25/37/abstract.html; http://www.elsevier.nl/gej-ng/10/30/19/126/25/37/article.pdf", acknowledgement = ack-nhfb, } @InCollection{Petitet:2000:PDS, author = "A. Petitet and H. Casanova and J. Dongarra and Y. Robert and R. Whaley", editor = "Jacek Blazewicz and others", booktitle = "Handbook on Parallel and Distributed Processing", title = "Parallel and Distributed Scientific Computing: {A} Numerical Linear Algebra Problem Solving Environment Designer's Perspective", publisher = pub-SV, address = pub-SV:adr, pages = "??--??", year = "2000", ISBN = "3-540-66441-6", ISBN-13 = "978-3-540-66441-3", LCCN = "QA76.58 .H36 2000", bibdate = "Tue Feb 26 10:10:44 2002", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Petitet:1998:NLA} and \cite{Petitet:1999:NLA}.", URL = "http://www.netlib.org/utk/people/JackDongarra/PAPERS/la-handbook.ps", acknowledgement = ack-nhfb, bookpages = "635", } @Article{Andersen:2001:RFC, author = "Bjarne S. Andersen and Jerzy Wa{\'s}niewski and Fred G. Gustavson", title = "A recursive formulation of {Cholesky} factorization of a matrix in packed storage", journal = j-TOMS, volume = "27", number = "2", pages = "214--244", month = jun, year = "2001", CODEN = "ACMSCU", ISSN = "0098-3500 (print), 1557-7295 (electronic)", bibdate = "Wed Feb 6 16:43:42 MST 2002", bibsource = "http://www.acm.org/pubs/contents/journals/toms/; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Andersen:2000:RFC}.", URL = "http://doi.acm.org/10.1145/383738.383741", abstract = "A new compact way to store a symmetric or triangular matrix called RPF for Recursive Packed Format is fully described. Novel ways to transform RPF to and from standard packed format are included. A new algorithm, called RPC for Recursive Packed Cholesky, that operates on the RPG format is presented. Algorithm RPC is basd on level-3 BLAS and requires variants of algorithms TRSM and SYRK that work on RPF. We call these RP\_TRSM and RP\_SYRK and find that they do most of their work by calling GEMM. It follows that most of the execution time of RPC lies in GEMM. The advantage of this storage scheme compared to traditional packed and full storage is demonstrated. First, the RPC storage format uses the minimal amount of storage for the symmetric or triangular matrix. Second, RPC gives a level-3 implementation of Cholesky factorization whereas standard packed implementations are only level 2. Hence, the performance of our RPC implementation is decidedly superior. Third, unlike fixed block size algorithms, RPC, requires no block size tuning parameter. We present performance measurements on several current architectures that demonstrate improvements over the traditional packed routines. Also MSP parallel computations on the IBM SMP computer are made. The graphs that are attached in Section 7 show that the RPC algorithms are superior by a factor between 1.6 and 7.4 for order around 1000, and between 1.9 and 10.3 for order around 3000 over the traditional packed algorithms. For some architectures, the RPC performance results are almost the same or even better than the traditional full-storage algorithms results.", accepted = "15 March 2001", acknowledgement = ack-nhfb, } @Article{Whaley:2001:AEO, author = "R. Clint Whaley and Antoine Petitet and Jack J. Dongarra", title = "Automated empirical optimizations of software and the {ATLAS} project", journal = j-PARALLEL-COMPUTING, volume = "27", number = "1--2", pages = "3--35", month = jan, year = "2001", CODEN = "PACOEJ", ISSN = "0167-8191", bibdate = "Wed Jul 18 06:31:14 MDT 2001", bibsource = "http://www.elsevier.com/locate/issn/01678191; http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Whaley:2000:AEO}.", URL = "http://www.elsevier.nl/gej-ng/10/35/21/47/25/23/abstract.html; http://www.elsevier.nl/gej-ng/10/35/21/47/25/23/article.pdf; http://www.netlib.org/utk/people/JackDongarra/PAPERS/atlas_pub.pdf", acknowledgement = ack-nhfb, } @Article{Bindel:2002:CGR, author = "David Bindel and James Demmel and William Kahan and Osni Marques", title = "On computing {Givens} rotations reliably and efficiently", journal = j-TOMS, volume = "28", number = "2", pages = "206--238", month = jun, year = "2002", CODEN = "ACMSCU", ISSN = "0098-3500 (print), 1557-7295 (electronic)", bibdate = "Sat Nov 9 11:16:50 MST 2002", bibsource = "http://www.acm.org/pubs/contents/journals/toms/; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Bindel:2000:CGR}.", URL = "http://doi.acm.org/10.1145/567806.567809", abstract = "We consider the efficient and accurate computation of Givens rotations. When $f$ and $g$ are positive real numbers, this simply amounts to computing the values of $ c = f / \sqrt {f^2 + g^2} $, $ s = g / \sqrt {f^2 + g^2} $, and $ r = \sqrt {f^2 + g^2} $. This apparently trivial computation merits closer consideration for the following three reasons. First, while the definitions of $c$, $s$ and $r$ seem obvious in the case of two nonnegative arguments $f$ and $g$, there is enough freedom of choice when one or more of $f$ and $g$ are negative, zero or complex that LAPACK auxiliary routines SLARTG, CLARTG, SLARGV and CLARGV can compute rather different values of $c$, $s$ and $r$ for mathematically identical values of $f$ and $g$. To eliminate this unnecessary ambiguity, the BLAS Technical Forum chose a single consistent definition of Givens rotations that we will justify here. Second, computing accurate values of $c$, $s$ and $r$ as efficiently as possible and reliably despite over/underflow is surprisingly complicated. For complex Givens rotations, the most efficient formulas require only one real square root and one real divide (as well as several much cheaper additions and multiplications), but a reliable implementation using only working precision has a number of cases. On a Sun Ultra-10, the new implementation is slightly faster than the previous LAPACK implementation in the most common case, and 2.7 to 4.6 times faster than the corresponding vendor, reference or ATLAS routines. It is also more reliable; all previous codes occasionally suffer from large inaccuracies due to over/underflow. For real Givens rotations there are also improvements in speed and accuracy, though not as striking. Third, the design process that led to this reliable implementation is quite systematic, and could be applied to the design of similarly reliable subroutines.", acknowledgement = ack-nhfb, } @Article{Henry:2002:PIN, author = "Greg Henry and David Watkins and Jack Dongarra", title = "A Parallel Implementation of the Nonsymmetric {$ Q R $} Algorithm for Distributed Memory Architectures", journal = j-SIAM-J-SCI-COMP, volume = "24", number = "1", pages = "284--311", month = jan, year = "2002", CODEN = "SJOCE3", DOI = "http://dx.doi.org/10.1137/S1064827597325165", ISSN = "1064-8275 (print), 1095-7197 (electronic)", ISSN-L = "1064-8275", bibdate = "Tue Oct 22 18:24:38 MDT 2002", bibsource = "http://epubs.siam.org/sam-bin/dbq/toc/SISC/24/1; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Henry:1997:PIN}.", URL = "http://epubs.siam.org/sam-bin/dbq/article/32516", acknowledgement = ack-nhfb, fjournal = "SIAM Journal on Scientific Computing", } @Article{Li:2002:DIT, author = "Xiaoye S. Li and James W. Demmel and David H. Bailey and Greg Henry and Yozo Hida and Jimmy Iskandar and William Kahan and Suh Y. Kang and Anil Kapur and Michael C. Martin and Brandon J. Thompson and Teresa Tung and Daniel J. Yoo", title = "Design, implementation and testing of extended and mixed precision {BLAS}", journal = j-TOMS, volume = "28", number = "2", pages = "152--205", month = jun, year = "2002", CODEN = "ACMSCU", ISSN = "0098-3500 (print), 1557-7295 (electronic)", bibdate = "Sat Nov 9 11:16:50 MST 2002", bibsource = "http://www.acm.org/pubs/contents/journals/toms/; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Li:2000:DIT}.", URL = "http://doi.acm.org/10.1145/567806.567808", abstract = "This paper describes the design rationale, a C implementation, and conformance testing of a subset of the new Standard for the BLAS (Basic Linear Algebra Subroutines): Extended and Mixed Precision BLAS. Permitting higher internal precision and mixed input\slash output types and precisions allows us to implement some algorithms that are simpler, more accurate, and sometimes faster than possible without these features. The new BLAS are challenging to implement and test because there are many more subroutines than in the existing Standard, and because we must be able to assess whether a higher precision is used for internal computations than is used for either input or output variables. We have therefore developed an automated process of generating and systematically testing these routines. Our methodology is applicable to languages besides C. In particular, our algorithms used in the testing code will be valuable to all other BLAS implementors. Our extra precision routines achieve excellent performance---close to half of the machine peak Megaflop rate even for the Level 2 BLAS, when the data access is stride one.", acknowledgement = ack-nhfb, } @Article{Dongarra:2003:SANb, author = "Jack Dongarra and Victor Eijkhout", title = "Self-Adapting Numerical Software for Next Generation Applications", journal = j-IJHPCA, volume = "17", number = "2", pages = "125--131", month = "Summer", year = "2003", CODEN = "IHPCFL", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Fri Nov 28 06:52:13 2003", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Dongarra:2002:SAN}.", URL = "http://www.netlib.org/netlib/utk/people/JackDongarra/PAPERS/sans-position.pdf; http://www.netlib.org/utk/people/JackDongarra/PAPERS/sans-ijhpca.pdf", acknowledgement = ack-nhfb, } @Article{Dhillon:2004:OER, author = "Inderjit S. Dhillon and Beresford N. Parlett", title = "Orthogonal Eigenvectors and Relative Gaps", journal = j-SIAM-J-MAT-ANA-APPL, volume = "25", number = "3", pages = "858--899", month = jul, year = "2004", CODEN = "SJMAEL", ISSN = "0895-4798 (print), 1095-7162 (electronic)", bibdate = "Sat Apr 16 10:32:32 MDT 2005", bibsource = "http://epubs.siam.org/sam-bin/dbq/toc/SIMAX/25/3; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "See original LAPACK Working note in \cite{Dhillon:2002:OER}.", URL = "http://epubs.siam.org/sam-bin/dbq/article/37011", acknowledgement = ack-nhfb, } @Article{Demmel:2007:FLAb, author = "James Demmel and Ioana Dumitriu and Olga Holtz", title = "Fast linear algebra is stable", journal = j-NUM-MATH, volume = "108", number = "1", pages = "59--91", month = nov, year = "2007", CODEN = "NUMMA7", DOI = "http://dx.doi.org/10.1007/s00211-007-0114-x; http://dx.doi.org/10.1007/s00211-007-0114-x", ISSN = "0029-599X (print), 0945-3245 (electronic)", bibdate = "Tue Jul 8 09:49:13 MDT 2008", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", abstract = "In Demmel et al. (Numer. Math. 106(2), 199--224, 2007) we showed that a large class of fast recursive matrix multiplication algorithms is stable in a normwise sense, and that in fact if multiplication of $n$-by-$n$ matrices can be done by any algorithm in {$ O(n^{\omega + \eta }) $} operations for any $ \eta > 0 $, then it can be done stably in {$ O(n^{\omega + \eta }) $} operations for any $ \eta > 0 $. Here we extend this result to show that essentially all standard linear algebra operations, including LU decomposition, QR decomposition, linear equation solving, matrix inversion, solving least squares problems, (generalized) eigenvalue problems and the singular value decomposition can also be done stably (in a normwise sense) in {$ O(n^{\omega + \eta }) $} operations.", acknowledgement = ack-nhfb, remark = "Journal publication of LAWN 186 \cite{Demmel:2007:FLAa}.", } @Article{Buttari:2008:PTF, author = "Alfredo Buttari and Julien Langou and Jakub Kurzak and Jack Dongarra", title = "Parallel Tiled {$ Q R $} Factorization for Multicore Architectures", journal = j-CCPE, volume = "20", number = "13", pages = "1573--1590", month = SEP, year = "2008", CODEN = "CCPEBO", DOI = "http://dx.doi.org/10.1002/cpe.1301", ISSN = "1532-0626 (print), 1532-0634 (electronic)", bibdate = "Fri Apr 24 12:25:43 2009", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", acknowledgement = ack-nhfb, remark = "Journal publication of LAWN 190 \cite{Buttari:2007:PTQ}.", } @Article{Demmel:2009:EPI, author = "James Demmel and Yozo Hida and E. Jason Riedy and Xiaoye S. Li", title = "Extra-Precise Iterative Refinement for Overdetermined Least Squares Problems", journal = j-TOMS, volume = "35", number = "4", pages = "28:1--28:??", month = feb, year = "2009", CODEN = "ACMSCU", DOI = "http://doi.acm.org/10.1145/1462173.1462177", ISSN = "0098-3500 (print), 1557-7295 (electronic)", bibdate = "Fri Feb 13 18:09:40 MST 2009", bibsource = "http://www.acm.org/pubs/contents/journals/toms/; http://www.math.utah.edu/pub/tex/bib/lawn.bib", abstract = "We present the algorithm, error bounds, and numerical results for extra-precise iterative refinement applied to overdetermined linear least squares (LLS) problems. We apply our linear system refinement algorithm to Bj{\"o}rck's augmented linear system formulation of an LLS problem. Our algorithm reduces the forward normwise and componentwise errors to {$ O(\epsilon_w) $}, where $ \epsilon_w $ is the working precision, unless the system is too ill conditioned. In contrast to linear systems, we provide two separate error bounds for the solution $x$ and the residual $r$. The refinement algorithm requires only limited use of extra precision and adds only {$ O(m n) $} work to the {$ O(m n^2) $} cost of QR factorization for problems of size $ m \times n $. The extra precision calculation is facilitated by the new extended-precision BLAS standard in a portable way, and the refinement algorithm will be included in a future release of LAPACK and can be extended to the other types of least squares problems.", acknowledgement = ack-nhfb, articleno = "28", keywords = "BLAS; floating-point arithmetic; LAPACK; Linear algebra", remark = "Journal publication of LAWN 188 \cite{Demmel:2007:EPI}.", }

%%% ==================================================================== %%% Cross-referenced entries must come last:

@Proceedings{Burkhart:1990:CVI, editor = "H. (Helmar) Burkhart", booktitle = "{CONPAR 90-VAPP IV}: {Joint} International Conference on Vector and Parallel Processing, {Zurich, Switzerland, September 10--13, 1990}: proceedings", title = "{CONPAR 90-VAPP IV}: {Joint} International Conference on Vector and Parallel Processing, {Zurich, Switzerland, September 10--13, 1990}: proceedings", volume = "457", publisher = pub-SV, address = pub-SV:adr, pages = "xi + 900", year = "1990", ISBN = "3-540-53065-7 (Berlin), 0-387-53065-7 (New York)", ISBN-13 = "978-3-540-53065-7 (Berlin), 978-0-387-53065-9 (New York)", LCCN = "QA76.58 .J65 1990", bibdate = "Sat Apr 23 06:53:59 MDT 2005", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib; z3950.loc.gov:7090/Voyager", series = "Lecture notes in computer science", acknowledgement = ack-nhfb, subject = "Parallel processing (Electronic computers); Congresses; Vector processing (Computer science); Congresses", } @Proceedings{IEEE:1990:PSN, editor = "{IEEE}", booktitle = "Proceedings, Supercomputing '90: November 12--16, 1990, New York Hilton at Rockefeller Center, New York, New York", title = "Proceedings, Supercomputing '90: November 12--16, 1990, New York Hilton at Rockefeller Center, New York, New York", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "xxv + 982", year = "1990", ISBN = "0-8186-2056-0 (paperback) (IEEE Computer Society), 0-89791-412-0 (paperback) (ACM)", ISBN-13 = "978-0-8186-2056-0 (paperback) (IEEE Computer Society), 978-0-89791-412-3 (paperback) (ACM)", LCCN = "QA 76.88 S87 1990", bibdate = "Wed Aug 28 06:48:31 MDT 1996", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib; University of California MELVYL catalog", note = "ACM order number 415903. IEEE Computer Society Press order number 2056. IEEE catalog number 90CH2916-5.", acknowledgement = ack-nhfb, classification = "C5440 (Multiprocessor systems and techniques); C5470 (Performance evaluation and testing); C6110 (Systems analysis and programming); C7000 (Computer applications)", keywords = "biological applications; computer applications; computer chess; innovative architectures; linear algebra algorithms; memory; networking computing; parallel languages; parallel processing; particle transport; partitioning; performance evaluation; performance visualizations; pipeline processing; program analysis; program restructuring; scheduling; supercomputers --- congresses; vector algorithms", } @Proceedings{Griffiths:1992:NAP, editor = "D. F. Griffiths and G. A. Watson", booktitle = "Numerical analysis, 1991: proceedings of the 14th Dundee Conference, June 1991", title = "Numerical analysis, 1991: proceedings of the 14th Dundee Conference, June 1991", volume = "260", publisher = pub-LONGMAN, address = pub-LONGMAN:adr, pages = "292", year = "1992", ISBN = "0-582-08908-5", ISBN-13 = "978-0-582-08908-2", LCCN = "QA297.D85 1991", bibdate = "Mon Jan 15 11:24:40 1996", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", series = "Pitman Res. Notes Math. Ser.", acknowledgement = ack-nhfb, } @Proceedings{IEEE:1992:SHP, editor = "{IEEE}", key = "SHPCC-92", booktitle = "Scalable High Performance Computing Conference, SHPCC-92, April 26--29, 1992, Williamsburg, Virginia", title = "Scalable High Performance Computing Conference, {SHPCC}-92, April 26--29, 1992, Williamsburg, Virginia", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "xiii + 448", year = "1992", ISBN = "0-8186-2775-1", ISBN-13 = "978-0-8186-2775-0", LCCN = "QA76.76.A65 S33 1992", bibdate = "Fri Dec 30 11:18:38 1994", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", acknowledgement = ack-nhfb, } @Proceedings{Siegel:1992:FSF, editor = "H. J. Siegel", booktitle = "The Fourth Symposium on the Frontiers of Massively Parallel Computation: Frontiers '92 / October 19--21, 1992, McLean, Virginia", title = "The Fourth Symposium on the Frontiers of Massively Parallel Computation: Frontiers '92 / October 19--21, 1992, McLean, Virginia", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "xviii + 592", year = "1992", ISBN = "0-8186-2772-7 (hardback), 0-8186-2771-9 (microfiche)", ISBN-13 = "978-0-8186-2772-9 (hardback), 978-0-8186-2771-2 (microfiche)", LCCN = "QA76.58 .S95 1992", bibdate = "Mon Jan 15 11:06:11 1996", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", acknowledgement = ack-nhfb, } @Proceedings{Swartzlander:1993:PSC, editor = "Earl {Swartzlander, Jr.} and Mary Jane Irwin and Graham Jullien", booktitle = "Proceedings: 11th Symposium on Computer Arithmetic, June 29--July 2, 1993, Windsor, Ontario", title = "Proceedings: 11th Symposium on Computer Arithmetic, June 29--July 2, 1993, Windsor, Ontario", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "xii + 284", year = "1993", ISBN = "0-7803-1401-8 (softbound), 0-8186-3862-1 (casebound), 0-8186-3861-3 (microfiche)", ISBN-13 = "978-0-7803-1401-6 (softbound), 978-0-8186-3862-6 (casebound), 978-0-8186-3861-9 (microfiche)", ISSN = "0018-9340 (print), 1557-9956 (electronic)", ISSN-L = "0018-9340", LCCN = "QA 76.9 C62 S95 1993", bibdate = "Thu Sep 01 22:58:49 1994", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "IEEE Transactions on Computers {\bf 43(8)}, 1994", acknowledgement = ack-nhfb, keywords = "ARITH-11", } @Proceedings{Sincovec:1993:SCP, editor = "Richard F. Sincovec", booktitle = "SIAM Conference on Parallel Processing for Scientific Computing (6th: 1993: Norfolk, VA, USA)", title = "{SIAM} Conference on Parallel Processing for Scientific Computing (6th: 1993: Norfolk, {VA}, {USA})", publisher = pub-SIAM, address = pub-SIAM:adr, pages = "xix + 1041 + iv", year = "1993", ISBN = "0-89871-315-3", ISBN-13 = "978-0-89871-315-2", LCCN = "QA 76.58 S55 1993", bibdate = "Wed Aug 14 10:36:11 1996", bibsource = "http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "Two volumes.", acknowledgement = ack-nhfb, sponsor = "Society for Industrial and Applied Mathematics.", } @Proceedings{Anonymous:1994:OON, editor = "Anonymous", booktitle = "{Object oriented numerics: Annual conference: 2nd --- April 1994, Sunriver, OR}", title = "{Object oriented numerics: Annual conference: 2nd --- April 1994, Sunriver, OR}", publisher = "RWS", address = "Corvallis, OR", pages = "????", year = "1994", bibdate = "Thu Sep 16 09:48:36 MDT 1999", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", acknowledgement = ack-nhfb, } @Proceedings{Dongarra:1994:PSW, editor = "J. J. Dongarra and B. Tourancheau", booktitle = "{Proceedings of the Second Workshop on Environments and Tools for Parallel Scientific Computing, Townsend, TN, USA, May 25--27, 1994}", title = "{Proceedings of the Second Workshop on Environments and Tools for Parallel Scientific Computing, Townsend, TN, USA, May 25--27, 1994}", publisher = pub-SIAM, address = pub-SIAM:adr, pages = "x + 292", year = "1994", ISBN = "0-89871-343-9", ISBN-13 = "978-0-89871-343-5", LCCN = "QA76.58.I568 1994", bibdate = "Sat May 11 12:16:44 MDT 1996", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", series = "Proceedings of the Workshop on Environments and Tools for Parallel Scientific Computing", acknowledgement = ack-nhfb, } @Proceedings{IEEE:1994:PSP, editor = "{IEEE}", booktitle = "Proceedings of the Scalable Parallel Libraries Conference, October 6--8, 1993, Mississippi State, Mississippi", title = "Proceedings of the Scalable Parallel Libraries Conference, October 6--8, 1993, Mississippi State, Mississippi", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "vii + 261", year = "1994", ISBN = "0-8186-4980-1 (paper), 0-8186-4981-X (microfiche)", ISBN-13 = "978-0-8186-4980-6 (paper), 978-0-8186-4981-3 (microfiche)", LCCN = "QA76.58 .S34 1993", bibdate = "Sat Mar 22 18:40:38 1997", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", acknowledgement = ack-nhfb, } @Proceedings{IEEE:1995:DPT, editor = "{IEEE}", booktitle = "Digest of papers / the Twenty-fifth International Symposium on Fault-Tolerant Computing, June 27--30, 1995, Pasadena, California", title = "Digest of papers / the Twenty-fifth International Symposium on Fault-Tolerant Computing, June 27--30, 1995, Pasadena, California", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "xxiii + 547", year = "1995", CODEN = "DPFTDL", ISBN = "0-8186-7079-7, 0-8186-7145-9", ISBN-13 = "978-0-8186-7079-4, 978-0-8186-7145-6", ISSN = "0731-3071", LCCN = "QA 76.9 F38 I57 1995", bibdate = "Fri Mar 1 10:04:10 MST 1996", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "IEEE catalog number 95CH35823.", series = "FTCS 25th", acknowledgement = ack-nhfb, sponsor = "IEEE. Computer Society. Technical Committee on Fault- Tolerant Computing.", } @Proceedings{ACM:1996:SCP, editor = "{ACM}", booktitle = "{Supercomputing '96 Conference Proceedings: November 17--22, Pittsburgh, PA}", title = "{Supercomputing '96 Conference Proceedings: November 17--22, Pittsburgh, PA}", publisher = pub-ACM # " and " # pub-IEEE, address = pub-ACM:adr # " and " # pub-IEEE:adr, pages = "????", year = "1996", ISBN = "0-89791-854-1", ISBN-13 = "978-0-89791-854-1", LCCN = "A76.88 .S8573 1996", bibdate = "Mon Mar 23 12:30:13 1998", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", note = "ACM Order Number: 415962, IEEE Computer Society Press Order Number: RS00126.", URL = "http://www.supercomp.org/sc96/proceedings/", acknowledgement = ack-nhfb, } @Proceedings{Dongarra:1996:APC, editor = "J. J. Dongarra and Kaj Madsen and Jerzy Wa{\'s}niewski", booktitle = "{Applied parallel computing: computations in physics, chemistry, and engineering science: second international workshop, PARA '95, Lyngby, Denmark, August 21--24, 1995: proceedings}", title = "{Applied parallel computing: computations in physics, chemistry, and engineering science: second international workshop, PARA '95, Lyngby, Denmark, August 21--24, 1995: proceedings}", volume = "1041", publisher = pub-SV, address = pub-SV:adr, pages = "562", year = "1996", CODEN = "LNCSD9", ISBN = "3-540-60902-4", ISBN-13 = "978-3-540-60902-5", ISSN = "0302-9743 (print), 1611-3349 (electronic)", LCCN = "QA76.58 .P35 1995", MRclass = "65-06", MRnumber = "1 320 056", bibdate = "Thu Dec 19 14:25:58 1996", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", series = ser-LNCS, acknowledgement = ack-nhfb, keywords = "Chemistry -- Data processing -- Congresses; Engineering -- Data processing -- Congresses.; Parallel processing (Electronic computers) -- Congresses; Physics -- Data processing -- Congresses", } @Proceedings{Wasniewski:1996:APC, editor = "Jerzy Wa{\'s}niewski and J. Dongarra and K. Madsen and D. Olesen", booktitle = "Applied parallel computing: industrial-strength computation and optimization: Third International Workshop, {PARA} 96, Lyngby, Denmark, August 18--21, 1996: proceedings", title = "Applied parallel computing: industrial-strength computation and optimization: Third International Workshop, {PARA} 96, Lyngby, Denmark, August 18--21, 1996: proceedings", volume = "1184", publisher = pub-SV, address = pub-SV:adr, pages = "xiii + 722", year = "1996", ISBN = "3-540-62095-8 (softcover)", ISBN-13 = "978-3-540-62095-2 (softcover)", ISSN = "0302-9743 (print), 1611-3349 (electronic)", LCCN = "QA76.58 .P35 1996", bibdate = "Sat Dec 21 16:06:37 MST 1996", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", series = ser-LNCS, acknowledgement = ack-nhfb, keywords = "Parallel processing (Electronic computers) -- Congresses.", } @Proceedings{ACM:1998:SHP, editor = "{ACM}", booktitle = "{SC'98: High Performance Networking and Computing: Proceedings of the 1998 ACM\slash IEEE SC98 Conference: Orange County Convention Center, Orlando, Florida, USA, November 7--13, 1998}", title = "{SC'98: High Performance Networking and Computing: Proceedings of the 1998 ACM\slash IEEE SC98 Conference: Orange County Convention Center, Orlando, Florida, USA, November 7--13, 1998}", publisher = pub-ACM # " and " # pub-IEEE, address = pub-ACM:adr # " and " # pub-IEEE:adr, pages = "????", year = "1998", ISBN = "????", ISBN-13 = "????", LCCN = "????", bibdate = "Wed Oct 07 08:51:34 1998", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", URL = "http://www.supercomp.org/sc98/papers/", acknowledgement = ack-nhfb, } @Proceedings{DHollander:1998:PCF, editor = "E. D'Hollander and others", booktitle = "{Parallel computing: fundamentals, applications, and new directions: Papers from ParCo97, held in Bonn, Germany, Sept. 19--22, 1997}", title = "{Parallel computing: fundamentals, applications, and new directions: Papers from ParCo97, held in Bonn, Germany, Sept. 19--22, 1997}", volume = "12", publisher = pub-ELSEVIER, address = pub-ELSEVIER:adr, pages = "xx + 748", year = "1998", ISBN = "0-444-82882-6", ISBN-13 = "978-0-444-82882-8", LCCN = "QA76.58.P3795 1997", bibdate = "Thu Sep 16 09:48:36 MDT 1999", bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; http://www.math.utah.edu/pub/tex/bib/lawn.bib", series = "Advances in Parallel Computing", acknowledgement = ack-nhfb, }