Entry Lo:1995:IBS from sigplan1990.bib

Last update: Thu Apr 12 03:37:15 MDT 2012                Valid HTML 3.2!

Index sections

Top | Symbols | Numbers | Math | A | B | C | D | E | F | G | H | I | J | K | L | M | N | O | P | Q | R | S | T | U | V | W | X | Y | Z

BibTeX entry

@Article{Lo:1995:IBS,
  author =       "Jack L. Lo and Susan J. Eggers",
  title =        "Improving balanced scheduling with compiler
                 optimizations that increase instruction-level
                 parallelism",
  journal =      j-SIGPLAN,
  volume =       "30",
  number =       "6",
  pages =        "151--162",
  month =        jun,
  year =         "1995",
  CODEN =        "SINODQ",
  ISSN =         "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)",
  ISSN-L =       "0362-1340",
  bibdate =      "Sun Dec 14 09:17:06 MST 2003",
  bibsource =    "http://portal.acm.org/;
                 http://www.acm.org/pubs/contents/proceedings/pldi/207110/index.html",
  URL =          "http://www.acm.org:80/pubs/citations/proceedings/pldi/207110/p151-lo/",
  abstract =     "Traditional list schedulers order instructions based
                 on an optimistic estimate of the load latency imposed
                 by the hardware and therefore cannot respond to
                 variations in memory latency caused by cache hits and
                 misses on non-blocking architectures. In contrast,
                 balanced scheduling schedules instructions based on an
                 estimate of the amount of instruction-level parallelism
                 in the program. By scheduling independent instructions
                 behind loads based on what the program can provide,
                 rather than what the implementation stipulates in the
                 best case (i.e., a cache hit), balanced scheduling can
                 hide variations in memory latencies more
                 effectively.\par Since its success depends on the
                 amount of instruction-level parallelism in the code,
                 balanced scheduling should perform even better when
                 more parallelism is available. In this study, we
                 combine balanced scheduling with three compiler
                 optimizations that increase instruction-level
                 parallelism: loop unrolling, trace scheduling and cache
                 locality analysis. Using code generated for the DEC
                 Alpha by the Multiflow compiler, we simulated a
                 non-blocking processor architecture that closely models
                 the Alpha 21164. Our results show that balanced
                 scheduling benefits from all three optimizations,
                 producing average speedups that range from 1.15 to
                 1.40, across the optimizations. More importantly,
                 because of its ability to tolerate variations in load
                 interlocks, it improves its advantage over traditional
                 scheduling. Without the optimizations, balanced
                 scheduled code is, on average, 1.05 times faster than
                 that generated by a traditional scheduler; with them,
                 its lead increases to 1.18.",
  acknowledgement = ack-nhfb,
  affiliation =  "Dept. of Comput. Sci. and Eng., Washington Univ.,
                 Seattle, WA, USA",
  annote =       "Published as part of the Proceedings of PLDI'95.",
  classification = "C6120 (File organisation); C6150C (Compilers,
                 interpreters and other processors); C6150N (Distributed
                 systems software)",
  keywords =     "algorithms; Alpha 21164; Average speedups; Balanced
                 scheduled code; Cache locality analysis; Compiler
                 optimizations; DEC Alpha; Improved balanced scheduling;
                 Independent instruction scheduling; Instruction-level
                 parallelism; languages; List schedulers; Load
                 interlocks; Loads; Loop unrolling; measurement; Memory
                 latency variation hiding; Multiflow compiler;
                 Nonblocking processor architecture; performance;
                 Program; Trace scheduling",
  subject =      "{\bf D.3.4} Software, PROGRAMMING LANGUAGES,
                 Processors, Compilers. {\bf D.3.4} Software,
                 PROGRAMMING LANGUAGES, Processors, Optimization. {\bf
                 D.1.3} Software, PROGRAMMING TECHNIQUES, Concurrent
                 Programming, Parallel programming. {\bf D.2.8}
                 Software, SOFTWARE ENGINEERING, Metrics, Performance
                 measures.",
  thesaurus =    "Cache storage; Optimising compilers; Processor
                 scheduling; Resource allocation",
}

Related entries