%%% -*-BibTeX-*-
%%% ====================================================================
%%% BibTeX-file{
%%%     author          = "Nelson H. F. Beebe",
%%%     version         = "1.17",
%%%     date            = "11 August 2009",
%%%     time            = "18:30:01 MDT",
%%%     filename        = "tcbb.bib",
%%%     address         = "University of Utah
%%%                        Department of Mathematics, 110 LCB
%%%                        155 S 1400 E RM 233
%%%                        Salt Lake City, UT 84112-0090
%%%                        USA",
%%%     telephone       = "+1 801 581 5254",
%%%     FAX             = "+1 801 581 4148",
%%%     URL             = "http://www.math.utah.edu/~beebe",
%%%     checksum        = "30271 8727 45125 408314",
%%%     email           = "beebe at math.utah.edu, beebe at acm.org,
%%%                        beebe at computer.org (Internet)",
%%%     codetable       = "ISO/ASCII",
%%%     keywords        = "BibTeX; bibliography; IEEE/ACM Transactions
%%%                        on Computational Biology and
%%%                        Bioinformatics; TCBB",
%%%     license         = "public domain",
%%%     supported       = "yes",
%%%     docstring       = "This is a COMPLETE BibTeX bibliography for
%%%                        IEEE/ACM Transactions on Computational
%%%                        Biology and Bioinformatics (CODEN ITCBCY,
%%%                        ISSN 1545-5963), covering all journal issues
%%%                        from 2004 to date.
%%%
%%%                        At version 1.17, the COMPLETE journal
%%%                        coverage looked like this:
%%%
%%%                             2004 (  23)    2006 (  41)    2008 (  59)
%%%                             2005 (  37)    2007 (  69)    2009 (  52)
%%%
%%%                             Article:        281
%%%
%%%                             Total entries:  281
%%%
%%%                        The journal Web pages can be found at:
%%%
%%%                            http://www.acm.org/pubs/tcbb/
%%%                            http://portal.acm.org/browse_dl.cfm?idx=J954
%%%
%%%                        Qualified subscribers can retrieve the full
%%%                        text of recent articles in PDF form.
%%%
%%%                        The initial draft was extracted from the ACM
%%%                        Web pages.
%%%
%%%                        ACM copyrights explicitly permit abstracting
%%%                        with credit, so article abstracts, keywords,
%%%                        and subject classifications have been
%%%                        included in this bibliography wherever
%%%                        available.  Article reviews have been
%%%                        omitted, until their copyright status has
%%%                        been clarified.
%%%
%%%                        bibsource keys in the bibliography entries
%%%                        below indicate the entry originally came
%%%                        from the computer science bibliography
%%%                        archive, even though it has likely since
%%%                        been corrected and updated.
%%%
%%%                        URL keys in the bibliography point to
%%%                        World Wide Web locations of additional
%%%                        information about the entry.
%%%
%%%                        BibTeX citation tags are uniformly chosen
%%%                        as name:year:abbrev, where name is the
%%%                        family name of the first author or editor,
%%%                        year is a 4-digit number, and abbrev is a
%%%                        3-letter condensation of important title
%%%                        words. Citation tags were automatically
%%%                        generated by software developed for the
%%%                        BibNet Project.
%%%
%%%                        In this bibliography, entries are sorted in
%%%                        publication order, using ``bibsort -byvolume.''
%%%
%%%                        The checksum field above contains a CRC-16
%%%                        checksum as the first value, followed by the
%%%                        equivalent of the standard UNIX wc (word
%%%                        count) utility output of lines, words, and
%%%                        characters.  This is produced by Robert
%%%                        Solovay's checksum utility."
%%%     }
%%% ====================================================================

@Preamble{"\input bibnames.sty"}

%%% ====================================================================
%%% Acknowledgement abbreviations:

@String{ack-nhfb = "Nelson H. F. Beebe,
                    University of Utah,
                    Department of Mathematics, 110 LCB,
                    155 S 1400 E RM 233,
                    Salt Lake City, UT 84112-0090, USA,
                    Tel: +1 801 581 5254,
                    FAX: +1 801 581 4148,
                    e-mail: \path|beebe@math.utah.edu|,
                            \path|beebe@acm.org|,
                            \path|beebe@computer.org| (Internet),
                    URL: \path|http://www.math.utah.edu/~beebe/|"}

%%% ====================================================================
%%% Journal abbreviations:

@String{j-TCBB                  = "IEEE/ACM Transactions on Computational
                                  Biology and Bioinformatics"}

%%% ====================================================================
%%% Bibliography entries:

@Article{Williams:2004:WM,
  author =       "Michael R. Williams",
  title =        "Welcome Message",
  journal =      j-TCBB,
  volume =       "1",
  number =       "1",
  pages =        "1--1",
  month =        jan,
  year =         "2004",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Nov 22 06:42:56 MST 2004",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Gusfield:2004:IIA,
  author =       "Dan Gusfield",
  title =        "Introduction to the {IEEE\slash ACM Transactions on
                 Computational Biology and Bioinformatics}",
  journal =      j-TCBB,
  volume =       "1",
  number =       "1",
  pages =        "2--3",
  month =        jan,
  year =         "2004",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Nov 22 06:42:56 MST 2004",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Williams:2004:INA,
  author =       "Michael R. Williams",
  title =        "Introduction of New {Associate Editors}",
  journal =      j-TCBB,
  volume =       "1",
  number =       "1",
  pages =        "4--12",
  month =        jan,
  year =         "2004",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Nov 22 06:42:56 MST 2004",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Moret:2004:PNM,
  author =       "Bernard M. E. Moret and Luay Nakhleh and Tandy Warnow
                 and C. Randal Linder and Anna Tholse and Anneke
                 Padolina and Jerry Sun and Ruth Timme",
  title =        "Phylogenetic Networks: Modeling, Reconstructibility,
                 and Accuracy",
  journal =      j-TCBB,
  volume =       "1",
  number =       "1",
  pages =        "13--23",
  month =        jan,
  year =         "2004",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Nov 22 06:42:56 MST 2004",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Madeira:2004:BAB,
  author =       "Sara C. Madeira and Arlindo L. Oliveira",
  title =        "Biclustering Algorithms for Biological Data Analysis:
                 {A} Survey",
  journal =      j-TCBB,
  volume =       "1",
  number =       "1",
  pages =        "24--45",
  month =        jan,
  year =         "2004",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Nov 22 06:42:56 MST 2004",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Preparata:2004:SHR,
  author =       "Franco P. Preparata",
  title =        "Sequencing-by-Hybridization Revisited: The
                 Analog-Spectrum Proposal",
  journal =      j-TCBB,
  volume =       "1",
  number =       "1",
  pages =        "46--52",
  month =        jan,
  year =         "2004",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Nov 22 06:42:56 MST 2004",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Hochsmann:2004:PMR,
  author =       "Matthias H{\"o}chsmann and Bj{\"o}rn Voss and Robert
                 Giegerich",
  title =        "Pure Multiple {RNA} Secondary Structure Alignments:
                 {A} Progressive Profile Approach",
  journal =      j-TCBB,
  volume =       "1",
  number =       "1",
  pages =        "53--62",
  month =        jan,
  year =         "2004",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Nov 22 06:42:56 MST 2004",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Anonymous:2004:INA,
  author =       "Anonymous",
  title =        "Introduction of New {Associate Editor}",
  journal =      j-TCBB,
  volume =       "1",
  number =       "2",
  pages =        "65--65",
  month =        apr,
  year =         "2004",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Nov 22 06:42:56 MST 2004",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Witwer:2004:PCR,
  author =       "Christina Witwer and Ivo L. Hofacker and Peter F.
                 Stadler",
  title =        "Prediction of Consensus {RNA} Secondary Structures
                 Including Pseudoknots",
  journal =      j-TCBB,
  volume =       "1",
  number =       "2",
  pages =        "66--77",
  month =        apr,
  year =         "2004",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Nov 22 06:42:56 MST 2004",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Bafna:2004:NRE,
  author =       "Vineet Bafna and Vikas Bansal",
  title =        "The Number of Recombination Events in a Sample
                 History: Conflict Graph and Lower Bounds",
  journal =      j-TCBB,
  volume =       "1",
  number =       "2",
  pages =        "78--90",
  month =        apr,
  year =         "2004",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Nov 22 06:42:56 MST 2004",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Raphael:2004:UPM,
  author =       "Benjamin Raphael and Lung-Tien Liu and George
                 Varghese",
  title =        "A Uniform Projection Method for Motif Discovery in
                 {DNA} Sequences",
  journal =      j-TCBB,
  volume =       "1",
  number =       "2",
  pages =        "91--94",
  month =        apr,
  year =         "2004",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Nov 22 06:42:56 MST 2004",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Gusfield:2004:INA,
  author =       "Dan Gusfield",
  title =        "Introduction of New {Associate Editors}",
  journal =      j-TCBB,
  volume =       "1",
  number =       "3",
  pages =        "97--97",
  month =        jul,
  year =         "2004",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Jan 24 14:15:55 MST 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Scheid:2004:SDS,
  author =       "Stefanie Scheid and Rainer Spang",
  title =        "A Stochastic Downhill Search Algorithm for Estimating
                 the Local False Discovery Rate",
  journal =      j-TCBB,
  volume =       "1",
  number =       "3",
  pages =        "98--108",
  month =        jul,
  year =         "2004",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Jan 24 14:15:55 MST 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Dress:2004:CSG,
  author =       "Andreas W. M. Dress and Daniel H. Huson",
  title =        "Constructing Splits Graphs",
  journal =      j-TCBB,
  volume =       "1",
  number =       "3",
  pages =        "109--115",
  month =        jul,
  year =         "2004",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Jan 24 14:15:55 MST 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Cameron:2004:IGA,
  author =       "Michael Cameron and Hugh E. Williams and Adam
                 Cannane",
  title =        "Improved Gapped Alignment in {BLAST}",
  journal =      j-TCBB,
  volume =       "1",
  number =       "3",
  pages =        "116--129",
  month =        jul,
  year =         "2004",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Jan 24 14:15:55 MST 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Evans:2004:UDT,
  author =       "Steven N. Evans and Tandy Warnow",
  title =        "Unidentifiable Divergence Times in Rates-across-Sites
                 Models",
  journal =      j-TCBB,
  volume =       "1",
  number =       "3",
  pages =        "130--134",
  month =        jul,
  year =         "2004",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Jan 24 14:15:55 MST 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Kim:2004:GEW,
  author =       "Junhyong Kim and Inge Jonassen",
  title =        "Guest Editorial: {WABI} Special Section Part 1",
  journal =      j-TCBB,
  volume =       "1",
  number =       "4",
  pages =        "137--138",
  month =        oct,
  year =         "2004",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Jan 24 14:15:55 MST 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Csuros:2004:MSS,
  author =       "Miklos Csuros",
  title =        "Maximum-Scoring Segment Sets",
  journal =      j-TCBB,
  volume =       "1",
  number =       "4",
  pages =        "139--150",
  month =        oct,
  year =         "2004",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Jan 24 14:15:55 MST 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Huson:2004:PSN,
  author =       "Daniel H. Huson and Tobias Dezulian and Tobias Klopper
                 and Mike A. Steel",
  title =        "Phylogenetic Super-Networks from Partial Trees",
  journal =      j-TCBB,
  volume =       "1",
  number =       "4",
  pages =        "151--158",
  month =        oct,
  year =         "2004",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Jan 24 14:15:55 MST 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Bannai:2004:ADO,
  author =       "Hideo Bannai and Heikki Hyyro and Ayumi Shinohara and
                 Masayuki Takeda and Kenta Nakai and Satoru Miyano",
  title =        "An {$O(N^2)$} Algorithm for Discovering Optimal
                 {Boolean} Pattern Pairs",
  journal =      j-TCBB,
  volume =       "1",
  number =       "4",
  pages =        "159--170",
  month =        oct,
  year =         "2004",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Jan 24 14:15:55 MST 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Gramm:2004:PTA,
  author =       "Jens Gramm",
  title =        "A Polynomial-Time Algorithm for the Matching of
                 Crossing Contact-Map Patterns",
  journal =      j-TCBB,
  volume =       "1",
  number =       "4",
  pages =        "171--180",
  month =        oct,
  year =         "2004",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Jan 24 14:15:55 MST 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Ye:2004:UUD,
  author =       "Jieping Ye and Tao Li and Tao Xiong and Ravi
                 Janardan",
  title =        "Using Uncorrelated Discriminant Analysis for Tissue
                 Classification with Gene Expression Data",
  journal =      j-TCBB,
  volume =       "1",
  number =       "4",
  pages =        "181--190",
  month =        oct,
  year =         "2004",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Jan 24 14:15:55 MST 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Anonymous:2004:AI,
  author =       "Anonymous",
  title =        "Annual Index",
  journal =      j-TCBB,
  volume =       "1",
  number =       "4",
  pages =        "191--192",
  month =        oct,
  year =         "2004",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Jan 24 14:15:55 MST 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Kim:2005:GEW,
  author =       "Junhyong Kim and Inge Jonassen",
  title =        "Guest Editorial: {WABI} Special Section Part {II}",
  journal =      j-TCBB,
  volume =       "2",
  number =       "1",
  pages =        "1--2",
  month =        jan,
  year =         "2005",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Tue Apr 12 07:11:54 MDT 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Allali:2005:NDH,
  author =       "Julien Allali and Marie-France Sagot",
  title =        "A New Distance for High Level {RNA} Secondary
                 Structure Comparison",
  journal =      j-TCBB,
  volume =       "2",
  number =       "1",
  pages =        "3--14",
  month =        jan,
  year =         "2005",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Tue Apr 12 07:11:54 MDT 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Bertrand:2005:TRL,
  author =       "Denis Bertrand and Olivier Gascuel",
  title =        "Topological Rearrangements and Local Search Method for
                 Tandem Duplication Trees",
  journal =      j-TCBB,
  volume =       "2",
  number =       "1",
  pages =        "15--28",
  month =        jan,
  year =         "2005",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Tue Apr 12 07:11:54 MDT 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Brown:2005:OMS,
  author =       "Daniel G. Brown",
  title =        "Optimizing Multiple Seeds for Protein Homology
                 Search",
  journal =      j-TCBB,
  volume =       "2",
  number =       "1",
  pages =        "29--38",
  month =        jan,
  year =         "2005",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Tue Apr 12 07:11:54 MDT 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Gusfield:2005:EST,
  author =       "Dan Gusfield",
  title =        "Editorial-State of the Transaction",
  journal =      j-TCBB,
  volume =       "2",
  number =       "1",
  pages =        "39--39",
  month =        jan,
  year =         "2005",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Tue Apr 12 07:11:54 MDT 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Pisanti:2005:BMG,
  author =       "Nadia Pisanti and Maxime Crochemore and Roberto Grossi
                 and Marie-France Sagot",
  title =        "Bases of Motifs for Generating Repeated Patterns with
                 Wild Cards",
  journal =      j-TCBB,
  volume =       "2",
  number =       "1",
  pages =        "40--50",
  month =        jan,
  year =         "2005",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Tue Apr 12 07:11:54 MDT 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Kucherov:2005:MLF,
  author =       "Gregory Kucherov and Laurent Noe and Mikhail
                 Roytberg",
  title =        "Multiseed Lossless Filtration",
  journal =      j-TCBB,
  volume =       "2",
  number =       "1",
  pages =        "51--61",
  month =        jan,
  year =         "2005",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Tue Apr 12 07:11:54 MDT 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Liu:2005:TMB,
  author =       "Ying Liu and Shamkant B. Navathe and Jorge Civera and
                 Venu Dasigi and Ashwin Ram and Brian J. Ciliax and Ray
                 Dingledine",
  title =        "Text Mining Biomedical Literature for Discovering
                 Gene-to-Gene Relationships: {A} Comparative Study of
                 Algorithms",
  journal =      j-TCBB,
  volume =       "2",
  number =       "1",
  pages =        "62--76",
  month =        jan,
  year =         "2005",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Tue Apr 12 07:11:54 MDT 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Staff:2005:RL,
  author =       "{IEEE/ACM Transactions on Computational Biology and
                 Bioinformatics staff}",
  title =        "2004 Reviewers List",
  journal =      j-TCBB,
  volume =       "2",
  number =       "1",
  pages =        "77--77",
  month =        jan,
  year =         "2005",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Tue Apr 12 07:11:54 MDT 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Ling:2005:GEIa,
  author =       "Charles X. Ling and William Stafford Noble and Qiang
                 Yang",
  title =        "{Guest Editors}' Introduction to the {Special Issue:
                 Machine Learning for Bioinformatics---Part 1}",
  journal =      j-TCBB,
  volume =       "2",
  number =       "2",
  pages =        "81--82",
  month =        apr,
  year =         "2005",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Wed Jun 22 17:33:35 MDT 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Au:2005:ACG,
  author =       "Wai-Ho Au and Keith C. C. Chan and Andrew K. C. Wong
                 and Yang Wang",
  title =        "Attribute Clustering for Grouping, Selection, and
                 Classification of Gene Expression Data",
  journal =      j-TCBB,
  volume =       "2",
  number =       "2",
  pages =        "83--101",
  month =        apr,
  year =         "2005",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Wed Jun 22 17:33:35 MDT 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Biyani:2005:JCP,
  author =       "Pravesh Biyani and Xiaolin Wu and Abhijit Sinha",
  title =        "Joint Classification and Pairing of Human
                 Chromosomes",
  journal =      j-TCBB,
  volume =       "2",
  number =       "2",
  pages =        "102--109",
  month =        apr,
  year =         "2005",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Wed Jun 22 17:33:35 MDT 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Furlanello:2005:SLM,
  author =       "Cesare Furlanello and Maria Serafini and Stefano
                 Merler and Giuseppe Jurman",
  title =        "Semisupervised Learning for Molecular Profiling",
  journal =      j-TCBB,
  volume =       "2",
  number =       "2",
  pages =        "110--118",
  month =        apr,
  year =         "2005",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Wed Jun 22 17:33:35 MDT 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Mamitsuka:2005:ELK,
  author =       "Hiroshi Mamitsuka",
  title =        "Essential Latent Knowledge for Protein-Protein
                 Interactions: Analysis by an Unsupervised Learning
                 Approach",
  journal =      j-TCBB,
  volume =       "2",
  number =       "2",
  pages =        "119--130",
  month =        apr,
  year =         "2005",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Wed Jun 22 17:33:35 MDT 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Rajapakse:2005:MED,
  author =       "Jagath C. Rajapakse and Loi Sy Ho",
  title =        "{Markov} Encoding for Detecting Signals in Genomic
                 Sequences",
  journal =      j-TCBB,
  volume =       "2",
  number =       "2",
  pages =        "131--142",
  month =        apr,
  year =         "2005",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Wed Jun 22 17:33:35 MDT 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Rogers:2005:LPD,
  author =       "Simon Rogers and Mark Girolami and Colin Campbell and
                 Rainer Breitling",
  title =        "The Latent Process Decomposition of {cDNA} Microarray
                 Data Sets",
  journal =      j-TCBB,
  volume =       "2",
  number =       "2",
  pages =        "143--156",
  month =        apr,
  year =         "2005",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Wed Jun 22 17:33:35 MDT 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Xu:2005:FRP,
  author =       "Jinbo Xu",
  title =        "Fold Recognition by Predicted Alignment Accuracy",
  journal =      j-TCBB,
  volume =       "2",
  number =       "2",
  pages =        "157--165",
  month =        apr,
  year =         "2005",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Wed Jun 22 17:33:35 MDT 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Shen:2005:DRB,
  author =       "Li Shen and Eng Chong Tan",
  title =        "Dimension Reduction-Based Penalized Logistic
                 Regression for Cancer Classification Using Microarray
                 Data",
  journal =      j-TCBB,
  volume =       "2",
  number =       "2",
  pages =        "166--175",
  month =        apr,
  year =         "2005",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Wed Jun 22 17:33:35 MDT 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Ling:2005:GEIb,
  author =       "C. X. Ling and W. S. Noble and Q. Yang",
  title =        "{Guest Editor}'s Introduction to the {Special Issue:
                 Machine Learning for Bioinformatics---Part 2}",
  journal =      j-TCBB,
  volume =       "2",
  number =       "3",
  pages =        "177--178",
  month =        jul,
  year =         "2005",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Tue Sep 20 06:11:25 MDT 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Schliep:2005:AGE,
  author =       "Alexander Schliep and Ivan G. Costa and Christine
                 Steinhoff and Alexander Schonhuth",
  title =        "Analyzing Gene Expression Time-Courses",
  journal =      j-TCBB,
  volume =       "2",
  number =       "3",
  pages =        "179--193",
  month =        jul,
  year =         "2005",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Tue Sep 20 06:11:25 MDT 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Kundaje:2005:CST,
  author =       "Anshul Kundaje and Manuel Middendorf and Feng Gao and
                 Chris Wiggins and Christina Leslie",
  title =        "Combining Sequence and Time Series Expression Data to
                 Learn Transcriptional Modules",
  journal =      j-TCBB,
  volume =       "2",
  number =       "3",
  pages =        "194--202",
  month =        jul,
  year =         "2005",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Tue Sep 20 06:11:25 MDT 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Kaski:2005:ACE,
  author =       "Samuel Kaski and Janne Nikkila and Janne Sinkkonen and
                 Leo Lahti and Juha E. A. Knuuttila and Christophe
                 Roos",
  title =        "Associative Clustering for Exploring Dependencies
                 between Functional Genomics Data Sets",
  journal =      j-TCBB,
  volume =       "2",
  number =       "3",
  pages =        "203--216",
  month =        jul,
  year =         "2005",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Tue Sep 20 06:11:25 MDT 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Zhang:2005:PMF,
  author =       "Jingfen Zhang and Wen Gao and Jinjin Cai and Simin He
                 and Rong Zeng and Runsheng Chen",
  title =        "Predicting Molecular Formulas of Fragment Ions with
                 Isotope Patterns in Tandem Mass Spectra",
  journal =      j-TCBB,
  volume =       "2",
  number =       "3",
  pages =        "217--230",
  month =        jul,
  year =         "2005",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Tue Sep 20 06:11:25 MDT 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Keedwell:2005:DGN,
  author =       "Edward Keedwell and Ajit Narayanan",
  title =        "Discovering Gene Networks with a Neural-Genetic
                 Hybrid",
  journal =      j-TCBB,
  volume =       "2",
  number =       "3",
  pages =        "231--242",
  month =        jul,
  year =         "2005",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Tue Sep 20 06:11:25 MDT 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Hawkins:2005:ARN,
  author =       "John Hawkins and Mikael Boden",
  title =        "The Applicability of Recurrent Neural Networks for
                 Biological Sequence Analysis",
  journal =      j-TCBB,
  volume =       "2",
  number =       "3",
  pages =        "243--253",
  month =        jul,
  year =         "2005",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Tue Sep 20 06:11:25 MDT 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Gustafsson:2005:CAL,
  author =       "Mika Gustafsson and Michael Hornquist and Anna
                 Lombardi",
  title =        "Constructing and Analyzing a Large-Scale Gene-to-Gene
                 Regulatory Network-Lasso-Constrained Inference and
                 Biological Validation",
  journal =      j-TCBB,
  volume =       "2",
  number =       "3",
  pages =        "254--261",
  month =        jul,
  year =         "2005",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Tue Sep 20 06:11:25 MDT 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Demir:2005:LTP,
  author =       "Cigdem Demir and S. Humayun Gultekin and Bulent
                 Yener",
  title =        "Learning the Topological Properties of Brain Tumors",
  journal =      j-TCBB,
  volume =       "2",
  number =       "3",
  pages =        "262--270",
  month =        jul,
  year =         "2005",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Tue Sep 20 06:11:25 MDT 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Anonymous:2005:CPS,
  author =       "Anonymous",
  title =        "Call for Papers for {Special Issue on Computational
                 Intelligence Approaches in Computational Biology and
                 Bioinformatics}",
  journal =      j-TCBB,
  volume =       "2",
  number =       "3",
  pages =        "271--271",
  month =        jul,
  year =         "2005",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Tue Sep 20 06:11:25 MDT 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Cickovski:2005:FTD,
  author =       "Trevor M. Cickovski and Chengbang Huang and Rajiv
                 Chaturvedi and Tilmann Glimm and H. George E. Hentschel
                 and Mark S. Alber and James A. Glazier and Stuart A.
                 Newman and Jesus A. Izaguirre",
  title =        "A Framework for Three-Dimensional Simulation of
                 Morphogenesis",
  journal =      j-TCBB,
  volume =       "2",
  number =       "4",
  pages =        "273--288",
  month =        oct,
  year =         "2005",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Fri Nov 18 05:22:15 MST 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Boscolo:2005:GFN,
  author =       "Riccardo Boscolo and Chiara Sabatti and James C. Liao
                 and Vwani P. Roychowdhury",
  title =        "A Generalized Framework for Network Component
                 Analysis",
  journal =      j-TCBB,
  volume =       "2",
  number =       "4",
  pages =        "289--301",
  month =        oct,
  year =         "2005",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Fri Nov 18 05:22:15 MST 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Chen:2005:AOG,
  author =       "Xin Chen and Jie Zheng and Zheng Fu and Peng Nan and
                 Yang Zhong and Stefano Lonardi and Tao Jiang",
  title =        "Assignment of Orthologous Genes via Genome
                 Rearrangement",
  journal =      j-TCBB,
  volume =       "2",
  number =       "4",
  pages =        "302--315",
  month =        oct,
  year =         "2005",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Fri Nov 18 05:22:15 MST 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Turner:2005:BMS,
  author =       "Heather L. Turner and Trevor C. Bailey and Wojtek J.
                 Krzanowski and Cheryl A. Hemingway",
  title =        "Biclustering Models for Structured Microarray Data",
  journal =      j-TCBB,
  volume =       "2",
  number =       "4",
  pages =        "316--329",
  month =        oct,
  year =         "2005",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Fri Nov 18 05:22:15 MST 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Sevilla:2005:CBG,
  author =       "Jose L. Sevilla and Victor Segura and Adam Podhorski
                 and Elizabeth Guruceaga and Jose M. Mato and Luis A.
                 Martinez-Cruz and Fernando J. Corrales and Angel
                 Rubio",
  title =        "Correlation between Gene Expression and {GO} Semantic
                 Similarity",
  journal =      j-TCBB,
  volume =       "2",
  number =       "4",
  pages =        "330--338",
  month =        oct,
  year =         "2005",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Fri Nov 18 05:22:15 MST 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Yoon:2005:DCB,
  author =       "Sungroh Yoon and Christine Nardini and Luca Benini and
                 Giovanni De Micheli",
  title =        "Discovering Coherent Biclusters from Gene Expression
                 Data Using Zero-Suppressed Binary Decision Diagrams",
  journal =      j-TCBB,
  volume =       "2",
  number =       "4",
  pages =        "339--354",
  month =        oct,
  year =         "2005",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Fri Nov 18 05:22:15 MST 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Tseng:2005:EMG,
  author =       "Vincent S. Tseng and Ching-Pin Kao",
  title =        "Efficiently Mining Gene Expression Data via a Novel
                 Parameterless Clustering Method",
  journal =      j-TCBB,
  volume =       "2",
  number =       "4",
  pages =        "355--365",
  month =        oct,
  year =         "2005",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Fri Nov 18 05:22:15 MST 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Zhang:2005:SGN,
  author =       "Shaojie Zhang and Brian Haas and Eleazar Eskin and
                 Vineet Bafna",
  title =        "Searching Genomes for Noncoding {RNA} Using {FastR}",
  journal =      j-TCBB,
  volume =       "2",
  number =       "4",
  pages =        "366--379",
  month =        oct,
  year =         "2005",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Fri Nov 18 05:22:15 MST 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Anonymous:2005:AI,
  author =       "Anonymous",
  title =        "2005 Annual Index",
  journal =      j-TCBB,
  volume =       "2",
  number =       "4",
  pages =        "380--384",
  month =        oct,
  year =         "2005",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Fri Nov 18 05:22:15 MST 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Gusfield:2006:SJ,
  author =       "Dan Gusfield",
  title =        "State of the Journal",
  journal =      j-TCBB,
  volume =       "3",
  number =       "1",
  pages =        "1--1",
  month =        jan,
  year =         "2006",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2006.12",
  ISSN =         "1545-5963",
  bibdate =      "Thu Feb 16 11:06:15 MST 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Berger:2006:JAG,
  author =       "John A. Berger and Sampsa Hautaniemi and Sanjit K.
                 Mitra and Jaakko Astola",
  title =        "Jointly Analyzing Gene Expression and Copy Number Data
                 in Breast Cancer Using Data Reduction Models",
  journal =      j-TCBB,
  volume =       "3",
  number =       "1",
  pages =        "2--16",
  month =        jan,
  year =         "2006",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Feb 16 11:06:15 MST 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Sebastian:2006:STA,
  author =       "Rafael Sebastian and Maria-Elena Diaz and Guillermo
                 Ayala and Kresimir Letinic and Jose Moncho-Bogani and
                 Derek Toomre",
  title =        "Spatio-Temporal Analysis of Constitutive Exocytosis in
                 Epithelial Cells",
  journal =      j-TCBB,
  volume =       "3",
  number =       "1",
  pages =        "17--32",
  month =        jan,
  year =         "2006",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Feb 16 11:06:15 MST 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Hershkovitz:2006:SAR,
  author =       "Eli Hershkovitz and Guillermo Sapiro and Allen
                 Tannenbaum and Loren Dean Williams",
  title =        "Statistical Analysis of {RNA} Backbone",
  journal =      j-TCBB,
  volume =       "3",
  number =       "1",
  pages =        "33--46",
  month =        jan,
  year =         "2006",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Feb 16 11:06:15 MST 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Dawy:2006:GMM,
  author =       "Zaher Dawy and Bernhard Goebel and Joachim Hagenauer
                 and Christophe Andreoli and Thomas Meitinger and Jakob
                 C. Mueller",
  title =        "Gene Mapping and Marker Clustering Using {Shannon}'s
                 Mutual Information",
  journal =      j-TCBB,
  volume =       "3",
  number =       "1",
  pages =        "47--56",
  month =        jan,
  year =         "2006",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Feb 16 11:06:15 MST 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Goutsias:2006:HMM,
  author =       "John Goutsias",
  title =        "A Hidden {Markov} Model for Transcriptional Regulation
                 in Single Cells",
  journal =      j-TCBB,
  volume =       "3",
  number =       "1",
  pages =        "57--71",
  month =        jan,
  year =         "2006",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Feb 16 11:06:15 MST 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Rueda:2006:HCA,
  author =       "Luis Rueda and Vidya Vidyadharan",
  title =        "A Hill-Climbing Approach for Automatic Gridding of
                 {cDNA} Microarray Images",
  journal =      j-TCBB,
  volume =       "3",
  number =       "1",
  pages =        "72--83",
  month =        jan,
  year =         "2006",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Feb 16 11:06:15 MST 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Semple:2006:UNC,
  author =       "Charles Semple and Mike Steel",
  title =        "Unicyclic Networks: Compatibility and Enumeration",
  journal =      j-TCBB,
  volume =       "3",
  number =       "1",
  pages =        "84--91",
  month =        jan,
  year =         "2006",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Feb 16 11:06:15 MST 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Roch:2006:SPP,
  author =       "Sebastien Roch",
  title =        "A Short Proof that Phylogenetic Tree Reconstruction by
                 Maximum Likelihood Is Hard",
  journal =      j-TCBB,
  volume =       "3",
  number =       "1",
  pages =        "92--94",
  month =        jan,
  year =         "2006",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Feb 16 11:06:15 MST 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Anonymous:2006:RL,
  author =       "Anonymous",
  title =        "2005 Reviewers List",
  journal =      j-TCBB,
  volume =       "3",
  number =       "1",
  pages =        "95--96",
  month =        jan,
  year =         "2006",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Feb 16 11:06:15 MST 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Gusfield:2006:INA,
  author =       "Dan Gusfield",
  title =        "Introduction of New {Associate Editors}",
  journal =      j-TCBB,
  volume =       "3",
  number =       "2",
  pages =        "97--97",
  month =        apr,
  year =         "2006",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2006.25",
  ISSN =         "1545-5963",
  bibdate =      "Wed Jun 7 06:38:18 MDT 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Chu:2006:BSM,
  author =       "Wei Chu and Zoubin Ghahramani and Alexei
                 Podtelezhnikov and David L. Wild",
  title =        "{Bayesian} Segmental Models with Multiple Sequence
                 Alignment Profiles for Protein Secondary Structure and
                 Contact Map Prediction",
  journal =      j-TCBB,
  volume =       "3",
  number =       "2",
  pages =        "98--113",
  month =        apr,
  year =         "2006",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2006.17",
  ISSN =         "1545-5963",
  bibdate =      "Wed Jun 7 06:38:18 MDT 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Danziger:2006:FCM,
  author =       "Samuel A. Danziger and S. Joshua Swamidass and Jue
                 Zeng and Lawrence R. Dearth and Qiang Lu and Jonathan
                 H. Chen and Jianlin Cheng and Vinh P. Hoang and Hiroto
                 Saigo and Ray Luo and Pierre Baldi and Rainer K.
                 Brachmann and Richard H. Lathrop",
  title =        "Functional Census of Mutation Sequence Spaces: The
                 Example of p53 Cancer Rescue Mutants",
  journal =      j-TCBB,
  volume =       "3",
  number =       "2",
  pages =        "114--125",
  month =        apr,
  year =         "2006",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2006.22",
  ISSN =         "1545-5963",
  bibdate =      "Wed Jun 7 06:38:18 MDT 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Carvalho:2006:EAI,
  author =       "Alexandra M. Carvalho and Ana T. Freitas and Arlindo
                 L. Oliveira and Marie-France Sagot",
  title =        "An Efficient Algorithm for the Identification of
                 Structured Motifs in {DNA} Promoter Sequences",
  journal =      j-TCBB,
  volume =       "3",
  number =       "2",
  pages =        "126--140",
  month =        apr,
  year =         "2006",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2006.16",
  ISSN =         "1545-5963",
  bibdate =      "Wed Jun 7 06:38:18 MDT 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Brown:2006:IPA,
  author =       "Daniel G. Brown and Ian M. Harrower",
  title =        "Integer Programming Approaches to Haplotype Inference
                 by Pure Parsimony",
  journal =      j-TCBB,
  volume =       "3",
  number =       "2",
  pages =        "141--154",
  month =        apr,
  year =         "2006",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2006.24",
  ISSN =         "1545-5963",
  bibdate =      "Wed Jun 7 06:38:18 MDT 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Vass:2006:JMB,
  author =       "Marc T. Vass and Clifford A. Shaffer and Naren
                 Ramakrishnan and Layne T. Watson and John J. Tyson",
  title =        "The {JigCell} Model Builder: {A} Spreadsheet Interface
                 for Creating Biochemical Reaction Network Models",
  journal =      j-TCBB,
  volume =       "3",
  number =       "2",
  pages =        "155--164",
  month =        apr,
  year =         "2006",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2006.27",
  ISSN =         "1545-5963",
  bibdate =      "Wed Jun 7 06:38:18 MDT 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Chen:2006:MFS,
  author =       "Duhong Chen and Oliver Eulenstein and David
                 Fernandez-Baca and Michael Sanderson",
  title =        "Minimum-Flip Supertrees: Complexity and Algorithms",
  journal =      j-TCBB,
  volume =       "3",
  number =       "2",
  pages =        "165--173",
  month =        apr,
  year =         "2006",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2006.26",
  ISSN =         "1545-5963",
  bibdate =      "Wed Jun 7 06:38:18 MDT 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Sevon:2006:TTP,
  author =       "Petteri Sevon and Hannu Toivonen and Vesa Ollikainen",
  title =        "{TreeDT}: Tree Pattern Mining for Gene Mapping",
  journal =      j-TCBB,
  volume =       "3",
  number =       "2",
  pages =        "174--185",
  month =        apr,
  year =         "2006",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2006.28",
  ISSN =         "1545-5963",
  bibdate =      "Wed Jun 7 06:38:18 MDT 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Song:2006:CNS,
  author =       "Yun S. Song",
  title =        "A Concise Necessary and Sufficient Condition for the
                 Existence of a Galled-Tree",
  journal =      j-TCBB,
  volume =       "3",
  number =       "2",
  pages =        "186--191",
  month =        apr,
  year =         "2006",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2006.15",
  ISSN =         "1545-5963",
  bibdate =      "Wed Jun 7 06:38:18 MDT 2006",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Galled-trees are a special class of graphical
                 representation of evolutionary history that has proven
                 amenable to efficient, polynomial-time algorithms. The
                 goal of this paper is to construct a concise necessary
                 and sufficient condition for the existence of a
                 galled-tree for $M$, a set of binary sequences that
                 purportedly have evolved in the presence of
                 recombination. Both root-known and root-unknown cases
                 are considered here.",
  acknowledgement = ack-nhfb,
}

@Article{Daras:2006:TDS,
  author =       "Petros Daras and Dimitrios Zarpalas and Apostolos
                 Axenopoulos and Dimitrios Tzovaras and Michael
                 Gerassimos Strintzis",
  title =        "Three-Dimensional Shape-Structure Comparison Method
                 for Protein Classification",
  journal =      j-TCBB,
  volume =       "3",
  number =       "3",
  pages =        "193--207",
  month =        jul,
  year =         "2006",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Sep 11 07:36:29 MDT 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Yu:2006:MPA,
  author =       "Weichuan Yu and Xiaoye Li and Junfeng Liu and Baolin
                 Wu and Kenneth R. Williams and Hongyu Zhao",
  title =        "Multiple Peak Alignment in Sequential Data Analysis:
                 {A} Scale-Space-Based Approach",
  journal =      j-TCBB,
  volume =       "3",
  number =       "3",
  pages =        "208--219",
  month =        jul,
  year =         "2006",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Sep 11 07:36:29 MDT 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Abul:2006:PAE,
  author =       "Osman Abul and Reda Alhajj and Faruk Polat",
  title =        "A Powerful Approach for Effective Finding of
                 Significantly Differentially Expressed Genes",
  journal =      j-TCBB,
  volume =       "3",
  number =       "3",
  pages =        "220--231",
  month =        jul,
  year =         "2006",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Sep 11 07:36:29 MDT 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Nagarajan:2006:CSC,
  author =       "Radhakrishnan Nagarajan and Meenakshi Upreti",
  title =        "Correlation Statistics for {cDNA} Microarray Image
                 Analysis",
  journal =      j-TCBB,
  volume =       "3",
  number =       "3",
  pages =        "232--238",
  month =        jul,
  year =         "2006",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Sep 11 07:36:29 MDT 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Song:2006:CAP,
  author =       "Yun S. Song and Rune Lyngso and Jotun Hein",
  title =        "Counting All Possible Ancestral Configurations of
                 Sample Sequences in Population Genetics",
  journal =      j-TCBB,
  volume =       "3",
  number =       "3",
  pages =        "239--251",
  month =        jul,
  year =         "2006",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Sep 11 07:36:29 MDT 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Pirinen:2006:FCG,
  author =       "Matti Pirinen and Dario Gasbarra",
  title =        "Finding Consistent Gene Transmission Patterns on Large
                 and Complex Pedigrees",
  journal =      j-TCBB,
  volume =       "3",
  number =       "3",
  pages =        "252--262",
  month =        jul,
  year =         "2006",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Sep 11 07:36:29 MDT 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Popescu:2006:FMG,
  author =       "Mihail Popescu and James M. Keller and Joyce A.
                 Mitchell",
  title =        "Fuzzy Measures on the Gene Ontology for Gene Product
                 Similarity",
  journal =      j-TCBB,
  volume =       "3",
  number =       "3",
  pages =        "263--274",
  month =        jul,
  year =         "2006",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Sep 11 07:36:29 MDT 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Bernt:2006:GRB,
  author =       "Matthias Bernt and Daniel Merkle and Martin
                 Middendorf",
  title =        "Genome Rearrangement Based on Reversals that Preserve
                 Conserved Intervals",
  journal =      j-TCBB,
  volume =       "3",
  number =       "3",
  pages =        "275--288",
  month =        jul,
  year =         "2006",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Sep 11 07:36:29 MDT 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Berry:2006:IPC,
  author =       "Vincent Berry and Fran{\c{c}}ois Nicolas",
  title =        "Improved Parameterized Complexity of the Maximum
                 Agreement Subtree and Maximum Compatible Tree
                 Problems",
  journal =      j-TCBB,
  volume =       "3",
  number =       "3",
  pages =        "289--302",
  month =        jul,
  year =         "2006",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Sep 11 07:36:29 MDT 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Sharan:2006:ITP,
  author =       "Roded Sharan and Bjarni V. Halldorsson and Sorin
                 Istrail",
  title =        "Islands of Tractability for Parsimony Haplotyping",
  journal =      j-TCBB,
  volume =       "3",
  number =       "3",
  pages =        "303--311",
  month =        jul,
  year =         "2006",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Sep 11 07:36:29 MDT 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Zhang:2006:SGR,
  author =       "Chaolin Zhang and Xuesong Lu and Xuegong Zhang",
  title =        "Significance of Gene Ranking for Classification of
                 Microarray Samples",
  journal =      j-TCBB,
  volume =       "3",
  number =       "3",
  pages =        "312--320",
  month =        jul,
  year =         "2006",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Sep 11 07:36:29 MDT 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Casadio:2006:GEI,
  author =       "Rita Casadio",
  title =        "{Guest Editor}'s Introduction to the Special Issue on
                 Computational Biology and Bioinformatics -- Part 1",
  journal =      j-TCBB,
  volume =       "3",
  number =       "4",
  pages =        "321--322",
  month =        oct,
  year =         "2006",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Nov 30 19:05:58 MST 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Snir:2006:UMC,
  author =       "Sagi Snir and Satish Rao",
  title =        "Using Max Cut to Enhance Rooted Trees Consistency",
  journal =      j-TCBB,
  volume =       "3",
  number =       "4",
  pages =        "323--333",
  month =        oct,
  year =         "2006",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Nov 30 19:05:58 MST 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Ganapathy:2006:PIB,
  author =       "Ganeshkumar Ganapathy and Barbara Goodson and Robert
                 Jansen and Hai-son Le and Vijaya Ramachandran and Tandy
                 Warnow",
  title =        "Pattern Identification in Biogeography",
  journal =      j-TCBB,
  volume =       "3",
  number =       "4",
  pages =        "334--346",
  month =        oct,
  year =         "2006",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Nov 30 19:05:58 MST 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Wernicke:2006:EDN,
  author =       "Sebastian Wernicke",
  title =        "Efficient Detection of Network Motifs",
  journal =      j-TCBB,
  volume =       "3",
  number =       "4",
  pages =        "347--359",
  month =        oct,
  year =         "2006",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Nov 30 19:05:58 MST 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Lacroix:2006:MSG,
  author =       "Vincent Lacroix and Cristina G. Fernandes and
                 Marie-France Sagot",
  title =        "Motif Search in Graphs: Application to Metabolic
                 Networks",
  journal =      j-TCBB,
  volume =       "3",
  number =       "4",
  pages =        "360--368",
  month =        oct,
  year =         "2006",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Nov 30 19:05:58 MST 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Elias:2006:AAS,
  author =       "Isaac Elias and Tzvika Hartman",
  title =        "A $1.375$-Approximation Algorithm for Sorting by
                 Transpositions",
  journal =      j-TCBB,
  volume =       "3",
  number =       "4",
  pages =        "369--379",
  month =        oct,
  year =         "2006",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Nov 30 19:05:58 MST 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Labarre:2006:NBT,
  author =       "Anthony Labarre",
  title =        "New Bounds and Tractable Instances for the
                 Transposition Distance",
  journal =      j-TCBB,
  volume =       "3",
  number =       "4",
  pages =        "380--394",
  month =        oct,
  year =         "2006",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Nov 30 19:05:58 MST 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Sammeth:2006:CTR,
  author =       "Michael Sammeth and Jens Stoye",
  title =        "Comparing Tandem Repeats with Duplications and
                 Excisions of Variable Degree",
  journal =      j-TCBB,
  volume =       "3",
  number =       "4",
  pages =        "395--407",
  month =        oct,
  year =         "2006",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Nov 30 19:05:58 MST 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Bilu:2006:FAO,
  author =       "Yonatan Bilu and Pankaj K. Agarwal and Rachel
                 Kolodny",
  title =        "Faster Algorithms for Optimal Multiple Sequence
                 Alignment Based on Pairwise Comparisons",
  journal =      j-TCBB,
  volume =       "3",
  number =       "4",
  pages =        "408--422",
  month =        oct,
  year =         "2006",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Nov 30 19:05:58 MST 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Song:2006:EPA,
  author =       "Yinglei Song and Chunmei Liu and Xiuzhen Huang and
                 Russell L. Malmberg and Ying Xu and Liming Cai",
  title =        "Efficient Parameterized Algorithms for Biopolymer
                 Structure-Sequence Alignment",
  journal =      j-TCBB,
  volume =       "3",
  number =       "4",
  pages =        "423--432",
  month =        oct,
  year =         "2006",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Nov 30 19:05:58 MST 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Anonymous:2006:AI,
  author =       "Anonymous",
  title =        "Annual Index",
  journal =      j-TCBB,
  volume =       "3",
  number =       "4",
  pages =        "??--??",
  month =        oct,
  year =         "2006",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Nov 30 19:05:58 MST 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Gusfield:2007:SJ,
  author =       "Dan Gusfield",
  title =        "State of the {Journal}",
  journal =      j-TCBB,
  volume =       "4",
  number =       "1",
  pages =        "1--1",
  month =        jan,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:57:20 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Gusfield:2007:AEAa,
  author =       "Dan Gusfield",
  title =        "{Associate Editor} Appreciation and Welcome",
  journal =      j-TCBB,
  volume =       "4",
  number =       "1",
  pages =        "2--2",
  month =        jan,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:57:20 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Casadio:2007:GEI,
  author =       "Rita Casadio",
  title =        "{Guest Editor}'s Introduction to the {Special Section
                 on Computational Biology and Bioinformatics (WABI)} --
                 Part 2",
  journal =      j-TCBB,
  volume =       "4",
  number =       "1",
  pages =        "3--3",
  month =        jan,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:57:20 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Berard:2007:PSR,
  author =       "Severine B{\'e}rard and Anne Bergeron and Cedric
                 Chauve and Christophe Paul",
  title =        "Perfect Sorting by Reversals Is Not Always Difficult",
  journal =      j-TCBB,
  volume =       "4",
  number =       "1",
  pages =        "4--16",
  month =        jan,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:57:20 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "We propose new algorithms for computing pairwise
                 rearrangement scenarios that conserve the combinatorial
                 structure of genomes. More precisely, we investigate
                 the problem of sorting signed permutations by reversals
                 without breaking common intervals. We describe a
                 combinatorial framework for this problem that allows us
                 to characterize classes of signed permutations for
                 which one can compute, in polynomial time, a shortest
                 reversal scenario that conserves all common intervals.
                 In particular, we define a class of permutations for
                 which this computation can be done in linear time with
                 a very simple algorithm that does not rely on the
                 classical Hannenhalli-Pevzner theory for sorting by
                 reversals. We apply these methods to the computation of
                 rearrangement scenarios between permutations obtained
                 from 16 synteny blocks of the X chromosomes of the
                 human, mouse, and rat.",
  acknowledgement = ack-nhfb,
  keywords =     "common intervals; evolution scenarios; reversals",
}

@Article{Vashist:2007:OCM,
  author =       "Akshay Vashist and Casimir A. Kulikowski and Ilya
                 Muchnik",
  title =        "Ortholog Clustering on a Multipartite Graph",
  journal =      j-TCBB,
  volume =       "4",
  number =       "1",
  pages =        "17--27",
  month =        jan,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:57:20 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "We present a method for automatically extracting
                 groups of orthologous genes from a large set of genomes
                 by a new clustering algorithm on a weighted
                 multipartite graph. The method assigns a score to an
                 arbitrary subset of genes from multiple genomes to
                 assess the orthologous relationships between genes in
                 the subset. This score is computed using sequence
                 similarities between the member genes and the
                 phylogenetic relationship between the corresponding
                 genomes. An ortholog cluster is found as the subset
                 with the highest score, so ortholog clustering is
                 formulated as a combinatorial optimization problem. The
                 algorithm for finding an ortholog cluster runs in time
                 O(|E|+|V| log|V|), where V and E are the sets of
                 vertices and edges, respectively, in the graph.
                 However, if we discretize the similarity scores into a
                 constant number of bins, the runtime improves to
                 O(|E|+|V|). The proposed method was applied to seven
                 complete eukaryote genomes on which the manually
                 curated database of eukaryotic ortholog clusters, KOG,
                 is constructed. A comparison of our results with the
                 manually curated ortholog clusters shows that our
                 clusters are well correlated with the existing
                 clusters.",
  acknowledgement = ack-nhfb,
  keywords =     "biology; clustering algorithms; genetics;
                 Graph-theoretic methods",
}

@Article{Lasker:2007:EDH,
  author =       "Keren Lasker and Oranit Dror and Maxim Shatsky and
                 Ruth Nussinov and Haim J. Wolfson",
  title =        "{EMatch}: Discovery of High Resolution Structural
                 Homologues of Protein Domains in Intermediate
                 Resolution Cryo-{EM} Maps",
  journal =      j-TCBB,
  volume =       "4",
  number =       "1",
  pages =        "28--39",
  month =        jan,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:57:20 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Cryo-EM has become an increasingly powerful technique
                 for elucidating the structure, dynamics, and function
                 of large flexible macromolecule assemblies that cannot
                 be determined at atomic resolution. However, due to the
                 relatively low resolution of cryo-EM data, a major
                 challenge is to identify components of complexes
                 appearing in cryo-EM maps. Here, we describe EMatch, a
                 novel integrated approach for recognizing structural
                 homologues of protein domains present in a 6-10{\AA}
                 resolution cryo-EM map and constructing a quasi-atomic
                 structural model of their assembly. The method is
                 highly efficient and has been successfully validated on
                 various simulated data. The strength of the method is
                 demonstrated by a domain assembly of an experimental
                 cryo-EM map of native GroEL at 6{\AA} resolution.",
  acknowledgement = ack-nhfb,
  keywords =     "3D alignment of secondary structures; cyclic symmetry;
                 intermediate resolution cryo-EM maps; macromolecular
                 assemblies; structural bioinformatics",
}

@Article{Wang:2007:ACC,
  author =       "Lipo Wang and Feng Chu and Wei Xie",
  title =        "Accurate Cancer Classification Using Expressions of
                 Very Few Genes",
  journal =      j-TCBB,
  volume =       "4",
  number =       "1",
  pages =        "40--53",
  month =        jan,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:57:20 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "We aim at finding the smallest set of genes that can
                 ensure highly accurate classification of cancers from
                 microarray data by using supervised machine learning
                 algorithms. The significance of finding the minimum
                 gene subsets is three-fold: 1) It greatly reduces the
                 computational burden and `noise' arising from
                 irrelevant genes. In the examples studied in this
                 paper, finding the minimum gene subsets even allows for
                 extraction of simple diagnostic rules which lead to
                 accurate diagnosis without the need for any
                 classifiers. 2) It simplifies gene expression tests to
                 include only a very small number of genes rather than
                 thousands of genes, which can bring down the cost for
                 cancer testing significantly. 3) It calls for further
                 investigation into the possible biological relationship
                 between these small numbers of genes and cancer
                 development and treatment. Our simple yet very
                 effective method involves two steps. In the first step,
                 we choose some important genes using a feature
                 importance ranking scheme. In the second step, we test
                 the classification capability of all simple
                 combinations of those important genes by using a good
                 classifier. For three `small' and `simple' data sets
                 with two, three, and four cancer (sub)types, our
                 approach obtained very high accuracy with only two or
                 three genes. For a `large' and `complex' data set with
                 14 cancer types, we divided the whole problem into a
                 group of binary classification problems and applied the
                 2--step approach to each of these binary classification
                 problems. Through this `divide-and-conquer' approach,
                 we obtained accuracy comparable to previously reported
                 results but with only 28 genes rather than 16,063
                 genes. In general, our method can significantly reduce
                 the number of genes required for highly reliable
                 diagnosis.",
  acknowledgement = ack-nhfb,
  keywords =     "cancer classification; fuzzy; gene expression; neural
                 networks; support vector machines.",
}

@Article{Zhi:2007:CBA,
  author =       "Degui Zhi and Uri Keich and Pavel Pevzner and Steffen
                 Heber and Haixu Tang",
  title =        "Correcting Base-Assignment Errors in Repeat Regions of
                 Shotgun Assembly",
  journal =      j-TCBB,
  volume =       "4",
  number =       "1",
  pages =        "54--64",
  month =        jan,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:57:20 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Accurate base-assignment in repeat regions of a whole
                 genome shotgun assembly is an unsolved problem. Since
                 reads in repeat regions cannot be easily attributed to
                 a unique location in the genome, current assemblers may
                 place these reads arbitrarily. As a result, the
                 base-assignment error rate in repeats is likely to be
                 much higher than that in the rest of the genome. We
                 developed an iterative algorithm, EULER-AIR, that is
                 able to correct base-assignment errors in finished
                 genome sequences in public databases. The Wolbachia
                 genome is among the best finished genomes. Using this
                 genome project as an example, we demonstrated that
                 EULER-AIR can 1) discover and correct base-assignment
                 errors, 2) provide accurate read assignments, 3)
                 utilize finishing reads for accurate base-assignment,
                 and 4) provide guidance for designing finishing
                 experiments. In the genome of Wolbachia, EULER-AIR
                 found 16 positions with ambiguous base-assignment and
                 two positions with erroneous bases. Besides Wolbachia,
                 many other genome sequencing projects have
                 significantly fewer finishing reads and, hence, are
                 likely to contain more base-assignment errors in
                 repeats. We demonstrate that EULER-AIR is a software
                 tool that can be used to find and correct
                 base-assignment errors in a genome assembly project.",
  acknowledgement = ack-nhfb,
  keywords =     "expectation maximization; finishing; fragment
                 assembly",
}

@Article{Xu:2007:MCC,
  author =       "Rui Xu and Georgios C. Anagnostopoulos and Donald C.
                 Wunsch",
  title =        "Multiclass Cancer Classification Using Semisupervised
                 Ellipsoid {ARTMAP} and Particle Swarm Optimization with
                 Gene Expression Data",
  journal =      j-TCBB,
  volume =       "4",
  number =       "1",
  pages =        "65--77",
  month =        jan,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:57:20 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "It is crucial for cancer diagnosis and treatment to
                 accurately identify the site of origin of a tumor. With
                 the emergence and rapid advancement of DNA microarray
                 technologies, constructing gene expression profiles for
                 different cancer types has already become a promising
                 means for cancer classification. In addition to
                 research on binary classification such as normal versus
                 tumor samples, which attracts numerous efforts from a
                 variety of disciplines, the discrimination of multiple
                 tumor types is also important. Meanwhile, the selection
                 of genes which are relevant to a certain cancer type
                 not only improves the performance of the classifiers,
                 but also provides molecular insights for treatment and
                 drug development. Here, we use Semisupervised Ellipsoid
                 ARTMAP (ssEAM) for multiclass cancer discrimination and
                 particle swarm optimization for informative gene
                 selection. ssEAM is a neural network architecture
                 rooted in Adaptive Resonance Theory and suitable for
                 classification tasks. ssEAM features fast, stable, and
                 finite learning and creates hyperellipsoidal clusters,
                 inducing complex nonlinear decision boundaries. PSO is
                 an evolutionary algorithm-based technique for global
                 optimization. A discrete binary version of PSO is
                 employed to indicate whether genes are chosen or not.
                 The effectiveness of ssEAM/PSO for multiclass cancer
                 diagnosis is demonstrated by testing it on three
                 publicly available multiple-class cancer data sets.
                 ssEAM/PSO achieves competitive performance on all these
                 data sets, with results comparable to or better than
                 those obtained by other classifiers.",
  acknowledgement = ack-nhfb,
  keywords =     "cancer classification; gene expression profile;
                 particle swarm optimization; semisupervised ellipsoid
                 ARTMAP",
}

@Article{Huang:2007:PPP,
  author =       "Chengbang Huang and Faruck Morcos and Simon P. Kanaan
                 and Stefan Wuchty and Danny Z. Chen and Jesus A.
                 Izaguirre",
  title =        "Predicting Protein-Protein Interactions from Protein
                 Domains Using a Set Cover Approach",
  journal =      j-TCBB,
  volume =       "4",
  number =       "1",
  pages =        "78--87",
  month =        jan,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:57:20 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "One goal of contemporary proteome research is the
                 elucidation of cellular protein interactions. Based on
                 currently available protein-protein interaction and
                 domain data, we introduce a novel method, Maximum
                 Specificity Set Cover (MSSC), for the prediction of
                 protein-protein interactions. In our approach, we map
                 the relationship between interactions of proteins and
                 their corresponding domain architectures to a
                 generalized weighted set cover problem. The application
                 of a greedy algorithm provides sets of domain
                 interactions which explain the presence of protein
                 interactions to the largest degree of specificity.
                 Utilizing domain and protein interaction data of S.
                 cerevisiae, MSSC enables prediction of previously
                 unknown protein interactions, links that are well
                 supported by a high tendency of coexpression and
                 functional homogeneity of the corresponding proteins.
                 Focusing on concrete examples, we show that MSSC
                 reliably predicts protein interactions in well-studied
                 molecular systems, such as the 26S proteasome and RNA
                 polymerase II of S. cerevisiae. We also show that the
                 quality of the predictions is comparable to the Maximum
                 Likelihood Estimation while MSSC is faster. This new
                 algorithm and all data sets used are accessible through
                 a Web portal at http://ppi.cse.nd.edu.",
  acknowledgement = ack-nhfb,
  keywords =     "bioinformatics (genome or protein) databases; biology;
                 Computations on discrete structures; genetics; graph
                 algorithms",
}

@Article{Kim:2007:AAD,
  author =       "Jong Hyun Kim and Michael S. Waterman and Lei M. Li",
  title =        "Accuracy Assessment of Diploid Consensus Sequences",
  journal =      j-TCBB,
  volume =       "4",
  number =       "1",
  pages =        "88--97",
  month =        jan,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:57:20 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "If the origins of fragments are known in genome
                 sequencing projects, it is straightforward to
                 reconstruct diploid consensus sequences. In reality,
                 however, this is not true. Although there are proposed
                 methods to reconstruct haplotypes from genome
                 sequencing projects, an accuracy assessment is required
                 to evaluate the confidence of the estimated diploid
                 consensus sequences. In this paper, we define the
                 confidence score of diploid consensus sequences. It
                 requires the calculation of the likelihood of an
                 assembly. To calculate the likelihood, we propose a
                 linear time algorithm with respect to the number of
                 polymorphic sites. The likelihood calculation and
                 confidence score are used for further improvements of
                 haplotype estimation in two directions. One direction
                 is that low-scored phases are disconnected. The other
                 direction is that, instead of using nominal frequency
                 1/2, the haplotype frequency is estimated to reflect
                 the actual contribution of each haplotype. Our method
                 was evaluated on the simulated data whose polymorphism
                 rate (1.2 percent) was based on Ciona intestinalis. As
                 a result, the high accuracy of our algorithm was
                 indicated: The true positive rate of the haplotype
                 estimation was greater than 97 percent.",
  acknowledgement = ack-nhfb,
  keywords =     "diploid; haplotype; polymorphism; shotgun sequencing",
}

@Article{Alekseyev:2007:CBG,
  author =       "Max A. Alekseyev and Pavel A. Pevzner",
  title =        "Colored de Bruijn Graphs and the Genome Halving
                 Problem",
  journal =      j-TCBB,
  volume =       "4",
  number =       "1",
  pages =        "98--107",
  month =        jan,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:57:20 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Breakpoint graph analysis is a key algorithmic
                 technique in studies of genome rearrangements. However,
                 breakpoint graphs are defined only for genomes without
                 duplicated genes, thus limiting their applications in
                 rearrangement analysis. We discuss a connection between
                 the breakpoint graphs and de Bruijn graphs that leads
                 to a generalization of the notion of breakpoint graph
                 for genomes with duplicated genes. We further use the
                 generalized breakpoint graphs to study the Genome
                 Halving Problem (first introduced and solved by Nadia
                 El-Mabrouk and David Sankoff). The El-Mabrouk-Sankoff
                 algorithm is rather complex, and, in this paper, we
                 present an alternative approach that is based on
                 generalized breakpoint graphs. The generalized
                 breakpoint graphs make the El-Mabrouk-Sankoff result
                 more transparent and promise to be useful in future
                 studies of genome rearrangements.",
  acknowledgement = ack-nhfb,
  keywords =     "breakpoint graph; de Bruijn graph; genome duplication;
                 genome halving; genome rearrangement; reversal",
}

@Article{Mossel:2007:DMT,
  author =       "Elchanan Mossel",
  title =        "Distorted Metrics on Trees and Phylogenetic Forests",
  journal =      j-TCBB,
  volume =       "4",
  number =       "1",
  pages =        "108--116",
  month =        jan,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:57:20 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "We study distorted metrics on binary trees in the
                 context of phylogenetic reconstruction. Given a binary
                 tree $T$ on $n$ leaves with a path metric $d$, consider
                 the pairwise distances $d(u,v)$ between leaves. It is
                 well known that these determine the tree and the
                 $d$-length of all edges. Here, we consider distortions
                 $\hat{d}$ of $d$ such that, for all leaves $u$ and $v$,
                 it holds that $|d(u,v) - \hat{d}(u,v)| < f/2$ if either
                 $d(u,v) < M + f/2$ or $\hat{d}(u,v) < M + f/2$, where
                 $d$ satisfies $f \leq d(e) \leq g$ for all edges $e$.
                 Given such distortions, we show how to reconstruct in
                 polynomial time a forest $T_1, \ldots{}, T_\alpha$ such
                 that the true tree $T$ may be obtained from that forest
                 by adding $\alpha-1$ edges and $\alpha-1 \leq
                 2-\Omega(M/g) n$. Our distorted metric result implies a
                 reconstruction algorithm of phylogenetic forests with a
                 small number of trees from sequences of length
                 logarithmic in the number of species. The
                 reconstruction algorithm is applicable for the general
                 Markov model. Both the distorted metric result and its
                 applications to phylogeny are almost tight.",
  acknowledgement = ack-nhfb,
  keywords =     "CFN; distortion; forest; Jukes--Cantor; metric;
                 phylogenetics; tree",
}

@Article{Aeling:2007:DDE,
  author =       "Kimberly A. Aeling and Nicholas R. Steffen and Matthew
                 Johnson and G. Wesley Hatfield and Richard H. Lathrop
                 and Donald F. Senear",
  title =        "{DNA} Deformation Energy as an Indirect Recognition
                 Mechanism in Protein-{DNA} Interactions",
  journal =      j-TCBB,
  volume =       "4",
  number =       "1",
  pages =        "117--125",
  month =        jan,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:57:20 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Proteins that bind to specific locations in genomic
                 DNA control many basic cellular functions. Proteins
                 detect their binding sites using both direct and
                 indirect recognition mechanisms. Deformation energy,
                 which models the energy required to bend DNA from its
                 native shape to its shape when bound to a protein, has
                 been shown to be an indirect recognition mechanism for
                 one particular protein, Integration Host Factor (IHF).
                 This work extends the analysis of deformation to two
                 other DNA-binding proteins, CRP and SRF, and two
                 endonucleases, I-CreI and I-PpoI. Known binding sites
                 for all five proteins showed statistically significant
                 differences in mean deformation energy as compared to
                 random sequences. Binding sites for the three
                 DNA-binding proteins and one of the endonucleases had
                 mean deformation energies lower than random sequences.
                 Binding sites for I-PpoI had mean deformation energy
                 higher than random sequences. Classifiers that were
                 trained using the deformation energy at each base pair
                 step showed good cross-validated accuracy when
                 classifying unseen sequences as binders or nonbinders.
                 These results support DNA deformation energy as an
                 indirect recognition mechanism across a wider range of
                 DNA-binding proteins. Deformation energy may also have
                 a predictive capacity for the underlying catalytic
                 mechanism of DNA-binding enzymes.",
  acknowledgement = ack-nhfb,
  keywords =     "deformation energy; DNA bending; DNA-protein binding;
                 indirect readout; indirect recognition; perceptron
                 learning",
}

@Article{Yang:2007:MFE,
  author =       "Jing Yang and Sarawan Wongsa and Visakan
                 Kadirkamanathan and Stephen A. Billings and Phillip C.
                 Wright",
  title =        "Metabolic Flux Estimation-{A} Self-Adaptive
                 Evolutionary Algorithm with Singular Value
                 Decomposition",
  journal =      j-TCBB,
  volume =       "4",
  number =       "1",
  pages =        "126--138",
  month =        jan,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:57:20 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Metabolic flux analysis is important for metabolic
                 system regulation and intracellular pathway
                 identification. A popular approach for intracellular
                 flux estimation involves using ^{13}{\rm C} tracer
                 experiments to label states that can be measured by
                 nuclear magnetic resonance spectrometry or gas
                 chromatography mass spectrometry. However, the bilinear
                 balance equations derived from ^{13}{\rm C} tracer
                 experiments and the noisy measurements require a
                 nonlinear optimization approach to obtain the optimal
                 solution. In this paper, the flux quantification
                 problem is formulated as an error-minimization problem
                 with equality and inequality constraints through the
                 ^{13}{\rm C} balance and stoichiometric equations. The
                 stoichiometric constraints are transformed to a null
                 space by singular value decomposition. Self-adaptive
                 evolutionary algorithms are then introduced for flux
                 quantification. The performance of the evolutionary
                 algorithm is compared with ordinary least squares
                 estimation by the simulation of the central pentose
                 phosphate pathway. The proposed algorithm is also
                 applied to the central metabolism of Corynebacterium
                 glutamicum under lysine-producing conditions. A
                 comparison between the results from the proposed
                 algorithm and data from the literature is given. The
                 complexity of a metabolic system with bidirectional
                 reactions is also investigated by analyzing the
                 fluctuations in the flux estimates when available
                 measurements are varied.",
  acknowledgement = ack-nhfb,
  keywords =     "evolutionary computing; least squares method;
                 metabolic flux analysis; singular value
                 decomposition.",
}

@Article{Wu:2007:QBP,
  author =       "Gang Wu and Jia-Huai You and Guohui Lin",
  title =        "Quartet-Based Phylogeny Reconstruction with Answer Set
                 Programming",
  journal =      j-TCBB,
  volume =       "4",
  number =       "1",
  pages =        "139--152",
  month =        jan,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:57:20 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "In this paper, a new representation is presented for
                 the Maximum Quartet Consistency (MQC) problem, where
                 solving the MQC problem becomes searching for an
                 ultrametric matrix that satisfies a maximum number of
                 given quartet topologies. A number of structural
                 properties of the MQC problem in this new
                 representation are characterized through formulating
                 into answer set programming, a recent powerful logic
                 programming tool for modeling and solving search
                 problems. Using these properties, a number of
                 optimization techniques are proposed to speed up the
                 search process. The experimental results on a number of
                 simulated data sets suggest that the new
                 representation, combined with answer set programming,
                 presents a unique perspective to the MQC problem.",
  acknowledgement = ack-nhfb,
  keywords =     "Answer Set Programming (ASP); Maximum Quartet
                 Consistency (MQC); phylogeny; quartet; ultrametric
                 matrix.",
}

@Article{Reinert:2007:LLE,
  author =       "Gesine Reinert and Michael S. Waterman",
  title =        "On the Length of the Longest Exact Position Match in a
                 Random Sequence",
  journal =      j-TCBB,
  volume =       "4",
  number =       "1",
  pages =        "153--156",
  month =        jan,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:57:20 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "A mixed Poisson approximation and a Poisson
                 approximation for the length of the longest exact match
                 of a random sequence across another sequence are
                 provided, where the match is required to start at
                 position 1 in the first sequence. This problem arises
                 when looking for suitable anchors in whole genome
                 alignments.",
  acknowledgement = ack-nhfb,
  keywords =     "Chen-Stein method; length of longest match; mixed
                 Poisson approximation; Poisson approximation",
}

@Article{Au:2007:CAC,
  author =       "Wai-Ho Au and Keith C. C. Chan and Andrew K. C. Wong
                 and Yang Wang",
  title =        "Correction to {``Attribute Clustering for Grouping,
                 Selection, and Classification of Gene Expression
                 Data''}",
  journal =      j-TCBB,
  volume =       "4",
  number =       "1",
  pages =        "157--157",
  month =        jan,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:57:20 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "This is a correction to a typographical error in (11)
                 in [1] which present the calculation of the sum of the
                 multiple significant interdependence redundancy
                 measure. Equation (11) in [1] should be:
                 $$k=\arg\max\nolimits_{k\in\{2,\ldots,p\}}\sum_{r=1}^k
                 \sum_{A_i\in\{C_r-\eta_r\}}R(A_i:\eta_r).$$(11)We
                 remark that the experimental results reported in [1]
                 are based on (11) above not (11) in [1].",
  acknowledgement = ack-nhfb,
}

@Article{Biology:2007:RL,
  author =       "IEEE/ACM Transactions on Computational Biology and
                 Bioinformatics staff",
  title =        "2006 Reviewers List",
  journal =      j-TCBB,
  volume =       "4",
  number =       "1",
  pages =        "158--160",
  month =        jan,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:57:20 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Rajapakse:2007:GEI,
  author =       "Jagath C. Rajapakse and Yan-Qing Zhang and Gary B.
                 Fogel",
  title =        "{Guest Editors}' Introduction to the {Special Section:
                 Computational Intelligence Approaches in Computational
                 Biology and Bioinformatics}",
  journal =      j-TCBB,
  volume =       "4",
  number =       "2",
  pages =        "161--162",
  month =        apr,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:57:55 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Wang:2007:PBS,
  author =       "Haiying Wang and Huiru Zheng and Francisco Azuaje",
  title =        "{Poisson}-Based Self-Organizing Feature Maps and
                 Hierarchical Clustering for Serial Analysis of Gene
                 Expression Data",
  journal =      j-TCBB,
  volume =       "4",
  number =       "2",
  pages =        "163--175",
  month =        apr,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:57:55 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Serial analysis of gene expression (SAGE) is a
                 powerful technique for global gene expression
                 profiling, allowing simultaneous analysis of thousands
                 of transcripts without prior structural and functional
                 knowledge. Pattern discovery and visualization have
                 become fundamental approaches to analyzing such
                 large-scale gene expression data. From the pattern
                 discovery perspective, clustering techniques have
                 received great attention. However, due to the
                 statistical nature of SAGE data (i.e., underlying
                 distribution), traditional clustering techniques may
                 not be suitable for SAGE data analysis. Based on the
                 adaptation and improvement of Self-Organizing Maps and
                 hierarchical clustering techniques, this paper presents
                 two new clustering algorithms, namely, PoissonS and
                 PoissonHC, for SAGE data analysis. Tested on synthetic
                 and experimental SAGE data, these algorithms
                 demonstrate several advantages over traditional pattern
                 discovery techniques. The results indicate that, by
                 incorporating statistical properties of SAGE data,
                 PoissonS and PoissonHC, as well as a hybrid approach
                 (neuro-hierarchical approach) based on the combination
                 of PoissonS and PoissonHC, offer significant
                 improvements in pattern discovery and visualization for
                 SAGE data. Moreover, a user-friendly platform, which
                 may improve and accelerate SAGE data mining, was
                 implemented. The system is freely available on request
                 from the authors for nonprofit use.",
  acknowledgement = ack-nhfb,
  keywords =     "hybrid machine learning; Pattern discovery and
                 visualization; Poisson distribution; self-organizing
                 maps; serial analysis of gene expression.",
}

@Article{Sjahputera:2007:RAC,
  author =       "Ozy Sjahputera and James M. Keller and J. Wade Davis
                 and Kristen H. Taylor and Farahnaz Rahmatpanah and
                 Huidong Shi and Derek T. Anderson and Samuel N. Blisard
                 and Robert H. Luke and Mihail Popescu and Gerald C.
                 Arthur and Charles W. Caldwell",
  title =        "Relational Analysis of {CpG} Islands Methylation and
                 Gene Expression in Human Lymphomas Using Possibilistic
                 {C}-Means Clustering and Modified Cluster Fuzzy
                 Density",
  journal =      j-TCBB,
  volume =       "4",
  number =       "2",
  pages =        "176--189",
  month =        apr,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:57:55 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Heterogeneous genetic and epigenetic alterations are
                 commonly found in human non-Hodgkin's lymphomas (NHL).
                 One such epigenetic alteration is aberrant methylation
                 of gene promoter-related CpG islands, where
                 hypermethylation frequently results in transcriptional
                 inactivation of target genes, while a decrease or loss
                 of promoter methylation (hypomethylation) is frequently
                 associated with transcriptional activation. Discovering
                 genes with these relationships in NHL or other types of
                 cancers could lead to a better understanding of the
                 pathobiology of these diseases. The simultaneous
                 analysis of promoter methylation using Differential
                 Methylation Hybridization (DMH) and its associated gene
                 expression using Expressed CpG Island Sequence Tag
                 (ECIST) microarrays generates a large volume of
                 methylation-expression relational data. To analyze this
                 data, we propose a set of algorithms based on fuzzy
                 sets theory, in particular Possibilistic c-Means (PCM)
                 and cluster fuzzy density. For each gene, these
                 algorithms calculate measures of confidence of various
                 methylation-expression relationships in each NHL
                 subclass. Thus, these tools can be used as a means of
                 high volume data exploration to better guide biological
                 confirmation using independent molecular biology
                 methods.",
  acknowledgement = ack-nhfb,
  keywords =     "cluster density; clustering; expression; fuzzy sets;
                 Methylation; microarray",
}

@Article{Lu:2007:ISL,
  author =       "Yijuan Lu and Qi Tian and Feng Liu and Maribel Sanchez
                 and Yufeng Wang",
  title =        "Interactive Semisupervised Learning for Microarray
                 Analysis",
  journal =      j-TCBB,
  volume =       "4",
  number =       "2",
  pages =        "190--203",
  month =        apr,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:57:55 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Microarray technology has generated vast amounts of
                 gene expression data with distinct patterns. Based on
                 the premise that genes of correlated functions tend to
                 exhibit similar expression patterns, various machine
                 learning methods have been applied to capture these
                 specific patterns in microarray data. However, the
                 discrepancy between the rich expression profiles and
                 the limited knowledge of gene functions has been a
                 major hurdle to the understanding of cellular networks.
                 To bridge this gap so as to properly comprehend and
                 interpret expression data, we introduce Relevance
                 Feedback to microarray analysis and propose an
                 interactive learning framework to incorporate the
                 expert knowledge into the decision module. In order to
                 find a good learning method and solve two intrinsic
                 problems in microarray data, high dimensionality and
                 small sample size, we also propose a semisupervised
                 learning algorithm: Kernel Discriminant-EM (KDEM). This
                 algorithm efficiently utilizes a large set of unlabeled
                 data to compensate for the insufficiency of a small set
                 of labeled data and it extends the linear algorithm in
                 Discriminant-EM (DEM) to a kernel algorithm to handle
                 nonlinearly separable data in a lower dimensional
                 space. The Relevance Feedback technique and KDEM
                 together construct an efficient and effective
                 interactive semisupervised learning framework for
                 microarray analysis. Extensive experiments on the yeast
                 cell cycle regulation data set and Plasmodium
                 falciparum red blood cell cycle data set show the
                 promise of this approach.",
  acknowledgement = ack-nhfb,
  keywords =     "kernel DEM; microarray analysis; relevance feedback;
                 semisupervised learning",
}

@Article{Lerner:2007:CSI,
  author =       "Boaz Lerner and Josepha Yeshaya and Lev Koushnir",
  title =        "On the Classification of a Small Imbalanced
                 Cytogenetic Image Database",
  journal =      j-TCBB,
  volume =       "4",
  number =       "2",
  pages =        "204--215",
  month =        apr,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:57:55 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Solving a multiclass classification task using a small
                 imbalanced database of patterns of high dimension is
                 difficult due to the curse-of-dimensionality and the
                 bias of the training toward the majority classes. Such
                 a problem has arisen while diagnosing genetic
                 abnormalities by classifying a small database of
                 fluorescence in situ hybridization signals of types
                 having different frequencies of occurrence. We propose
                 and experimentally study using the cytogenetic domain
                 two solutions to the problem. The first is hierarchical
                 decomposition of the classification task, where each
                 hierarchy level is designed to tackle a simpler problem
                 which is represented by classes that are approximately
                 balanced. The second solution is balancing the data by
                 up-sampling the minority classes accompanied by
                 dimensionality reduction. Implemented by the naive
                 Bayesian classifier or the multilayer perceptron neural
                 network, both solutions have diminished the problem and
                 contributed to accuracy improvement. In addition, the
                 experiments suggest that coping with the smallness of
                 the data is more beneficial than dealing with its
                 imbalance.",
  acknowledgement = ack-nhfb,
  keywords =     "classification; dimensionality reduction; genetic
                 diagnosis; imbalanced data; multilayer perceptron
                 (MLP); naive Bayesian classifier (NBC); small sample
                 size.",
}

@Article{Igel:2007:GBO,
  author =       "Christian Igel and Tobias Glasmachers and Britta
                 Mersch and Nico Pfeifer and Peter Meinicke",
  title =        "Gradient-Based Optimization of Kernel-Target Alignment
                 for Sequence Kernels Applied to Bacterial Gene Start
                 Detection",
  journal =      j-TCBB,
  volume =       "4",
  number =       "2",
  pages =        "216--226",
  month =        apr,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:57:55 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Biological data mining using kernel methods can be
                 improved by a task-specific choice of the kernel
                 function. Oligo kernels for genomic sequence analysis
                 have proven to have a high discriminative power and to
                 provide interpretable results. Oligo kernels that
                 consider subsequences of different lengths can be
                 combined and parameterized to increase their
                 flexibility. For adapting these parameters efficiently,
                 gradient-based optimization of the kernel-target
                 alignment is proposed. The power of this new, general
                 model selection procedure and the benefits of fitting
                 kernels to problem classes are demonstrated by adapting
                 oligo kernels for bacterial gene start detection.",
  acknowledgement = ack-nhfb,
  keywords =     "kernel target alignment; model selection; oligo
                 kernel; sequence analysis; support vector machines;
                 translation initiation sites",
}

@Article{Ogul:2007:SLP,
  author =       "Hasan Ogul and Erkan U. Mumcuo{\u{g}}lu",
  title =        "Subcellular Localization Prediction with New Protein
                 Encoding Schemes",
  journal =      j-TCBB,
  volume =       "4",
  number =       "2",
  pages =        "227--232",
  month =        apr,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:57:55 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Subcellular localization is one of the key properties
                 in functional annotation of proteins. Support vector
                 machines (SVMs) have been widely used for automated
                 prediction of subcellular localizations. Existing
                 methods differ in the protein encoding schemes used. In
                 this study, we present two methods for protein encoding
                 to be used for SVM-based subcellular localization
                 prediction: n{\hbox{-}}\rm peptide compositions with
                 reduced amino acid alphabets for larger values of n and
                 pairwise sequence similarity scores based on whole
                 sequence and N-terminal sequence. We tested the methods
                 on a common benchmarking data set that consists of
                 2,427 eukaryotic proteins with four localization sites.
                 As a result of 5-fold cross-validation tests, the
                 encoding with n{\hbox{-}}\rm peptide compositions
                 provided the accuracies of 84.5, 88.9, 66.3, and 94.3
                 percent for cytoplasmic, extracellular, mitochondrial,
                 and nuclear proteins, where the overall accuracy was
                 87.1 percent. The second method provided 83.6, 87.7,
                 87.9, and 90.5 percent accuracies for individual
                 locations and 87.8 percent overall accuracy. A hybrid
                 system, which we called PredLOC, makes a final decision
                 based on the results of the two presented methods which
                 achieved an overall accuracy of 91.3 percent, which is
                 better than the achievements of many of the existing
                 methods. The new system also outperformed the recent
                 methods in the experiments conducted on a new-unique
                 SWISSPROT test set.",
  acknowledgement = ack-nhfb,
  keywords =     "n{\hbox{-}}\rm peptide composition; probabilistic
                 suffix tree; subcellular localization; support vector
                 machines.",
}

@Article{Li:2007:DSD,
  author =       "Wenyuan Li and Ying Liu and Hung-Chung Huang and
                 Yanxiong Peng and Yongjing Lin and Wee-Keong Ng and
                 Kok-Leong Ong",
  title =        "Dynamical Systems for Discovering Protein Complexes
                 and Functional Modules from Biological Networks",
  journal =      j-TCBB,
  volume =       "4",
  number =       "2",
  pages =        "233--250",
  month =        apr,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:57:55 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Recent advances in high throughput experiments and
                 annotations via published literature have provided a
                 wealth of interaction maps of several biomolecular
                 networks, including metabolic, protein-protein, and
                 protein-DNA interaction networks. The architecture of
                 these molecular networks reveals important principles
                 of cellular organization and molecular functions.
                 Analyzing such networks, i.e., discovering dense
                 regions in the network, is an important way to identify
                 protein complexes and functional modules. This task has
                 been formulated as the problem of finding heavy
                 subgraphs, the Heaviest k{\hbox{-}}\rm Subgraph Problem
                 (k{\hbox{-}}\rm HSP), which itself is NP-hard. However,
                 any method based on the k{\hbox{-}}\rm HSP requires the
                 parameter k and an exact solution of k{\hbox{-}}\rm HSP
                 may still end up as a `spurious' heavy subgraph, thus
                 reducing its practicability in analyzing large scale
                 biological networks. We proposed a new formulation,
                 called the rank-HSP, and two dynamical systems to
                 approximate its results. In addition, a novel metric,
                 called the Standard deviation and Mean Ratio (SMR), is
                 proposed for use in `spurious' heavy subgraphs to
                 automate the discovery by setting a fixed threshold.
                 Empirical results on both the simulated graphs and
                 biological networks have demonstrated the efficiency
                 and effectiveness of our proposal.",
  acknowledgement = ack-nhfb,
  keywords =     "bioinformatics databases; evolutionary computing;
                 Graph algorithms; neural nets",
}

@Article{Hu:2007:DMP,
  author =       "Xiaohua Hu and Daniel D. Wu",
  title =        "Data Mining and Predictive Modeling of Biomolecular
                 Network from Biomedical Literature Databases",
  journal =      j-TCBB,
  volume =       "4",
  number =       "2",
  pages =        "251--263",
  month =        apr,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:57:55 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "In this paper, we present a novel approach Bio-IEDM
                 (Biomedical Information Extraction and Data Mining) to
                 integrate text mining and predictive modeling to
                 analyze biomolecular network from biomedical literature
                 databases. Our method consists of two phases. In phase
                 1, we discuss a semisupervised efficient learning
                 approach to automatically extract biological
                 relationships such as protein-protein interaction,
                 protein-gene interaction from the biomedical literature
                 databases to construct the biomolecular network. Our
                 method automatically learns the patterns based on a few
                 user seed tuples and then extracts new tuples from the
                 biomedical literature based on the discovered patterns.
                 The derived biomolecular network forms a large
                 scale-free network graph. In phase 2, we present a
                 novel clustering algorithm to analyze the biomolecular
                 network graph to identify biologically meaningful
                 subnetworks (communities). The clustering algorithm
                 considers the characteristics of the scale-free network
                 graphs and is based on the local density of the vertex
                 and its neighborhood functions that can be used to find
                 more meaningful clusters with different density level.
                 The experimental results indicate our approach is very
                 effective in extracting biological knowledge from a
                 huge collection of biomedical literature. The
                 integration of data mining and information extraction
                 provides a promising direction for analyzing the
                 biomolecular network.",
  acknowledgement = ack-nhfb,
  keywords =     "biological complexes (communities); biomolecular
                 network; information extraction; scale-free network;
                 semisupervised learning",
}

@Article{Neri:2007:AMA,
  author =       "Ferrante Neri and Jari Toivanen and Giuseppe Leonardo
                 Cascella and Yew-Soon Ong",
  title =        "An Adaptive Multimeme Algorithm for Designing {HIV}
                 Multidrug Therapies",
  journal =      j-TCBB,
  volume =       "4",
  number =       "2",
  pages =        "264--278",
  month =        apr,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:57:55 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "This paper proposes a period representation for
                 modeling the multidrug HIV therapies and an Adaptive
                 Multimeme Algorithm (AMmA) for designing the optimal
                 therapy. The period representation offers benefits in
                 terms of flexibility and reduction in dimensionality
                 compared to the binary representation. The AMmA is a
                 memetic algorithm which employs a list of three local
                 searchers adaptively activated by an evolutionary
                 framework. These local searchers, having different
                 features according to the exploration logic and the
                 pivot rule, have the role of exploring the decision
                 space from different and complementary perspectives
                 and, thus, assisting the standard evolutionary
                 operators in the optimization process. Furthermore, the
                 AMmA makes use of an adaptation which dynamically sets
                 the algorithmic parameters in order to prevent
                 stagnation and premature convergence. The numerical
                 results demonstrate that the application of the
                 proposed algorithm leads to very efficient medication
                 schedules which quickly stimulate a strong immune
                 response to HIV. The earlier termination of the
                 medication schedule leads to lesser unpleasant side
                 effects for the patient due to strong antiretroviral
                 therapy. A numerical comparison shows that the AMmA is
                 more efficient than three popular metaheuristics.
                 Finally, a statistical test based on the calculation of
                 the tolerance interval confirms the superiority of the
                 AMmA compared to the other methods for the problem
                 under study.",
  acknowledgement = ack-nhfb,
  keywords =     "adaptive algorithms; HIV therapy design; memetic
                 algorithms; nonlinear integer programming.",
}

@Article{Handl:2007:MOB,
  author =       "Julia Handl and Douglas B. Kell and Joshua Knowles",
  title =        "Multiobjective Optimization in Bioinformatics and
                 Computational Biology",
  journal =      j-TCBB,
  volume =       "4",
  number =       "2",
  pages =        "279--292",
  month =        apr,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:57:55 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "This paper reviews the application of multiobjective
                 optimization in the fields of bioinformatics and
                 computational biology. A survey of existing work,
                 organized by application area, forms the main body of
                 the review, following an introduction to the key
                 concepts in multiobjective optimization. An original
                 contribution of the review is the identification of
                 five distinct `contexts,' giving rise to multiple
                 objectives: These are used to explain the reasons
                 behind the use of multiobjective optimization in each
                 application area and also to point the way to potential
                 future uses of the technique.",
  acknowledgement = ack-nhfb,
  keywords =     "bioinformatics (genome or protein) databases;
                 classification and association rules; clustering;
                 experimental design; global optimization; interactive
                 data exploration and discovery; machine learning",
}

@Article{Bontempi:2007:BSI,
  author =       "Gianluca Bontempi",
  title =        "A Blocking Strategy to Improve Gene Selection for
                 Classification of Gene Expression Data",
  journal =      j-TCBB,
  volume =       "4",
  number =       "2",
  pages =        "293--300",
  month =        apr,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:57:55 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Because of high dimensionality, machine learning
                 algorithms typically rely on feature selection
                 techniques in order to perform effective classification
                 in microarray gene expression data sets. However, the
                 large number of features compared to the number of
                 samples makes the task of feature selection
                 computationally hard and prone to errors. This paper
                 interprets feature selection as a task of stochastic
                 optimization, where the goal is to select among an
                 exponential number of alternative gene subsets the one
                 expected to return the highest generalization in
                 classification. Blocking is an experimental design
                 strategy which produces similar experimental conditions
                 to compare alternative stochastic configurations in
                 order to be confident that observed differences in
                 accuracy are due to actual differences rather than to
                 fluctuations and noise effects. We propose an original
                 blocking strategy for improving feature selection which
                 aggregates in a paired way the validation outcomes of
                 several learning algorithms to assess a gene subset and
                 compare it to others. This is a novelty with respect to
                 conventional wrappers, which commonly adopt a sole
                 learning algorithm to evaluate the relevance of a given
                 set of variables. The rationale of the approach is
                 that, by increasing the amount of experimental
                 conditions under which we validate a feature subset, we
                 can lessen the problems related to the scarcity of
                 samples and consequently come up with a better
                 selection. The paper shows that the blocking strategy
                 significantly improves the performance of a
                 conventional forward selection for a set of 16 publicly
                 available cancer expression data sets. The experiments
                 involve six different classifiers and show that
                 improvements take place independent of the
                 classification algorithm used after the selection step.
                 Two further validations based on available biological
                 annotation support the claim that blocking strategies
                 in feature selection may improve the accuracy and the
                 quality of the solution. The first validation is based
                 on retrieving PubMEd abstracts associated to the
                 selected genes and matching them to regular expressions
                 describing the biological phenomenon underlying the
                 expression data sets. The biological validation that
                 follows is based on the use of the Bioconductor package
                 GoStats in order to perform Gene Ontology statistical
                 analysis.",
  acknowledgement = ack-nhfb,
  keywords =     "bioinformatics (genome or protein) databases; data
                 mining; feature evaluation and selection; machine
                 learning",
}

@Article{Diekmann:2007:EUR,
  author =       "Yoan Diekmann and Marie-France Sagot and Eric
                 Tannier",
  title =        "Evolution under Reversals: Parsimony and Conservation
                 of Common Intervals",
  journal =      j-TCBB,
  volume =       "4",
  number =       "2",
  pages =        "301--309",
  month =        apr,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:57:55 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "In comparative genomics, gene order data is often
                 modeled as signed permutations. A classical problem for
                 genome comparison is to detect common intervals in
                 permutations, that is, genes that are colocalized in
                 several species, indicating that they remained grouped
                 during evolution. A second largely studied problem
                 related to gene order is to compute a minimum scenario
                 of reversals that transforms a signed permutation into
                 another. Several studies began to mix the two problems
                 and it was observed that their results are not always
                 compatible: Often, parsimonious scenarios of reversals
                 break common intervals. If a scenario does not break
                 any common interval, it is called perfect. In two
                 recent studies, B{\'e}rard et al. defined a
                 class of permutations for which building a perfect
                 scenario of reversals sorting a permutation was
                 achieved in polynomial time and stated as an open
                 question whether it is possible to decide, given a
                 permutation, if there exists a minimum scenario of
                 reversals that is perfect. In this paper, we give a
                 solution to this problem and prove that this widens the
                 class of permutations addressed by the aforementioned
                 studies. We implemented and tested this algorithm on
                 gene order data of chromosomes from several mammal
                 species and we compared it to other methods. The
                 algorithm helps to choose among several possible
                 scenarios of reversals and indicates that the minimum
                 scenario of reversals is not always the most
                 plausible.",
  acknowledgement = ack-nhfb,
  keywords =     "common intervals; computational biology; genome
                 rearrangements; perfect sorting; signed permutations;
                 sorting by reversals",
}

@Article{Weskamp:2007:MGA,
  author =       "Nils Weskamp and Eyke Hullermeier and Daniel Kuhn and
                 Gerhard Klebe",
  title =        "Multiple Graph Alignment for the Structural Analysis
                 of Protein Active Sites",
  journal =      j-TCBB,
  volume =       "4",
  number =       "2",
  pages =        "310--320",
  month =        apr,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:57:55 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Graphs are frequently used to describe the geometry
                 and also the physicochemical composition of protein
                 active sites. Here, the concept of graph alignment as a
                 novel method for the structural analysis of protein
                 binding pockets is presented. Using inexact
                 graph-matching techniques, one is able to identify both
                 conserved areas and regions of difference among
                 different binding pockets. Thus, using multiple graph
                 alignments, it is possible to characterize functional
                 protein families and to examine differences among
                 related protein families independent of sequence or
                 fold homology. Optimized algorithms are described for
                 the efficient calculation of multiple graph alignments
                 for the analysis of physicochemical descriptors
                 representing protein binding pockets. Additionally, it
                 is shown how the calculated graph alignments can be
                 analyzed to identify structural features that are
                 characteristic for a given protein family and also
                 features that are discriminative among related
                 families. The methods are applied to a substantial
                 high-quality subset of the PDB database and their
                 ability to successfully characterize and classify 10
                 highly populated functional protein families is shown.
                 Additionally, two related protein families from the
                 group of serine proteases are examined and important
                 structural differences are detected automatically and
                 efficiently.",
  acknowledgement = ack-nhfb,
  keywords =     "drug design; fuzzy patterns; graph mining; knowledge
                 discovery in databases; structural pattern discovery",
}

@Article{Gusfield:2007:AEAb,
  author =       "Dan Gusfield",
  title =        "{Associate Editor} Appreciation and Welcome",
  journal =      j-TCBB,
  volume =       "4",
  number =       "3",
  pages =        "321--321",
  month =        jul,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:58:24 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Fujarewicz:2007:ASM,
  author =       "Krzysztof Fujarewicz and Marek Kimmel and Tomasz
                 Lipniacki and Andrzej Swierniak",
  title =        "Adjoint Systems for Models of Cell Signaling Pathways
                 and their Application to Parameter Fitting",
  journal =      j-TCBB,
  volume =       "4",
  number =       "3",
  pages =        "322--335",
  month =        jul,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:58:24 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "The paper concerns the problem of fitting mathematical
                 models of cell signaling pathways. Such models
                 frequently take the form of sets of nonlinear ordinary
                 differential equations. While the model is continuous
                 in time, the performance index used in the fitting
                 procedure, involves measurements taken at discrete time
                 moments. Adjoint sensitivity analysis is a tool, which
                 can be used for finding the gradient of a performance
                 index in the space of parameters of the model. In the
                 paper a structural formulation of adjoint sensitivity
                 analysis called the Generalized Backpropagation Through
                 Time (GBPTT) is used. The method is especially suited
                 for hybrid, continuous-discrete time systems. As an
                 example we use the mathematical model of the NF-kB
                 regulatory module, which plays a major role in the
                 innate immune response in animals.",
  acknowledgement = ack-nhfb,
  keywords =     "biology and genetics; modeling; ordinary differential
                 equations; parameter learning",
}

@Article{Wan:2007:CCN,
  author =       "Xiang Wan and Guohui Lin",
  title =        "{CISA}: Combined {NMR} Resonance Connectivity
                 Information Determination and Sequential Assignment",
  journal =      j-TCBB,
  volume =       "4",
  number =       "3",
  pages =        "336--348",
  month =        jul,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:58:24 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "A nearly complete sequential resonance assignment is a
                 key factor leading to successful protein structure
                 determination via NMR spectroscopy. Assuming the
                 availability of a set of NMR spectral peak lists, most
                 of the existing assignment algorithms first use the
                 differences between chemical shift values for common
                 nuclei across multiple spectra to provide the evidence
                 that some pairs of peaks should be assigned to
                 sequentially adjacent amino acid residues in the target
                 protein. They then use these connectivities as
                 constraints to produce a sequential assignment. At
                 various levels of success, these algorithms typically
                 generate a large number of potential connectivity
                 constraints, and it grows exponentially as the quality
                 of spectral data decreases. A key observation used in
                 our sequential assignment program, CISA, is that
                 chemical shift residual signature information can be
                 used to improve the connectivity determination, and
                 thus to dramatically decrease the number of predicted
                 connectivity constraints. Fewer connectivity
                 constraints lead to less ambiguities in the sequential
                 assignment. Extensive simulation studies on several
                 large test datasets demonstrated that CISA is efficient
                 and effective, compared to three most recently proposed
                 sequential resonance assignment programs RANDOM, PACES,
                 and MARS.",
  acknowledgement = ack-nhfb,
  keywords =     "NMR sequential resonance assignment; spin system; spin
                 system assignment; spin system residual signature; spin
                 system sequential connectivity",
}

@Article{Cameron:2007:CCS,
  author =       "Michael Cameron and Hugh Williams",
  title =        "Comparing Compressed Sequences for Faster Nucleotide
                 {BLAST} Searches",
  journal =      j-TCBB,
  volume =       "4",
  number =       "3",
  pages =        "349--364",
  month =        jul,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:58:24 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Molecular biologists, geneticists, and other life
                 scientists use the BLAST homology search package as
                 their first step for discovery of information about
                 unknown or poorly annotated genomic sequences. There
                 are two main variants of BLAST: BLASTP for searching
                 protein collections and BLASTN for nucleotide
                 collections. Surprisingly, BLASTN has had very little
                 attention; for example, the algorithms it uses do not
                 follow those described in the 1997 BLAST paper [1] and
                 no exact description has been published. It is
                 important that BLASTN is state-of-the-art: Nucleotide
                 collections such as GenBank dwarf the protein
                 collections in size, they double in size almost yearly,
                 and they take many minutes to search on modern general
                 purpose workstations. This paper proposes significant
                 improvements to the BLASTN algorithms. Each of our
                 schemes is based on compressed bytepacked formats that
                 allow queries and collection sequences to be compared
                 four bases at a time, permitting very fast query
                 evaluation using lookup tables and numeric comparisons.
                 Our most significant innovations are two new, fast
                 gapped alignment schemes that allow accurate sequence
                 alignment without decompression of the collection
                 sequences. Overall, our innovations more than double
                 the speed of BLASTN with no effect on accuracy and have
                 been integrated into our new version of BLAST that is
                 freely available for download from
                 http://www.fsa-blast.org/.",
  acknowledgement = ack-nhfb,
  keywords =     "BLAST; compression; Four Russians algorithm; homology
                 search; sequence alignment",
}

@Article{Tang:2007:DTS,
  author =       "Yuchun Tang and Yan-Qing Zhang and Zhen Huang",
  title =        "Development of Two-Stage {SVM}-{RFE} Gene Selection
                 Strategy for Microarray Expression Data Analysis",
  journal =      j-TCBB,
  volume =       "4",
  number =       "3",
  pages =        "365--381",
  month =        jul,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:58:24 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Extracting a subset of informative genes from
                 microarray expression data is a critical data
                 preparation step in cancer classification and other
                 biological function analyses. Though many algorithms
                 have been developed, the Support Vector Machine -
                 Recursive Feature Elimination (SVM-RFE) algorithm is
                 one of the best gene feature selection algorithms. It
                 assumes that a smaller `filter-out' factor in the
                 SVM-RFE, which results in a smaller number of gene
                 features eliminated in each recursion, should lead to
                 extraction of a better gene subset. Because the SVM-RFE
                 is highly sensitive to the `filter-out' factor, our
                 simulations have shown that this assumption is not
                 always correct and that the SVM-RFE is an unstable
                 algorithm. To select a set of key gene features for
                 reliable prediction of cancer types or subtypes and
                 other applications, a new two-stage SVM-RFE algorithm
                 has been developed. It is designed to effectively
                 eliminate most of the irrelevant, redundant and noisy
                 genes while keeping information loss small at the first
                 stage. A fine selection for the final gene subset is
                 then performed at the second stage. The two-stage
                 SVM-RFE overcomes the instability problem of the
                 SVM-RFE to achieve better algorithm utility. We have
                 demonstrated that the two-stage SVM-RFE is
                 significantly more accurate and more reliable than the
                 SVM-RFE and three correlation-based methods based on
                 our analysis of three publicly available microarray
                 expression datasets. Furthermore, the two-stage SVM-RFE
                 is computationally efficient because its time
                 complexity is $O(d * \log{_2d})$, where $d$ is the size
                 of the original gene set.",
  acknowledgement = ack-nhfb,
  keywords =     "bioinformatics; cancer classification; feature
                 selection; gene selection; microarray gene expression
                 data analysis; recursive feature elimination; support
                 vector machines",
}

@Article{Ng:2007:NGW,
  author =       "Lydia Ng and Sayan Pathak and Chihchau Kuan and Chris
                 Lau and Hong-wei Dong and Andrew Sodt and Chinh Dang
                 and Brian Avants and Paul Yushkevich and James Gee and
                 David Haynor and Ed Lein and Allan Jones and Mike
                 Hawrylycz",
  title =        "Neuroinformatics for Genome-Wide {$3$-D} Gene
                 Expression Mapping in the Mouse Brain",
  journal =      j-TCBB,
  volume =       "4",
  number =       "3",
  pages =        "382--393",
  month =        jul,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:58:24 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Large scale gene expression studies in the mammalian
                 brain offer the promise of understanding the topology,
                 networks and ultimately the function of its complex
                 anatomy, opening previously unexplored avenues in
                 neuroscience. High-throughput methods permit
                 genome-wide searches to discover genes that are
                 uniquely expressed in brain circuits and regions that
                 control behavior. Previous gene expression mapping
                 studies in model organisms have employed situ
                 hybridization (ISH), a technique that uses labeled
                 nucleic acid probes to bind to specific mRNA
                 transcripts in tissue sections. A key requirement for
                 this effort is the development of fast and robust
                 algorithms for anatomically mapping and quantifying
                 gene expression for ISH. We describe a neuroinformatics
                 pipeline for automatically mapping expression profiles
                 of ISH data and its use to produce the first genomic
                 scale 3-D mapping of gene expression in a mammalian
                 brain. The pipeline is fully automated and adaptable to
                 other organisms and tissues. Our automated study of
                 over 20,000 genes indicates that at least 78.8\% are
                 expressed at some level in the adult C56BL/6J mouse
                 brain. In addition to providing a platform for genomic
                 scale search, high-resolution images and visualization
                 tools for expression analysis are available at the
                 Allen Brain Atlas web site
                 (http://www.brain-map.org).",
  acknowledgement = ack-nhfb,
  keywords =     "bioinformatics (genome or protein) databases; data
                 mining; information visualization; registration;
                 segmentation",
}

@Article{Nguyen:2007:RRN,
  author =       "C. Thach Nguyen and Nguyen Bao Nguyen and Wing-Kin
                 Sung and Louxin Zhang",
  title =        "Reconstructing Recombination Network from Sequence
                 Data: The Small Parsimony Problem",
  journal =      j-TCBB,
  volume =       "4",
  number =       "3",
  pages =        "394--402",
  month =        jul,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:58:24 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "The small parsimony problem is studied for
                 reconstructing recombination networks from sequence
                 data. The small parsimony problem is polynomial-time
                 solvable for phylogenetic trees. However, the problem
                 is proved NP-hard even for galled recombination
                 networks. A dynamic programming algorithm is also
                 developed to solve the small parsimony problem. It
                 takes $O(dn2^{3h})$ time on an input recombination
                 network over length-$d$ sequences in which there are
                 $h$ recombination and $n - h$ tree nodes.",
  acknowledgement = ack-nhfb,
  keywords =     "approximability; combination network; dynamic
                 programming; NP-hardness; parsimony method;
                 phylogenetic network",
}

@Article{Lones:2007:RMD,
  author =       "Michael Lones and Andy Tyrrell",
  title =        "Regulatory Motif Discovery Using a Population
                 Clustering Evolutionary Algorithm",
  journal =      j-TCBB,
  volume =       "4",
  number =       "3",
  pages =        "403--414",
  month =        jul,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:58:24 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "This paper describes a novel evolutionary algorithm
                 for regulatory motif discovery in DNA promoter
                 sequences. The algorithm uses data clustering to
                 logically distribute the evolving population across the
                 search space. Mating then takes place within local
                 regions of the population, promoting overall solution
                 diversity and encouraging discovery of multiple
                 solutions. Experiments using synthetic data sets have
                 demonstrated the algorithm's capacity to find position
                 frequency matrix models of known regulatory motifs in
                 relatively long promoter sequences. These experiments
                 have also shown the algorithm's ability to maintain
                 diversity during search and discover multiple motifs
                 within a single population. The utility of the
                 algorithm for discovering motifs in real biological
                 data is demonstrated by its ability to find meaningful
                 motifs within muscle-specific regulatory sequences.",
  acknowledgement = ack-nhfb,
  keywords =     "evolutionary computation; motif discovery;
                 muscle-specific gene expression; population-based data
                 clustering; transcription factor binding sites",
}

@Article{Yip:2007:SIS,
  author =       "Andy M. Yip and Michael K. Ng and Edmond H. Wu and
                 Tony F. Chan",
  title =        "Strategies for Identifying Statistically Significant
                 Dense Regions in Microarray Data",
  journal =      j-TCBB,
  volume =       "4",
  number =       "3",
  pages =        "415--429",
  month =        jul,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:58:24 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "We propose and study the notion of dense regions for
                 the analysis of categorized gene expression data and
                 present some searching algorithms for discovering them.
                 The algorithms can be applied to any categorical data
                 matrices derived from gene expression level matrices.
                 We demonstrate that dense regions are simple but useful
                 and statistically significant patterns that can be used
                 to 1) identify genes and/or samples of interest and 2)
                 eliminate genes and/or samples corresponding to
                 outliers, noise, or abnormalities. Some theoretical
                 studies on the properties of the dense regions are
                 presented which allow us to characterize dense regions
                 into several classes and to derive tailor-made
                 algorithms for different classes of regions. Moreover,
                 an empirical simulation study on the distribution of
                 the size of dense regions is carried out which is then
                 used to assess the significance of dense regions and to
                 derive effective pruning methods to speed up the
                 searching algorithms. Real microarray data sets are
                 employed to test our methods. Comparisons with six
                 other well-known clustering algorithms using synthetic
                 and real data are also conducted which confirm the
                 superiority of our methods in discovering dense
                 regions. The DRIFT code and a tutorial are available as
                 supplemental material, which can be found on the
                 Computer Society Digital Library at
                 http://computer.org/tcbb/archives.htm.",
  acknowledgement = ack-nhfb,
  keywords =     "bicluster; categorical data; clustering; coexpressed
                 genes; dense region; gene expression; microarray",
}

@Article{Liang:2007:BBD,
  author =       "Kuo-ching Liang and Xiaodong Wang and Dimitris
                 Anastassiou",
  title =        "Bayesian Basecalling for {DNA} Sequence Analysis Using
                 Hidden Markov Models",
  journal =      j-TCBB,
  volume =       "4",
  number =       "3",
  pages =        "430--440",
  month =        jul,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:58:24 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "It has been shown that electropherograms of DNA
                 sequences can be modeled with hidden Markov models.
                 Basecalling, the procedure that determines the sequence
                 of bases from the given eletropherogram, can then be
                 performed using the Viterbi algorithm. A training step
                 is required prior to basecalling in order to estimate
                 the HMM parameters. In this paper, we propose a
                 Bayesian approach which employs the Markov chain Monte
                 Carlo (MCMC) method to perform basecalling. Such an
                 approach not only allows one to naturally encode the
                 prior biological knowledge into the basecalling
                 algorithm, it also exploits both the training data and
                 the basecalling data in estimating the HMM parameters,
                 leading to more accurate estimates. Using the recently
                 sequenced genome of the organism Legionella pneumophila
                 we show that the MCMC basecaller outperforms the
                 state-of-the-art basecalling algorithm in terms of
                 total errors while requiring much less training than
                 other proposed statistical basecallers.",
  acknowledgement = ack-nhfb,
  keywords =     "basecalling; DNA sequencing; electropherogram; hidden
                 Markov model (HMM); Markov chain Monte Carlo (MCMC)",
}

@Article{Thireou:2007:BLS,
  author =       "Trias Thireou and Martin Reczko",
  title =        "Bidirectional Long Short-Term Memory Networks for
                 Predicting the Subcellular Localization of Eukaryotic
                 Proteins",
  journal =      j-TCBB,
  volume =       "4",
  number =       "3",
  pages =        "441--446",
  month =        jul,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:58:24 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "An algorithm called Bidirectional Long Short-Term
                 Memory Networks (BLSTM) for processing sequential data
                 is introduced. This supervised learning method trains a
                 special recurrent neural network to use very long
                 ranged symmetric sequence context using a combination
                 of nonlinear processing elements and linear feedback
                 loops for storing long-range context. The algorithm is
                 applied to the sequence-based prediction of protein
                 localization and predicts 93.3\% novel non-plant
                 proteins and 88.4\% novel plant proteins correctly,
                 which is an improvement over feedforward and standard
                 recurrent networks solving the same problem. The BLSTM
                 system is available as a web-service
                 (http://www.stepc.gr/~synaptic/blstm.html).",
  acknowledgement = ack-nhfb,
  keywords =     "biological sequence analysis; long short-term memory;
                 protein subcellular localization prediction; recurrent
                 neural networks",
}

@Article{Korodi:2007:CAN,
  author =       "Gergely Korodi and Ioan Tabus",
  title =        "Compression of Annotated Nucleotide Sequences",
  journal =      j-TCBB,
  volume =       "4",
  number =       "3",
  pages =        "447--457",
  month =        jul,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:58:24 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "This article introduces an algorithm for the lossless
                 compression of DNA files, which contain annotation text
                 besides the nucleotide sequence. First a grammar is
                 specifically designed to capture the regularities of
                 the annotation text. A revertible transformation uses
                 the grammar rules in order to equivalently represent
                 the original file as a collection of parsed segments
                 and a sequence of decisions made by the grammar parser.
                 This decomposition enables the efficient use of
                 state-of-the-art encoders for processing the parsed
                 segments. The output size of the decision-making
                 process of the grammar is optimized by extending the
                 states to account for high-order Markovian
                 dependencies. The practical implementation of the
                 algorithm achieves a significant improvement when
                 compared to the general-purpose methods currently used
                 for DNA files.",
  acknowledgement = ack-nhfb,
  keywords =     "4 [Data]: Coding and Information Theory | Data
                 compaction and compression; Annotation; Compression;
                 F.4 [Theory of Computation]: Mathematical Logic and
                 Formal Languages | Formal languages; Formal Grammars;
                 G.3 [Mathematics of Computing]: Probability and
                 Statistics | Markov processes; J.3 [Computer
                 Applications]: Life and Medical Sciences | Biology and
                 genetics; nucleotide sequences",
}

@Article{Bordewich:2007:CHN,
  author =       "Magnus Bordewich and Charles Semple",
  title =        "Computing the Hybridization Number of Two Phylogenetic
                 Trees Is Fixed-Parameter Tractable",
  journal =      j-TCBB,
  volume =       "4",
  number =       "3",
  pages =        "458--466",
  month =        jul,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:58:24 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Reticulation processes in evolution mean that the
                 ancestral history of certain groups of present-day
                 species is non-tree-like. These processes include
                 hybridization, lateral gene transfer, and
                 recombination. Despite the existence of reticulation,
                 such events are relatively rare and so a fundamental
                 problem for biologists is the following: given a
                 collection of rooted binary phylogenetic trees on sets
                 of species that correctly represent the tree-like
                 evolution of different parts of their genomes, what is
                 the smallest number of `reticulation' vertices in any
                 network that explains the evolution of the species
                 under consideration. It has been previously shown that
                 this problem is NP-hard even when the collection
                 consists of only two rooted binary phylogenetic trees.
                 However, in this paper, we show that the problem is
                 fixed-parameter tractable in the two-tree instance,
                 when parameterized by this smallest number of
                 reticulation vertices.",
  acknowledgement = ack-nhfb,
  keywords =     "agreement forest; hybridization network; reticulate
                 evolution; rooted phylogenetic tree; subtree prune and
                 regraft",
}

@Article{Huang:2007:EGS,
  author =       "D. Huang and Tommy Chow",
  title =        "Effective Gene Selection Method With Small Sample Sets
                 Using Gradient-Based and Point Injection Techniques",
  journal =      j-TCBB,
  volume =       "4",
  number =       "3",
  pages =        "467--475",
  month =        jul,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:58:24 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Microarray gene expression data usually consist of a
                 large amount of genes. Among these genes, only a small
                 fraction is informative for performing cancer
                 diagnostic test. This paper focuses on effective
                 identification of informative genes. We analyze gene
                 selection models from the perspective of optimization
                 theory. As a result, a new strategy is designed to
                 modify conventional search engines. Also, as
                 overfitting is likely to occur in microarray data
                 because of their small sample set, a point injection
                 technique is developed to address the problem of
                 overfitting. The proposed strategies have been
                 evaluated on three kinds of cancer diagnosis. Our
                 results show that the proposed strategies can improve
                 the performance of gene selection substantially. The
                 experimental results also indicate that the proposed
                 methods are very robust under all the investigated
                 cases.",
  acknowledgement = ack-nhfb,
  keywords =     "gene selection; gradient based learning; optimization
                 theory; point injection",
}

@Article{Hecht:2007:HTL,
  author =       "David Hecht and Gary Fogel",
  title =        "High-Throughput Ligand Screening via Preclustering and
                 Evolved Neural Networks",
  journal =      j-TCBB,
  volume =       "4",
  number =       "3",
  pages =        "476--484",
  month =        jul,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:58:24 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "The pathway for novel lead drug discovery has many
                 major deficiencies, the most significant of which is
                 the immense size of small molecule diversity space.
                 Methods that increase the search efficiency and/or
                 reduce the size of the search space, increase the rate
                 at which useful lead compounds are identified.
                 Artificial neural networks optimized via evolutionary
                 computation provide a cost and time-effective solution
                 to this problem. Here, we present results that suggest
                 preclustering of small molecules prior to neural
                 network optimization is useful for generating models of
                 quantitative structure-activity relationships for a set
                 of HIV inhibitors. Using these methods, it is possible
                 to prescreen compounds to separate active from inactive
                 compounds or even actives and mildly active compounds
                 from inactive compounds with high predictive accuracy
                 while simultaneously reducing the feature space. It is
                 also possible to identify `human interpretable'
                 features from the best models that can be used for
                 proposal and synthesis of new compounds in order to
                 optimize potency and specificity.",
  acknowledgement = ack-nhfb,
  keywords =     "artificial neural networks; computational
                 intelligence; evolutionary computation; medicine and
                 science",
}

@Article{Zhang:2007:MCU,
  author =       "Runxuan Zhang and Guang-Bin Huang and N. Sundararajan
                 and P. Saratchandran",
  title =        "Multicategory Classification Using An Extreme Learning
                 Machine for Microarray Gene Expression Cancer
                 Diagnosis",
  journal =      j-TCBB,
  volume =       "4",
  number =       "3",
  pages =        "485--495",
  month =        jul,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:58:24 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "In this paper, the recently developed Extreme Learning
                 Machine (ELM) is used for direct multicategory
                 classification problems in the cancer diagnosis area.
                 ELM avoids problems like local minima, improper
                 learning rate and overfitting commonly faced by
                 iterative learning methods and completes the training
                 very fast. We have evaluated the multi-category
                 classification performance of ELM on three benchmark
                 microarray datasets for cancer diagnosis, namely, the
                 GCM dataset, the Lung dataset and the Lymphoma dataset.
                 The results indicate that ELM produces comparable or
                 better classification accuracies with reduced training
                 time and implementation complexity compared to
                 artificial neural networks methods like conventional
                 back-propagation ANN, Linder's SANN, and Support Vector
                 Machine methods like SVM-OVO and Ramaswamy's SVM-OVA.
                 ELM also achieves better accuracies for classification
                 of individual categories.",
  acknowledgement = ack-nhfb,
  keywords =     "extreme learning machine; gene expression; microarray;
                 multi-category classification; SVM",
}

@Article{Zhang:2007:SSS,
  author =       "Louxin Zhang",
  title =        "Superiority of Spaced Seeds for Homology Search",
  journal =      j-TCBB,
  volume =       "4",
  number =       "3",
  pages =        "496--505",
  month =        jul,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:58:24 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "In homology search, good spaced seeds have higher
                 sensitivity for the same cost (weight). However,
                 elucidating the mechanism that confers power to spaced
                 seeds and characterizing optimal spaced seeds still
                 remain unsolved. This paper investigates these two
                 important open questions by formally analyzing the
                 average number of non-overlapping hits and the hit
                 probability of a spaced seed in the Bernoulli sequence
                 model. We prove that when the length of a non-uniformly
                 spaced seed is bounded above by an exponential function
                 of the seed weight, the seed outperforms strictly the
                 traditional consecutive seed of the same weight in both
                 (i) the average number of non-overlapping hits and (ii)
                 the asymptotic hit probability. This clearly answers
                 the first problem mentioned above in the Bernoulli
                 sequence model. The theoretical study in this paper
                 also gives a new solution to finding long optimal
                 seeds.",
  acknowledgement = ack-nhfb,
  keywords =     "homology search; pattern matching; renewal theory; run
                 statistics; sequence alignment; spaced seeds",
}

@Article{Matsen:2007:OCT,
  author =       "Frederick Matsen",
  title =        "Optimization Over a Class of Tree Shape Statistics",
  journal =      j-TCBB,
  volume =       "4",
  number =       "3",
  pages =        "506--512",
  month =        jul,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:58:24 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Tree shape statistics quantify some aspect of the
                 shape of a phylogenetic tree. They are commonly used to
                 compare reconstructed trees to evolutionary models and
                 to find evidence of tree reconstruction bias.
                 Historically, to find a useful tree shape statistic,
                 formulas have been invented by hand and then evaluated
                 for utility. This article presents the first method
                 which is capable of optimizing over a class of tree
                 shape statistics, called Binary Recursive Tree Shape
                 Statistics (BRTSS). After defining the BRTSS class, a
                 set of algebraic expressions is defined which can be
                 used in the recursions. The tree shape statistics
                 definable using these expressions in the BRTSS is very
                 general, and includes many of the statistics with which
                 phylogenetic researchers are already familiar. We then
                 present a practical genetic algorithm which is capable
                 of performing optimization over BRTSS given any
                 objective function. The chapter concludes with a
                 successful application of the methods to find a new
                 statistic which indicates a significant difference
                 between two distributions on trees which were
                 previously postulated to have similar properties.",
  acknowledgement = ack-nhfb,
  keywords =     "biology and genetics; evolutionary computing and
                 genetic algorithms",
}

@Article{Mandoiu:2007:GEI,
  author =       "Ion I. M{\~a}ndoiu and Yi Pan and Alexander
                 Zelikovsky",
  title =        "{Guest Editors}' Introduction to the {Special Section
                 on Bioinformatics Research and Applications}",
  journal =      j-TCBB,
  volume =       "4",
  number =       "4",
  pages =        "513--514",
  month =        oct,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:58:47 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Zheng:2007:RNA,
  author =       "Chunfang Zheng and Qian Zhu and David Sankoff",
  title =        "Removing Noise and Ambiguities from Comparative Maps
                 in Rearrangement Analysis",
  journal =      j-TCBB,
  volume =       "4",
  number =       "4",
  pages =        "515--522",
  month =        oct,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:58:47 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Comparison of genomic maps is hampered by errors and
                 ambiguities introduced by mapping technology,
                 incorrectly resolved paralogy, small samples of markers
                 and extensive genome rearrangement. We design an
                 analysis to remove or resolve most of these problems
                 and to extract corrected data where markers occur in
                 consecutive strips in both genomes. To do this we
                 introduce the notion of pre-strip, an efficient way of
                 generating these, and a compatibility analysis
                 culminating in a Maximum Weighted Clique (MWC) search.
                 The output can be directly analyzed with genome
                 rearrangement algorithms, allowing the restoration of
                 some of the data not incorporated into the clique
                 solution. We investigate the trade-off between criteria
                 for discarding excessive pre-strips to make MWC
                 feasible, in terms of retaining as many markers as
                 possible in the solution and producing an economical
                 rearrangement analysis. We explore these questions
                 through simulation and through comparison of the rice
                 and sorghum genomes.",
  acknowledgement = ack-nhfb,
  keywords =     "genome rearrangements; Maximum Weight Clique; rice;
                 sorghum; synteny blocks",
}

@Article{Blin:2007:CGD,
  author =       "Guillaume Blin and Cedric Chauve and Guillaume Fertin
                 and Romeo Rizzi and Stephane Vialette",
  title =        "Comparing Genomes with Duplications: {A} Computational
                 Complexity Point of View",
  journal =      j-TCBB,
  volume =       "4",
  number =       "4",
  pages =        "523--534",
  month =        oct,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:58:47 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "In this paper, we are interested in the computational
                 complexity of computing (dis)similarity measures
                 between two genomes when they contain duplicated genes
                 or genomic markers, a problem that happens frequently
                 when comparing whole nuclear genomes. Recently, several
                 methods ( [1], [2]) have been proposed that are based
                 on two steps to compute a given (dis)similarity measure
                 M between two genomes G_1 and G_2: first, one
                 establishes a one-to-one correspondence between genes
                 of G_1 and genes of G_2 ; second, once this
                 correspondence is established, it defines explicitly a
                 permutation and it is then possible to quantify their
                 similarity using classical measures defined for
                 permutations, like the number of breakpoints. Hence
                 these methods rely on two elements: a way to establish
                 a one-to-one correspondence between genes of a pair of
                 genomes, and a (dis)similarity measure for
                 permutations. The problem is then, given a
                 (dis)similarity measure for permutations, to compute a
                 correspondence that defines an optimal permutation for
                 this measure. We are interested here in two models to
                 compute a one-to-one correspondence: the exemplar
                 model, where all but one copy are deleted in both
                 genomes for each gene family, and the matching model,
                 that computes a maximal correspondence for each gene
                 family. We show that for these two models, and for
                 three (dis)similarity measures on permutations, namely
                 the number of common intervals, the maximum adjacency
                 disruption (MAD) number and the summed adjacency
                 disruption (SAD) number, the problem of computing an
                 optimal correspondence is NP-complete, and even APXhard
                 for the MAD number and SAD number.",
  acknowledgement = ack-nhfb,
  keywords =     "common intervals; comparative genomics; computational
                 complexity; maximum adjacency disruption number; summed
                 adjacency disruption number",
}

@Article{Bonizzoni:2007:ELC,
  author =       "Paola Bonizzoni and Gianluca Della Vedova and Riccardo
                 Dondi and Guillaume Fertin and Raffaella Rizzi and
                 Stephane Vialette",
  title =        "Exemplar Longest Common Subsequence",
  journal =      j-TCBB,
  volume =       "4",
  number =       "4",
  pages =        "535--543",
  month =        oct,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:58:47 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "In this paper, we investigate the computational and
                 approximation complexity of the Exemplar Longest Common
                 Subsequence of a set of sequences (ELCS problem), a
                 generalization of the Longest Common Subsequence
                 problem, where the input sequences are over the union
                 of two disjoint sets of symbols, a set of mandatory
                 symbols and a set of optional symbols. We show that
                 different versions of the problem are APX-hard even for
                 instances with two sequences. Moreover, we show that
                 the related problem of determining the existence of a
                 feasible solution of the Exemplar Longest Common
                 Subsequence of two sequences is NP-hard. On the
                 positive side, we first present an efficient algorithm
                 for the ELCS problem over instances of two sequences
                 where each mandatory symbol can appear in total at most
                 three times in the sequences. Furthermore, we present
                 two fixed-parameter algorithms for the ELCS problem
                 over instances of two sequences where the parameter is
                 the number of mandatory symbols.",
  acknowledgement = ack-nhfb,
  keywords =     "algorithm design and analysis; analysis of algorithms
                 and problem complexity; combinatorial algorithms;
                 comparative genomics; longest common subsequence",
}

@Article{Davila:2007:FPA,
  author =       "Jaime Davila and Sudha Balla and Sanguthevar
                 Rajasekaran",
  title =        "Fast and Practical Algorithms for Planted (l, d) Motif
                 Search",
  journal =      j-TCBB,
  volume =       "4",
  number =       "4",
  pages =        "544--552",
  month =        oct,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:58:47 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "We consider the planted (l, d) motif search problem,
                 which consists of finding a substring of length l that
                 occurs in a set of input sequences {s1, . . . , sn}
                 with up to d errors, a problem that arises from the
                 need to find transcription factor-binding sites in
                 genomic information. We propose a sequence of practical
                 algorithms, which start based on the ideas considered
                 in PMS1. These algorithms are exact, have little space
                 requirements, and are able to tackle challenging
                 instances with bigger d, taking less time in the
                 instances reported solved by exact algorithms. In
                 particular, one of the proposed algorithms, PMSprune,
                 is able to solve the challenging instances, such as
                 (17, 6) and (19, 7), which were not previously reported
                 as solved in the literature.",
  acknowledgement = ack-nhfb,
  keywords =     "branch and bound algorithms; challenging instances;
                 exact algorithms; planted motif search problem",
}

@Article{Schneider:2007:SDM,
  author =       "Adrian Schneider and Gaston Gonnet and Gina
                 Cannarozzi",
  title =        "{SynPAM---A} Distance Measure Based on Synonymous
                 Codon Substitutions",
  journal =      j-TCBB,
  volume =       "4",
  number =       "4",
  pages =        "553--560",
  month =        oct,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:58:47 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Measuring evolutionary distances between DNA or
                 protein sequences forms the basis of many applications
                 in computational biology and evolutionary studies. Of
                 particular interest are distances based on synonymous
                 substitutions, since these substitutions are considered
                 to be under very little selection pressure and
                 therefore assumed to accumulate in an almost clock-like
                 manner. SynPAM, the method presented here, allows the
                 estimation of distances between coding DNA sequences
                 based on synonymous codon substitutions. The problem of
                 estimating an accurate distance from the observed
                 substitution pattern is solved by maximum-likelihood
                 with empirical codon substitution matrices employed for
                 the underlying Markov model. Comparisons with
                 established measures of synonymous distance indicate
                 that SynPAM has less variance and yields useful results
                 over a longer time range.",
  acknowledgement = ack-nhfb,
  keywords =     "dS; evolutionary distance; molecular evolution;
                 synonymous substitutions; SynPAM",
}

@Article{Sridhar:2007:AEN,
  author =       "Srinath Sridhar and Kedar Dhamdhere and Guy Blelloch
                 and Eran Halperin and R. Ravi and Russell Schwartz",
  title =        "Algorithms for Efficient Near-Perfect Phylogenetic
                 Tree Reconstruction in Theory and Practice",
  journal =      j-TCBB,
  volume =       "4",
  number =       "4",
  pages =        "561--571",
  month =        oct,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:58:47 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "We consider the problem of reconstructing near-perfect
                 phylogenetic trees using binary character states
                 (referred to as BNPP). A perfect phylogeny assumes that
                 every character mutates at most once in the
                 evolutionary tree, yielding an algorithm for binary
                 character states that is computationally efficient but
                 not robust to imperfections in real data. A
                 near-perfect phylogeny relaxes the perfect phylogeny
                 assumption by allowing at most a constant number of
                 additional mutations. We develop two algorithms for
                 constructing optimal near-perfect phylogenies and
                 provide empirical evidence of their performance. The
                 first simple algorithm is fixed parameter tractable
                 when the number of additional mutations and the number
                 of characters that share four gametes with some other
                 character are constants. The second, more involved
                 algorithm for the problem is fixed parameter tractable
                 when only the number of additional mutations is fixed.
                 We have implemented both algorithms and shown them to
                 be extremely efficient in practice on biologically
                 significant data sets. This work proves the BNPP
                 problem fixed parameter tractable and provides the
                 first practical phylogenetic tree reconstruction
                 algorithms that find guaranteed optimal solutions while
                 being easily implemented and computationally feasible
                 for data sets of biologically meaningful size and
                 complexity.",
  acknowledgement = ack-nhfb,
  keywords =     "biology and genetics; computations on discrete
                 structures; trees",
}

@Article{Chen:2007:CBR,
  author =       "Jinmiao Chen and Narendra Chaudhari",
  title =        "Cascaded Bidirectional Recurrent Neural Networks for
                 Protein Secondary Structure Prediction",
  journal =      j-TCBB,
  volume =       "4",
  number =       "4",
  pages =        "572--582",
  month =        oct,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:58:47 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Protein secondary structure (PSS) prediction is an
                 important topic in bioinformatics. Our study on a large
                 set of non-homologous proteins shows that long-range
                 interactions commonly exist and negatively affect PSS
                 prediction. Besides, we also reveal strong correlations
                 between secondary structure (SS) elements. In order to
                 take into account the long-range interactions and SS-SS
                 correlations, we propose a novel prediction system
                 based on cascaded bidirectional recurrent neural
                 network (BRNN). We compare the cascaded BRNN against
                 another two BRNN architectures, namely the original
                 BRNN architecture used for speech recognition as well
                 as Pollastri's BRNN that was proposed for PSS
                 prediction. Our cascaded BRNN achieves an overall three
                 state accuracy Q3 of 74.38\%, and reaches a high
                 Segment OVerlap (SOV) of 66.0455. It outperforms the
                 original BRNN and Pollastri's BRNN in both Q3 and SOV.
                 Specifically, it improves the SOV score by 4-6\%.",
  acknowledgement = ack-nhfb,
}

@Article{Xiong:2007:DDK,
  author =       "Huilin Xiong and Ya Zhang and Xue-Wen Chen",
  title =        "Data-Dependent Kernel Machines for Microarray Data
                 Classification",
  journal =      j-TCBB,
  volume =       "4",
  number =       "4",
  pages =        "583--595",
  month =        oct,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:58:47 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "One important application of gene expression analysis
                 is to classify tissue samples according to their gene
                 expression levels. Gene expression data are typically
                 characterized by high dimensionality and small sample
                 size, which makes the classification task quite
                 challenging. In this paper, we present a data-dependent
                 kernel for microarray data classification. This kernel
                 function is engineered so that the class separability
                 of the training data is maximized. A
                 bootstrapping-based resampling scheme is introduced to
                 reduce the possible training bias. The effectiveness of
                 this adaptive kernel for microarray data classification
                 is illustrated with a k-Nearest Neighbor (KNN)
                 classifier. Our experimental study shows that the
                 data-dependent kernel leads to a significant
                 improvement in the accuracy of KNN classifiers.
                 Furthermore, this kernel-based KNN scheme has been
                 demonstrated to be competitive to, if not better than,
                 more sophisticated classifiers such as Support Vector
                 Machines (SVMs) and the Uncorrelated Linear
                 Discriminant Analysis (ULDA) for classifying gene
                 expression data.",
  acknowledgement = ack-nhfb,
  keywords =     "bootstrapping resampling; cancer classification;
                 kernel machines; kernel optimization; microarray data
                 analysis",
}

@Article{Michal:2007:FCM,
  author =       "Shahar Michal and Tor Ivry and Omer Cohen and Moshe
                 Sipper and Danny Barash",
  title =        "Finding a Common Motif of {RNA} Sequences Using
                 Genetic Programming: The {GeRNAMo} System",
  journal =      j-TCBB,
  volume =       "4",
  number =       "4",
  pages =        "596--610",
  month =        oct,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:58:47 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "We focus on finding a consensus motif of a set of
                 homologous or functionally related RNA molecules.
                 Recent approaches to this problem have been limited to
                 simple motifs, require sequence alignment, and make
                 prior assumptions concerning the data set. We use
                 genetic programming to predict RNA consensus motifs
                 based solely on the data set. Our system -- dubbed
                 GeRNAMo (Genetic programming of RNA Motifs) -- predicts
                 the most common motifs without sequence alignment and
                 is capable of dealing with any motif size. Our program
                 only requires the maximum number of stems in the motif,
                 and if prior knowledge is available the user can
                 specify other attributes of the motif (e.g., the range
                 of the motif's minimum and maximum sizes), thereby
                 increasing both sensitivity and speed. We describe
                 several experiments using either ferritin iron response
                 element (IRE); signal recognition particle (SRP); or
                 microRNA sequences, showing that the most common motif
                 is found repeatedly, and that our system offers
                 substantial advantages over previous methods.",
  acknowledgement = ack-nhfb,
}

@Article{McIntosh:2007:HCR,
  author =       "Tara McIntosh and Sanjay Chawla",
  title =        "High Confidence Rule Mining for Microarray Analysis",
  journal =      j-TCBB,
  volume =       "4",
  number =       "4",
  pages =        "611--623",
  month =        oct,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:58:47 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "We present an association rule mining method for
                 mining high confidence rules, which describe
                 interesting gene relationships from microarray
                 datasets. Microarray datasets typically contain an
                 order of magnitude more genes than experiments,
                 rendering many data mining methods impractical as they
                 are optimised for sparse datasets. A new family of
                 row-enumeration rule mining algorithms have emerged to
                 facilitate mining in dense datasets. These algorithms
                 rely on pruning infrequent relationships to reduce the
                 search space by using the support measure. This major
                 shortcoming results in the pruning of many potentially
                 interesting rules with low support but high confidence.
                 We propose a new row-enumeration rule mining method,
                 MaxConf, to mine high confidence rules from microarray
                 data. MaxConf is a support-free algorithm which
                 directly uses the confidence measure to effectively
                 prune the search space. Experiments on three microarray
                 datasets show that MaxConf outperforms support-based
                 rule mining with respect to scalability and rule
                 extraction. Furthermore, detailed biological analyses
                 demonstrate the effectiveness of our approach -- the
                 rules discovered by MaxConf are substantially more
                 interesting and meaningful compared with support-based
                 methods.",
  acknowledgement = ack-nhfb,
  keywords =     "association rules; data mining; high confidence rule
                 mining; microarray analysis",
}

@Article{Ponzoni:2007:IAR,
  author =       "Ignacio Ponzoni and Francisco Azuaje and Juan Augusto
                 and David Glass",
  title =        "Inferring Adaptive Regulation Thresholds and
                 Association Rules from Gene Expression Data through
                 Combinatorial Optimization Learning",
  journal =      j-TCBB,
  volume =       "4",
  number =       "4",
  pages =        "624--634",
  month =        oct,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:58:47 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "There is a need to design computational methods to
                 support the prediction of gene regulatory networks.
                 Such models should offer both biologically-meaningful
                 and computationally-accurate predictions, which in
                 combination with other techniques may improve
                 large-scale, integrative studies. This paper presents a
                 new machine learning method for the prediction of
                 putative regulatory associations from expression data,
                 which exhibit properties never or only partially
                 addressed by other techniques recently published. The
                 method was tested on a Saccharomyces cerevisiae gene
                 expression dataset. The results were statistically
                 validated and compared with the relationships inferred
                 by two machine learning approaches to gene regulatory
                 network prediction. Furthermore, the resulting
                 predictions were assessed using domain knowledge. The
                 proposed algorithm may be able to accurately predict
                 relevant biological associations between genes. One of
                 the most relevant features of this new method is the
                 prediction of adaptive regulation thresholds for the
                 discretization of gene expression values, which is
                 required prior to the rule association learning
                 process. Moreover, an important advantage consists of
                 its low computational cost to infer association rules.
                 The proposed system may significantly support
                 exploratory, large-scale studies of automated
                 identification of potentially-relevant gene expression
                 associations.",
  acknowledgement = ack-nhfb,
  keywords =     "combinatorial optimization; decision trees; gene
                 expression data; genetic regulatory networks;
                 machine-learning",
}

@Article{Noman:2007:IGR,
  author =       "Nasimul Noman and Hitoshi Iba",
  title =        "Inferring Gene Regulatory Networks using Differential
                 Evolution with Local Search Heuristics",
  journal =      j-TCBB,
  volume =       "4",
  number =       "4",
  pages =        "634--647",
  month =        oct,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:58:47 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "We present a memetic algorithm for evolving the
                 structure of biomolecular interactions and inferring
                 the effective kinetic parameters from the time series
                 data of gene expression using the decoupled system
                 formalism. We propose an Information Criteria based
                 fitness evaluation for gene network model selection
                 instead of the conventional Mean Squared Error (MSE)
                 based fitness evaluation. A hill-climbing local-search
                 method has been incorporated in our evolutionary
                 algorithm for efficiently attaining the skeletal
                 architecture which is most frequently observed in
                 biological networks. The suitability of the method is
                 tested in gene circuit reconstruction experiments,
                 varying the network dimension and/or characteristics,
                 the amount of gene expression data used for inference
                 and the noise level present in expression profiles. The
                 reconstruction method inferred the network topology and
                 the regulatory parameters with high accuracy.
                 Nevertheless, the performance is limited to the amount
                 of expression data used and the noise level present in
                 the data. The proposed fitness function has been found
                 more suitable for identifying correct network topology
                 and for estimating the accurate parameter values
                 compared to the existing ones. Finally, we applied the
                 methodology for analyzing the cell-cycle gene
                 expression data of budding yeast and reconstructed the
                 network of some key regulators.",
  acknowledgement = ack-nhfb,
  keywords =     "biology and genetics; gene regulatory system; global
                 optimization; inverse problems; medicine and science;
                 memetic algorithm; microarray data; transcriptional
                 regulation",
}

@Article{Ho:2007:ITS,
  author =       "Shinn-Ying Ho and Chih-Hung Hsieh and Fu-Chieh Yu and
                 Hui-Ling Huang",
  title =        "An Intelligent Two-Stage Evolutionary Algorithm for
                 Dynamic Pathway Identification From Gene Expression
                 Profiles",
  journal =      j-TCBB,
  volume =       "4",
  number =       "4",
  pages =        "648--704",
  month =        oct,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:58:47 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "From gene expression profiles, it is desirable to
                 rebuild cellular dynamic regulation networks to
                 discover more delicate and substantial functions in
                 molecular biology, biochemistry, bioengineering and
                 pharmaceutics. S-system model is suitable to
                 characterize biochemical network systems and capable to
                 analyze the regulatory system dynamics. However,
                 inference of an S-system model of N-gene genetic
                 networks has 2N(N+1) parameters in a set of non-linear
                 differential equations to be optimized. This paper
                 proposes an intelligent two-stage evolutionary
                 algorithm (iTEA) to efficiently infer the S-system
                 models of genetic networks from time-series data of
                 gene expression. To cope with curse of dimensionality,
                 the proposed algorithm consists of two stages where
                 each uses a divide-and-conquer strategy. The
                 optimization problem is first decomposed into N
                 subproblems having 2(N+1) parameters each. At the first
                 stage, each subproblem is solved using a novel
                 intelligent genetic algorithm (IGA) with intelligent
                 crossover based on orthogonal experimental design
                 (OED). At the second stage, the obtained N solutions to
                 the N subproblems are combined and refined using an
                 OED-based simulated annealing algorithm for handling
                 noisy gene expression profiles. The effectiveness of
                 iTEA is evaluated using simulated expression patterns
                 with and without noise running on a single-processor
                 PC. It is shown that 1) IGA is efficient enough to
                 solve subproblems; 2) IGA is significantly superior to
                 the existing method SPXGA; and 3) iTEA performs well in
                 inferring S-system models for dynamic pathway
                 identification.",
  acknowledgement = ack-nhfb,
  keywords =     "divide-and-conquer; evolutionary algorithm; genetic
                 network; orthogonal experimental design; pathway
                 identification; S-system model",
}

@Article{Bereg:2007:PNB,
  author =       "Sergey Bereg and Yuanyi Zhang",
  title =        "Phylogenetic Networks Based on the Molecular Clock
                 Hypothesis",
  journal =      j-TCBB,
  volume =       "4",
  number =       "4",
  pages =        "661--667",
  month =        oct,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:58:47 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "A classical result in phylogenetic trees is that a
                 binary phylogenetic tree adhering to the molecular
                 clock hypothesis exists if and only if the matrix of
                 distances between taxa is ultrametric. The ultrametric
                 condition is very restrictive. In this paper we study
                 phylogenetic networks that can be constructed assuming
                 the molecular clock hypothesis. We characterize
                 distance matrices that admit such networks for 3 and 4
                 taxa. We also design two algorithms for constructing
                 networks optimizing the least-squares fit.",
  acknowledgement = ack-nhfb,
  keywords =     "least-squares fit; molecular clock hypothesis;
                 Phylogenetic Networks",
}

@Article{Blazewicz:2007:SPD,
  author =       "Jacek Blazewicz and Edmund Burke and Marta Kasprzak
                 and Alexandr Kovalev and Mikhail Kovalyov",
  title =        "Simplified Partial Digest Problem: Enumerative and
                 Dynamic Programming Algorithms",
  journal =      j-TCBB,
  volume =       "4",
  number =       "4",
  pages =        "668--680",
  month =        oct,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:58:47 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "We study the Simplified Partial Digest Problem (SPDP),
                 which is a mathematical model for a new simplified
                 partial digest method of genome mapping. This method is
                 easy for laboratory implementation and robust with
                 respect to the experimental errors. SPDP is NP-hard in
                 the strong sense. We present an $O(n2^n)$ time
                 enumerative algorithm and an $O(n^{2q})$ time dynamic
                 programming algorithm for the error-free SPDP, where
                 $n$ is the number of restriction sites and $q$ is the
                 number of distinct intersite distances. We also give
                 examples of the problem, in which there are
                 $2^{\frac{n+2}{3}-1}$ non-congruent solutions. These
                 examples partially answer a question recently posed in
                 the literature about the number of solutions of SPDP.
                 We adapt our enumerative algorithm for handling SPDP
                 with imprecise input data. Finally, we describe and
                 discuss the results of the computer experiments with
                 our algorithms.",
  acknowledgement = ack-nhfb,
  keywords =     "algorithm design and analysis; dynamic programming;
                 genome mapping; imprecise information; restriction site
                 analysis",
}

@Article{Xu:2007:IGR,
  author =       "Rui Xu and Donald {Wunsch II} and Ronald Frank",
  title =        "Inference of Genetic Regulatory Networks with
                 Recurrent Neural Network Models Using Particle Swarm
                 Optimization",
  journal =      j-TCBB,
  volume =       "4",
  number =       "4",
  pages =        "681--692",
  month =        oct,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:58:47 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Genetic regulatory network inference is critically
                 important for revealing fundamental cellular processes,
                 investigating gene functions, and understanding their
                 relations. The availability of time series gene
                 expression data makes it possible to investigate the
                 gene activities of whole genomes, rather than those of
                 only a pair of genes or among several genes. However,
                 current computational methods do not sufficiently
                 consider the temporal behavior of this type of data and
                 lack the capability to capture the complex nonlinear
                 system dynamics. We propose a recurrent neural network
                 (RNN) and particle swarm optimization (PSO) approach to
                 infer genetic regulatory networks from time series gene
                 expression data. Under this framework, gene interaction
                 is explained through a connection weight matrix. Based
                 on the fact that the measured time points are limited
                 and the assumption that the genetic networks are
                 usually sparsely connected, we present a PSO-based
                 search algorithm to unveil potential genetic network
                 constructions that fit well with the time series data
                 and explore possible gene interactions. Furthermore,
                 PSO is used to train the RNN and determine the network
                 parameters. Our approach has been applied to both
                 synthetic and real data sets. The results demonstrate
                 that the RNN/PSO can provide meaningful insights in
                 understanding the nonlinear dynamics of the gene
                 expression time series and revealing potential
                 regulatory interactions between genes.",
  acknowledgement = ack-nhfb,
  keywords =     "genetic regulatory networks; particle swarm
                 optimization; recurrent neural networks; time series
                 gene expression data",
}

@Article{Agius:2007:TSA,
  author =       "Phaedra Agius and Barry Kreiswirth and Steve Naidich
                 and Kristin Bennett",
  title =        "Typing Staphylococcus aureus Using the spa Gene and
                 Novel Distance Measures",
  journal =      j-TCBB,
  volume =       "4",
  number =       "4",
  pages =        "693--704",
  month =        oct,
  year =         "2007",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:58:47 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "We developed an approach for identifying groups or
                 families of Staphylococcus aureus bacteria based on
                 genotype data. With the emergence of drug resistant
                 strains, S. aureus represents a significant human
                 health threat. Identifying the family types efficiently
                 and quickly is crucial in community settings. Here, we
                 develop a hybrid sequence algorithm approach to type
                 this bacterium using only its spa gene. Two of the
                 sequence algorithms we used are well established, while
                 the third, the Best Common Gap-Weighted Sequence
                 (BCGS), is novel. We combined the sequence algorithms
                 with a weighted match/mismatch algorithm for the spa
                 sequence ends. Normalized similarity scores and
                 distances between the sequences were derived and used
                 within unsupervised clustering methods. The resulting
                 spa groupings correlated strongly with the groups
                 defined by the well-established Multi locus sequence
                 typing (MLST) method. Spa typing is preferable to MLST
                 typing which types seven genes instead of just one.
                 Furthermore, our spa clustering methods can be
                 fine-tuned to be more discriminative than MLST,
                 identifying new strains that the MLST method may not.
                 Finally, we performed a multidimensional scaling of our
                 distance matrices to visualize the relationship between
                 isolates. The proposed methodology provides a promising
                 new approach to molecular epidemiology.",
  acknowledgement = ack-nhfb,
  keywords =     "clustering; genotyping; molecular epidemiology;
                 sequence algorithms; staphylococcus aureus",
}

@Article{Congdon:2008:EIC,
  author =       "Clare Bates Congdon and Joseph C. Aman and Gerardo M.
                 Nava and H. Rex Gaskins and Carolyn J. Mattingly",
  title =        "An Evaluation of Information Content as a Metric for
                 the Inference of Putative Conserved Noncoding Regions
                 in {DNA} Sequences Using a Genetic Algorithms
                 Approach",
  journal =      j-TCBB,
  volume =       "5",
  number =       "1",
  pages =        "1--14",
  month =        jan,
  year =         "2008",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:59:11 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "In previous work, we presented GAMI [1], an approach
                 to motif inference that uses a genetic algorithms
                 search. GAMI is designed specifically to find putative
                 conserved regulatory motifs in noncoding regions of
                 divergent species, and is designed to allow for
                 analysis of long nucleotide sequences. In this work, we
                 compare GAMI's performance when run with its original
                 fitness function (a simple count of the number of
                 matches) and when run with information content, as well
                 as several variations on these metrics. Results
                 indicate that information content does not identify
                 highly conserved regions, and thus is not the
                 appropriate metric for this task, while variations on
                 information content as well as the original metric
                 succeed in identifying putative conserved regions.",
  acknowledgement = ack-nhfb,
  keywords =     "biology and genetics; evolutionary computing and
                 genetic algorithms",
}

@Article{Boscolo:2008:ITE,
  author =       "Riccardo Boscolo and James C. Liao and Vwani P.
                 Roychowdhury",
  title =        "An Information Theoretic Exploratory Method for
                 Learning Patterns of Conditional Gene Coexpression from
                 Microarray Data",
  journal =      j-TCBB,
  volume =       "5",
  number =       "1",
  pages =        "15--24",
  month =        jan,
  year =         "2008",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:59:11 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "In this article, we introduce an exploratory framework
                 for learning patterns of conditional co-expression in
                 gene expression data. The main idea behind the proposed
                 approach consists of estimating how the information
                 content shared by a set of M nodes in a network (where
                 each node is associated to an expression profile)
                 varies upon conditioning on a set of L conditioning
                 variables (in the simplest case represented by a
                 separate set of expression profiles). The method is
                 non-parametric and it is based on the concept of
                 statistical co-information, which, unlike conventional
                 correlation based techniques, is not restricted in
                 scope to linear conditional dependency patterns.
                 Moreover, such conditional co-expression relationships
                 can potentially indicate regulatory interactions that
                 do not manifest themselves when only pair-wise
                 relationships are considered. A moment based
                 approximation of the co-information measure is derived
                 that efficiently gets around the problem of estimating
                 high-dimensional multi-variate probability density
                 functions from the data, a task usually not viable due
                 to the intrinsic sample size limitations that
                 characterize expression level measurements. By applying
                 the proposed exploratory method, we analyzed a whole
                 genome microarray assay of the eukaryote Saccharomices
                 cerevisiae and were able to learn statistically
                 significant patterns of conditional co-expression. A
                 selection of such interactions that carry a meaningful
                 biological interpretation are discussed.",
  acknowledgement = ack-nhfb,
  keywords =     "co-information; entropy; gene expression data;
                 information theory; statistical analysis",
}

@Article{Wiese:2008:REA,
  author =       "Kay C. Wiese and Alain A. Deschenes and Andrew G.
                 Hendriks",
  title =        "{RnaPredict---An} Evolutionary Algorithm for {RNA}
                 Secondary Structure Prediction",
  journal =      j-TCBB,
  volume =       "5",
  number =       "1",
  pages =        "25--41",
  month =        jan,
  year =         "2008",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:59:11 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "This paper presents two in-depth studies on
                 RnaPredict, an evolutionary algorithm for RNA secondary
                 structure prediction. The first study is an analysis of
                 the performance of two thermodynamic models, INN and
                 INN-HB. The correlation between the free energy of
                 predicted structures and the sensitivity is analyzed
                 for 19 RNA sequences. Although some variance is shown,
                 there is a clear trend between a lower free energy and
                 an increase in true positive base pairs. With
                 increasing sequence length, this correlation generally
                 decreases. In the second experiment, the accuracy of
                 the predicted structures for these 19 sequences are
                 compared against the accuracy of the structures
                 generated by the mfold dynamic programming algorithm
                 (DPA) and also to known structures. RnaPredict is shown
                 to outperform the minimum free energy structures
                 produced by mfold and has comparable performance when
                 compared to sub-optimal structures produced by mfold.",
  acknowledgement = ack-nhfb,
  keywords =     "evolutionary computation; RnaPredict; RNA secondary
                 structure prediction",
}

@Article{Rother:2008:SCP,
  author =       "Diego Rother and Guillermo Sapiro and Vijay Pande",
  title =        "Statistical Characterization of Protein Ensembles",
  journal =      j-TCBB,
  volume =       "5",
  number =       "1",
  pages =        "42--55",
  month =        jan,
  year =         "2008",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:59:11 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "When accounting for structural fluctuations or
                 measurement errors, a single rigid structure may not be
                 sufficient to represent a protein. One approach to
                 solve this problem is to represent the possible
                 conformations as a discrete set of observed
                 conformations, an ensemble. In this work, we follow a
                 different richer approach, and introduce a framework
                 for estimating probability density functions in very
                 high dimensions, and then apply it to represent
                 ensembles of folded proteins. This proposed approach
                 combines techniques such as kernel density estimation,
                 maximum likelihood, cross-validation, and
                 bootstrapping. We present the underlying theoretical
                 and computational framework and apply it to artificial
                 data and protein ensembles obtained from molecular
                 dynamics simulations. We compare the results with those
                 obtained experimentally, illustrating the potential and
                 advantages of this representation.",
  acknowledgement = ack-nhfb,
  keywords =     "Bayesian networks; bootstrapping; cross-validation;
                 density estimation; graphical models; maximum
                 likelihood; protein ensembles",
}

@Article{Cui:2008:EAA,
  author =       "Yun Cui and Lusheng Wang and Daming Zhu and Xiaowen
                 Liu",
  title =        "A $(1.5 + {\epsilon})$-Approximation Algorithm for
                 Unsigned Translocation Distance",
  journal =      j-TCBB,
  volume =       "5",
  number =       "1",
  pages =        "56--66",
  month =        jan,
  year =         "2008",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:59:11 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Genome rearrangement is an important area in
                 computational biology and bioinformatics. The
                 translocation operation is one of the popular
                 operations for genome rearrangement. It was proved that
                 computing the unsigned translocation distance is
                 NP-hard. In this paper, we present a $(1.5 +
                 \epsilon)$-approximation algorithm for computing
                 unsigned translocation distance which improves upon the
                 best known 1.75-ratio. The running time of our
                 algorithm is $O(n^2 + (4/\epsilon)^1.5 \surd
                 \log(4/\epsilon)2 4^\epsilon)$, where $n$ is the total
                 number of genes in the genome.",
  acknowledgement = ack-nhfb,
  keywords =     "and approximation algorithms; genome rearrangement;
                 unsigned translocation",
}

@Article{Tan:2008:NBP,
  author =       "Tuan Zea Tan and Geok See Ng and Chai Quek",
  title =        "A Novel Biologically and Psychologically Inspired
                 Fuzzy Decision Support System: Hierarchical
                 Complementary Learning",
  journal =      j-TCBB,
  volume =       "5",
  number =       "1",
  pages =        "67--79",
  month =        jan,
  year =         "2008",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:59:11 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "A computational intelligent system that models the
                 human cognitive abilities may promise significant
                 performance in problem learning because human is
                 effective in learning and problem solving. Functionally
                 modeling the human cognitive abilities not only avoids
                 the details of the underlying neural mechanisms
                 performing the tasks, but also reduces the complexity
                 of the system. The complementary learning mechanism is
                 responsible for human pattern recognition, i.e. human
                 attends to positive and negative samples when making
                 decision. Furthermore, human concept learning is
                 organized in a hierarchical fashion. Such hierarchical
                 organization allows the divide-and-conquer approach to
                 the problem. Thus, integrating the functional models of
                 hierarchical organization and complementary learning
                 can potentially improve the performance in pattern
                 recognition. Hierarchical complementary learning
                 exhibits many of the desirable features of pattern
                 recognition. It is further supported by the
                 experimental results that verify the rationale of the
                 integration and that the hierarchical complementary
                 learning system is a promising pattern recognition
                 tool.",
  acknowledgement = ack-nhfb,
  keywords =     "cognitive learning; complementary learning; decision
                 support; fuzzy neural network; hierarchical model",
}

@Article{Ciocchetta:2008:ATS,
  author =       "Federica Ciocchetta and Corrado Priami and Paola
                 Quaglia",
  title =        "An Automatic Translation of {SBML} into Beta-Binders",
  journal =      j-TCBB,
  volume =       "5",
  number =       "1",
  pages =        "80--90",
  month =        jan,
  year =         "2008",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:59:11 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "A translation of SBML (Systems Biology Markup
                 Language) into a process algebra is proposed in order
                 to allow the formal specification, the simulation and
                 the formal analysis of biological models. Beta-binders,
                 a language with a quantitative stochastic extension, is
                 chosen for the translation. The proposed translation
                 focuses on the main components of SBML models, as
                 species and reactions. Furthermore, it satisfies the
                 compositional property, i.e. the translation of the
                 whole model is obtained by composing the translation of
                 the subcomponents. An automatic translator tool of SBML
                 models into Beta-binders has been implemented as well.
                 Finally, the translation of a simple model is
                 reported.",
  acknowledgement = ack-nhfb,
  keywords =     "biological systems; modeling; Process algebras;
                 systems biology; Systems Biology Markup Language
                 (SBML); translation tool",
}

@Article{Bocker:2008:CAM,
  author =       "Sebastian Bocker and Veli Makinen",
  title =        "Combinatorial Approaches for Mass Spectra
                 Recalibration",
  journal =      j-TCBB,
  volume =       "5",
  number =       "1",
  pages =        "91--100",
  month =        jan,
  year =         "2008",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:59:11 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Mass spectrometry has become one of the most popular
                 analysis techniques in Proteomics and Systems Biology.
                 With the creation of larger datasets, the automated
                 recalibration of mass spectra becomes important to
                 ensure that every peak in the sample spectrum is
                 correctly assigned to some peptide and protein.
                 Algorithms for recalibrating mass spectra have to be
                 robust with respect to wrongly assigned peaks, as well
                 as efficient due to the amount of mass spectrometry
                 data. The recalibration of mass spectra leads us to the
                 problem of finding an optimal matching between mass
                 spectra under measurement errors. We have developed two
                 deterministic methods that allow robust computation of
                 such a matching: The first approach uses a
                 computational geometry interpretation of the problem,
                 and tries to find two parallel lines with constant
                 distance that stab a maximal number of points in the
                 plane. The second approach is based on finding a
                 maximal common approximate subsequence, and improves
                 existing algorithms by one order of magnitude
                 exploiting the sequential nature of the matching
                 problem. We compare our results to a computational
                 geometry algorithm using a topological line-sweep.",
  acknowledgement = ack-nhfb,
  keywords =     "biotechnology; combinatorial pattern matching;
                 computational geometry; mass spectrometry",
}

@Article{Barzuza:2008:CPP,
  author =       "Tamar Barzuza and Jacques S. Beckmann and Ron Shamir
                 and Itsik Pe'er",
  title =        "Computational Problems in Perfect Phylogeny
                 Haplotyping: Typing without Calling the Allele",
  journal =      j-TCBB,
  volume =       "5",
  number =       "1",
  pages =        "101--109",
  month =        jan,
  year =         "2008",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:59:11 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "A haplotype is an m-long binary vector. The
                 xor-genotype of two haplotypes is the m-vector of their
                 coordinate-wise xor. We study the following problem:
                 Given a set of xor-genotypes, reconstruct their
                 haplotypes so that the set of resulting haplotypes can
                 be mapped onto a perfect phylogeny tree. The question
                 is motivated by studying population evolution in human
                 genetics, and is a variant of the perfect phylogeny
                 haplotyping problem that has received intensive
                 attention recently. Unlike the latter problem, in which
                 the input is `full' genotypes, here we assume less
                 informative input, and so may be more economical to
                 obtain experimentally. Building on ideas of Gusfield,
                 we show how to solve the problem in polynomial time, by
                 a reduction to the graph realization problem. The
                 actual haplotypes are not uniquely determined by that
                 tree they map onto, and the tree itself may or may not
                 be unique. We show that tree uniqueness implies
                 uniquely determined haplotypes, up to inherent degrees
                 of freedom, and give a sufficient condition for the
                 uniqueness. To actually determine the haplotypes given
                 the tree, additional information is necessary. We show
                 that two or three full genotypes suffice to reconstruct
                 all the haplotypes, and present a linear algorithm for
                 identifying those genotypes.",
  acknowledgement = ack-nhfb,
  keywords =     "graph realization; haplotypes; perfect phylogeny;
                 XOR-genotypes",
}

@Article{Chin:2008:DMR,
  author =       "Francis Chin and Henry C. M. Leung",
  title =        "{DNA} Motif Representation with Nucleotide
                 Dependency",
  journal =      j-TCBB,
  volume =       "5",
  number =       "1",
  pages =        "110--119",
  month =        jan,
  year =         "2008",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:59:11 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "The problem of discovering novel motifs of binding
                 sites is important to the understanding of gene
                 regulatory networks. Motifs are generally represented
                 by matrices (PWM or PSSM) or strings. However, these
                 representations cannot model biological binding sites
                 well because they fail to capture nucleotide
                 interdependence. It has been pointed out by many
                 researchers that the nucleotides of the DNA binding
                 site cannot be treated independently, e.g. the binding
                 sites of zinc finger in proteins. In this paper, a new
                 representation called Scored PositionSpecific Pattern
                 (SPSP), which is a generalization of the matrix and
                 string representations, is introduced which takes into
                 consideration the dependent occurrences of neighboring
                 nucleotides. Even though the problem of discovering the
                 optimal motif in SPSP representation is proved to
                 beNP-hard, we introduce a heuristic algorithm called
                 SPSP-Finder, which can effectively find optimal motifs
                 in most simulated cases and some real cases for which
                 existing popular motif finding software, such as
                 Weeder, MEME and AlignACE, fail.",
  acknowledgement = ack-nhfb,
  keywords =     "Computing methodologies; design methodology; pattern
                 analysis; pattern recognition",
}

@Article{Yin:2008:NAC,
  author =       "Zong-Xian Yin and Jung-Hsien Chiang",
  title =        "Novel Algorithm for Coexpression Detection in
                 Time-Varying Microarray Data Sets",
  journal =      j-TCBB,
  volume =       "5",
  number =       "1",
  pages =        "120--135",
  month =        jan,
  year =         "2008",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:59:11 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "When analyzing the results of microarray experiments,
                 biologists generally use unsupervised categorization
                 tools. However, such tools regard each time point as an
                 independent dimension and utilize the Euclidean
                 distance to compute the similarities between
                 expressions. Furthermore, some of these methods require
                 the number of clusters to be determined in advance,
                 which is clearly impossible in the case of a new
                 dataset. Therefore, this study proposes a novel scheme,
                 designated as the Variation-based Co-expression
                 Detection (VCD) algorithm, to analyze the trends of
                 expressions based on their variation over time. The
                 proposed algorithm has two advantages. First, it is
                 unnecessary to determine the number of clusters in
                 advance since the algorithm automatically detects those
                 genes whose profiles are grouped together and creates
                 patterns for these groups. Second, the algorithm
                 features a new measurement criterion for calculating
                 the degree of change of the expressions between
                 adjacent time points and evaluating their trend
                 similarities. Three real-world microarray datasets are
                 employed to evaluate the performance of the proposed
                 algorithm.",
  acknowledgement = ack-nhfb,
  keywords =     "bioinformatics; clustering; data mining; gene
                 expression; pattern analysis; time series analysis",
}

@Article{Goeffon:2008:PTN,
  author =       "Adrien Goeffon and Jean-Michel Richer and Jin-Kao
                 Hao",
  title =        "Progressive Tree Neighborhood Applied to the Maximum
                 Parsimony Problem",
  journal =      j-TCBB,
  volume =       "5",
  number =       "1",
  pages =        "136--145",
  month =        jan,
  year =         "2008",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:59:11 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "The Maximum Parsimony problem aims at reconstructing a
                 phylogenetic tree from DNA sequences while minimizing
                 the number of genetic transformations. To solve this
                 NP-complete problem, heuristic methods have been
                 developed, often based on local search. In this
                 article, we focus on the influence of the neighborhood
                 relations. After analyzing the advantages and drawbacks
                 of the well-known NNI, SPR and TBR neighborhoods, we
                 introduce the concept of Progressive Neighborhood which
                 consists in constraining progressively the size of the
                 neighborhood as the search advances. We empirically
                 show that applied to the Maximum Parsimony problem,
                 this progressive neighborhood turns out to be more
                 efficient and robust than the classic neighborhoods
                 using a descent algorithm. Indeed, it allows to find
                 better solutions with a smaller number of iterations or
                 trees evaluated.",
  acknowledgement = ack-nhfb,
  keywords =     "combinatorial algorithms; maximum parsimony;
                 optimization; phylogeny reconstruction",
}

@Article{Anonymous:2008:RL,
  author =       "Anonymous",
  title =        "2007 Reviewers List",
  journal =      j-TCBB,
  volume =       "5",
  number =       "1",
  pages =        "146--147",
  month =        jan,
  year =         "2008",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:59:11 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Anonymous:2008:AI,
  author =       "Anonymous",
  title =        "2007 Annual Index",
  journal =      j-TCBB,
  volume =       "5",
  number =       "1",
  pages =        "148--158",
  month =        jan,
  year =         "2008",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:59:11 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Anonymous:2008:CAE,
  author =       "Anonymous",
  title =        "Call for Applications for {Editor-in-Chief}",
  journal =      j-TCBB,
  volume =       "5",
  number =       "1",
  pages =        "159--159",
  month =        jan,
  year =         "2008",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:59:11 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Jackson:2008:CGM,
  author =       "Benjamin N. Jackson and Patrick S. Schnable and
                 Srinivas Aluru",
  title =        "Consensus Genetic Maps as Median Orders from
                 Inconsistent Sources",
  journal =      j-TCBB,
  volume =       "5",
  number =       "2",
  pages =        "161--171",
  month =        apr,
  year =         "2008",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:59:29 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "A genetic map is an ordering of genetic markers
                 calculated from a population of known lineage. While
                 traditionally a map has been generated from a single
                 population for each species, recently researchers have
                 created maps from multiple populations. In the face of
                 these new data, we address the need to find a consensus
                 map --- a map that combines the information from
                 multiple partial and possibly inconsistent input maps.
                 We model each input map as a partial order and
                 formulate the consensus problem as finding a median
                 partial order. Finding the median of multiple total
                 orders (preferences or rankings)is a well studied
                 problem in social choice. We choose to find the median
                 using the weighted symmetric difference distance, a
                 more general version of both the symmetric difference
                 distance and the Kemeny distance. Finding a median
                 order using this distance is NP-hard. We show that for
                 our chosen weight assignment, a median order satisfies
                 the positive responsiveness, extended Condorcet,and
                 unanimity criteria. Our solution involves finding the
                 maximum acyclic subgraph of a weighted directed graph.
                 We present a method that dynamically switches between
                 an exact branch and bound algorithm and a heuristic
                 algorithm, and show that for real data from closely
                 related organisms, an exact median can often be found.
                 We present experimental results using seven populations
                 of the crop plant Zea mays.",
  acknowledgement = ack-nhfb,
  keywords =     "genetic map; Kemeny distance; median order; path and
                 circuit problems; symmetric difference distance.",
}

@Article{Gupta:2008:EDS,
  author =       "Anupam Gupta and Ziv Bar-Joseph",
  title =        "Extracting Dynamics from Static Cancer Expression
                 Data",
  journal =      j-TCBB,
  volume =       "5",
  number =       "2",
  pages =        "172--182",
  month =        apr,
  year =         "2008",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:59:29 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Static expression experiments analyze samples from
                 many individuals. These samples are often snapshots of
                 the progression of a certain disease such as cancer.
                 This raises an intriguing question: Can we determine a
                 temporal order for these samples? Such an ordering can
                 lead to better understanding of the dynamics of the
                 disease and to the identification of genes associated
                 with its progression. In this paper we formally prove,
                 for the first time, that under a model for the dynamics
                 of the expression levels of a single gene, it is indeed
                 possible to recover the correct ordering of the static
                 expression datasets by solving an instance of the
                 traveling salesman problem (TSP). In addition, we
                 devise an algorithm that combines a TSP heuristic and
                 probabilistic modeling for inferring the underlying
                 temporal order of the microarray experiments. This
                 algorithm constructs probabilistic continuous curves to
                 represent expression profiles leading to accurate
                 temporal reconstruction for human data. Applying our
                 method to cancer expression data we show that the
                 ordering derived agrees well with survival duration. A
                 classifier that utilizes this ordering improves upon
                 other classifiers suggested for this task. The set of
                 genes displaying consistent behavior for the determined
                 ordering are enriched for genes associated with cancer
                 progression.",
  acknowledgement = ack-nhfb,
  keywords =     "EM; glioma; microarrays; traveling salesman",
}

@Article{Thomas:2008:GMR,
  author =       "John Thomas and Naren Ramakrishnan and Chris
                 Bailey-Kellogg",
  title =        "Graphical Models of Residue Coupling in Protein
                 Families",
  journal =      j-TCBB,
  volume =       "5",
  number =       "2",
  pages =        "183--197",
  month =        apr,
  year =         "2008",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:59:29 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Many statistical measures and algorithmic techniques
                 have been proposed for studying residue coupling in
                 protein families. Generally speaking, two residue
                 positions are considered coupled if, in the sequence
                 record, some of their amino acid type combinations are
                 significantly more common than others. While the
                 proposed approaches have proven useful in finding and
                 describing coupling, a significant missing component is
                 a formal probabilistic model that explicates and
                 compactly represents the coupling, integrates
                 information about sequence,structure, and function, and
                 supports inferential procedures for analysis,
                 diagnosis, and prediction. We present an approach to
                 learning and using probabilistic graphical models of
                 residue coupling. These models capture significant
                 conservation and coupling constraints observable in a
                 multiply-aligned set of sequences. Our approach can
                 place a structural prior on considered couplings, so
                 that all identified relationships have direct
                 mechanistic explanations. It can also incorporate
                 information about functional classes, and thereby learn
                 a differential graphical model that distinguishes
                 constraints common to all classes from those unique to
                 individual classes. Such differential models separately
                 account for class-specific conservation and family-wide
                 coupling, two different sources of sequence
                 covariation. They are then able to perform
                 interpretable functional classification of new
                 sequences, explaining classification decisions in terms
                 of the underlying conservation and coupling
                 constraints. We apply our approach in studies of both G
                 protein-coupled receptors and PDZ domains, identifying
                 and analyzing family-wide and class-specific
                 constraints, and performing functional classification.
                 The results demonstrate that graphical models of
                 residue coupling provide a powerful tool for
                 uncovering, representing, and utilizing significant
                 sequence structure-function relationships in protein
                 families.",
  acknowledgement = ack-nhfb,
  keywords =     "correlated mutations; evolutionary covariation;
                 functional classification; graphical models;
                 sequence-structure-function relationships",
}

@Article{Mena-Chalco:2008:IPC,
  author =       "Jesus Mena-Chalco and Helaine Carrer and Yossi Zana
                 and Roberto M. {Cesar Jr.}",
  title =        "Identification of Protein Coding Regions Using the
                 Modified Gabor-Wavelet Transform",
  journal =      j-TCBB,
  volume =       "5",
  number =       "2",
  pages =        "198--207",
  month =        apr,
  year =         "2008",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:59:29 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "An important topic in genomic sequence analysis is the
                 identification of protein coding regions. In this
                 context, several coding DNA model-independent methods,
                 based on the occurrence of specific patterns of
                 nucleotides at coding regions, have been proposed.
                 Nonetheless, these methods have not been completely
                 suitable due to their dependence on an empirically
                 pre-defined window length required for a local analysis
                 of a DNA region. We introduce a method, based on a
                 modified Gabor-wavelet transform (MGWT), for the
                 identification of protein coding regions. This novel
                 transform is tuned to analyze periodic signal
                 components and presents the advantage of being
                 independent of the window length. We compared the
                 performance of the MGWT with other methods using
                 eukaryote datasets. The results show that the MGWT
                 outperforms all assessed model-independent methods with
                 respect to identification accuracy. These results
                 indicate that the source of at least part of the
                 identification errors produced by the previous methods
                 is the fixed working scale. The new method not only
                 avoids this source of errors, but also makes available
                 a tool for detailed exploration of the nucleotide
                 occurrence.",
  acknowledgement = ack-nhfb,
  keywords =     "biology and genetics; pattern recognition; signal
                 processing",
}

@Article{deJong:2008:SSS,
  author =       "Hidde de Jong and Michel Page",
  title =        "Search for Steady States of Piecewise-Linear
                 Differential Equation Models of Genetic Regulatory
                 Networks",
  journal =      j-TCBB,
  volume =       "5",
  number =       "2",
  pages =        "208--222",
  month =        apr,
  year =         "2008",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:59:29 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Analysis of the attractors of a genetic regulatory
                 network gives a good indication of the possible
                 functional modes of the system. In this paper we are
                 concerned with the problem of finding all steady states
                 of genetic regulatory networks described by
                 piecewise-linear differential equation (PLDE) models.
                 We show that the problem is NP-hard and translate it
                 into a propositional satisfiability (SAT) problem. This
                 allows the use of existing, efficient SAT solvers and
                 has enabled the development of a steady state search
                 module of the computer tool Genetic Network Analyzer
                 (GNA). The practical use of this module is demonstrated
                 by means of the analysis of a number of relatively
                 small bacterial regulatory networks as well as randomly
                 generated networks of several hundreds of genes.",
  acknowledgement = ack-nhfb,
  keywords =     "genetic regulatory networks; large-scale systems;
                 piecewise-linear differential equations; SAT problem;
                 steady states",
}

@Article{Sadot:2008:TVB,
  author =       "Avital Sadot and Jasmin Fisher and Dan Barak and
                 Yishai Admanit and Michael J. Stern and E. Jane Albert
                 Hubbard and David Harel",
  title =        "Toward Verified Biological Models",
  journal =      j-TCBB,
  volume =       "5",
  number =       "2",
  pages =        "223--234",
  month =        apr,
  year =         "2008",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:59:29 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "The last several decades have witnessed a vast
                 accumulation of biological data and data analysis. Many
                 of these data sets represent only a small fraction of
                 the system's behavior, making the visualization of full
                 system behavior difficult. A more complete
                 understanding of a biological system is gained when
                 different types of data (and/or conclusions drawn from
                 the data) are integrated into a larger-scale
                 representation or model of the system. Ideally, this
                 type of model is consistent with all available data
                 about the system, and it is then used to generate
                 additional hypotheses to be tested. Computer-based
                 methods intended to formulate models that integrate
                 various events and to test the consistency of these
                 models with respect to the laboratory-based
                 observations on which they are based are potentially
                 very useful. In addition, in contrast to informal
                 models, the consistency of such formal computer-based
                 models with laboratory data can be tested rigorously by
                 methods of formal verification. We combined two formal
                 modeling approaches in computer science that were
                 originally developed for non-biological system design.
                 One is the inter-object approach using the language of
                 live sequence charts (LSCs) with the Play-Engine tool,
                 and the other is the intra-object approach using the
                 language of statecharts and Rhapsody as the tool.
                 Integration is carried out using InterPlay, a
                 simulation engine coordinator. Using these tools, we
                 constructed a combined model comprising three modules.
                 One module represents the early lineage of the somatic
                 gonad of C. elegans in LSCs, while a second more
                 detailed module in statecharts represents an
                 interaction between two cells within this lineage that
                 determine their developmental outcome. Using the
                 advantages of the tools, we created a third module
                 representing a set of key experimental data using LSCs.
                 We tested the combined statechart-LSC model by showing
                 that the simulations were consistent with the set of
                 experimental LSCs. This small-scale modular example
                 demonstrates the potential for using similar approaches
                 for verification by exhaustive testing of models by
                 LSCs. It also shows the advantages of these approaches
                 for modeling biology.",
  acknowledgement = ack-nhfb,
  keywords =     "C. elegans; modeling; statecharts; verification",
}

@Article{Spillner:2008:CPD,
  author =       "Andreas Spillner and Binh T. Nguyen and Vincent
                 Moulton",
  title =        "Computing Phylogenetic Diversity for Split Systems",
  journal =      j-TCBB,
  volume =       "5",
  number =       "2",
  pages =        "235--244",
  month =        apr,
  year =         "2008",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:59:29 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "In conservation biology it is a central problem to
                 measure, predict, and preserve biodiversity as species
                 face extinction. In 1992 Faith proposed measuring the
                 diversity of a collection of species in terms of their
                 relationships on a phylogenetic tree, and to use this
                 information to identify collections of species with
                 high diversity. Here we are interested in some variants
                 of the resulting optimization problem that arise when
                 considering species whose evolution is better
                 represented by a network rather than a tree. More
                 specifically, we consider the problem of computing
                 phylogenetic diversity relative to a split system on a
                 collection of species of size $n$. We show that for
                 general split systems this problem is NP-hard. In
                 addition we provide some efficient algorithms for some
                 special classes of split systems, in particular
                 presenting an optimal $O(n)$ time algorithm for
                 phylogenetic trees and an $O(n\log n + n k)$ time
                 algorithm for choosing an optimal subset of size $k$
                 relative to a circular split system.",
  acknowledgement = ack-nhfb,
  keywords =     "biology and genetics; life and medical sciences",
}

@Article{Lancia:2008:HDA,
  author =       "Giuseppe Lancia and R. Ravi and Romeo Rizzi",
  title =        "Haplotyping for Disease Association: {A} Combinatorial
                 Approach",
  journal =      j-TCBB,
  volume =       "5",
  number =       "2",
  pages =        "245--251",
  month =        apr,
  year =         "2008",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:59:29 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "We consider a combinatorial problem derived from
                 haplotyping a population with respect to a genetic
                 disease, either recessive or dominant. Given a set of
                 individuals, partitioned into healthy and diseased, and
                 the corresponding sets of genotypes, we want to infer
                 ``bad'' and ``good'' haplotypes to account for these
                 genotypes and for the disease. Assume e.g. the disease
                 is recessive. Then, the resolving haplotypes must
                 consist of \emph{bad} and \emph{good} haplotypes, so
                 that (i) each genotype belonging to a diseased
                 individual is explained by a pair of bad haplotypes and
                 (ii) each genotype belonging to a healthy individual is
                 explained by a pair of haplotypes of which at least one
                 is good. We prove that the associated decision problem
                 is NP-complete. However, we also prove that there is a
                 simple solution, provided the data satisfy a very weak
                 requirement.",
  acknowledgement = ack-nhfb,
  keywords =     "biology and genetics; combinatorics; discrete
                 mathematics",
}

@Article{Gusev:2008:HSG,
  author =       "Alexander Gusev and Ion I. M{\~a}ndoiu and Bogdan
                 Pa{\c{s}}aniuc",
  title =        "Highly Scalable Genotype Phasing by Entropy
                 Minimization",
  journal =      j-TCBB,
  volume =       "5",
  number =       "2",
  pages =        "252--261",
  month =        apr,
  year =         "2008",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:59:29 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "A Single Nucleotide Polymorphism (SNP) is a position
                 in the genome at which two or more of the possible four
                 nucleotides occur in a large percentage of the
                 population. SNPsaccount for most of the genetic
                 variability between individuals,and mapping SNPs in the
                 human population has become the next high-priority in
                 genomics after the completion of the HumanGenome
                 project. In diploid organisms such as humans, there are
                 two non-identical copies of each autosomal chromosome.
                 A description of the SNPs in a chromosome is called a
                 haplotype. At present, it is prohibitively expensive to
                 directly determine the haplotypes of an individual, but
                 it is possible to obtain rather easily the conflated
                 SNP information in the so called genotype.
                 Computational methods for genotype phasing, i.e.,
                 inferring haplotypes from genotype data, have received
                 much attention in recent years as haplotype information
                 leads to increased statistical power of disease
                 association tests. However, many of the existing
                 algorithms have impractical running time for phasing
                 large genotype datasets such as those generated by the
                 international HapMap project. In this paper we propose
                 a highly scalable algorithm based on entropy
                 minimization. Our algorithm is capable of phasing both
                 unrelated and related genotypes coming from complex
                 pedigrees. Experimental results on both real and
                 simulated datasets show that our algorithm achieves a
                 phasing accuracy worse but close to that of best
                 existing methods while being several orders of
                 magnitude faster. The open source code implementation
                 of the algorithm and a web interface are publicly
                 available at
                 http://dna.engr.uconn.edu/~software/ent/.",
  acknowledgement = ack-nhfb,
  keywords =     "algorithm; genotype phasing; haplotype; Single
                 Nucleotide Polymorphism",
}

@Article{Zhao:2008:ICG,
  author =       "Wentao Zhao and Erchin Serpedin and Edward R.
                 Dougherty",
  title =        "Inferring Connectivity of Genetic Regulatory Networks
                 Using Information-Theoretic Criteria",
  journal =      j-TCBB,
  volume =       "5",
  number =       "2",
  pages =        "262--274",
  month =        apr,
  year =         "2008",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:59:29 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Recently, the concept of mutual information has been
                 proposed for infering the structure of genetic
                 regulatory networks from gene expression profiling.
                 After analyzing the limitations of mutual information
                 in inferring the gene-to-gene interactions, this paper
                 introduces the concept of conditional mutual
                 information and based on it proposes two novel
                 algorithms to infer the connectivity structure of
                 genetic regulatory networks. One of the proposed
                 algorithms exhibits a better accuracy while the other
                 algorithm excels in simplicity and flexibility. By
                 exploiting the mutual information and conditional
                 mutual information, a practical metric is also proposed
                 to assess the likeliness of direct connectivity between
                 genes. This novel metric resolves a common limitation
                 associated with the current inference algorithms,
                 namely the situations where the gene connectivity is
                 established in terms of the dichotomy of being either
                 connected or disconnected. Based on the data sets
                 generated by synthetic networks, the performance of the
                 proposed algorithms is compared favorably relative to
                 existing state-of-the-art schemes. The proposed
                 algorithms are also applied on realistic biological
                 measurements, such as the cutaneous melanoma data set,
                 and biological meaningful results are inferred.",
  acknowledgement = ack-nhfb,
  keywords =     "biology and genetics; DNA microarray; genetic
                 regulatory network; information theory",
}

@Article{Bordewich:2008:NRS,
  author =       "Magnus Bordewich and Charles Semple",
  title =        "Nature Reserve Selection Problem: {A} Tight
                 Approximation Algorithm",
  journal =      j-TCBB,
  volume =       "5",
  number =       "2",
  pages =        "275--280",
  month =        apr,
  year =         "2008",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:59:29 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "The Nature Reserve Selection Problem is a problem that
                 arises in the context of studying biodiversity
                 conservation. Subject to budgetary constraints, the
                 problem is to select a set of regions to conserve so
                 that the phylogenetic diversity of the set of species
                 contained within those regions is maximized. Recently,
                 it was shown in a paper by Moulton {\\em et al.} that
                 this problem is NP-hard. In this paper, we establish a
                 tight polynomial-time approximation algorithm for the
                 Nature Reserve Section Problem. Furthermore, we resolve
                 a question on the computational complexity of a related
                 problem left open in Moulton {\\em et al.}",
  acknowledgement = ack-nhfb,
  keywords =     "combinatorial algorithms; trees",
}

@Article{Hsieh:2008:OAI,
  author =       "Yong-Hsiang Hsieh and Chih-Chiang Yu and Biing-Feng
                 Wang",
  title =        "Optimal Algorithms for the Interval Location Problem
                 with Range Constraints on Length and Average",
  journal =      j-TCBB,
  volume =       "5",
  number =       "2",
  pages =        "281--290",
  month =        apr,
  year =         "2008",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:59:29 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Let $A$ be a sequence of $n$ real numbers, $L_1$ and
                 $L_2$ be two integers such that $L_1 \leq L_2$, and
                 $R_1$ and $R_2$ be two real numbers such that $R_1 \leq
                 R_2$. An interval of $A$ is feasible if its length is
                 between $L_1$ and $L_2$ and its average is between
                 $R_1$ and $R_2$. In this paper, we study the following
                 problems: finding all feasible intervals of $A$,
                 counting all feasible intervals of $A$, finding a
                 maximum cardinality set of non-overlapping feasible
                 intervals of $A$, locating a longest feasible interval
                 of $A$, and locating a shortest feasible interval of
                 $A$. The problems are motivated from the problem of
                 locating CpG islands in biomolecular sequences. In this
                 paper, we firstly show that all the problems have an
                 $\Omega(n \log n)$-time lower bound in the comparison
                 model. Then, we use geometric approaches to design
                 optimal algorithms for the problems. All the presented
                 algorithms run in an on-line manner and use $O(n)$
                 space.",
  acknowledgement = ack-nhfb,
  keywords =     "algorithms; analysis of algorithms; data structures;
                 geometrical problems and computations",
}

@Article{Lamers:2008:PRX,
  author =       "Susanna L. Lamers and Marco Salemi and Michael S.
                 McGrath and Gary B. Fogel",
  title =        "Prediction of {R5}, {X4}, and {R5X4} {HIV}-1
                 Coreceptor Usage with Evolved Neural Networks",
  journal =      j-TCBB,
  volume =       "5",
  number =       "2",
  pages =        "291--300",
  month =        apr,
  year =         "2008",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:59:29 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "The HIV-1 genome is highly heterogeneous. This
                 variation affords the virus a wide range of molecular
                 properties, including the ability to infect cell types,
                 such as macrophages and lymphocytes, expressing
                 different chemokine receptors on the cell surface. In
                 particular, R5 HIV-1 viruses use CCR5 as co-receptor
                 for viral entry, X4 viruses use CXCR4, whereas some
                 viral strains, known as R5X4 or D-tropic, have the
                 ability to utilize both co-receptors. X4 and R5X4
                 viruses are associated with rapid disease progression
                 to AIDS. R5X4 viruses differ in that they have yet to
                 be characterized by the examination of the genetic
                 sequence of HIV-1 alone. In this study, a series of
                 experiments was performed to evaluate different
                 strategies of feature selection and neural network
                 optimization. We demonstrate the use of artificial
                 neural networks trained via evolutionary computation to
                 predict viral co-receptor usage. The results indicate
                 identification of R5X4 viruses with predictive accuracy
                 of 75.5\%.",
  acknowledgement = ack-nhfb,
  keywords =     "AIDS; artificial neural networks; Computational
                 intelligence; dual-tropic viruses; evolutionary
                 computation; HIV; phenotype prediction; tropism",
}

@Article{vanIersel:2008:SIT,
  author =       "Leo van Iersel and Judith Keijsper and Steven Kelk and
                 Leen Stougie",
  title =        "Shorelines of Islands of Tractability: Algorithms for
                 Parsimony and Minimum Perfect Phylogeny Haplotyping
                 Problems",
  journal =      j-TCBB,
  volume =       "5",
  number =       "2",
  pages =        "301--312",
  month =        apr,
  year =         "2008",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:59:29 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "The problem Parsimony Haplotyping (PH) asks for the
                 smallest set of haplotypes which can explain a given
                 set of genotypes, and the problem Minimum Perfect
                 Phylogeny Haplotyping (MPPH) asks for the smallest such
                 set which also allows the haplotypes to be embedded in
                 a perfect phylogeny, an evolutionary tree with
                 biologically-motivated restrictions. For PH, we extend
                 recent work by further mapping the interface between
                 ``easy'' and ``hard'' instances, within the framework
                 of $(k,l)$-bounded instances where the number of 2's
                 per column and row of the input matrix is restricted.
                 By exploring, in the same way, the tractability
                 frontier of MPPH we provide the first concrete,
                 positive results for this problem. In addition, we
                 construct for both PH and MPPH polynomial time
                 approximation algorithms, based on properties of the
                 columns of the input matrix.",
  acknowledgement = ack-nhfb,
  keywords =     "biology and genetics; combinatorial algorithms;
                 complexity hierarchies",
}

@Article{Brinza:2008:SPM,
  author =       "Dumitru Brinza and Alexander Zelikovsky",
  title =        "{2SNP}: Scalable Phasing Method for Trios and
                 Unrelated Individuals",
  journal =      j-TCBB,
  volume =       "5",
  number =       "2",
  pages =        "313--318",
  month =        apr,
  year =         "2008",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Thu Jun 12 16:59:29 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Emerging microarray technologies allow affordable
                 typing of very long genome sequences. A key challenge
                 in analyzing of such huge amount of data is scalable
                 and accurate computational inferring of haplotypes
                 (i.e., splitting of each genotype into a pair of
                 corresponding haplotypes). In this paper, we first
                 phase genotypes consisting only of two SNPs using
                 genotypes frequencies adjusted to the random mating
                 model and then extend phasing of two-SNP genotypes to
                 phasing of complete genotypes using maximum spanning
                 trees. Runtime of the proposed 2SNP algorithm is $O(nm
                 (n + \log m)$, where n and m are the numbers of
                 genotypes and SNPs, respectively, and it can handle
                 genotypes spanning entire chromosomes in a matter of
                 hours. On datasets across 23 chromosomal regions from
                 HapMap[11], 2SNP is several orders of magnitude faster
                 than GERBIL and PHASE while matching them in quality
                 measured by the number of correctly phased genotypes,
                 single-site and switching errors. For example the 2SNP
                 software phases entire chromosome ($10^5$ SNPs from
                 HapMap) for 30 individuals in 2 hours with average
                 switching error 7.7\%. We have also enhanced 2SNP
                 algorithm to phase family trio data and compared it
                 with four other well-known phasing methods on simulated
                 data from [15]. 2SNP is much faster than all of them
                 while losing in quality only to PHASE. 2SNP software is
                 publicly available at
                 \texttt{http://alla.cs.gsu.edu/~software/2SNP}.",
  acknowledgement = ack-nhfb,
  keywords =     "algorithm; genotype; haplotype; phasing; SNP",
}

@Article{Mandoiu:2008:GEI,
  author =       "Ion I. Mandoiu and Yi Pan and Alexander Zelikovsky",
  title =        "{Guest Editors}' Introduction to the Special Section on
                 Bioinformatics Research and Applications",
  journal =      j-TCBB,
  volume =       "5",
  number =       "3",
  pages =        "321--322",
  month =        jul,
  year =         "2008",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2008.85",
  ISSN =         "1545-5963",
  bibdate =      "Fri Oct 10 12:59:44 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Sridhar:2008:MIL,
  author =       "Srinath Sridhar and Fumei Lam and Guy E. Blelloch and
                 R. Ravi and Russell Schwartz",
  title =        "Mixed Integer Linear Programming for Maximum-Parsimony
                 Phylogeny Inference",
  journal =      j-TCBB,
  volume =       "5",
  number =       "3",
  pages =        "323--331",
  month =        jul,
  year =         "2008",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2008.26",
  ISSN =         "1545-5963",
  bibdate =      "Fri Oct 10 12:59:44 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Reconstruction of phylogenetic trees is a fundamental
                 problem in computational biology. While excellent
                 heuristic methods are available for many variants of
                 this problem, new advances in phylogeny inference will
                 be required if we are to be able to continue to make
                 effective use of the rapidly growing stores of
                 variation data now being gathered. In this paper, we
                 present two integer linear programming (ILP)
                 formulations to find the most parsimonious phylogenetic
                 tree from a set of binary variation data. One method
                 uses a flow-based formulation that can produce
                 exponential numbers of variables and constraints in the
                 worst case. The method has, however, proven extremely
                 efficient in practice on datasets that are well beyond
                 the reach of the available provably efficient methods,
                 solving several large mtDNA and Y-chromosome instances
                 within a few seconds and giving provably optimal
                 results in times competitive with fast heuristics than
                 cannot guarantee optimality. An alternative formulation
                 establishes that the problem can be solved with a
                 polynomial-sized ILP. We further present a web server
                 developed based on the exponential-sized ILP that
                 performs fast maximum parsimony inferences and serves
                 as a front end to a database of precomputed phylogenies
                 spanning the human genome.",
  acknowledgement = ack-nhfb,
  keywords =     "algorithms; computational biology; integer linear
                 programming; maximum parsimony; phylogenetic tree
                 reconstruction; Steiner tree problem",
}

@Article{Bernt:2008:SPR,
  author =       "Matthias Bernt and Daniel Merkle and Martin
                 Middendorf",
  title =        "Solving the Preserving Reversal Median Problem",
  journal =      j-TCBB,
  volume =       "5",
  number =       "3",
  pages =        "332--347",
  month =        jul,
  year =         "2008",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2008.39",
  ISSN =         "1545-5963",
  bibdate =      "Fri Oct 10 12:59:44 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Genomic rearrangement operations can be very useful to
                 infer the phylogenetic relationship of gene orders
                 representing species. We study the problem of finding
                 potential ancestral gene orders for the gene orders of
                 given taxa, such that the corresponding rearrangement
                 scenario has a minimal number of reversals, and where
                 each of the reversals has to preserve the common
                 intervals of the given input gene orders. Common
                 intervals identify sets of genes that occur
                 consecutively in all input gene orders. The problem of
                 finding such an ancestral gene order is called the
                 preserving reversal median problem (pRMP). A tree-based
                 data structure for the representation of the common
                 intervals of all input gene orders is used in our exact
                 algorithm TCIP for solving the pRMP. It is known that
                 the minimum number of reversals to transform one gene
                 order into another can be computed in polynomial time,
                 whereas the corresponding problem with the restriction
                 that common intervals should not be destroyed is
                 already NP-hard. It is shown theoretically that TCIP
                 can solve a large class of pRMP instances in polynomial
                 time. Empirically we show the good performance of TCIP
                 on biological and artificial data.",
  acknowledgement = ack-nhfb,
  keywords =     "biology and genetics; permutations and combinations",
}

@Article{Braga:2008:ESS,
  author =       "Mar{\'\i}lia D. V. Braga and Marie-France Sagot and
                 Celine Scornavacca and Eric Tannier",
  title =        "Exploring the Solution Space of Sorting by Reversals,
                 with Experiments and an Application to Evolution",
  journal =      j-TCBB,
  volume =       "5",
  number =       "3",
  pages =        "348--356",
  month =        jul,
  year =         "2008",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2008.16",
  ISSN =         "1545-5963",
  bibdate =      "Fri Oct 10 12:59:44 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "In comparative genomics, algorithms that sort
                 permutations by reversals are often used to propose
                 evolutionary scenarios of rearrangements between
                 species. One of the main problems of such methods is
                 that they give one solution while the number of optimal
                 solutions is huge, with no criteria to discriminate
                 among them. Bergeron et al. started to give some
                 structure to the set of optimal solutions, in order to
                 be able to deliver more presentable results than only
                 one solution or a complete list of all solutions.
                 However, no algorithm exists so far to compute this
                 structure except through the enumeration of all
                 solutions, which takes too much time even for small
                 permutations. Bergeron et al. state as an open problem
                 the design of such an algorithm. We propose in this
                 paper an answer to this problem, that is, an algorithm
                 which gives all the classes of solutions and counts the
                 number of solutions in each class, with a better
                 theoretical and practical complexity than the complete
                 enumeration method. We give an example of how to reduce
                 the number of classes obtained, using further
                 constraints. Finally, we apply our algorithm to analyse
                 the possible scenarios of rearrangement between
                 mammalian sex chromosomes.",
  acknowledgement = ack-nhfb,
  keywords =     "common intervals; evolution; genome rearrangements;
                 perfect sorting; sex chromosomes; signed permutations;
                 sorting by reversals",
}

@Article{Vassura:2008:RSP,
  author =       "Marco Vassura and Luciano Margara and Pietro Di Lena
                 and Filippo Medri and Piero Fariselli and Rita
                 Casadio",
  title =        "Reconstruction of {$3$D} Structures From Protein
                 Contact Maps",
  journal =      j-TCBB,
  volume =       "5",
  number =       "3",
  pages =        "357--367",
  month =        jul,
  year =         "2008",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2008.27",
  ISSN =         "1545-5963",
  bibdate =      "Fri Oct 10 12:59:44 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "The prediction of the protein tertiary structure from
                 solely its residue sequence (the so called Protein
                 Folding Problem) is one of the most challenging
                 problems in Structural Bioinformatics. We focus on the
                 protein residue contact map. When this map is assigned
                 it is possible to reconstruct the 3D structure of the
                 protein backbone. The general problem of recovering a
                 set of 3D coordinates consistent with some given
                 contact map is known as a unit-disk-graph realization
                 problem and it has been recently proven to be
                 NP-Hard. In this paper we describe a heuristic method
                 (COMAR) that is able to reconstruct with an
                 unprecedented rate (3-15 seconds) a 3D model that
                 exactly matches the target contact map of a
                 protein. Working with a non-redundant set of 1760
                 proteins, we find that the scoring efficiency of
                 finding a 3D model very close to the protein native
                 structure depends on the threshold value adopted to
                 compute the protein residue contact map. Contact maps
                 whose threshold values range from 10 to 18
                 {\AA}ngstroms allow reconstructing 3D models that are
                 very similar to the proteins native structure.",
  acknowledgement = ack-nhfb,
  keywords =     "combinatorial algorithms; contact map; molecular
                 modeling; protein structure prediction",
}

@Article{Lee:2008:IEN,
  author =       "George Lee and Carlos Rodriguez and Anant Madabhushi",
  title =        "Investigating the Efficacy of Nonlinear Dimensionality
                 Reduction Schemes in Classifying Gene and Protein
                 Expression Studies",
  journal =      j-TCBB,
  volume =       "5",
  number =       "3",
  pages =        "368--384",
  month =        jul,
  year =         "2008",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2008.36",
  ISSN =         "1545-5963",
  bibdate =      "Fri Oct 10 12:59:44 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "The recent explosion in procurement and availability
                 of high-dimensional gene- and protein-expression
                 profile datasets for cancer diagnostics has
                 necessitated the development of sophisticated machine
                 learning tools with which to analyze them. A major
                 limitation in the ability to accurate classify these
                 high-dimensional datasets stems from the 'curse of
                 dimensionality', occurring in situations where the
                 number of genes or peptides significantly exceeds the
                 total number of patient samples. Previous attempts at
                 dealing with this issue have mostly centered on the use
                 of a dimensionality reduction (DR) scheme, Principal
                 Component Analysis (PCA), to obtain a low-dimensional
                 projection of the high-dimensional data. However,
                 linear PCA and other linear DR methods, which rely on
                 Euclidean distances to estimate object similarity, do
                 not account for the inherent underlying nonlinear
                 structure associated with most biomedical data. The
                 motivation behind this work is to identify the
                 appropriate DR methods for analysis of high-dimensional
                 gene- and protein-expression studies. Towards this end,
                 we empirically and rigorously compare three nonlinear
                 (Isomap, Locally Linear Embedding, Laplacian Eigenmaps)
                 and three linear DR schemes (PCA, Linear Discriminant
                 Analysis, Multidimensional Scaling) with the intent of
                 determining a reduced subspace representation in which
                 the individual object classes are more easily
                 discriminable.",
  acknowledgement = ack-nhfb,
  keywords =     "and association rules; Bioinformatics (genome or
                 protein) databases; classification; clustering; data
                 and knowledge visualization; data mining; feature
                 extraction or construction",
}

@Article{Cho:2008:CHC,
  author =       "Hyuk Cho and Inderjit S. Dhillon",
  title =        "Coclustering of Human Cancer Microarrays Using Minimum
                 Sum-Squared Residue Coclustering",
  journal =      j-TCBB,
  volume =       "5",
  number =       "3",
  pages =        "385--400",
  month =        jul,
  year =         "2008",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2007.70268",
  ISSN =         "1545-5963",
  bibdate =      "Fri Oct 10 12:59:44 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "It is a consensus in microarray analysis that
                 identifying potential local patterns, characterized by
                 coherent groups of genes and conditions, may shed light
                 on the discovery of previously undetectable biological
                 cellular processes of genes as well as macroscopic
                 phenotypes of related samples. In order to
                 simultaneously cluster genes and conditions, we have
                 previously developed a fast co-clustering algorithm,
                 Minimum Sum-Squared Residue Co-clustering (MSSRCC),
                 which employs an alternating minimization scheme and
                 generates what we call co-clusters in a checkerboard
                 structure. In this paper, we propose specific
                 strategies that enable MSSRCC to escape poor local
                 minima and resolve the degeneracy problem in
                 partitional clustering algorithms. The strategies
                 include binormalization, deterministic spectral
                 initialization, and incremental local search. We assess
                 the effects of various strategies on both synthetic
                 gene expression datasets and real human cancer
                 microarrays and provide empirical evidence that MSSRCC
                 with the proposed strategies performs better than
                 existing co-clustering and clustering algorithms. In
                 particular, the combination of all the three strategies
                 leads to the best performance. Furthermore, we
                 illustrate coherence of the resulting co-clusters in a
                 checkerboard structure, where genes in a co-cluster
                 manifest the phenotype structure of corresponding
                 specific samples, and evaluate the enrichment of
                 functional annotations in Gene Ontology (GO).",
  acknowledgement = ack-nhfb,
  keywords =     "binormalization; co-clustering; deterministic spectral
                 initialization; gene ontology; local search; microarray
                 analysis",
}

@Article{Wei:2008:IGF,
  author =       "Peng Wei and Wei Pan",
  title =        "Incorporating Gene Functions into Regression Analysis
                 of {DNA}-Protein Binding Data and Gene Expression Data
                 to Construct Transcriptional Networks",
  journal =      j-TCBB,
  volume =       "5",
  number =       "3",
  pages =        "401--415",
  month =        jul,
  year =         "2008",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2007.1062",
  ISSN =         "1545-5963",
  bibdate =      "Fri Oct 10 12:59:44 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Useful information on transcriptional networks has
                 been extracted by regression analyses of gene
                 expression data and DNA-protein binding data. However,
                 a potential limitation of these approaches is their
                 assumption on the common and constant activity level of
                 a transcription factor (TF) on all the genes in any
                 given experimental condition; for example, any TF is
                 assumed to be either an activator or a repressor, but
                 not both, while it is known that some TFs can be dual
                 regulators. Rather than assuming a common linear
                 regression model for all the genes, we propose using
                 separate regression models for various gene groups; the
                 genes can be grouped based on their functions or some
                 clustering results. Furthermore, to take advantage of
                 the hierarchical structure of many existing gene
                 function annotation systems, such as Gene Ontology
                 (GO), we propose a shrinkage method that borrows
                 information from relevant gene groups. Applications to
                 a yeast dataset and simulations lend support for our
                 proposed methods. In particular, we find that the
                 shrinkage method consistently works well under various
                 scenarios. We recommend the use of the shrinkage method
                 as a useful alternative to the existing methods.",
  acknowledgement = ack-nhfb,
  keywords =     "LASSO; microarray; shrinkage estimator; stratified
                 analysis; transcription factor",
}

@Article{Mak:2008:PPS,
  author =       "Man-Wai Mak and Jian Guo and Sun-Yuan Kung",
  title =        "{PairProSVM}: Protein Subcellular Localization Based
                 on Local Pairwise Profile Alignment and {SVM}",
  journal =      j-TCBB,
  volume =       "5",
  number =       "3",
  pages =        "416--422",
  month =        jul,
  year =         "2008",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2007.70256",
  ISSN =         "1545-5963",
  bibdate =      "Fri Oct 10 12:59:44 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "The subcellular locations of proteins are important
                 functional annotations. An effective and reliable
                 subcellular localization method is necessary for
                 proteomics research. This paper introduces a new
                 method---PairProSVM---to automatically predict the
                 subcellular locations of proteins. The profiles of all
                 protein sequences in the training set are constructed
                 by PSI-BLAST and the pairwise profile-alignment scores
                 are used to form feature vectors for training a support
                 vector machine (SVM) classifier. It was found that
                 PairProSVM outperforms the methods that are based on
                 sequence alignment and amino-acid compositions even if
                 most of the homologous sequences have been removed.
                 This paper also demonstrates that the performance of
                 PairProSVM is sensitive (and somewhat proportional) to
                 the degree of its kernel matrix meeting the Mercer's
                 condition. PairProSVM was evaluated on Reinhardt and
                 Hubbard's, Huang and Li's, and Gardy et al.'s protein
                 datasets. The overall accuracies on these three
                 datasets reach 99.3\\\%, 76.5\\\%, and 91.9\\\%,
                 respectively, which are higher than or comparable to
                 those obtained by sequence alignment and by the methods
                 compared in this paper.",
  acknowledgement = ack-nhfb,
  keywords =     "kernel methods; Mercer condition; profile alignment;
                 subcellular localization; support vector machines",
}

@Article{Elo:2008:ROT,
  author =       "Laura L. Elo and Sanna Filen and Riitta Lahesmaa and
                 Tero Aittokallio",
  title =        "Reproducibility-Optimized Test Statistic for Ranking
                 Genes in Microarray Studies",
  journal =      j-TCBB,
  volume =       "5",
  number =       "3",
  pages =        "423--431",
  month =        jul,
  year =         "2008",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/tcbb.2007.1078",
  ISSN =         "1545-5963",
  bibdate =      "Fri Oct 10 12:59:44 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "A principal goal of microarray studies is to identify
                 the genes showing differential expression under
                 distinct conditions. In such studies, the selection of
                 an optimal test statistic is a crucial challenge, which
                 depends on the type and amount of data under analysis.
                 While previous studies on simulated or spike-in
                 datasets do not provide practical guidance on how to
                 choose the best method for a given real dataset, we
                 introduce an enhanced reproducibility-optimization
                 procedure, which enables the selection of a suitable
                 gene- anking statistic directly from the data. In
                 comparison with existing ranking methods, the
                 reproducibility-optimized statistic shows good
                 performance consistently under various simulated
                 conditions and on Affymetrix spike-in dataset. Further,
                 the feasibility of the novel statistic is confirmed in
                 a practical research setting using data from an
                 in-house cDNA microarray study of asthma-related gene
                 expression changes. These results suggest that the
                 procedure facilitates the selection of an appropriate
                 test statistic for a given dataset without relying on a
                 priori assumptions, which may bias the findings and
                 their interpretation. Moreover, the general
                 reproducibility-optimization procedure is not limited to
                 detecting differential expression only but could be
                 extended to a wide range of other applications as
                 well.",
  acknowledgement = ack-nhfb,
  keywords =     "bootstrap; differential expression; gene expression;
                 gene ranking; microarray; reproducibility",
}

@Article{Parker:2008:SPT,
  author =       "Douglass Stott Parker and Ruey-Lung Hsiao and Yi Xing
                 and Alissa M. Resch and Christopher J. Lee",
  title =        "Solving the Problem of Trans-Genomic Query with
                 Alignment Tables",
  journal =      j-TCBB,
  volume =       "5",
  number =       "3",
  pages =        "432--447",
  month =        jul,
  year =         "2008",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2007.1073",
  ISSN =         "1545-5963",
  bibdate =      "Fri Oct 10 12:59:44 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "The trans-genomic query (TGQ) problem -- enabling the
                 free query of biological information, even across
                 genomes -- is a central challenge facing
                 bioinformatics. Solutions to this problem can alter the
                 nature of the field, moving it beyond the jungle of
                 data integration and expanding the number and scope of
                 questions that can be answered. An alignment table is a
                 binary relationship on locations (sequence segments).
                 An important special case of alignment tables are hit
                 tables ? tables of pairs of highly similar segments
                 produced by alignment tools like BLAST. However,
                 alignment tables also include general binary
                 relationships, and can represent any useful connection
                 between sequence locations. They can be curated, and
                 provide a high-quality queryable backbone of
                 connections between biological information. Alignment
                 tables thus can be a natural foundation for TGQ, as
                 they permit a central part of the TGQ problem to be
                 reduced to purely technical problems involving tables
                 of locations. Key challenges in implementing alignment
                 tables include efficient representation and indexing of
                 sequence locations. We define a location datatype that
                 can be incorporated naturally into common off-the-shelf
                 database systems. We also describe an implementation of
                 alignment tables in BLASTGRES, an extension of the
                 open-source POSTGRESQL database system that provides
                 indexing and operators on locations required for
                 querying alignment tables. This paper also reviews
                 several successful large-scale applications of
                 alignment tables for Trans-Genomic Query. Tables with
                 millions of alignments have been used in queries about
                 alternative splicing, an area of genomic analysis
                 concerning the way in which a single gene can yield
                 multiple transcripts. Comparative genomics is a large
                 potential application area for TGQ and alignment
                 tables.",
  acknowledgement = ack-nhfb,
}

@Article{Dawy:2008:FSG,
  author =       "Zaher Dawy and Michel Sarkis and Joachim Hagenauer and
                 Jakob C. Mueller",
  title =        "Fine-Scale Genetic Mapping Using Independent Component
                 Analysis",
  journal =      j-TCBB,
  volume =       "5",
  number =       "3",
  pages =        "448--460",
  month =        jul,
  year =         "2008",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2007.1072",
  ISSN =         "1545-5963",
  bibdate =      "Fri Oct 10 12:59:44 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "The aim of genetic mapping is to locate the loci
                 responsible for specific traits such as complex
                 diseases. These traits are normally caused by mutations
                 at multiple loci of unknown locations and interactions.
                 In this work, we model the biological system that
                 relates DNA polymorphisms with complex traits as a
                 linear mixing process. Given this model, we propose a
                 new fine-scale genetic mapping method based on
                 independent component analysis. The proposed method
                 outputs both independent associated groups of SNPs in
                 addition to specific associated SNPs with the
                 phenotype. It is applied to a clinical data set for the
                 Schizophrenia disease with 368 individuals and 42 SNPs.
                 It is also applied to a simulation study to investigate
                 in more depth its performance. The obtained results
                 demonstrate the novel characteristics of the proposed
                 method compared to other genetic mapping methods.
                 Finally, we study the robustness of the proposed method
                 with missing genotype values and limited sample
                 sizes.",
  acknowledgement = ack-nhfb,
  keywords =     "association mapping; complex diseases; independent
                 component analysis (ICA); linkage disequilibrium;
                 principal component analysis (PCA); single nucleotide
                 polymorphisms (SNPs)",
}

@Article{Hendy:2008:HCK,
  author =       "Michael D. Hendy and Sagi Snir",
  title =        "{Hadamard} Conjugation for the {Kimura} {3ST} Model:
                 Combinatorial Proof Using Path Sets",
  journal =      j-TCBB,
  volume =       "5",
  number =       "3",
  pages =        "461--471",
  month =        jul,
  year =         "2008",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2007.70227",
  ISSN =         "1545-5963",
  bibdate =      "Fri Oct 10 12:59:44 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Under a stochastic model of molecular sequence
                 evolution the probability of each possible pattern of a
                 characters is well defined. The Kimura's
                 three-substitution-types (K3ST) model of evolution,
                 allows analytical expression for these probabilities of
                 by means of the Hadamard conjugation as a function of
                 the phylogeny T and the substitution probabilities on
                 each edge of TM . In this paper we produce a direct
                 combinatorial proof of these results, using pathset
                 distances which generalise pairwise distances between
                 sequences. This interpretation provides us with tools
                 that were proved useful in related problems in the
                 mathematical analysis of sequence evolution.",
  acknowledgement = ack-nhfb,
  keywords =     "Hadamard conjugation; K3ST model; path-sets;
                 phylogenetic invariants; phylogenetic trees",
}

@Article{Gambette:2008:ILP,
  author =       "Philippe Gambette and Daniel H. Huson",
  title =        "Improved Layout of Phylogenetic Networks",
  journal =      j-TCBB,
  volume =       "5",
  number =       "3",
  pages =        "472--479",
  month =        jul,
  year =         "2008",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/tcbb.2007.1046",
  ISSN =         "1545-5963",
  bibdate =      "Fri Oct 10 12:59:44 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Split networks are increasingly being used in
                 phylogenetic analysis. Usually, a simple equal angle
                 algorithm is used to draw such networks, producing
                 layouts that leave much room for improvement.
                 Addressing the problem of producing better layouts of
                 split networks, this paper presents an algorithm for
                 maximizing the area covered by the network, describes
                 an extension of the equal-daylight algorithm to
                 networks, looks into using a spring embedder and
                 discusses how to construct rooted split networks.",
  acknowledgement = ack-nhfb,
  keywords =     "algorithms; graph drawing; phylogenetic networks;
                 phylogenetics",
}

@Article{Gusfield:2008:EE,
  author =       "Dan Gusfield",
  title =        "{EIC} Editorial",
  journal =      j-TCBB,
  volume =       "5",
  number =       "4",
  pages =        "481--481",
  month =        oct,
  year =         "2008",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2008.115",
  ISSN =         "1545-5963",
  bibdate =      "Wed Jan 14 12:51:33 MST 2009",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Giancarlo:2008:GEI,
  author =       "Raffaele Giancarlo and Sridhar Hannenhalli",
  title =        "{Guest Editors}' Introduction to the Special Section
                 on Algorithms in Bioinformatics",
  journal =      j-TCBB,
  volume =       "5",
  number =       "4",
  pages =        "482--483",
  month =        oct,
  year =         "2008",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2008.116",
  ISSN =         "1545-5963",
  bibdate =      "Wed Jan 14 12:51:33 MST 2009",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Jeong:2008:ISP,
  author =       "Jieun Jeong and Piotr Berman and Teresa M. Przytycka",
  title =        "Improving Strand Pairing Prediction through Exploring
                 Folding Cooperativity",
  journal =      j-TCBB,
  volume =       "5",
  number =       "4",
  pages =        "484--491",
  month =        oct,
  year =         "2008",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2008.88",
  ISSN =         "1545-5963",
  bibdate =      "Wed Jan 14 12:51:33 MST 2009",
  bibsource =    "http://portal.acm.org/",
  abstract =     "The topology of $\beta$-sheets is defined by the
                 pattern of hydrogen-bonded strand pairing. Therefore,
                 predicting hydrogen bonded strand partners is a
                 fundamental step towards predicting $\beta$-sheet
                 topology. At the same time, finding the correct
                 partners is very difficult due to long range
                 interactions involved in strand pairing. Additionally,
                 patterns of aminoacids involved, in $\beta$-sheet
                 formations are very general and therefore difficult to
                 use for computational recognition of specific contacts
                 between strands. In this work, we report a new strand
                 pairing algorithm. To address above mentioned
                 difficulties, our algorithm attempts to mimic elements
                 of the folding process. Namely, in addition to ensuring
                 that the predicted hydrogen bonded strand pairs satisfy
                 basic global consistency constraints, it takes into
                 account hypothetical folding pathways. Consistently
                 with this view, introducing hydrogen bonds between a
                 pair of strands changes the probabilities of forming
                 hydrogen bonds between other pairs of strand. We
                 demonstrate that this approach provides an improvement
                 over previously proposed algorithms. We also compare
                 the performance of this method to that of a global
                 optimization algorithm that poses the problem as
                 integer linear programming optimization problem and
                 solves it using ILOG CPLEX\texttrademark\ package.",
  acknowledgement = ack-nhfb,
  keywords =     "Biology and genetics; Combinatorial algorithms",
}

@Article{Genovese:2008:SAH,
  author =       "Loredana M. Genovese and Filippo Geraci and Marco
                 Pellegrini",
  title =        "{SpeedHap}: An Accurate Heuristic for the Single
                 Individual {SNP} Haplotyping Problem with Many Gaps,
                 High Reading Error Rate and Low Coverage",
  journal =      j-TCBB,
  volume =       "5",
  number =       "4",
  pages =        "492--502",
  month =        oct,
  year =         "2008",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2008.67",
  ISSN =         "1545-5963",
  bibdate =      "Wed Jan 14 12:51:33 MST 2009",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Single nucleotide polymorphism (SNP) is the most
                 frequent form of DNA variation. The set of SNP's
                 present in a chromosome (called the em haplotype) is of
                 interest in a wide area of applications in molecular
                 biology and biomedicine, including diagnostic and
                 medical therapy. In this paper we propose a new
                 heuristic method for the problem of haplotype
                 reconstruction for (portions of) a pair of homologous
                 human chromosomes from a single individual (SIH). The
                 problem is well known in literature and exact
                 algorithms have been proposed for the case when no (or
                 few) gaps are allowed in the input fragments. These
                 algorithms, though exact and of polynomial complexity,
                 are slow in practice. When gaps are considered no exact
                 method of polynomial complexity is known. The problem
                 is also hard to approximate with guarantees. Therefore
                 fast heuristics have been proposed. In this paper we
                 describe SpeedHap, a new heuristic method that is able
                 to tackle the case of many gapped fragments and retains
                 its effectiveness even when the input fragments have
                 high rate of reading errors (up to 20\%) and low
                 coverage (as low as 3). We test SpeedHap on real data
                 from the HapMap Project.",
  acknowledgement = ack-nhfb,
  keywords =     "Algorithms; Biology and genetics",
}

@Article{Lozano:2008:STA,
  author =       "Antoni Lozano and Ron Y. Pinter and Oleg Rokhlenko and
                 Gabriel Valiente and Michal Ziv-Ukelson",
  title =        "Seeded Tree Alignment",
  journal =      j-TCBB,
  volume =       "5",
  number =       "4",
  pages =        "503--513",
  month =        oct,
  year =         "2008",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2008.59",
  ISSN =         "1545-5963",
  bibdate =      "Wed Jan 14 12:51:33 MST 2009",
  bibsource =    "http://portal.acm.org/",
  abstract =     "The optimal transformation of one tree into another by
                 means of elementary edit operations is an important
                 algorithmic problem that has several interesting
                 applications to computational biology. Here we
                 introduce a constrained form of this problem in which a
                 partial mapping of a set of nodes (the 'seeds') in one
                 tree to a corresponding set of nodes in the other tree
                 is given, and present efficient algorithms for both
                 ordered and unordered trees. Whereas ordered tree
                 matching based on seeded nodes has applications in
                 pattern matching of RNA structures, unordered tree
                 matching based on seeded nodes has applications in
                 co-speciation and phylogeny reconciliation. The latter
                 involves the solution of the planar tanglegram layout
                 problem, for which a polynomial-time algorithm is given
                 here.",
  acknowledgement = ack-nhfb,
  keywords =     "Biology and genetics; Computer Applications; Discrete
                 Mathematics; Graph algorithms; Graph Theory; Life and
                 Medical Sciences; Mathematics of Computing; Trees",
}

@Article{Bansal:2008:NLS,
  author =       "Mukul S. Bansal and Oliver Eulenstein",
  title =        "An {$\Omega(n^2/ \log n)$} Speed-Up of {TBR}
                 Heuristics for the Gene-Duplication Problem",
  journal =      j-TCBB,
  volume =       "5",
  number =       "4",
  pages =        "514--524",
  month =        oct,
  year =         "2008",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2008.69",
  ISSN =         "1545-5963",
  bibdate =      "Wed Jan 14 12:51:33 MST 2009",
  bibsource =    "http://portal.acm.org/",
  abstract =     "The gene-duplication problem is to infer a species
                 supertree from gene trees that are confounded by
                 complex histories of gene duplications. This problem is
                 NP-hard and thus requires efficient and effective
                 heuristics. Existing heuristics perform a stepwise
                 search of the tree space, where each step is guided by
                 an exact solution to an instance of a local search
                 problem. We improve on the time complexity of the local
                 search problem by a factor of n2= log n, where n is the
                 size of the resulting species supertree. Typically,
                 several thousand instances of the local search problem
                 are solved throughout a stepwise heuristic search.
                 Hence, our improvement makes the gene-duplication
                 problem much more tractable for large-scale
                 phylogenetic analyses.",
  acknowledgement = ack-nhfb,
  keywords =     "Algorithms; Computational Biology; Gene Duplication;
                 Phylogenetics; Supertrees",
}

@Article{Wang:2008:DCO,
  author =       "Xueyi Wang and Jack Snoeyink",
  title =        "Defining and Computing Optimum {RMSD} for Gapped and
                 Weighted Multiple-Structure Alignment",
  journal =      j-TCBB,
  volume =       "5",
  number =       "4",
  pages =        "525--533",
  month =        oct,
  year =         "2008",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2008.92",
  ISSN =         "1545-5963",
  bibdate =      "Wed Jan 14 12:51:33 MST 2009",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Pairwise structure alignment commonly uses root mean
                 square deviation (RMSD) to measure the structural
                 similarity, and methods for optimizing RMSD are well
                 established. We extend RMSD to weighted RMSD for
                 multiple structures. By using multiplicative weights,
                 we show that weighted RMSD for all pairs is the same as
                 weighted RMSD to an average of the structures. Thus,
                 using RMSD or weighted RMSD implies that the average is
                 a consensus structure. Although we show that in
                 general, the two tasks of finding the optimal
                 translations and rotations for minimizing weighted RMSD
                 cannot be separated for multiple structures like they
                 can for pairs, an inherent difficulty and a fact
                 ignored by previous work, we develop a near-linear
                 iterative algorithm to converge weighted RMSD to a
                 local minimum. 10,000 experiments of gapped alignment
                 done on each of 23 protein families from HOMSTRAD
                 (where each structure starts with a random translation
                 and rotation) converge rapidly to the same minimum.
                 Finally we propose a heuristic method to iteratively
                 remove the effect of outliers and find well-aligned
                 positions that determine the structural conserved
                 region by modeling B-factors and deviations from the
                 average positions as weights and iteratively assigning
                 higher weights to better aligned atoms.",
  acknowledgement = ack-nhfb,
  keywords =     "multiple structure alignment; optimization methods;
                 structural conserved region; weighted RMSD",
}

@Article{Yao:2008:EAE,
  author =       "Peggy Yao and Ankur Dhanik and Nathan Marz and Ryan
                 Propper and Charles Kou and Guanfeng Liu and Henry van
                 den Bedem and Jean-Claude Latombe and Inbal
                 Halperin-Landsberg and Russ B. Altman",
  title =        "Efficient Algorithms to Explore Conformation Spaces of
                 Flexible Protein Loops",
  journal =      j-TCBB,
  volume =       "5",
  number =       "4",
  pages =        "534--545",
  month =        oct,
  year =         "2008",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2008.96",
  ISSN =         "1545-5963",
  bibdate =      "Wed Jan 14 12:51:33 MST 2009",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Several applications in biology - e.g., incorporation
                 of protein flexibility in ligand docking algorithms,
                 interpretation of fuzzy X-ray crystallographic data,
                 and homology modeling - require computing the internal
                 parameters of a flexible fragment (usually, a loop) of
                 a protein in order to connect its termini to the rest
                 of the protein without causing any steric clash. One
                 must often sample many such conformations in order to
                 explore and adequately represent the conformational
                 range of the studied loop. While sampling must be fast,
                 it is made difficult by the fact that two conflicting
                 constraints - kinematic closure and clash avoidance -
                 must be satisfied concurrently. This paper describes
                 two efficient and complementary sampling algorithms to
                 explore the space of closed clash-free conformations of
                 a flexible protein loop. The 'seed sampling' algorithm
                 samples broadly from this space, while the 'deformation
                 sampling' algorithm uses seed conformations as starting
                 points to explore the conformation space around them at
                 a finer grain. Computational results are presented for
                 various loops ranging from 5 to 25 residues. More
                 specific results also show that the combination of the
                 sampling algorithms with a functional site prediction
                 software (FEATURE) makes it possible to compute and
                 recognize calcium-binding loop conformations. The
                 sampling algorithms are implemented in a toolkit
                 (LoopTK), which is available at
                 https://simtk.org/home/looptk.",
  acknowledgement = ack-nhfb,
  keywords =     "Biology and genetics; Robotics",
}

@Article{Kim:2008:LSS,
  author =       "Eagu Kim and John Kececioglu",
  title =        "Learning Scoring Schemes for Sequence Alignment from
                 Partial Examples",
  journal =      j-TCBB,
  volume =       "5",
  number =       "4",
  pages =        "546--556",
  month =        oct,
  year =         "2008",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2008.57",
  ISSN =         "1545-5963",
  bibdate =      "Wed Jan 14 12:51:33 MST 2009",
  bibsource =    "http://portal.acm.org/",
  abstract =     "When aligning biological sequences, the choice of
                 parameter values for the alignment scoring function is
                 critical. Small changes in gap penalties, for example,
                 can yield radically different alignments. A rigorous
                 way to compute parameter values that are appropriate
                 for aligning biological sequences is through inverse
                 parametric sequence alignment. Given a collection of
                 examples of biologically correct alignments, this is
                 the problem of finding parameter values that make the
                 scores of the example alignments close to those of
                 optimal alignments for their sequences. We extend prior
                 work on inverse parametric alignment to partial
                 examples, which contain regions where the alignment is
                 left unspecified, and to an improved formulation based
                 on minimizing the average error between the score of an
                 example and the score of an optimal alignment.
                 Experiments on benchmark biological alignments show we
                 can find parameters that generalize across protein
                 families and that boost the accuracy of multiple
                 sequence alignment by as much as 25\%.",
  acknowledgement = ack-nhfb,
  keywords =     "Analysis of Algorithms and Problem Complexity; Biology
                 and genetics; Linear programming; Pattern matching",
}

@Article{Schliep:2008:EAC,
  author =       "Alexander Schliep and Roland Krause",
  title =        "Efficient Algorithms for the Computational Design of
                 Optimal Tiling Arrays",
  journal =      j-TCBB,
  volume =       "5",
  number =       "4",
  pages =        "557--567",
  month =        oct,
  year =         "2008",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2008.50",
  ISSN =         "1545-5963",
  bibdate =      "Wed Jan 14 12:51:33 MST 2009",
  bibsource =    "http://portal.acm.org/",
  abstract =     "The representation of a genome by oligonucleotide
                 probes is a prerequisite for the analysis of many of
                 its basic properties, such as transcription factor
                 binding sites, chromosomal breakpoints, gene expression
                 of known genes and detection of novel genes, in
                 particular those coding for small RNAs. An ideal
                 representation would consist of a high density set of
                 oligonucleotides with similar melting temperatures that
                 do not cross-hybridize with other regions of the genome
                 and are equidistantly spaced. The implementation of
                 such design is typically called a tiling array or
                 genome array. We formulate the minimal cost tiling path
                 problem for the selection of oligonucleotides from a
                 set of candidates. Computing the selection of probes
                 requires multi-criterion optimization, which we cast
                 into a shortest path problem. Standard algorithms
                 running in linear time allow us to compute globally
                 optimal tiling paths from millions of candidate
                 oligonucleotides on a standard desktop computer for
                 most problem variants. The solutions to this
                 multi-criterion optimization are spatially adaptive to
                 the problem instance. Our formulation incorporates
                 experimental constraints with respect to specific
                 regions of interest and trade offs between
                 hybridization parameters, probe quality and tiling
                 density easily.",
  acknowledgement = ack-nhfb,
  keywords =     "Biology and genetics; Graph Theory",
}

@Article{Yu:2008:CAA,
  author =       "Zeyun Yu and Chandrajit Bajaj",
  title =        "Computational Approaches for Automatic Structural
                 Analysis of Large Biomolecular Complexes",
  journal =      j-TCBB,
  volume =       "5",
  number =       "4",
  pages =        "568--582",
  month =        oct,
  year =         "2008",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2007.70226",
  ISSN =         "1545-5963",
  bibdate =      "Wed Jan 14 12:51:33 MST 2009",
  bibsource =    "http://portal.acm.org/",
  abstract =     "We present computational solutions to two problems of
                 macromolecular structure interpretation from
                 reconstructed three-dimensional electron microscopy
                 (3D-EM) maps of large bio-molecular complexes at
                 intermediate resolution (5A-15A). Thetwo problems
                 addressed are: (a) 3D structural alignment
                 (matching)between identified and segmented 3D maps of
                 structure units(e.g. trimeric configuration of
                 proteins), and (b) the secondary structure
                 identification of a segmented protein 3D map
                 (i.e.locations of a-helices, b -sheets). For problem
                 (a), we present an efficient algorithm to correlate
                 spatially (and structurally)two 3D maps of structure
                 units. Besides providing a similarity score between
                 structure units, the algorithm yields an effective
                 technique for resolution refinement of repeated
                 structure units,by 3D alignment and averaging. For
                 problem (b), we present an efficient algorithm to
                 compute eigenvalues and link eigenvectors of a Gaussian
                 convoluted structure tensor derived from the protein 3D
                 Map, thereby identifying and locating secondary
                 structural motifs of proteins. The efficiency and
                 performance of our approach is demonstrated on several
                 experimentally reconstructed 3D maps of virus capsid
                 shells from single-particle cryo-EM, as well as
                 computationally simulated protein structure density 3D
                 maps generated from protein model entries in the
                 Protein Data Bank.",
  acknowledgement = ack-nhfb,
  keywords =     "3D Reconstruction; Alignment; Cryo-EM Maps; Secondary
                 Structure Detection; Segmentation; Similarity Measure;
                 Skeletonization; Structure Analysis",
}

@Article{Christinat:2008:GED,
  author =       "Yann Christinat and Bernd Wachmann and Lei Zhang",
  title =        "Gene Expression Data Analysis Using a Novel Approach
                 to Biclustering Combining Discrete and Continuous
                 Data",
  journal =      j-TCBB,
  volume =       "5",
  number =       "4",
  pages =        "583--593",
  month =        oct,
  year =         "2008",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2007.70251",
  ISSN =         "1545-5963",
  bibdate =      "Wed Jan 14 12:51:33 MST 2009",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Many different methods exist for pattern detection in
                 gene expression data. In contrast to classical methods,
                 biclustering has the ability to cluster a group of
                 genes together with a group of conditions (replicates,
                 set of patients or drug compounds). However, since the
                 problem is NP-complex, most algorithms use heuristic
                 search functions and therefore might converge towards
                 local maxima. By using the results of biclustering on
                 discrete data as a starting point for a local search
                 function on continuous data, our algorithm avoids the
                 problem of heuristic initialization. Similar to OPSM,
                 our algorithm aims to detect biclusters whose rows and
                 columns can be ordered such that row values are growing
                 across the bicluster's columns and vice-versa. Results
                 have been generated on the yeast genome (Saccharomyces
                 cerevisiae), a human cancer dataset and random data.
                 Results on the yeast genome showed that 89\% of the one
                 hundred biggest non-overlapping biclusters were
                 enriched with Gene Ontology annotations. A comparison
                 with OPSM and ISA demonstrated a better efficiency when
                 using gene and condition orders. We present results on
                 random and real datasets that show the ability of our
                 algorithm to capture statistically significant and
                 biologically relevant biclusters.",
  acknowledgement = ack-nhfb,
  keywords =     "Bioinformatics (genome or protein) databases; Data and
                 knowledge visualization; Data mining; Graph and tree
                 search strategies; Machine learning",
}

@Article{Lacroix:2008:IMN,
  author =       "Vincent Lacroix and Ludovic Cottret and Patricia
                 Th{\'e}bault and Marie-France Sagot",
  title =        "An Introduction to Metabolic Networks and Their
                 Structural Analysis",
  journal =      j-TCBB,
  volume =       "5",
  number =       "4",
  pages =        "594--617",
  month =        oct,
  year =         "2008",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2008.79",
  ISSN =         "1545-5963",
  bibdate =      "Wed Jan 14 12:51:33 MST 2009",
  bibsource =    "http://portal.acm.org/",
  abstract =     "There has been a renewed interest for metabolism in
                 the computational biology community, leading to an
                 avalanche of papers coming from methodological network
                 analysis as well as experimental and theoretical
                 biology. This paper is meant to serve as an initial
                 guide for both the biologists interested in formal
                 approaches and the mathematicians or computer
                 scientists wishing to inject more realism into their
                 models. The paper is focused on the structural aspects
                 of metabolism only. The literature is vast enough
                 already, and the thread through it difficult to follow
                 even for the more experienced worker in the field. We
                 explain methods for acquiring data and reconstructing
                 metabolic networks, and review the various models that
                 have been used for their structural analysis. Several
                 concepts such as modularity are introduced, as are the
                 controversies that have beset the field these past few
                 years, for instance, on whether metabolic networks are
                 small-world or scale-free, and on which model better
                 explains the evolution of metabolism. Clarifying the
                 work that has been done also helps in identifying open
                 questions and in proposing relevant future directions
                 in the field, which we do along the paper and in the
                 conclusion.",
  acknowledgement = ack-nhfb,
  keywords =     "Biology and genetics; evolution; Graph Theory;
                 Introductory and Survey; metabolic networks; modelling;
                 modularity; reconstruction",
}

@Article{Satya:2008:UIP,
  author =       "Ravi Vijaya Satya and Amar Mukherjee",
  title =        "The Undirected Incomplete Perfect Phylogeny Problem",
  journal =      j-TCBB,
  volume =       "5",
  number =       "4",
  pages =        "618--629",
  month =        oct,
  year =         "2008",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2007.70218",
  ISSN =         "1545-5963",
  bibdate =      "Wed Jan 14 12:51:33 MST 2009",
  bibsource =    "http://portal.acm.org/",
  abstract =     "The incomplete perfect phylogeny (IPP) problem and the
                 incomplete perfect phylogeny haplotyping (IPPH) problem
                 deal with constructing a phylogeny for a given set of
                 haplotypes or genotypes with missing entries. The
                 earlier approaches for both of these problems dealt
                 with restricted versions of the problems, where the
                 root is either available or can be trivially
                 re-constructed from the data, or certain assumptions
                 were made about the data. In this paper, we deal with
                 the unrestricted versions of the problems, where the
                 root of the phylogeny is neither available nor
                 trivially recoverable from the data. Both IPP and IPPH
                 problems have previously been proven to be NP complete.
                 Here, we present efficient enumerative algorithms that
                 can handle practical instances of the problem.
                 Empirical analysis on simulated data shows that the
                 algorithms perform very well both in terms of speed and
                 in terms accuracy of the recovered data.",
  acknowledgement = ack-nhfb,
  keywords =     "Haplotype Inference; Incomplete Perfect Phylogeny;
                 Perfect Phylogeny; Phylogenetics",
}

@Article{Gondro:2008:OCM,
  author =       "Cedric Gondro and Brian P. Kinghorn",
  title =        "Optimization of {cDNA} Microarray Experimental Designs
                 Using an Evolutionary Algorithm",
  journal =      j-TCBB,
  volume =       "5",
  number =       "4",
  pages =        "630--638",
  month =        oct,
  year =         "2008",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2007.70222",
  ISSN =         "1545-5963",
  bibdate =      "Wed Jan 14 12:51:33 MST 2009",
  bibsource =    "http://portal.acm.org/",
  abstract =     "The cDNA microarray is an important tool for
                 generating large datasets of gene expression
                 measurements. An efficient design is critical to ensure
                 that the experiment will be able to address relevant
                 biological questions. Microarray experimental design
                 can be treated as a multicriterion optimization
                 problem. For this class of problems evolutionary
                 algorithms (EAs) are well suited, as they can search
                 the solution space and evolve a design that optimizes
                 the parameters of interest based on their relative
                 value to the researcher under a given set of
                 constraints. This paper introduces the use of EAs for
                 optimization of experimental designs of spotted
                 microarrays using a weighted objective function. The EA
                 and the various criteria relevant to design
                 optimization are discussed. Evolved designs are
                 compared with designs obtained through exhaustive
                 search with results suggesting that the EA can find
                 just as efficient optimal or near-optimal designs
                 within a tractable timeframe.",
  acknowledgement = ack-nhfb,
  keywords =     "Evolutionary computing and genetic algorithms;
                 experimental design; global optimization; microarrays",
}

@Article{Gusfield:2009:FFY,
  author =       "Dan Gusfield",
  title =        "Final, Five-Year End, Editorial",
  journal =      j-TCBB,
  volume =       "6",
  number =       "1",
  pages =        "1--2",
  month =        jan,
  year =         "2009",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Mar 2 18:46:49 MST 2009",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Sagot:2009:NEE,
  author =       "Marie-France Sagot",
  title =        "New {EIC} Editorial",
  journal =      j-TCBB,
  volume =       "6",
  number =       "1",
  pages =        "3--3",
  month =        jan,
  year =         "2009",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Mar 2 18:46:49 MST 2009",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Huson:2009:SSP,
  author =       "Daniel H. Huson and Vincent Moulton and Mike Steel",
  title =        "Special Section: Phylogenetics",
  journal =      j-TCBB,
  volume =       "6",
  number =       "1",
  pages =        "4--6",
  month =        jan,
  year =         "2009",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Mar 2 18:46:49 MST 2009",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Liu:2009:BWT,
  author =       "Kevin Liu and Serita Nelesen and Sindhu Raghavan and
                 C. Randal Linder and Tandy Warnow",
  title =        "Barking Up The Wrong Treelength: The Impact of Gap
                 Penalty on Alignment and Tree Accuracy",
  journal =      j-TCBB,
  volume =       "6",
  number =       "1",
  pages =        "7--21",
  month =        jan,
  year =         "2009",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Mar 2 18:46:49 MST 2009",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Minh:2009:BPD,
  author =       "Bui Quang Minh and Fabio Pardi and Steffen Klaere and
                 Arndt von Haeseler",
  title =        "Budgeted Phylogenetic Diversity on Circular Split
                 Systems",
  journal =      j-TCBB,
  volume =       "6",
  number =       "1",
  pages =        "22--29",
  month =        jan,
  year =         "2009",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Mar 2 18:46:49 MST 2009",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Linz:2009:HNT,
  author =       "Simone Linz and Charles Semple",
  title =        "Hybridization in Nonbinary Trees",
  journal =      j-TCBB,
  volume =       "6",
  number =       "1",
  pages =        "30--45",
  month =        jan,
  year =         "2009",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Mar 2 18:46:49 MST 2009",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Cardona:2009:MPN,
  author =       "Gabriel Cardona and Merc{\`e} Llabr{\'e}s and Francesc
                 Rossell{\'o} and Gabriel Valiente",
  title =        "Metrics for Phylogenetic Networks {I}: Generalizations
                 of the {Robinson--Foulds} Metric",
  journal =      j-TCBB,
  volume =       "6",
  number =       "1",
  pages =        "46--61",
  month =        jan,
  year =         "2009",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Mar 2 18:46:49 MST 2009",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Willson:2009:RTS,
  author =       "Stephen J. Willson",
  title =        "Robustness of Topological Supertree Methods for
                 Reconciling Dense Incompatible Data",
  journal =      j-TCBB,
  volume =       "6",
  number =       "1",
  pages =        "62--75",
  month =        jan,
  year =         "2009",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Mar 2 18:46:49 MST 2009",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Allman:2009:ICM,
  author =       "Elizabeth S. Allman and John A. Rhodes",
  title =        "The Identifiability of Covarion Models in
                 Phylogenetics",
  journal =      j-TCBB,
  volume =       "6",
  number =       "1",
  pages =        "76--88",
  month =        jan,
  year =         "2009",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Mar 2 18:46:49 MST 2009",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Matsen:2009:FTI,
  author =       "Frederick A. Matsen",
  title =        "{Fourier} Transform Inequalities for Phylogenetic
                 Trees",
  journal =      j-TCBB,
  volume =       "6",
  number =       "1",
  pages =        "89--95",
  month =        jan,
  year =         "2009",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Mar 2 18:46:49 MST 2009",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Gusfield:2009:OEE,
  author =       "Dan Gusfield",
  title =        "Outgoing {EIC} Editorial for this Special Section of
                 {TCBB} with the Theme of Phylogenetics",
  journal =      j-TCBB,
  volume =       "6",
  number =       "1",
  pages =        "96--96",
  month =        jan,
  year =         "2009",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Mar 2 18:46:49 MST 2009",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Grunewald:2009:MPT,
  author =       "Stefan Gr{\"u}newald and Vincent Moulton",
  title =        "Maximum Parsimony for Tree Mixtures",
  journal =      j-TCBB,
  volume =       "6",
  number =       "1",
  pages =        "97--102",
  month =        jan,
  year =         "2009",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Mar 2 18:46:49 MST 2009",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Huson:2009:DRP,
  author =       "Daniel H. Huson",
  title =        "Drawing Rooted Phylogenetic Networks",
  journal =      j-TCBB,
  volume =       "6",
  number =       "1",
  pages =        "103--109",
  month =        jan,
  year =         "2009",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Mar 2 18:46:49 MST 2009",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Bordewich:2009:CTM,
  author =       "Magnus Bordewich and Olivier Gascuel and Katharina T.
                 Huber and Vincent Moulton",
  title =        "Consistency of Topological Moves Based on the Balanced
                 Minimum Evolution Principle of Phylogenetic Inference",
  journal =      j-TCBB,
  volume =       "6",
  number =       "1",
  pages =        "110--117",
  month =        jan,
  year =         "2009",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Mar 2 18:46:49 MST 2009",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Wu:2009:RPT,
  author =       "Taoyang Wu and Vincent Moulton and Mike Steel",
  title =        "Refining Phylogenetic Trees Given Additional Data: An
                 Algorithm Based on Parsimony",
  journal =      j-TCBB,
  volume =       "6",
  number =       "1",
  pages =        "118--125",
  month =        jan,
  year =         "2009",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Mar 2 18:46:49 MST 2009",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Mossel:2009:SEA,
  author =       "Elchanan Mossel and Sebastien Roch and Mike Steel",
  title =        "Shrinkage Effect in Ancestral Maximum Likelihood",
  journal =      j-TCBB,
  volume =       "6",
  number =       "1",
  pages =        "126--133",
  month =        jan,
  year =         "2009",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Mar 2 18:46:49 MST 2009",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Ma:2009:GCU,
  author =       "Jianmin Ma and Minh N. Nguyen and Jagath C.
                 Rajapakse",
  title =        "Gene Classification Using Codon Usage and Support
                 Vector Machines",
  journal =      j-TCBB,
  volume =       "6",
  number =       "1",
  pages =        "134--143",
  month =        jan,
  year =         "2009",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Mar 2 18:46:49 MST 2009",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Maitra:2009:IPO,
  author =       "Ranjan Maitra",
  title =        "Initializing Partition-Optimization Algorithms",
  journal =      j-TCBB,
  volume =       "6",
  number =       "1",
  pages =        "144--157",
  month =        jan,
  year =         "2009",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Mar 2 18:46:49 MST 2009",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Narasimhan:2009:SPG,
  author =       "Sridharakumar Narasimhan and Raghunathan Rengaswamy
                 and Rajanikanth Vadigepalli",
  title =        "Structural Properties of Gene Regulatory Networks:
                 Definitions and Connections",
  journal =      j-TCBB,
  volume =       "6",
  number =       "1",
  pages =        "158--170",
  month =        jan,
  year =         "2009",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Mar 2 18:46:49 MST 2009",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Anonymous:2009:RL,
  author =       "Anonymous",
  title =        "2008 Reviewers List",
  journal =      j-TCBB,
  volume =       "6",
  number =       "1",
  pages =        "171--173",
  month =        jan,
  year =         "2009",
  CODEN =        "ITCBCY",
  ISSN =         "1545-5963",
  bibdate =      "Mon Mar 2 18:46:49 MST 2009",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Sagot:2009:EE,
  author =       "Marie-France Sagot",
  title =        "{EIC} Editorial",
  journal =      j-TCBB,
  volume =       "6",
  number =       "2",
  pages =        "177--177",
  month =        apr,
  year =         "2009",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2009.44",
  ISSN =         "1545-5963",
  bibdate =      "Mon Jun 1 17:03:22 MDT 2009",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Mandoiu:2009:GEI,
  author =       "Ion Mandoiu and Yi Pan and Raj Sunderraman and
                 Alexander Zelikovsky",
  title =        "{Guest Editors}' Introduction to the Special Section on
                 Bioinformatics Research and Applications",
  journal =      j-TCBB,
  volume =       "6",
  number =       "2",
  pages =        "178--179",
  month =        apr,
  year =         "2009",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2009.45",
  ISSN =         "1545-5963",
  bibdate =      "Mon Jun 1 17:03:22 MDT 2009",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Treangen:2009:NHL,
  author =       "Todd J. Treangen and Aaron E. Darling and Guillaume
                 Achaz and Mark A. Ragan and Xavier Messeguer and
                 Eduardo P. C. Rocha",
  title =        "A Novel Heuristic for Local Multiple Alignment of
                 Interspersed {DNA} Repeats",
  journal =      j-TCBB,
  volume =       "6",
  number =       "2",
  pages =        "180--189",
  month =        apr,
  year =         "2009",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2009.9",
  ISSN =         "1545-5963",
  bibdate =      "Mon Jun 1 17:03:22 MDT 2009",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Pairwise local sequence alignment methods have been
                 the prevailing technique to identify homologous
                 nucleotides between related species. However, existing
                 methods that identify and align all homologous
                 nucleotides in one or more genomes have suffered from
                 poor scalability and limited accuracy. We propose a
                 novel method that couples a gapped extension heuristic
                 with an efficient filtration method for identifying
                 interspersed repeats in genome sequences. During gapped
                 extension, we use the MUSCLE implementation of
                 progressive global multiple alignment with iterative
                 refinement. The resulting gapped extensions potentially
                 contain alignments of unrelated sequence. We detect and
                 remove such undesirable alignments using a hidden
                 Markov model (HMM) to predict the posterior probability
                 of homology. The HMM emission frequencies for
                 nucleotide substitutions can be derived from any
                 time-reversible nucleotide substitution matrix. We
                 evaluate the performance of our method and previous
                 approaches on a hybrid data set of real genomic DNA
                 with simulated interspersed repeats. Our method
                 outperforms a related method in terms of sensitivity,
                 positive predictive value, and localizing boundaries of
                 homology. The described methods have been implemented
                 in freely available software, Repeatoire, available
                 from: http://wwwabi.snv.jussieu.fr/public/Repeatoire.",
  acknowledgement = ack-nhfb,
  keywords =     "DNA repeats; gapped extension.; genome comparison;
                 hidden Markov model; local multiple alignment; Sequence
                 alignment",
}

@Article{Qiu:2009:FMK,
  author =       "Shibin Qiu and Terran Lane",
  title =        "A Framework for Multiple Kernel Support Vector
                 Regression and Its Applications to {siRNA} Efficacy
                 Prediction",
  journal =      j-TCBB,
  volume =       "6",
  number =       "2",
  pages =        "190--199",
  month =        apr,
  year =         "2009",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2008.139",
  ISSN =         "1545-5963",
  bibdate =      "Mon Jun 1 17:03:22 MDT 2009",
  bibsource =    "http://portal.acm.org/",
  abstract =     "The cell defense mechanism of RNA interference has
                 applications in gene function analysis and promising
                 potentials in human disease therapy. To effectively
                 silence a target gene, it is desirable to select
                 appropriate initiator siRNA molecules having
                 satisfactory silencing capabilities. Computational
                 prediction for silencing efficacy of siRNAs can assist
                 this screening process before using them in biological
                 experiments. String kernel functions, which operate
                 directly on the string objects representing siRNAs and
                 target mRNAs, have been applied to support vector
                 regression for the prediction and improved accuracy
                 over numerical kernels in multidimensional vector
                 spaces constructed from descriptors of siRNA design
                 rules. To fully utilize information provided by string
                 and numerical data, we propose to unify the two in a
                 kernel feature space by devising a multiple kernel
                 regression framework where a linear combination of the
                 kernels is used. We formulate the multiple kernel
                 learning into a quadratically constrained quadratic
                 programming (QCQP) problem, which although yields
                 global optimal solution, is computationally demanding
                 and requires a commercial solver package. We further
                 propose three heuristics based on the principle of
                 kernel-target alignment and predictive accuracy.
                 Empirical results demonstrate that multiple kernel
                 regression can improve accuracy, decrease model
                 complexity by reducing the number of support vectors,
                 and speed up computational performance dramatically. In
                 addition, multiple kernel regression evaluates the
                 importance of constituent kernels, which for the siRNA
                 efficacy prediction problem, compares the relative
                 significance of the design rules. Finally, we give
                 insights into the multiple kernel regression mechanism
                 and point out possible extensions.",
  acknowledgement = ack-nhfb,
  keywords =     "multiple kernel heuristics; Multiple kernel learning;
                 QCQP optimization; RNA interference; siRNA efficacy.;
                 support vector regression",
}

@Article{Park:2009:NBI,
  author =       "Yongjin Park and Stanley Shackney and Russell
                 Schwartz",
  title =        "Network-Based Inference of Cancer Progression from
                 Microarray Data",
  journal =      j-TCBB,
  volume =       "6",
  number =       "2",
  pages =        "200--212",
  month =        apr,
  year =         "2009",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2008.126",
  ISSN =         "1545-5963",
  bibdate =      "Mon Jun 1 17:03:22 MDT 2009",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Cancer cells exhibit a common phenotype of
                 uncontrolled cell growth, but this phenotype may arise
                 from many different combinations of mutations. By
                 inferring how cells evolve in individual tumors, a
                 process called cancer progression, we may be able to
                 identify important mutational events for different
                 tumor types, potentially leading to new therapeutics
                 and diagnostics. Prior work has shown that it is
                 possible to infer frequent progression pathways by
                 using gene expression profiles to estimate
                 &\#8220;distances'' between tumors. Here, we apply gene
                 network models to improve these estimates of
                 evolutionary distance by controlling for correlations
                 among coregulated genes. We test three variants of this
                 approach: one using an optimized best-fit network,
                 another using sampling to infer a high-confidence
                 subnetwork, and one using a modular network inferred
                 from clusters of similarly expressed genes. Application
                 to lung cancer and breast cancer microarray data sets
                 shows small improvements in phylogenies when correcting
                 from the optimized network and more substantial
                 improvements when correcting from the sampled or
                 modular networks. Our results suggest that a network
                 correction approach improves estimates of tumor
                 similarity, but sophisticated network models are needed
                 to control for the large hypothesis space and sparse
                 data currently available.",
  acknowledgement = ack-nhfb,
  keywords =     "Biology and genetics; graphs and networks; machine
                 learning.; trees",
}

@Article{Zhu:2009:GGA,
  author =       "Qian Zhu and Zaky Adam and Vicky Choi and David
                 Sankoff",
  title =        "Generalized Gene Adjacencies, Graph Bandwidth, and
                 Clusters in Yeast Evolution",
  journal =      j-TCBB,
  volume =       "6",
  number =       "2",
  pages =        "213--220",
  month =        apr,
  year =         "2009",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2008.121",
  ISSN =         "1545-5963",
  bibdate =      "Mon Jun 1 17:03:22 MDT 2009",
  bibsource =    "http://portal.acm.org/",
  abstract =     "We present a parameterized definition of gene clusters
                 that allows us to control the emphasis placed on
                 conserved order within a cluster. Though motivated by
                 biological rather than mathematical considerations,
                 this parameter turns out to be closely related to the
                 bandwidth parameter of a graph. Our focus will be on
                 how this parameter affects the characteristics of
                 clusters: how numerous they are, how large they are,
                 how rearranged they are, and to what extent they are
                 preserved from ancestor to descendant in a phylogenetic
                 tree. We infer the latter property by dynamic
                 programming optimization of the presence of individual
                 edges at the ancestral nodes of the phylogeny. We apply
                 our analysis to a set of genomes drawn from the Yeast
                 Gene Order Browser.",
  acknowledgement = ack-nhfb,
  keywords =     "Ashbya gossypii; Candida glabrata; Comparative
                 genomics; dynamic programming; evolution; gene
                 clusters; genome rearrangements; graph bandwidth;
                 Kluyveromyces lactis.; Kluyveromyces waltii; phylogeny;
                 Saccharomyces cerevisiae; yeast",
}

@Article{Bansal:2009:GDP,
  author =       "Mukul S. Bansal and Oliver Eulenstein and Andr{\'e}
                 Wehe",
  title =        "The Gene-Duplication Problem: Near-Linear Time
                 Algorithms for {NNI}-Based Local Searches",
  journal =      j-TCBB,
  volume =       "6",
  number =       "2",
  pages =        "221--231",
  month =        apr,
  year =         "2009",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2009.7",
  ISSN =         "1545-5963",
  bibdate =      "Mon Jun 1 17:03:22 MDT 2009",
  bibsource =    "http://portal.acm.org/",
  abstract =     "The gene-duplication problem is to infer a species
                 supertree from a collection of gene trees that are
                 confounded by complex histories of gene-duplication
                 events. This problem is NP-complete and thus requires
                 efficient and effective heuristics. Existing heuristics
                 perform a stepwise search of the tree space, where each
                 step is guided by an exact solution to an instance of a
                 local search problem. A classical local search problem
                 is the {\tt NNI} search problem, which is based on the
                 nearest neighbor interchange operation. In this work,
                 we 1) provide a novel near-linear time algorithm for
                 the {\tt NNI} search problem, 2) introduce extensions
                 that significantly enlarge the search space of the {\tt
                 NNI} search problem, and 3) present algorithms for
                 these extended versions that are asymptotically just as
                 efficient as our algorithm for the {\tt NNI} search
                 problem. The exceptional speedup achieved in the
                 extended {\tt NNI} search problems makes the
                 gene-duplication problem more tractable for large-scale
                 phylogenetic analyses. We verify the performance of our
                 algorithms in a comparison study using sets of large
                 randomly generated gene trees.",
  acknowledgement = ack-nhfb,
  keywords =     "Computational phylogenetics; gene-duplication; local
                 search; supertrees; {\tt NNI}.",
}

@Article{Sun:2009:DPP,
  author =       "Yanni Sun and Jeremy Buhler",
  title =        "Designing Patterns and Profiles for Faster {HMM}
                 Search",
  journal =      j-TCBB,
  volume =       "6",
  number =       "2",
  pages =        "232--243",
  month =        apr,
  year =         "2009",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2008.14",
  ISSN =         "1545-5963",
  bibdate =      "Mon Jun 1 17:03:22 MDT 2009",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Profile HMMs are powerful tools for modeling conserved
                 motifs in proteins. They are widely used by search
                 tools to classify new protein sequences into families
                 based on domain architecture. However, the
                 proliferation of known motifs and new proteomic
                 sequence data poses a computational challenge for
                 search, requiring days of CPU time to annotate an
                 organism's proteome. It is highly desirable to speed up
                 HMM search in large databases. We design PROSITE-like
                 patterns and short profiles that are used as filters to
                 rapidly eliminate protein-motif pairs for which a full
                 profile HMM comparison does not yield a significant
                 match. The design of the pattern-based filters is
                 formulated as a multichoice knapsack problem.
                 Profile-based filters with high sensitivity are
                 extracted from a profile HMM based on their theoretical
                 sensitivity and false positive rate. Experiments show
                 that our profile-based filters achieve high sensitivity
                 (near 100 percent) while keeping around 20\times
                 speedup with respect to the unfiltered search program.
                 Pattern-based filters typically retain at least 90
                 percent of the sensitivity of the source HMM with
                 30-40\times speedup. The profile-based filters have
                 sensitivity comparable to the multistage filtering
                 strategy HMMERHEAD [15] and are faster in most of our
                 experiments.",
  acknowledgement = ack-nhfb,
  keywords =     "bioinformatics databases; Biology and genetics; hidden
                 Markov models.; sequence similarity search",
}

@Article{Shaik:2009:FAS,
  author =       "Jahangheer Shaik and Mohammed Yeasin",
  title =        "Fuzzy-Adaptive-Subspace-Iteration-Based Two-Way
                 Clustering of Microarray Data",
  journal =      j-TCBB,
  volume =       "6",
  number =       "2",
  pages =        "244--259",
  month =        apr,
  year =         "2009",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2008.15",
  ISSN =         "1545-5963",
  bibdate =      "Mon Jun 1 17:03:22 MDT 2009",
  bibsource =    "http://portal.acm.org/",
  abstract =     "This paper presents
                 Fuzzy-Adaptive-Subspace-Iteration-based Two-way
                 Clustering (FASIC) of microarray data for finding
                 differentially expressed genes (DEGs) from two-sample
                 microarray experiments. The concept of fuzzy membership
                 is introduced to transform the hard adaptive subspace
                 iteration (ASI) algorithm into a fuzzy-ASI algorithm to
                 perform two-way clustering. The proposed approach
                 follows a progressive framework to assign a relevance
                 value to genes associated with each cluster.
                 Subsequently, each gene cluster is scored and ranked
                 based on its potential to provide a correct
                 classification of the sample classes. These ranks are
                 converted into P values using the R-test, and the
                 significance of each gene is determined. A fivefold
                 validation is performed on the DEGs selected using the
                 proposed approach. Empirical analyses on a number of
                 simulated microarray data sets are conducted to
                 quantify the results obtained using the proposed
                 approach. To exemplify the efficacy of the proposed
                 approach, further analyses on different real microarray
                 data sets are also performed.",
  acknowledgement = ack-nhfb,
  keywords =     "classification and association rules; Clustering; data
                 and knowledge visualization; data mining; feature
                 extraction or construction.",
}

@Article{Vignes:2009:GCI,
  author =       "Matthieu Vignes and Florence Forbes",
  title =        "Gene Clustering via Integrated {Markov} Models
                 Combining Individual and Pairwise Features",
  journal =      j-TCBB,
  volume =       "6",
  number =       "2",
  pages =        "260--270",
  month =        apr,
  year =         "2009",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2007.70248",
  ISSN =         "1545-5963",
  bibdate =      "Mon Jun 1 17:03:22 MDT 2009",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Clustering of genes into groups sharing common
                 characteristics is a useful exploratory technique for a
                 number of subsequent computational analysis. A wide
                 range of clustering algorithms have been proposed in
                 particular to analyze gene expression data, but most of
                 them consider genes as independent entities or include
                 relevant information on gene interactions in a
                 suboptimal way. We propose a probabilistic model that
                 has the advantage to account for individual data (e.g.,
                 expression) and pairwise data (e.g., interaction
                 information coming from biological networks)
                 simultaneously. Our model is based on hidden Markov
                 random field models in which parametric probability
                 distributions account for the distribution of
                 individual data. Data on pairs, possibly reflecting
                 distance or similarity measures between genes, are then
                 included through a graph, where the nodes represent the
                 genes, and the edges are weighted according to the
                 available interaction information. As a probabilistic
                 model, this model has many interesting theoretical
                 features. In addition, preliminary experiments on
                 simulated and real data show promising results and
                 points out the gain in using such an approach.
                 Availability: The software used in this work is written
                 in C++ and is available with other supplementary
                 material at
                 http://mistis.inrialpes.fr/people/forbes/transparentia/supplementary.html.",
  acknowledgement = ack-nhfb,
  keywords =     "gene expression.; Markov random fields; metabolic
                 networks; model-based clustering",
}

@Article{Heath:2009:SMN,
  author =       "Lenwood S. Heath and Allan A. Sioson",
  title =        "Semantics of Multimodal Network Models",
  journal =      j-TCBB,
  volume =       "6",
  number =       "2",
  pages =        "271--280",
  month =        apr,
  year =         "2009",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2007.70242",
  ISSN =         "1545-5963",
  bibdate =      "Mon Jun 1 17:03:22 MDT 2009",
  bibsource =    "http://portal.acm.org/",
  abstract =     "A multimodal network (MMN) is a novel graph-theoretic
                 formalism designed to capture the structure of
                 biological networks and to represent relationships
                 derived from multiple biological databases. MMNs
                 generalize the standard notions of graphs and
                 hypergraphs, which are the bases of current
                 diagrammatic representations of biological phenomena,
                 and incorporate the concept of mode. Each vertex of an
                 MMN is a biological entity, a biot, while each modal
                 hyperedge is a typed relationship, where the type is
                 given by the mode of the hyperedge. The semantics of
                 each modal hyperedge e is given through denotational
                 semantics, where a valuation function f\_{e} defines
                 the relationship among the values of the vertices
                 incident on e. The meaning of an MMN is denoted in
                 terms of the semantics of a hyperedge sequence. A
                 companion paper defines MMNs and concentrates on the
                 structural aspects of MMNs. This paper develops MMN
                 denotational semantics when used as a representation of
                 the semantics of biological networks and discusses
                 applications of MMNs in managing complex biological
                 data.",
  acknowledgement = ack-nhfb,
  keywords =     "biological model; biological networks; biot;
                 denotational semantics.; graph; hypergraph; mode;
                 Multimodal network",
}

@Article{Arribas-Gil:2009:SAS,
  author =       "Ana Arribas-Gil and Dirk Metzler and Jean-Louis
                 Plouhinec",
  title =        "Statistical Alignment with a Sequence Evolution Model
                 Allowing Rate Heterogeneity along the Sequence",
  journal =      j-TCBB,
  volume =       "6",
  number =       "2",
  pages =        "281--295",
  month =        apr,
  year =         "2009",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2007.70246",
  ISSN =         "1545-5963",
  bibdate =      "Mon Jun 1 17:03:22 MDT 2009",
  bibsource =    "http://portal.acm.org/",
  abstract =     "We present a stochastic sequence evolution model to
                 obtain alignments and estimate mutation rates between
                 two homologous sequences. The model allows two possible
                 evolutionary behaviors along a DNA sequence in order to
                 determine conserved regions and take its heterogeneity
                 into account. In our model, the sequence is divided
                 into slow and fast evolution regions. The boundaries
                 between these sections are not known. It is our aim to
                 detect them. The evolution model is based on a fragment
                 insertion and deletion process working on fast regions
                 only and on a substitution process working on fast and
                 slow regions with different rates. This model induces a
                 pair hidden Markov structure at the level of
                 alignments, thus making efficient statistical alignment
                 algorithms possible. We propose two complementary
                 estimation methods, namely, a Gibbs sampler for
                 Bayesian estimation and a stochastic version of the EM
                 algorithm for maximum likelihood estimation. Both
                 algorithms involve the sampling of alignments. We
                 propose a partial alignment sampler, which is
                 computationally less expensive than the typical whole
                 alignment sampler. We show the convergence of the two
                 estimation algorithms when used with this partial
                 sampler. Our algorithms provide consistent estimates
                 for the mutation rates and plausible alignments and
                 sequence segmentations on both simulated and real
                 data.",
  acknowledgement = ack-nhfb,
  keywords =     "biology and genetics.; Markov processes; mathematics
                 and statistics; probabilistic algorithms; sequence
                 evolution",
}

@Article{Weber:2009:VET,
  author =       "Gunther H. Weber and Oliver Rubel and Min-Yu Huang and
                 Angela H. DePace and Charless C. Fowlkes and Soile V.
                 E. Keranen and Cris L. Luengo Hendriks and Hans Hagen
                 and David W. Knowles and Jitendra Malik and Mark D.
                 Biggin and Bernd Hamann",
  title =        "Visual Exploration of Three-Dimensional Gene
                 Expression Using Physical Views and Linked Abstract
                 Views",
  journal =      j-TCBB,
  volume =       "6",
  number =       "2",
  pages =        "296--309",
  month =        apr,
  year =         "2009",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2007.70249",
  ISSN =         "1545-5963",
  bibdate =      "Mon Jun 1 17:03:22 MDT 2009",
  bibsource =    "http://portal.acm.org/",
  abstract =     "During animal development, complex patterns of gene
                 expression provide positional information within the
                 embryo. To better understand the underlying gene
                 regulatory networks, the Berkeley Drosophila
                 Transcription Network Project (BDTNP) has developed
                 methods that support quantitative computational
                 analysis of three-dimensional (3D) gene expression in
                 early Drosophila embryos at cellular resolution. We
                 introduce PointCloudXplore (PCX), an interactive
                 visualization tool that supports visual exploration of
                 relationships between different genes' expression using
                 a combination of established visualization techniques.
                 Two aspects of gene expression are of particular
                 interest: 1) gene expression patterns defined by the
                 spatial locations of cells expressing a gene and 2)
                 relationships between the expression levels of multiple
                 genes. PCX provides users with two corresponding
                 classes of data views: 1) Physical Views based on the
                 spatial relationships of cells in the embryo and 2)
                 Abstract Views that discard spatial information and
                 plot expression levels of multiple genes with respect
                 to each other. Cell Selectors highlight data associated
                 with subsets of embryo cells within a View. Using
                 linking, these selected cells can be viewed in multiple
                 representations. We describe PCX as a 3D gene
                 expression visualization tool and provide examples of
                 how it has been used by BDTNP biologists to generate
                 new hypotheses.",
  acknowledgement = ack-nhfb,
  keywords =     "brushing; information visualization; Interactive data
                 exploration; multiple linked views; physical views;
                 scatter plots.; spatial expression patterns;
                 three-dimensional gene expression; visualization",
}

@Article{Dougherty:2009:CBM,
  author =       "Edward R. Dougherty and Marcel Brun and Jeffrey M.
                 Trent and Michael L. Bittner",
  title =        "Conditioning-Based Modeling of Contextual Genomic
                 Regulation",
  journal =      j-TCBB,
  volume =       "6",
  number =       "2",
  pages =        "310--320",
  month =        apr,
  year =         "2009",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2007.70247",
  ISSN =         "1545-5963",
  bibdate =      "Mon Jun 1 17:03:22 MDT 2009",
  bibsource =    "http://portal.acm.org/",
  abstract =     "A more complete understanding of the alterations in
                 cellular regulatory and control mechanisms that occur
                 in the various forms of cancer has been one of the
                 central targets of the genomic and proteomic methods
                 that allow surveys of the abundance and/or state of
                 cellular macromolecules. This preference is driven both
                 by the intractability of cancer to generic therapies,
                 assumed to be due to the highly varied molecular
                 etiologies observed in cancer, and by the opportunity
                 to discern and dissect the regulatory and control
                 interactions presented by the highly diverse assortment
                 of perturbations of regulation and control that arise
                 in cancer. Exploiting the opportunities for inference
                 on the regulatory and control connections offered by
                 these revealing system perturbations is fraught with
                 the practical problems that arise from the way
                 biological systems operate. Two classes of regulatory
                 action in biological systems are particularly inimical
                 to inference, convergent regulation, where a variety of
                 regulatory actions result in a common set of control
                 responses (crosstalk), and divergent regulation, where
                 a single regulatory action produces entirely different
                 sets of control responses, depending on cellular
                 context (conditioning). We have constructed a coarse
                 mathematical model of the propagation of regulatory
                 influence in such distributed, context-sensitive
                 regulatory networks that allows a quantitative
                 estimation of the amount of crosstalk and conditioning
                 associated with a candidate regulatory gene taken from
                 a set of genes that have been profiled over a series of
                 samples where the candidate's activity varies.",
  acknowledgement = ack-nhfb,
  keywords =     "Microarray; regulatory networks.",
}

@Article{Heath:2009:MNS,
  author =       "Lenwood S. Heath and Allan A. Sioson",
  title =        "Multimodal Networks: Structure and Operations",
  journal =      j-TCBB,
  volume =       "6",
  number =       "2",
  pages =        "321--332",
  month =        apr,
  year =         "2009",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2007.70243",
  ISSN =         "1545-5963",
  bibdate =      "Mon Jun 1 17:03:22 MDT 2009",
  bibsource =    "http://portal.acm.org/",
  abstract =     "A multimodal network (MMN) is a novel graph-theoretic
                 formalism designed to capture the structure of
                 biological networks and to represent relationships
                 derived from multiple biological databases. MMNs
                 generalize the standard notions of graphs and
                 hypergraphs, which are the bases of current
                 diagrammatic representations of biological phenomena
                 and incorporate the concept of mode. Each vertex of an
                 MMN is a biological entity, a biot, while each modal
                 hyperedge is a typed relationship, where the type is
                 given by the mode of the hyperedge. The current paper
                 defines MMNs and concentrates on the structural aspects
                 of MMNs. A companion paper develops MMNs as a
                 representation of the semantics of biological networks
                 and discusses applications of the MMNs in managing
                 complex biological data. The MMN model has been
                 implemented in a database system containing multiple
                 kinds of biological networks.",
  acknowledgement = ack-nhfb,
  keywords =     "biological networks; biot.; graph; hypergraph; mode;
                 Multimodal network",
}

@Article{Yukinawa:2009:OAB,
  author =       "Naoto Yukinawa and Shigeyuki Oba and Kikuya Kato and
                 Shin Ishii",
  title =        "Optimal Aggregation of Binary Classifiers for
                 Multiclass Cancer Diagnosis Using Gene Expression
                 Profiles",
  journal =      j-TCBB,
  volume =       "6",
  number =       "2",
  pages =        "333--343",
  month =        apr,
  year =         "2009",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2007.70239",
  ISSN =         "1545-5963",
  bibdate =      "Mon Jun 1 17:03:22 MDT 2009",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Multiclass classification is one of the fundamental
                 tasks in bioinformatics and typically arises in cancer
                 diagnosis studies by gene expression profiling. There
                 have been many studies of aggregating binary
                 classifiers to construct a multiclass classifier based
                 on one-versus-the-rest (1R), one-versus-one (11), or
                 other coding strategies, as well as some comparison
                 studies between them. However, the studies found that
                 the best coding depends on each situation. Therefore, a
                 new problem, which we call the &\#8220;optimal coding
                 problem,'' has arisen: how can we determine which
                 coding is the optimal one in each situation? To
                 approach this optimal coding problem, we propose a
                 novel framework for constructing a multiclass
                 classifier, in which each binary classifier to be
                 aggregated has a weight value to be optimally tuned
                 based on the observed data. Although there is no a
                 priori answer to the optimal coding problem, our weight
                 tuning method can be a consistent answer to the
                 problem. We apply this method to various classification
                 problems including a synthesized data set and some
                 cancer diagnosis data sets from gene expression
                 profiling. The results demonstrate that, in most
                 situations, our method can improve classification
                 accuracy over simple voting heuristics and is better
                 than or comparable to state-of-the-art multiclass
                 predictors.",
  acknowledgement = ack-nhfb,
  keywords =     "cancer diagnosis.; error correcting output coding;
                 gene expression profiling; Multiclass classification",
}

@Article{Olman:2009:PCA,
  author =       "Victor Olman and Fenglou Mao and Hongwei Wu and Ying
                 Xu",
  title =        "Parallel Clustering Algorithm for Large Data Sets with
                 Applications in Bioinformatics",
  journal =      j-TCBB,
  volume =       "6",
  number =       "2",
  pages =        "344--352",
  month =        apr,
  year =         "2009",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2007.70272",
  ISSN =         "1545-5963",
  bibdate =      "Mon Jun 1 17:03:22 MDT 2009",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Large sets of bioinformatical data provide a challenge
                 in time consumption while solving the cluster
                 identification problem, and that is why a parallel
                 algorithm is so needed for identifying dense clusters
                 in a noisy background. Our algorithm works on a graph
                 representation of the data set to be analyzed. It
                 identifies clusters through the identification of
                 densely intraconnected subgraphs. We have employed a
                 minimum spanning tree (MST) representation of the graph
                 and solve the cluster identification problem using this
                 representation. The computational bottleneck of our
                 algorithm is the construction of an MST of a graph, for
                 which a parallel algorithm is employed. Our high-level
                 strategy for the parallel MST construction algorithm is
                 to first partition the graph, then construct MSTs for
                 the partitioned subgraphs and auxiliary bipartite
                 graphs based on the subgraphs, and finally merge these
                 MSTs to derive an MST of the original graph. The
                 computational results indicate that when running on 150
                 CPUs, our algorithm can solve a cluster identification
                 problem on a data set with 1,000,000 data points almost
                 100 times faster than on single CPU, indicating that
                 this program is capable of handling very large data
                 clustering problems in an efficient manner. We have
                 implemented the clustering algorithm as the software
                 CLUMP.",
  acknowledgement = ack-nhfb,
  keywords =     "clustering algorithm; genome application; parallel
                 processing.; Pattern recognition",
}

@Article{Paul:2009:PCC,
  author =       "Topon Kumar Paul and Hitoshi Iba",
  title =        "Prediction of Cancer Class with Majority Voting
                 Genetic Programming Classifier Using Gene Expression
                 Data",
  journal =      j-TCBB,
  volume =       "6",
  number =       "2",
  pages =        "353--367",
  month =        apr,
  year =         "2009",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2007.70245",
  ISSN =         "1545-5963",
  bibdate =      "Mon Jun 1 17:03:22 MDT 2009",
  bibsource =    "http://portal.acm.org/",
  abstract =     "In order to get a better understanding of different
                 types of cancers and to find the possible biomarkers
                 for diseases, recently, many researchers are analyzing
                 the gene expression data using various machine learning
                 techniques. However, due to a very small number of
                 training samples compared to the huge number of genes
                 and class imbalance, most of these methods suffer from
                 overfitting. In this paper, we present a majority
                 voting genetic programming classifier (MVGPC) for the
                 classification of microarray data. Instead of a single
                 rule or a single set of rules, we evolve multiple rules
                 with genetic programming (GP) and then apply those
                 rules to test samples to determine their labels with
                 majority voting technique. By performing experiments on
                 four different public cancer data sets, including
                 multiclass data sets, we have found that the test
                 accuracies of MVGPC are better than those of other
                 methods, including AdaBoost with GP. Moreover, some of
                 the more frequently occurring genes in the
                 classification rules are known to be associated with
                 the types of cancers being studied in this paper.",
  acknowledgement = ack-nhfb,
  keywords =     "Classifier design and evaluation; data mining;
                 evolutionary computing and genetic algorithm; feature
                 extraction; gene expression; majority voting.",
}

@Article{Sagot:2009:EEI,
  author =       "Marie-France Sagot",
  title =        "{EIC} Editorial: Introducing New {Associate Editors}",
  journal =      j-TCBB,
  volume =       "6",
  number =       "3",
  pages =        "369--369",
  month =        jul,
  year =         "2009",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2009.66",
  ISSN =         "1545-5963",
  bibdate =      "Tue Aug 11 18:13:22 MDT 2009",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Bi:2009:MCE,
  author =       "Chengpeng Bi",
  title =        "A {Monte Carlo} {EM} Algorithm for {De Novo Motif}
                 Discovery in Biomolecular Sequences",
  journal =      j-TCBB,
  volume =       "6",
  number =       "3",
  pages =        "370--386",
  month =        jul,
  year =         "2009",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2008.103",
  ISSN =         "1545-5963",
  bibdate =      "Tue Aug 11 18:13:22 MDT 2009",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Motif discovery methods play pivotal roles in
                 deciphering the genetic regulatory codes (i.e., motifs)
                 in genomes as well as in locating conserved domains in
                 protein sequences. The Expectation Maximization (EM)
                 algorithm is one of the most popular methods used in de
                 novo motif discovery. Based on the position weight
                 matrix (PWM) updating technique, this paper presents a
                 Monte Carlo version of the EM motif-finding algorithm
                 that carries out stochastic sampling in local alignment
                 space to overcome the conventional EM's main drawback
                 of being trapped in a local optimum. The newly
                 implemented algorithm is named as Monte Carlo EM Motif
                 Discovery Algorithm (MCEMDA). MCEMDA starts from an
                 initial model, and then it iteratively performs Monte
                 Carlo simulation and parameter update until
                 convergence. A log-likelihood profiling technique
                 together with the top-$k$ strategy is introduced to
                 cope with the phase shifts and multiple modal issues in
                 motif discovery problem. A novel grouping motif
                 alignment (GMA) algorithm is designed to select motifs
                 by clustering a population of candidate local
                 alignments and successfully applied to subtle motif
                 discovery. MCEMDA compares favorably to other popular
                 PWM-based and word enumerative motif algorithms tested
                 using simulated $(l, d)$-motif cases, documented
                 prokaryotic, and eukaryotic DNA motif
                 sequences. Finally, MCEMDA is applied to detect large
                 blocks of conserved domains using protein benchmarks
                 and exhibits its excellent capacity while compared with
                 other multiple sequence alignment methods.",
  acknowledgement = ack-nhfb,
  keywords =     "Expectation maximization (EM); Monte Carlo EM; motif
                 discovery; multiple sequence alignment; transcriptional
                 regulation.",
}

@Article{Stoye:2009:UAR,
  author =       "Jens Stoye and Roland Wittler",
  title =        "A Unified Approach for Reconstructing Ancient Gene
                 Clusters",
  journal =      j-TCBB,
  volume =       "6",
  number =       "3",
  pages =        "387--400",
  month =        jul,
  year =         "2009",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2008.135",
  ISSN =         "1545-5963",
  bibdate =      "Tue Aug 11 18:13:22 MDT 2009",
  bibsource =    "http://portal.acm.org/",
  abstract =     "The order of genes in genomes provides extensive
                 information. In comparative genomics, differences or
                 similarities of gene orders are determined to predict
                 functional relations of genes or phylogenetic relations
                 of genomes. For this purpose, various combinatorial
                 models can be used to identify gene
                 clusters&\#8212;groups of genes that are colocated in a
                 set of genomes. We introduce a unified approach to
                 model gene clusters and define the problem of labeling
                 the inner nodes of a given phylogenetic tree with sets
                 of gene clusters. Our optimization criterion in this
                 context combines two properties: parsimony, i.e., the
                 number of gains and losses of gene clusters has to be
                 minimal, and consistency, i.e., for each ancestral
                 node, there must exist at least one potential gene
                 order that contains all the reconstructed clusters. We
                 present and evaluate an exact algorithm to solve this
                 problem. Despite its exponential worst-case time
                 complexity, our method is suitable even for large-scale
                 data. We show the effectiveness and efficiency on both
                 simulated and real data.",
  acknowledgement = ack-nhfb,
  keywords =     "Comparative genomics; consistency.; gene cluster; gene
                 cluster reconstruction; gene order; parsimony;
                 phylogeny",
}

@Article{Chen:2009:AAM,
  author =       "Xin Chen and Yun Cui",
  title =        "An Approximation Algorithm for the Minimum Breakpoint
                 Linearization Problem",
  journal =      j-TCBB,
  volume =       "6",
  number =       "3",
  pages =        "401--409",
  month =        jul,
  year =         "2009",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2009.3",
  ISSN =         "1545-5963",
  bibdate =      "Tue Aug 11 18:13:22 MDT 2009",
  bibsource =    "http://portal.acm.org/",
  abstract =     "In the recent years, there has been a growing interest
                 in inferring the total order of genes or markers on a
                 chromosome, since current genetic mapping efforts might
                 only suffice to produce a partial order. Many
                 interesting optimization problems were thus formulated
                 in the framework of genome rearrangement. As an
                 important one among them, the minimum breakpoint
                 linearization (MBL) problem is to find the total order
                 of a partially ordered genome that minimizes its
                 breakpoint distance to a reference genome whose genes
                 are already totally ordered. It was previously shown to
                 be NP-hard, and the algorithms proposed so far are all
                 heuristic. In this paper, we present an {m^2+m\over
                 2}-approximation algorithm for the MBL problem, where m
                 is the number of gene maps that are combined together
                 to form a partial order of the genome under
                 investigation.",
  acknowledgement = ack-nhfb,
  keywords =     "approximation algorithms.; breakpoint distance;
                 Comparative genomics; partially ordered genomes",
}

@Article{Wang:2009:EKF,
  author =       "Zidong Wang and Xiaohui Liu and Yurong Liu and Jinling
                 Liang and Veronica Vinciotti",
  title =        "An Extended {Kalman} Filtering Approach to Modeling
                 Nonlinear Dynamic Gene Regulatory Networks via Short
                 Gene Expression Time Series",
  journal =      j-TCBB,
  volume =       "6",
  number =       "3",
  pages =        "410--419",
  month =        jul,
  year =         "2009",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2009.5",
  ISSN =         "1545-5963",
  bibdate =      "Tue Aug 11 18:13:22 MDT 2009",
  bibsource =    "http://portal.acm.org/",
  abstract =     "In this paper, the extended Kalman filter (EKF)
                 algorithm is applied to model the gene regulatory
                 network from gene time series data. The gene regulatory
                 network is considered as a nonlinear dynamic stochastic
                 model that consists of the gene measurement equation
                 and the gene regulation equation. After specifying the
                 model structure, we apply the EKF algorithm for
                 identifying both the model parameters and the actual
                 value of gene expression levels. It is shown that the
                 EKF algorithm is an online estimation algorithm that
                 can identify a large number of parameters (including
                 parameters of nonlinear functions) through iterative
                 procedure by using a small number of observations. Four
                 real-world gene expression data sets are employed to
                 demonstrate the effectiveness of the EKF algorithm, and
                 the obtained models are evaluated from the viewpoint of
                 bioinformatics.",
  acknowledgement = ack-nhfb,
  keywords =     "clustering; DNA microarray technology; extended Kalman
                 filtering; gene expression; Modeling; time series
                 data.",
}

@Article{Bryant:2009:CDT,
  author =       "David Bryant and Mike Steel",
  title =        "Computing the Distribution of a Tree Metric",
  journal =      j-TCBB,
  volume =       "6",
  number =       "3",
  pages =        "420--426",
  month =        jul,
  year =         "2009",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2009.32",
  ISSN =         "1545-5963",
  bibdate =      "Tue Aug 11 18:13:22 MDT 2009",
  bibsource =    "http://portal.acm.org/",
  abstract =     "The Robinson-Foulds (RF) distance is by far the most
                 widely used measure of dissimilarity between trees.
                 Although the distribution of these distances has been
                 investigated for 20 years, an algorithm that is
                 explicitly polynomial time has yet to be described for
                 computing the distribution for trees around a given
                 tree. In this paper, we derive a polynomial-time
                 algorithm for this distribution. We show how the
                 distribution can be approximated by a Poisson
                 distribution determined by the proportion of leaves
                 that lie in &\#8220;cherries'' of the given tree. We
                 also describe how our results can be used to derive
                 normalization constants that are required in a recently
                 proposed maximum likelihood approach to supertree
                 construction.",
  acknowledgement = ack-nhfb,
  keywords =     "Biology and genetics; discrete mathematics
                 applications; normalization constant.; phylogenetics;
                 Poisson approximation; Robinson-Foulds distance;
                 trees",
}

@Article{Hulsman:2009:EOK,
  author =       "Marc Hulsman and Marcel J. T. Reinders and Dick de
                 Ridder",
  title =        "Evolutionary Optimization of Kernel Weights Improves
                 Protein Complex Comembership Prediction",
  journal =      j-TCBB,
  volume =       "6",
  number =       "3",
  pages =        "427--437",
  month =        jul,
  year =         "2009",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2008.137",
  ISSN =         "1545-5963",
  bibdate =      "Tue Aug 11 18:13:22 MDT 2009",
  bibsource =    "http://portal.acm.org/",
  abstract =     "In recent years, more and more high-throughput data
                 sources useful for protein complex prediction have
                 become available (e.g., gene sequence, mRNA expression,
                 and interactions). The integration of these different
                 data sources can be challenging. Recently, it has been
                 recognized that kernel-based classifiers are well
                 suited for this task. However, the different kernels
                 (data sources) are often combined using equal weights.
                 Although several methods have been developed to
                 optimize kernel weights, no large-scale example of an
                 improvement in classifier performance has been shown
                 yet. In this work, we employ an evolutionary algorithm
                 to determine weights for a larger set of kernels by
                 optimizing a criterion based on the area under the ROC
                 curve. We show that setting the right kernel weights
                 can indeed improve performance. We compare this to the
                 existing kernel weight optimization methods (i.e.,
                 (regularized) optimization of the SVM criterion or
                 aligning the kernel with an ideal kernel) and find that
                 these do not result in a significant performance
                 improvement and can even cause a decrease in
                 performance. Results also show that an expert approach
                 of assigning high weights to features with high
                 individual performance is not necessarily the best
                 strategy.",
  acknowledgement = ack-nhfb,
  keywords =     "biology and genetics; Classifier design and
                 evaluation; evolutionary computing and genetic
                 algorithms.",
}

@Article{Chen:2009:IAA,
  author =       "Zhi-Zhong Chen and Lusheng Wang",
  title =        "Improved Approximation Algorithms for Reconstructing
                 the History of Tandem Repeats",
  journal =      j-TCBB,
  volume =       "6",
  number =       "3",
  pages =        "438--453",
  month =        jul,
  year =         "2009",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2008.122",
  ISSN =         "1545-5963",
  bibdate =      "Tue Aug 11 18:13:22 MDT 2009",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Some genetic diseases in human beings are dominated by
                 short sequences repeated consecutively called tandem
                 repeats. Once a region containing tandem repeats is
                 found, it is of great interest to study the history of
                 creating the repeats. The computational problem of
                 reconstructing the duplication history of tandem
                 repeats has been studied extensively in the literature.
                 Almost all previous studies focused on the simplest
                 case where the size of each duplication block is 1.
                 Only recently we succeeded in giving the first
                 polynomial-time approximation algorithm with a
                 guaranteed ratio for a more general case where the size
                 of each duplication block is at most 2; the algorithm
                 achieves a ratio of 6 and runs in O(n^{11}) time. In
                 this paper, we present two new polynomial-time
                 approximation algorithms for this more general case.
                 One of them achieves a ratio of 5 and runs in O(n^9)
                 time, while the other achieves a ratio of 2.5+\epsilon
                 for any constant \epsilon > 0 but runs slower.",
  acknowledgement = ack-nhfb,
  keywords =     "approximation algorithms.; Computational biology",
}

@Article{Cardona:2009:MPN,
  author =       "Gabriel Cardona and Merce Llabres and Francesc
                 Rossello and Gabriel Valiente",
  title =        "Metrics for Phylogenetic Networks {II}: Nodal and
                 Triplets Metrics",
  journal =      j-TCBB,
  volume =       "6",
  number =       "3",
  pages =        "454--469",
  month =        jul,
  year =         "2009",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2008.127",
  ISSN =         "1545-5963",
  bibdate =      "Tue Aug 11 18:13:22 MDT 2009",
  bibsource =    "http://portal.acm.org/",
  abstract =     "The assessment of phylogenetic network reconstruction
                 methods requires the ability to compare phylogenetic
                 networks. This is the second in a series of papers
                 devoted to the analysis and comparison of metrics for
                 tree-child time consistent phylogenetic networks on the
                 same set of taxa. In this paper, we generalize to
                 phylogenetic networks two metrics that have already
                 been introduced in the literature for phylogenetic
                 trees: the nodal distance and the triplets distance. We
                 prove that they are metrics on any class of tree-child
                 time consistent phylogenetic networks on the same set
                 of taxa, as well as some basic properties for them. To
                 prove these results, we introduce a reduction/expansion
                 procedure that can be used not only to establish
                 properties of tree-child time consistent phylogenetic
                 networks by induction, but also to generate all
                 tree-child time consistent phylogenetic networks with a
                 given number of leaves.",
  acknowledgement = ack-nhfb,
  keywords =     "nodal distance; partition distance; Phylogenetic
                 network; temporal representation; time consistency;
                 tree-child phylogenetic network; triplets distance.",
}

@Article{Sotiropoulos:2009:MRM,
  author =       "Vassilios Sotiropoulos and Marrie-Nathalie
                 Contou-Carrere and Prodromos Daoutidis and Yiannis N.
                 Kaznessis",
  title =        "Model Reduction of Multiscale Chemical {Langevin}
                 Equations: {A} Numerical Case Study",
  journal =      j-TCBB,
  volume =       "6",
  number =       "3",
  pages =        "470--482",
  month =        jul,
  year =         "2009",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2009.23",
  ISSN =         "1545-5963",
  bibdate =      "Tue Aug 11 18:13:22 MDT 2009",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Two very important characteristics of biological
                 reaction networks need to be considered carefully when
                 modeling these systems. First, models must account for
                 the inherent probabilistic nature of systems far from
                 the thermodynamic limit. Often, biological systems
                 cannot be modeled with traditional
                 continuous-deterministic models. Second, models must
                 take into consideration the disparate spectrum of time
                 scales observed in biological phenomena, such as slow
                 transcription events and fast dimerization reactions.
                 In the last decade, significant efforts have been
                 expended on the development of stochastic chemical
                 kinetics models to capture the dynamics of biomolecular
                 systems, and on the development of robust multiscale
                 algorithms, able to handle stiffness. In this paper,
                 the focus is on the dynamics of reaction sets governed
                 by stiff chemical Langevin equations, i.e., stiff
                 stochastic differential equations. These are
                 particularly challenging systems to model, requiring
                 prohibitively small integration step sizes. We describe
                 and illustrate the application of a semianalytical
                 reduction framework for chemical Langevin equations
                 that results in significant gains in computational
                 cost.",
  acknowledgement = ack-nhfb,
  keywords =     "chemical Langevin equations (CLEs); Model reduction;
                 multiscale models; stiff biomolecular systems.;
                 stochastic chemical kinetics",
}

@Article{Roytberg:2009:SSP,
  author =       "Mikhail Roytberg and Anna Gambin and Laurent Noe and
                 Slawomir Lasota and Eugenia Furletova and Ewa Szczurek
                 and Gregory Kucherov",
  title =        "On Subset Seeds for Protein Alignment",
  journal =      j-TCBB,
  volume =       "6",
  number =       "3",
  pages =        "483--494",
  month =        jul,
  year =         "2009",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2009.4",
  ISSN =         "1545-5963",
  bibdate =      "Tue Aug 11 18:13:22 MDT 2009",
  bibsource =    "http://portal.acm.org/",
  abstract =     "We apply the concept of subset seeds proposed in [1]
                 to similarity search in protein sequences. The main
                 question studied is the design of efficient seed
                 alphabets to construct seeds with optimal
                 sensitivity/selectivity trade-offs. We propose several
                 different design methods and use them to construct
                 several alphabets. We then perform a comparative
                 analysis of seeds built over those alphabets and
                 compare them with the standard Blastp seeding method
                 [2], [3], as well as with the family of vector seeds
                 proposed in [4]. While the formalism of subset seeds is
                 less expressive (but less costly to implement) than the
                 cumulative principle used in Blastp and vector seeds,
                 our seeds show a similar or even better performance
                 than Blastp on Bernoulli models of proteins compatible
                 with the common BLOSUM62 matrix. Finally, we perform a
                 large-scale benchmarking of our seeds against several
                 main databases of protein alignments. Here again, the
                 results show a comparable or better performance of our
                 seeds versus Blastp.",
  acknowledgement = ack-nhfb,
  keywords =     "local alignment; multiple seeds; protein databases;
                 Protein sequences; seed alphabet; seeds; selectivity.;
                 sensitivity; similarity search; subset seeds",
}

@Article{Jin:2009:PSP,
  author =       "Guohua Jin and Luay Nakhleh and Sagi Snir and Tamir
                 Tuller",
  title =        "Parsimony Score of Phylogenetic Networks: Hardness
                 Results and a Linear-Time Heuristic",
  journal =      j-TCBB,
  volume =       "6",
  number =       "3",
  pages =        "495--505",
  month =        jul,
  year =         "2009",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2008.119",
  ISSN =         "1545-5963",
  bibdate =      "Tue Aug 11 18:13:22 MDT 2009",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Phylogenies&\#8212;the evolutionary histories of
                 groups of organisms&\#8212;play a major role in
                 representing the interrelationships among biological
                 entities. Many methods for reconstructing and studying
                 such phylogenies have been proposed, almost all of
                 which assume that the underlying history of a given set
                 of species can be represented by a binary tree.
                 Although many biological processes can be effectively
                 modeled and summarized in this fashion, others cannot:
                 recombination, hybrid speciation, and horizontal gene
                 transfer result in networks of relationships rather
                 than trees of relationships. In previous works, we
                 formulated a maximum parsimony (MP) criterion for
                 reconstructing and evaluating phylogenetic networks,
                 and demonstrated its quality on biological as well as
                 synthetic data sets. In this paper, we provide further
                 theoretical results as well as a very fast heuristic
                 algorithm for the MP criterion of phylogenetic
                 networks. In particular, we provide a novel
                 combinatorial definition of phylogenetic networks in
                 terms of &\#8220;forbidden cycles,'' and provide
                 detailed hardness and hardness of approximation proofs
                 for the 'small'' MP problem. We demonstrate the
                 performance of our heuristic in terms of time and
                 accuracy on both biological and synthetic data sets.
                 Finally, we explain the difference between our model
                 and a similar one formulated by Nguyen et al., and
                 describe the implications of this difference on the
                 hardness and approximation results.",
  acknowledgement = ack-nhfb,
  keywords =     "hardness and approximation.; horizontal gene transfer;
                 Maximum parsimony; phylogenetic networks",
}

@Article{Thomas:2009:PDS,
  author =       "John Thomas and Naren Ramakrishnan and Chris
                 Bailey-Kellogg",
  title =        "Protein Design by Sampling an Undirected Graphical
                 Model of Residue Constraints",
  journal =      j-TCBB,
  volume =       "6",
  number =       "3",
  pages =        "506--516",
  month =        jul,
  year =         "2009",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2008.124",
  ISSN =         "1545-5963",
  bibdate =      "Tue Aug 11 18:13:22 MDT 2009",
  bibsource =    "http://portal.acm.org/",
  abstract =     "This paper develops an approach for designing protein
                 variants by sampling sequences that satisfy residue
                 constraints encoded in an undirected probabilistic
                 graphical model. Due to evolutionary pressures on
                 proteins to maintain structure and function, the
                 sequence record of a protein family contains valuable
                 information regarding position-specific residue
                 conservation and coupling (or covariation) constraints.
                 Representing these constraints with a graphical model
                 provides two key benefits for protein design: a
                 probabilistic semantics enabling evaluation of possible
                 sequences for consistency with the constraints, and an
                 explicit factorization of residue dependence and
                 independence supporting efficient exploration of the
                 constrained sequence space. We leverage these benefits
                 in developing two complementary MCMC algorithms for
                 protein design: constrained shuffling mixes wild-type
                 sequences positionwise and evaluates graphical model
                 likelihood, while component sampling directly generates
                 sequences by sampling clique values and propagating to
                 other cliques. We apply our methods to design WW
                 domains. We demonstrate that likelihood under a model
                 of wild-type WWs is highly predictive of foldedness of
                 new WWs. We then show both theoretical and rapid
                 empirical convergence of our algorithms in generating
                 high-likelihood, diverse new sequences. We further show
                 that these sequences capture the original sequence
                 constraints, yielding a model as predictive of
                 foldedness as the original one.",
  acknowledgement = ack-nhfb,
  keywords =     "graphical models; Markov chain Monte Carlo (MCMC).;
                 Protein design; residue coupling",
}

@Article{Smith:2009:RSD,
  author =       "Jennifer A. Smith",
  title =        "{RNA} Search with Decision Trees and Partial
                 Covariance Models",
  journal =      j-TCBB,
  volume =       "6",
  number =       "3",
  pages =        "517--527",
  month =        jul,
  year =         "2009",
  CODEN =        "ITCBCY",
  DOI =          "http://dx.doi.org/10.1109/TCBB.2008.120",
  ISSN =         "1545-5963",
  bibdate =      "Tue Aug 11 18:13:22 MDT 2009",
  bibsource =    "http://portal.acm.org/",
  abstract =     "The use of partial covariance models to search for RNA
                 family members in genomic sequence databases is
                 explored. The partial models are formed from contiguous
                 subranges of the overall RNA family multiple alignment
                 columns. A binary decision-tree framework is presented
                 for choosing the order to apply the partial models and
                 the score thresholds on which to make the decisions.
                 The decision trees are chosen to minimize computation
                 time subject to the constraint that all of the training
                 sequences are passed to the full covariance model for
                 final evaluation. Computational intelligence methods
                 are suggested to select the decision tree since the
                 tree can be quite complex and there is no obvious
                 method to build the tree in these cases. Experimental
                 results from seven RNA families shows execution times
                 of 0.066-0.268 relative to using the full covariance
                 model alone. Tests on the full sets of known sequences
                 for each family show that at least 95 percent of these
                 sequences are found for two families and 100 percent
                 for five others. Since the full covariance model is run
                 on all sequences accepted by the partial model decision
                 tree, the false alarm rate is at least as low as that
                 of the full model alone.",
  acknowledgement = ack-nhfb,
  keywords =     "Bioinformatics; computational intelligence; covariance
                 models; decision trees; RNA database search.",
}