%%% -*-BibTeX-*-
%%% ====================================================================
%%% BibTeX-file{
%%% author = "Nelson H. F. Beebe",
%%% version = "1.17",
%%% date = "11 August 2009",
%%% time = "18:30:01 MDT",
%%% filename = "tcbb.bib",
%%% address = "University of Utah
%%% Department of Mathematics, 110 LCB
%%% 155 S 1400 E RM 233
%%% Salt Lake City, UT 84112-0090
%%% USA",
%%% telephone = "+1 801 581 5254",
%%% FAX = "+1 801 581 4148",
%%% URL = "http://www.math.utah.edu/~beebe",
%%% checksum = "30271 8727 45125 408314",
%%% email = "beebe at math.utah.edu, beebe at acm.org,
%%% beebe at computer.org (Internet)",
%%% codetable = "ISO/ASCII",
%%% keywords = "BibTeX; bibliography; IEEE/ACM Transactions
%%% on Computational Biology and
%%% Bioinformatics; TCBB",
%%% license = "public domain",
%%% supported = "yes",
%%% docstring = "This is a COMPLETE BibTeX bibliography for
%%% IEEE/ACM Transactions on Computational
%%% Biology and Bioinformatics (CODEN ITCBCY,
%%% ISSN 1545-5963), covering all journal issues
%%% from 2004 to date.
%%%
%%% At version 1.17, the COMPLETE journal
%%% coverage looked like this:
%%%
%%% 2004 ( 23) 2006 ( 41) 2008 ( 59)
%%% 2005 ( 37) 2007 ( 69) 2009 ( 52)
%%%
%%% Article: 281
%%%
%%% Total entries: 281
%%%
%%% The journal Web pages can be found at:
%%%
%%% http://www.acm.org/pubs/tcbb/
%%% http://portal.acm.org/browse_dl.cfm?idx=J954
%%%
%%% Qualified subscribers can retrieve the full
%%% text of recent articles in PDF form.
%%%
%%% The initial draft was extracted from the ACM
%%% Web pages.
%%%
%%% ACM copyrights explicitly permit abstracting
%%% with credit, so article abstracts, keywords,
%%% and subject classifications have been
%%% included in this bibliography wherever
%%% available. Article reviews have been
%%% omitted, until their copyright status has
%%% been clarified.
%%%
%%% bibsource keys in the bibliography entries
%%% below indicate the entry originally came
%%% from the computer science bibliography
%%% archive, even though it has likely since
%%% been corrected and updated.
%%%
%%% URL keys in the bibliography point to
%%% World Wide Web locations of additional
%%% information about the entry.
%%%
%%% BibTeX citation tags are uniformly chosen
%%% as name:year:abbrev, where name is the
%%% family name of the first author or editor,
%%% year is a 4-digit number, and abbrev is a
%%% 3-letter condensation of important title
%%% words. Citation tags were automatically
%%% generated by software developed for the
%%% BibNet Project.
%%%
%%% In this bibliography, entries are sorted in
%%% publication order, using ``bibsort -byvolume.''
%%%
%%% The checksum field above contains a CRC-16
%%% checksum as the first value, followed by the
%%% equivalent of the standard UNIX wc (word
%%% count) utility output of lines, words, and
%%% characters. This is produced by Robert
%%% Solovay's checksum utility."
%%% }
%%% ====================================================================
@Preamble{"\input bibnames.sty"}
%%% ====================================================================
%%% Acknowledgement abbreviations:
@String{ack-nhfb = "Nelson H. F. Beebe,
University of Utah,
Department of Mathematics, 110 LCB,
155 S 1400 E RM 233,
Salt Lake City, UT 84112-0090, USA,
Tel: +1 801 581 5254,
FAX: +1 801 581 4148,
e-mail: \path|beebe@math.utah.edu|,
\path|beebe@acm.org|,
\path|beebe@computer.org| (Internet),
URL: \path|http://www.math.utah.edu/~beebe/|"}
%%% ====================================================================
%%% Journal abbreviations:
@String{j-TCBB = "IEEE/ACM Transactions on Computational
Biology and Bioinformatics"}
%%% ====================================================================
%%% Bibliography entries:
@Article{Williams:2004:WM,
author = "Michael R. Williams",
title = "Welcome Message",
journal = j-TCBB,
volume = "1",
number = "1",
pages = "1--1",
month = jan,
year = "2004",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Nov 22 06:42:56 MST 2004",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Gusfield:2004:IIA,
author = "Dan Gusfield",
title = "Introduction to the {IEEE\slash ACM Transactions on
Computational Biology and Bioinformatics}",
journal = j-TCBB,
volume = "1",
number = "1",
pages = "2--3",
month = jan,
year = "2004",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Nov 22 06:42:56 MST 2004",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Williams:2004:INA,
author = "Michael R. Williams",
title = "Introduction of New {Associate Editors}",
journal = j-TCBB,
volume = "1",
number = "1",
pages = "4--12",
month = jan,
year = "2004",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Nov 22 06:42:56 MST 2004",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Moret:2004:PNM,
author = "Bernard M. E. Moret and Luay Nakhleh and Tandy Warnow
and C. Randal Linder and Anna Tholse and Anneke
Padolina and Jerry Sun and Ruth Timme",
title = "Phylogenetic Networks: Modeling, Reconstructibility,
and Accuracy",
journal = j-TCBB,
volume = "1",
number = "1",
pages = "13--23",
month = jan,
year = "2004",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Nov 22 06:42:56 MST 2004",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Madeira:2004:BAB,
author = "Sara C. Madeira and Arlindo L. Oliveira",
title = "Biclustering Algorithms for Biological Data Analysis:
{A} Survey",
journal = j-TCBB,
volume = "1",
number = "1",
pages = "24--45",
month = jan,
year = "2004",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Nov 22 06:42:56 MST 2004",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Preparata:2004:SHR,
author = "Franco P. Preparata",
title = "Sequencing-by-Hybridization Revisited: The
Analog-Spectrum Proposal",
journal = j-TCBB,
volume = "1",
number = "1",
pages = "46--52",
month = jan,
year = "2004",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Nov 22 06:42:56 MST 2004",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Hochsmann:2004:PMR,
author = "Matthias H{\"o}chsmann and Bj{\"o}rn Voss and Robert
Giegerich",
title = "Pure Multiple {RNA} Secondary Structure Alignments:
{A} Progressive Profile Approach",
journal = j-TCBB,
volume = "1",
number = "1",
pages = "53--62",
month = jan,
year = "2004",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Nov 22 06:42:56 MST 2004",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Anonymous:2004:INA,
author = "Anonymous",
title = "Introduction of New {Associate Editor}",
journal = j-TCBB,
volume = "1",
number = "2",
pages = "65--65",
month = apr,
year = "2004",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Nov 22 06:42:56 MST 2004",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Witwer:2004:PCR,
author = "Christina Witwer and Ivo L. Hofacker and Peter F.
Stadler",
title = "Prediction of Consensus {RNA} Secondary Structures
Including Pseudoknots",
journal = j-TCBB,
volume = "1",
number = "2",
pages = "66--77",
month = apr,
year = "2004",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Nov 22 06:42:56 MST 2004",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Bafna:2004:NRE,
author = "Vineet Bafna and Vikas Bansal",
title = "The Number of Recombination Events in a Sample
History: Conflict Graph and Lower Bounds",
journal = j-TCBB,
volume = "1",
number = "2",
pages = "78--90",
month = apr,
year = "2004",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Nov 22 06:42:56 MST 2004",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Raphael:2004:UPM,
author = "Benjamin Raphael and Lung-Tien Liu and George
Varghese",
title = "A Uniform Projection Method for Motif Discovery in
{DNA} Sequences",
journal = j-TCBB,
volume = "1",
number = "2",
pages = "91--94",
month = apr,
year = "2004",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Nov 22 06:42:56 MST 2004",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Gusfield:2004:INA,
author = "Dan Gusfield",
title = "Introduction of New {Associate Editors}",
journal = j-TCBB,
volume = "1",
number = "3",
pages = "97--97",
month = jul,
year = "2004",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Jan 24 14:15:55 MST 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Scheid:2004:SDS,
author = "Stefanie Scheid and Rainer Spang",
title = "A Stochastic Downhill Search Algorithm for Estimating
the Local False Discovery Rate",
journal = j-TCBB,
volume = "1",
number = "3",
pages = "98--108",
month = jul,
year = "2004",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Jan 24 14:15:55 MST 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Dress:2004:CSG,
author = "Andreas W. M. Dress and Daniel H. Huson",
title = "Constructing Splits Graphs",
journal = j-TCBB,
volume = "1",
number = "3",
pages = "109--115",
month = jul,
year = "2004",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Jan 24 14:15:55 MST 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Cameron:2004:IGA,
author = "Michael Cameron and Hugh E. Williams and Adam
Cannane",
title = "Improved Gapped Alignment in {BLAST}",
journal = j-TCBB,
volume = "1",
number = "3",
pages = "116--129",
month = jul,
year = "2004",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Jan 24 14:15:55 MST 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Evans:2004:UDT,
author = "Steven N. Evans and Tandy Warnow",
title = "Unidentifiable Divergence Times in Rates-across-Sites
Models",
journal = j-TCBB,
volume = "1",
number = "3",
pages = "130--134",
month = jul,
year = "2004",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Jan 24 14:15:55 MST 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Kim:2004:GEW,
author = "Junhyong Kim and Inge Jonassen",
title = "Guest Editorial: {WABI} Special Section Part 1",
journal = j-TCBB,
volume = "1",
number = "4",
pages = "137--138",
month = oct,
year = "2004",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Jan 24 14:15:55 MST 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Csuros:2004:MSS,
author = "Miklos Csuros",
title = "Maximum-Scoring Segment Sets",
journal = j-TCBB,
volume = "1",
number = "4",
pages = "139--150",
month = oct,
year = "2004",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Jan 24 14:15:55 MST 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Huson:2004:PSN,
author = "Daniel H. Huson and Tobias Dezulian and Tobias Klopper
and Mike A. Steel",
title = "Phylogenetic Super-Networks from Partial Trees",
journal = j-TCBB,
volume = "1",
number = "4",
pages = "151--158",
month = oct,
year = "2004",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Jan 24 14:15:55 MST 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Bannai:2004:ADO,
author = "Hideo Bannai and Heikki Hyyro and Ayumi Shinohara and
Masayuki Takeda and Kenta Nakai and Satoru Miyano",
title = "An {$O(N^2)$} Algorithm for Discovering Optimal
{Boolean} Pattern Pairs",
journal = j-TCBB,
volume = "1",
number = "4",
pages = "159--170",
month = oct,
year = "2004",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Jan 24 14:15:55 MST 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Gramm:2004:PTA,
author = "Jens Gramm",
title = "A Polynomial-Time Algorithm for the Matching of
Crossing Contact-Map Patterns",
journal = j-TCBB,
volume = "1",
number = "4",
pages = "171--180",
month = oct,
year = "2004",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Jan 24 14:15:55 MST 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Ye:2004:UUD,
author = "Jieping Ye and Tao Li and Tao Xiong and Ravi
Janardan",
title = "Using Uncorrelated Discriminant Analysis for Tissue
Classification with Gene Expression Data",
journal = j-TCBB,
volume = "1",
number = "4",
pages = "181--190",
month = oct,
year = "2004",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Jan 24 14:15:55 MST 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Anonymous:2004:AI,
author = "Anonymous",
title = "Annual Index",
journal = j-TCBB,
volume = "1",
number = "4",
pages = "191--192",
month = oct,
year = "2004",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Jan 24 14:15:55 MST 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Kim:2005:GEW,
author = "Junhyong Kim and Inge Jonassen",
title = "Guest Editorial: {WABI} Special Section Part {II}",
journal = j-TCBB,
volume = "2",
number = "1",
pages = "1--2",
month = jan,
year = "2005",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Tue Apr 12 07:11:54 MDT 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Allali:2005:NDH,
author = "Julien Allali and Marie-France Sagot",
title = "A New Distance for High Level {RNA} Secondary
Structure Comparison",
journal = j-TCBB,
volume = "2",
number = "1",
pages = "3--14",
month = jan,
year = "2005",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Tue Apr 12 07:11:54 MDT 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Bertrand:2005:TRL,
author = "Denis Bertrand and Olivier Gascuel",
title = "Topological Rearrangements and Local Search Method for
Tandem Duplication Trees",
journal = j-TCBB,
volume = "2",
number = "1",
pages = "15--28",
month = jan,
year = "2005",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Tue Apr 12 07:11:54 MDT 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Brown:2005:OMS,
author = "Daniel G. Brown",
title = "Optimizing Multiple Seeds for Protein Homology
Search",
journal = j-TCBB,
volume = "2",
number = "1",
pages = "29--38",
month = jan,
year = "2005",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Tue Apr 12 07:11:54 MDT 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Gusfield:2005:EST,
author = "Dan Gusfield",
title = "Editorial-State of the Transaction",
journal = j-TCBB,
volume = "2",
number = "1",
pages = "39--39",
month = jan,
year = "2005",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Tue Apr 12 07:11:54 MDT 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Pisanti:2005:BMG,
author = "Nadia Pisanti and Maxime Crochemore and Roberto Grossi
and Marie-France Sagot",
title = "Bases of Motifs for Generating Repeated Patterns with
Wild Cards",
journal = j-TCBB,
volume = "2",
number = "1",
pages = "40--50",
month = jan,
year = "2005",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Tue Apr 12 07:11:54 MDT 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Kucherov:2005:MLF,
author = "Gregory Kucherov and Laurent Noe and Mikhail
Roytberg",
title = "Multiseed Lossless Filtration",
journal = j-TCBB,
volume = "2",
number = "1",
pages = "51--61",
month = jan,
year = "2005",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Tue Apr 12 07:11:54 MDT 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Liu:2005:TMB,
author = "Ying Liu and Shamkant B. Navathe and Jorge Civera and
Venu Dasigi and Ashwin Ram and Brian J. Ciliax and Ray
Dingledine",
title = "Text Mining Biomedical Literature for Discovering
Gene-to-Gene Relationships: {A} Comparative Study of
Algorithms",
journal = j-TCBB,
volume = "2",
number = "1",
pages = "62--76",
month = jan,
year = "2005",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Tue Apr 12 07:11:54 MDT 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Staff:2005:RL,
author = "{IEEE/ACM Transactions on Computational Biology and
Bioinformatics staff}",
title = "2004 Reviewers List",
journal = j-TCBB,
volume = "2",
number = "1",
pages = "77--77",
month = jan,
year = "2005",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Tue Apr 12 07:11:54 MDT 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Ling:2005:GEIa,
author = "Charles X. Ling and William Stafford Noble and Qiang
Yang",
title = "{Guest Editors}' Introduction to the {Special Issue:
Machine Learning for Bioinformatics---Part 1}",
journal = j-TCBB,
volume = "2",
number = "2",
pages = "81--82",
month = apr,
year = "2005",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Wed Jun 22 17:33:35 MDT 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Au:2005:ACG,
author = "Wai-Ho Au and Keith C. C. Chan and Andrew K. C. Wong
and Yang Wang",
title = "Attribute Clustering for Grouping, Selection, and
Classification of Gene Expression Data",
journal = j-TCBB,
volume = "2",
number = "2",
pages = "83--101",
month = apr,
year = "2005",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Wed Jun 22 17:33:35 MDT 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Biyani:2005:JCP,
author = "Pravesh Biyani and Xiaolin Wu and Abhijit Sinha",
title = "Joint Classification and Pairing of Human
Chromosomes",
journal = j-TCBB,
volume = "2",
number = "2",
pages = "102--109",
month = apr,
year = "2005",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Wed Jun 22 17:33:35 MDT 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Furlanello:2005:SLM,
author = "Cesare Furlanello and Maria Serafini and Stefano
Merler and Giuseppe Jurman",
title = "Semisupervised Learning for Molecular Profiling",
journal = j-TCBB,
volume = "2",
number = "2",
pages = "110--118",
month = apr,
year = "2005",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Wed Jun 22 17:33:35 MDT 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Mamitsuka:2005:ELK,
author = "Hiroshi Mamitsuka",
title = "Essential Latent Knowledge for Protein-Protein
Interactions: Analysis by an Unsupervised Learning
Approach",
journal = j-TCBB,
volume = "2",
number = "2",
pages = "119--130",
month = apr,
year = "2005",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Wed Jun 22 17:33:35 MDT 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Rajapakse:2005:MED,
author = "Jagath C. Rajapakse and Loi Sy Ho",
title = "{Markov} Encoding for Detecting Signals in Genomic
Sequences",
journal = j-TCBB,
volume = "2",
number = "2",
pages = "131--142",
month = apr,
year = "2005",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Wed Jun 22 17:33:35 MDT 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Rogers:2005:LPD,
author = "Simon Rogers and Mark Girolami and Colin Campbell and
Rainer Breitling",
title = "The Latent Process Decomposition of {cDNA} Microarray
Data Sets",
journal = j-TCBB,
volume = "2",
number = "2",
pages = "143--156",
month = apr,
year = "2005",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Wed Jun 22 17:33:35 MDT 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Xu:2005:FRP,
author = "Jinbo Xu",
title = "Fold Recognition by Predicted Alignment Accuracy",
journal = j-TCBB,
volume = "2",
number = "2",
pages = "157--165",
month = apr,
year = "2005",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Wed Jun 22 17:33:35 MDT 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Shen:2005:DRB,
author = "Li Shen and Eng Chong Tan",
title = "Dimension Reduction-Based Penalized Logistic
Regression for Cancer Classification Using Microarray
Data",
journal = j-TCBB,
volume = "2",
number = "2",
pages = "166--175",
month = apr,
year = "2005",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Wed Jun 22 17:33:35 MDT 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Ling:2005:GEIb,
author = "C. X. Ling and W. S. Noble and Q. Yang",
title = "{Guest Editor}'s Introduction to the {Special Issue:
Machine Learning for Bioinformatics---Part 2}",
journal = j-TCBB,
volume = "2",
number = "3",
pages = "177--178",
month = jul,
year = "2005",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Tue Sep 20 06:11:25 MDT 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Schliep:2005:AGE,
author = "Alexander Schliep and Ivan G. Costa and Christine
Steinhoff and Alexander Schonhuth",
title = "Analyzing Gene Expression Time-Courses",
journal = j-TCBB,
volume = "2",
number = "3",
pages = "179--193",
month = jul,
year = "2005",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Tue Sep 20 06:11:25 MDT 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Kundaje:2005:CST,
author = "Anshul Kundaje and Manuel Middendorf and Feng Gao and
Chris Wiggins and Christina Leslie",
title = "Combining Sequence and Time Series Expression Data to
Learn Transcriptional Modules",
journal = j-TCBB,
volume = "2",
number = "3",
pages = "194--202",
month = jul,
year = "2005",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Tue Sep 20 06:11:25 MDT 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Kaski:2005:ACE,
author = "Samuel Kaski and Janne Nikkila and Janne Sinkkonen and
Leo Lahti and Juha E. A. Knuuttila and Christophe
Roos",
title = "Associative Clustering for Exploring Dependencies
between Functional Genomics Data Sets",
journal = j-TCBB,
volume = "2",
number = "3",
pages = "203--216",
month = jul,
year = "2005",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Tue Sep 20 06:11:25 MDT 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Zhang:2005:PMF,
author = "Jingfen Zhang and Wen Gao and Jinjin Cai and Simin He
and Rong Zeng and Runsheng Chen",
title = "Predicting Molecular Formulas of Fragment Ions with
Isotope Patterns in Tandem Mass Spectra",
journal = j-TCBB,
volume = "2",
number = "3",
pages = "217--230",
month = jul,
year = "2005",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Tue Sep 20 06:11:25 MDT 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Keedwell:2005:DGN,
author = "Edward Keedwell and Ajit Narayanan",
title = "Discovering Gene Networks with a Neural-Genetic
Hybrid",
journal = j-TCBB,
volume = "2",
number = "3",
pages = "231--242",
month = jul,
year = "2005",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Tue Sep 20 06:11:25 MDT 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Hawkins:2005:ARN,
author = "John Hawkins and Mikael Boden",
title = "The Applicability of Recurrent Neural Networks for
Biological Sequence Analysis",
journal = j-TCBB,
volume = "2",
number = "3",
pages = "243--253",
month = jul,
year = "2005",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Tue Sep 20 06:11:25 MDT 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Gustafsson:2005:CAL,
author = "Mika Gustafsson and Michael Hornquist and Anna
Lombardi",
title = "Constructing and Analyzing a Large-Scale Gene-to-Gene
Regulatory Network-Lasso-Constrained Inference and
Biological Validation",
journal = j-TCBB,
volume = "2",
number = "3",
pages = "254--261",
month = jul,
year = "2005",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Tue Sep 20 06:11:25 MDT 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Demir:2005:LTP,
author = "Cigdem Demir and S. Humayun Gultekin and Bulent
Yener",
title = "Learning the Topological Properties of Brain Tumors",
journal = j-TCBB,
volume = "2",
number = "3",
pages = "262--270",
month = jul,
year = "2005",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Tue Sep 20 06:11:25 MDT 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Anonymous:2005:CPS,
author = "Anonymous",
title = "Call for Papers for {Special Issue on Computational
Intelligence Approaches in Computational Biology and
Bioinformatics}",
journal = j-TCBB,
volume = "2",
number = "3",
pages = "271--271",
month = jul,
year = "2005",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Tue Sep 20 06:11:25 MDT 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Cickovski:2005:FTD,
author = "Trevor M. Cickovski and Chengbang Huang and Rajiv
Chaturvedi and Tilmann Glimm and H. George E. Hentschel
and Mark S. Alber and James A. Glazier and Stuart A.
Newman and Jesus A. Izaguirre",
title = "A Framework for Three-Dimensional Simulation of
Morphogenesis",
journal = j-TCBB,
volume = "2",
number = "4",
pages = "273--288",
month = oct,
year = "2005",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Fri Nov 18 05:22:15 MST 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Boscolo:2005:GFN,
author = "Riccardo Boscolo and Chiara Sabatti and James C. Liao
and Vwani P. Roychowdhury",
title = "A Generalized Framework for Network Component
Analysis",
journal = j-TCBB,
volume = "2",
number = "4",
pages = "289--301",
month = oct,
year = "2005",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Fri Nov 18 05:22:15 MST 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Chen:2005:AOG,
author = "Xin Chen and Jie Zheng and Zheng Fu and Peng Nan and
Yang Zhong and Stefano Lonardi and Tao Jiang",
title = "Assignment of Orthologous Genes via Genome
Rearrangement",
journal = j-TCBB,
volume = "2",
number = "4",
pages = "302--315",
month = oct,
year = "2005",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Fri Nov 18 05:22:15 MST 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Turner:2005:BMS,
author = "Heather L. Turner and Trevor C. Bailey and Wojtek J.
Krzanowski and Cheryl A. Hemingway",
title = "Biclustering Models for Structured Microarray Data",
journal = j-TCBB,
volume = "2",
number = "4",
pages = "316--329",
month = oct,
year = "2005",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Fri Nov 18 05:22:15 MST 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Sevilla:2005:CBG,
author = "Jose L. Sevilla and Victor Segura and Adam Podhorski
and Elizabeth Guruceaga and Jose M. Mato and Luis A.
Martinez-Cruz and Fernando J. Corrales and Angel
Rubio",
title = "Correlation between Gene Expression and {GO} Semantic
Similarity",
journal = j-TCBB,
volume = "2",
number = "4",
pages = "330--338",
month = oct,
year = "2005",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Fri Nov 18 05:22:15 MST 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Yoon:2005:DCB,
author = "Sungroh Yoon and Christine Nardini and Luca Benini and
Giovanni De Micheli",
title = "Discovering Coherent Biclusters from Gene Expression
Data Using Zero-Suppressed Binary Decision Diagrams",
journal = j-TCBB,
volume = "2",
number = "4",
pages = "339--354",
month = oct,
year = "2005",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Fri Nov 18 05:22:15 MST 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Tseng:2005:EMG,
author = "Vincent S. Tseng and Ching-Pin Kao",
title = "Efficiently Mining Gene Expression Data via a Novel
Parameterless Clustering Method",
journal = j-TCBB,
volume = "2",
number = "4",
pages = "355--365",
month = oct,
year = "2005",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Fri Nov 18 05:22:15 MST 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Zhang:2005:SGN,
author = "Shaojie Zhang and Brian Haas and Eleazar Eskin and
Vineet Bafna",
title = "Searching Genomes for Noncoding {RNA} Using {FastR}",
journal = j-TCBB,
volume = "2",
number = "4",
pages = "366--379",
month = oct,
year = "2005",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Fri Nov 18 05:22:15 MST 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Anonymous:2005:AI,
author = "Anonymous",
title = "2005 Annual Index",
journal = j-TCBB,
volume = "2",
number = "4",
pages = "380--384",
month = oct,
year = "2005",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Fri Nov 18 05:22:15 MST 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Gusfield:2006:SJ,
author = "Dan Gusfield",
title = "State of the Journal",
journal = j-TCBB,
volume = "3",
number = "1",
pages = "1--1",
month = jan,
year = "2006",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2006.12",
ISSN = "1545-5963",
bibdate = "Thu Feb 16 11:06:15 MST 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Berger:2006:JAG,
author = "John A. Berger and Sampsa Hautaniemi and Sanjit K.
Mitra and Jaakko Astola",
title = "Jointly Analyzing Gene Expression and Copy Number Data
in Breast Cancer Using Data Reduction Models",
journal = j-TCBB,
volume = "3",
number = "1",
pages = "2--16",
month = jan,
year = "2006",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Feb 16 11:06:15 MST 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Sebastian:2006:STA,
author = "Rafael Sebastian and Maria-Elena Diaz and Guillermo
Ayala and Kresimir Letinic and Jose Moncho-Bogani and
Derek Toomre",
title = "Spatio-Temporal Analysis of Constitutive Exocytosis in
Epithelial Cells",
journal = j-TCBB,
volume = "3",
number = "1",
pages = "17--32",
month = jan,
year = "2006",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Feb 16 11:06:15 MST 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Hershkovitz:2006:SAR,
author = "Eli Hershkovitz and Guillermo Sapiro and Allen
Tannenbaum and Loren Dean Williams",
title = "Statistical Analysis of {RNA} Backbone",
journal = j-TCBB,
volume = "3",
number = "1",
pages = "33--46",
month = jan,
year = "2006",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Feb 16 11:06:15 MST 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Dawy:2006:GMM,
author = "Zaher Dawy and Bernhard Goebel and Joachim Hagenauer
and Christophe Andreoli and Thomas Meitinger and Jakob
C. Mueller",
title = "Gene Mapping and Marker Clustering Using {Shannon}'s
Mutual Information",
journal = j-TCBB,
volume = "3",
number = "1",
pages = "47--56",
month = jan,
year = "2006",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Feb 16 11:06:15 MST 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Goutsias:2006:HMM,
author = "John Goutsias",
title = "A Hidden {Markov} Model for Transcriptional Regulation
in Single Cells",
journal = j-TCBB,
volume = "3",
number = "1",
pages = "57--71",
month = jan,
year = "2006",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Feb 16 11:06:15 MST 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Rueda:2006:HCA,
author = "Luis Rueda and Vidya Vidyadharan",
title = "A Hill-Climbing Approach for Automatic Gridding of
{cDNA} Microarray Images",
journal = j-TCBB,
volume = "3",
number = "1",
pages = "72--83",
month = jan,
year = "2006",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Feb 16 11:06:15 MST 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Semple:2006:UNC,
author = "Charles Semple and Mike Steel",
title = "Unicyclic Networks: Compatibility and Enumeration",
journal = j-TCBB,
volume = "3",
number = "1",
pages = "84--91",
month = jan,
year = "2006",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Feb 16 11:06:15 MST 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Roch:2006:SPP,
author = "Sebastien Roch",
title = "A Short Proof that Phylogenetic Tree Reconstruction by
Maximum Likelihood Is Hard",
journal = j-TCBB,
volume = "3",
number = "1",
pages = "92--94",
month = jan,
year = "2006",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Feb 16 11:06:15 MST 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Anonymous:2006:RL,
author = "Anonymous",
title = "2005 Reviewers List",
journal = j-TCBB,
volume = "3",
number = "1",
pages = "95--96",
month = jan,
year = "2006",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Feb 16 11:06:15 MST 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Gusfield:2006:INA,
author = "Dan Gusfield",
title = "Introduction of New {Associate Editors}",
journal = j-TCBB,
volume = "3",
number = "2",
pages = "97--97",
month = apr,
year = "2006",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2006.25",
ISSN = "1545-5963",
bibdate = "Wed Jun 7 06:38:18 MDT 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Chu:2006:BSM,
author = "Wei Chu and Zoubin Ghahramani and Alexei
Podtelezhnikov and David L. Wild",
title = "{Bayesian} Segmental Models with Multiple Sequence
Alignment Profiles for Protein Secondary Structure and
Contact Map Prediction",
journal = j-TCBB,
volume = "3",
number = "2",
pages = "98--113",
month = apr,
year = "2006",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2006.17",
ISSN = "1545-5963",
bibdate = "Wed Jun 7 06:38:18 MDT 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Danziger:2006:FCM,
author = "Samuel A. Danziger and S. Joshua Swamidass and Jue
Zeng and Lawrence R. Dearth and Qiang Lu and Jonathan
H. Chen and Jianlin Cheng and Vinh P. Hoang and Hiroto
Saigo and Ray Luo and Pierre Baldi and Rainer K.
Brachmann and Richard H. Lathrop",
title = "Functional Census of Mutation Sequence Spaces: The
Example of p53 Cancer Rescue Mutants",
journal = j-TCBB,
volume = "3",
number = "2",
pages = "114--125",
month = apr,
year = "2006",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2006.22",
ISSN = "1545-5963",
bibdate = "Wed Jun 7 06:38:18 MDT 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Carvalho:2006:EAI,
author = "Alexandra M. Carvalho and Ana T. Freitas and Arlindo
L. Oliveira and Marie-France Sagot",
title = "An Efficient Algorithm for the Identification of
Structured Motifs in {DNA} Promoter Sequences",
journal = j-TCBB,
volume = "3",
number = "2",
pages = "126--140",
month = apr,
year = "2006",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2006.16",
ISSN = "1545-5963",
bibdate = "Wed Jun 7 06:38:18 MDT 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Brown:2006:IPA,
author = "Daniel G. Brown and Ian M. Harrower",
title = "Integer Programming Approaches to Haplotype Inference
by Pure Parsimony",
journal = j-TCBB,
volume = "3",
number = "2",
pages = "141--154",
month = apr,
year = "2006",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2006.24",
ISSN = "1545-5963",
bibdate = "Wed Jun 7 06:38:18 MDT 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Vass:2006:JMB,
author = "Marc T. Vass and Clifford A. Shaffer and Naren
Ramakrishnan and Layne T. Watson and John J. Tyson",
title = "The {JigCell} Model Builder: {A} Spreadsheet Interface
for Creating Biochemical Reaction Network Models",
journal = j-TCBB,
volume = "3",
number = "2",
pages = "155--164",
month = apr,
year = "2006",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2006.27",
ISSN = "1545-5963",
bibdate = "Wed Jun 7 06:38:18 MDT 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Chen:2006:MFS,
author = "Duhong Chen and Oliver Eulenstein and David
Fernandez-Baca and Michael Sanderson",
title = "Minimum-Flip Supertrees: Complexity and Algorithms",
journal = j-TCBB,
volume = "3",
number = "2",
pages = "165--173",
month = apr,
year = "2006",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2006.26",
ISSN = "1545-5963",
bibdate = "Wed Jun 7 06:38:18 MDT 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Sevon:2006:TTP,
author = "Petteri Sevon and Hannu Toivonen and Vesa Ollikainen",
title = "{TreeDT}: Tree Pattern Mining for Gene Mapping",
journal = j-TCBB,
volume = "3",
number = "2",
pages = "174--185",
month = apr,
year = "2006",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2006.28",
ISSN = "1545-5963",
bibdate = "Wed Jun 7 06:38:18 MDT 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Song:2006:CNS,
author = "Yun S. Song",
title = "A Concise Necessary and Sufficient Condition for the
Existence of a Galled-Tree",
journal = j-TCBB,
volume = "3",
number = "2",
pages = "186--191",
month = apr,
year = "2006",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2006.15",
ISSN = "1545-5963",
bibdate = "Wed Jun 7 06:38:18 MDT 2006",
bibsource = "http://portal.acm.org/",
abstract = "Galled-trees are a special class of graphical
representation of evolutionary history that has proven
amenable to efficient, polynomial-time algorithms. The
goal of this paper is to construct a concise necessary
and sufficient condition for the existence of a
galled-tree for $M$, a set of binary sequences that
purportedly have evolved in the presence of
recombination. Both root-known and root-unknown cases
are considered here.",
acknowledgement = ack-nhfb,
}
@Article{Daras:2006:TDS,
author = "Petros Daras and Dimitrios Zarpalas and Apostolos
Axenopoulos and Dimitrios Tzovaras and Michael
Gerassimos Strintzis",
title = "Three-Dimensional Shape-Structure Comparison Method
for Protein Classification",
journal = j-TCBB,
volume = "3",
number = "3",
pages = "193--207",
month = jul,
year = "2006",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Sep 11 07:36:29 MDT 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Yu:2006:MPA,
author = "Weichuan Yu and Xiaoye Li and Junfeng Liu and Baolin
Wu and Kenneth R. Williams and Hongyu Zhao",
title = "Multiple Peak Alignment in Sequential Data Analysis:
{A} Scale-Space-Based Approach",
journal = j-TCBB,
volume = "3",
number = "3",
pages = "208--219",
month = jul,
year = "2006",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Sep 11 07:36:29 MDT 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Abul:2006:PAE,
author = "Osman Abul and Reda Alhajj and Faruk Polat",
title = "A Powerful Approach for Effective Finding of
Significantly Differentially Expressed Genes",
journal = j-TCBB,
volume = "3",
number = "3",
pages = "220--231",
month = jul,
year = "2006",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Sep 11 07:36:29 MDT 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Nagarajan:2006:CSC,
author = "Radhakrishnan Nagarajan and Meenakshi Upreti",
title = "Correlation Statistics for {cDNA} Microarray Image
Analysis",
journal = j-TCBB,
volume = "3",
number = "3",
pages = "232--238",
month = jul,
year = "2006",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Sep 11 07:36:29 MDT 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Song:2006:CAP,
author = "Yun S. Song and Rune Lyngso and Jotun Hein",
title = "Counting All Possible Ancestral Configurations of
Sample Sequences in Population Genetics",
journal = j-TCBB,
volume = "3",
number = "3",
pages = "239--251",
month = jul,
year = "2006",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Sep 11 07:36:29 MDT 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Pirinen:2006:FCG,
author = "Matti Pirinen and Dario Gasbarra",
title = "Finding Consistent Gene Transmission Patterns on Large
and Complex Pedigrees",
journal = j-TCBB,
volume = "3",
number = "3",
pages = "252--262",
month = jul,
year = "2006",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Sep 11 07:36:29 MDT 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Popescu:2006:FMG,
author = "Mihail Popescu and James M. Keller and Joyce A.
Mitchell",
title = "Fuzzy Measures on the Gene Ontology for Gene Product
Similarity",
journal = j-TCBB,
volume = "3",
number = "3",
pages = "263--274",
month = jul,
year = "2006",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Sep 11 07:36:29 MDT 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Bernt:2006:GRB,
author = "Matthias Bernt and Daniel Merkle and Martin
Middendorf",
title = "Genome Rearrangement Based on Reversals that Preserve
Conserved Intervals",
journal = j-TCBB,
volume = "3",
number = "3",
pages = "275--288",
month = jul,
year = "2006",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Sep 11 07:36:29 MDT 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Berry:2006:IPC,
author = "Vincent Berry and Fran{\c{c}}ois Nicolas",
title = "Improved Parameterized Complexity of the Maximum
Agreement Subtree and Maximum Compatible Tree
Problems",
journal = j-TCBB,
volume = "3",
number = "3",
pages = "289--302",
month = jul,
year = "2006",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Sep 11 07:36:29 MDT 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Sharan:2006:ITP,
author = "Roded Sharan and Bjarni V. Halldorsson and Sorin
Istrail",
title = "Islands of Tractability for Parsimony Haplotyping",
journal = j-TCBB,
volume = "3",
number = "3",
pages = "303--311",
month = jul,
year = "2006",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Sep 11 07:36:29 MDT 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Zhang:2006:SGR,
author = "Chaolin Zhang and Xuesong Lu and Xuegong Zhang",
title = "Significance of Gene Ranking for Classification of
Microarray Samples",
journal = j-TCBB,
volume = "3",
number = "3",
pages = "312--320",
month = jul,
year = "2006",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Sep 11 07:36:29 MDT 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Casadio:2006:GEI,
author = "Rita Casadio",
title = "{Guest Editor}'s Introduction to the Special Issue on
Computational Biology and Bioinformatics -- Part 1",
journal = j-TCBB,
volume = "3",
number = "4",
pages = "321--322",
month = oct,
year = "2006",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Nov 30 19:05:58 MST 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Snir:2006:UMC,
author = "Sagi Snir and Satish Rao",
title = "Using Max Cut to Enhance Rooted Trees Consistency",
journal = j-TCBB,
volume = "3",
number = "4",
pages = "323--333",
month = oct,
year = "2006",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Nov 30 19:05:58 MST 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Ganapathy:2006:PIB,
author = "Ganeshkumar Ganapathy and Barbara Goodson and Robert
Jansen and Hai-son Le and Vijaya Ramachandran and Tandy
Warnow",
title = "Pattern Identification in Biogeography",
journal = j-TCBB,
volume = "3",
number = "4",
pages = "334--346",
month = oct,
year = "2006",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Nov 30 19:05:58 MST 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Wernicke:2006:EDN,
author = "Sebastian Wernicke",
title = "Efficient Detection of Network Motifs",
journal = j-TCBB,
volume = "3",
number = "4",
pages = "347--359",
month = oct,
year = "2006",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Nov 30 19:05:58 MST 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Lacroix:2006:MSG,
author = "Vincent Lacroix and Cristina G. Fernandes and
Marie-France Sagot",
title = "Motif Search in Graphs: Application to Metabolic
Networks",
journal = j-TCBB,
volume = "3",
number = "4",
pages = "360--368",
month = oct,
year = "2006",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Nov 30 19:05:58 MST 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Elias:2006:AAS,
author = "Isaac Elias and Tzvika Hartman",
title = "A $1.375$-Approximation Algorithm for Sorting by
Transpositions",
journal = j-TCBB,
volume = "3",
number = "4",
pages = "369--379",
month = oct,
year = "2006",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Nov 30 19:05:58 MST 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Labarre:2006:NBT,
author = "Anthony Labarre",
title = "New Bounds and Tractable Instances for the
Transposition Distance",
journal = j-TCBB,
volume = "3",
number = "4",
pages = "380--394",
month = oct,
year = "2006",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Nov 30 19:05:58 MST 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Sammeth:2006:CTR,
author = "Michael Sammeth and Jens Stoye",
title = "Comparing Tandem Repeats with Duplications and
Excisions of Variable Degree",
journal = j-TCBB,
volume = "3",
number = "4",
pages = "395--407",
month = oct,
year = "2006",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Nov 30 19:05:58 MST 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Bilu:2006:FAO,
author = "Yonatan Bilu and Pankaj K. Agarwal and Rachel
Kolodny",
title = "Faster Algorithms for Optimal Multiple Sequence
Alignment Based on Pairwise Comparisons",
journal = j-TCBB,
volume = "3",
number = "4",
pages = "408--422",
month = oct,
year = "2006",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Nov 30 19:05:58 MST 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Song:2006:EPA,
author = "Yinglei Song and Chunmei Liu and Xiuzhen Huang and
Russell L. Malmberg and Ying Xu and Liming Cai",
title = "Efficient Parameterized Algorithms for Biopolymer
Structure-Sequence Alignment",
journal = j-TCBB,
volume = "3",
number = "4",
pages = "423--432",
month = oct,
year = "2006",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Nov 30 19:05:58 MST 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Anonymous:2006:AI,
author = "Anonymous",
title = "Annual Index",
journal = j-TCBB,
volume = "3",
number = "4",
pages = "??--??",
month = oct,
year = "2006",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Nov 30 19:05:58 MST 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Gusfield:2007:SJ,
author = "Dan Gusfield",
title = "State of the {Journal}",
journal = j-TCBB,
volume = "4",
number = "1",
pages = "1--1",
month = jan,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:57:20 MDT 2008",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Gusfield:2007:AEAa,
author = "Dan Gusfield",
title = "{Associate Editor} Appreciation and Welcome",
journal = j-TCBB,
volume = "4",
number = "1",
pages = "2--2",
month = jan,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:57:20 MDT 2008",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Casadio:2007:GEI,
author = "Rita Casadio",
title = "{Guest Editor}'s Introduction to the {Special Section
on Computational Biology and Bioinformatics (WABI)} --
Part 2",
journal = j-TCBB,
volume = "4",
number = "1",
pages = "3--3",
month = jan,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:57:20 MDT 2008",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Berard:2007:PSR,
author = "Severine B{\'e}rard and Anne Bergeron and Cedric
Chauve and Christophe Paul",
title = "Perfect Sorting by Reversals Is Not Always Difficult",
journal = j-TCBB,
volume = "4",
number = "1",
pages = "4--16",
month = jan,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:57:20 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "We propose new algorithms for computing pairwise
rearrangement scenarios that conserve the combinatorial
structure of genomes. More precisely, we investigate
the problem of sorting signed permutations by reversals
without breaking common intervals. We describe a
combinatorial framework for this problem that allows us
to characterize classes of signed permutations for
which one can compute, in polynomial time, a shortest
reversal scenario that conserves all common intervals.
In particular, we define a class of permutations for
which this computation can be done in linear time with
a very simple algorithm that does not rely on the
classical Hannenhalli-Pevzner theory for sorting by
reversals. We apply these methods to the computation of
rearrangement scenarios between permutations obtained
from 16 synteny blocks of the X chromosomes of the
human, mouse, and rat.",
acknowledgement = ack-nhfb,
keywords = "common intervals; evolution scenarios; reversals",
}
@Article{Vashist:2007:OCM,
author = "Akshay Vashist and Casimir A. Kulikowski and Ilya
Muchnik",
title = "Ortholog Clustering on a Multipartite Graph",
journal = j-TCBB,
volume = "4",
number = "1",
pages = "17--27",
month = jan,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:57:20 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "We present a method for automatically extracting
groups of orthologous genes from a large set of genomes
by a new clustering algorithm on a weighted
multipartite graph. The method assigns a score to an
arbitrary subset of genes from multiple genomes to
assess the orthologous relationships between genes in
the subset. This score is computed using sequence
similarities between the member genes and the
phylogenetic relationship between the corresponding
genomes. An ortholog cluster is found as the subset
with the highest score, so ortholog clustering is
formulated as a combinatorial optimization problem. The
algorithm for finding an ortholog cluster runs in time
O(|E|+|V| log|V|), where V and E are the sets of
vertices and edges, respectively, in the graph.
However, if we discretize the similarity scores into a
constant number of bins, the runtime improves to
O(|E|+|V|). The proposed method was applied to seven
complete eukaryote genomes on which the manually
curated database of eukaryotic ortholog clusters, KOG,
is constructed. A comparison of our results with the
manually curated ortholog clusters shows that our
clusters are well correlated with the existing
clusters.",
acknowledgement = ack-nhfb,
keywords = "biology; clustering algorithms; genetics;
Graph-theoretic methods",
}
@Article{Lasker:2007:EDH,
author = "Keren Lasker and Oranit Dror and Maxim Shatsky and
Ruth Nussinov and Haim J. Wolfson",
title = "{EMatch}: Discovery of High Resolution Structural
Homologues of Protein Domains in Intermediate
Resolution Cryo-{EM} Maps",
journal = j-TCBB,
volume = "4",
number = "1",
pages = "28--39",
month = jan,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:57:20 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "Cryo-EM has become an increasingly powerful technique
for elucidating the structure, dynamics, and function
of large flexible macromolecule assemblies that cannot
be determined at atomic resolution. However, due to the
relatively low resolution of cryo-EM data, a major
challenge is to identify components of complexes
appearing in cryo-EM maps. Here, we describe EMatch, a
novel integrated approach for recognizing structural
homologues of protein domains present in a 6-10{\AA}
resolution cryo-EM map and constructing a quasi-atomic
structural model of their assembly. The method is
highly efficient and has been successfully validated on
various simulated data. The strength of the method is
demonstrated by a domain assembly of an experimental
cryo-EM map of native GroEL at 6{\AA} resolution.",
acknowledgement = ack-nhfb,
keywords = "3D alignment of secondary structures; cyclic symmetry;
intermediate resolution cryo-EM maps; macromolecular
assemblies; structural bioinformatics",
}
@Article{Wang:2007:ACC,
author = "Lipo Wang and Feng Chu and Wei Xie",
title = "Accurate Cancer Classification Using Expressions of
Very Few Genes",
journal = j-TCBB,
volume = "4",
number = "1",
pages = "40--53",
month = jan,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:57:20 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "We aim at finding the smallest set of genes that can
ensure highly accurate classification of cancers from
microarray data by using supervised machine learning
algorithms. The significance of finding the minimum
gene subsets is three-fold: 1) It greatly reduces the
computational burden and `noise' arising from
irrelevant genes. In the examples studied in this
paper, finding the minimum gene subsets even allows for
extraction of simple diagnostic rules which lead to
accurate diagnosis without the need for any
classifiers. 2) It simplifies gene expression tests to
include only a very small number of genes rather than
thousands of genes, which can bring down the cost for
cancer testing significantly. 3) It calls for further
investigation into the possible biological relationship
between these small numbers of genes and cancer
development and treatment. Our simple yet very
effective method involves two steps. In the first step,
we choose some important genes using a feature
importance ranking scheme. In the second step, we test
the classification capability of all simple
combinations of those important genes by using a good
classifier. For three `small' and `simple' data sets
with two, three, and four cancer (sub)types, our
approach obtained very high accuracy with only two or
three genes. For a `large' and `complex' data set with
14 cancer types, we divided the whole problem into a
group of binary classification problems and applied the
2--step approach to each of these binary classification
problems. Through this `divide-and-conquer' approach,
we obtained accuracy comparable to previously reported
results but with only 28 genes rather than 16,063
genes. In general, our method can significantly reduce
the number of genes required for highly reliable
diagnosis.",
acknowledgement = ack-nhfb,
keywords = "cancer classification; fuzzy; gene expression; neural
networks; support vector machines.",
}
@Article{Zhi:2007:CBA,
author = "Degui Zhi and Uri Keich and Pavel Pevzner and Steffen
Heber and Haixu Tang",
title = "Correcting Base-Assignment Errors in Repeat Regions of
Shotgun Assembly",
journal = j-TCBB,
volume = "4",
number = "1",
pages = "54--64",
month = jan,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:57:20 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "Accurate base-assignment in repeat regions of a whole
genome shotgun assembly is an unsolved problem. Since
reads in repeat regions cannot be easily attributed to
a unique location in the genome, current assemblers may
place these reads arbitrarily. As a result, the
base-assignment error rate in repeats is likely to be
much higher than that in the rest of the genome. We
developed an iterative algorithm, EULER-AIR, that is
able to correct base-assignment errors in finished
genome sequences in public databases. The Wolbachia
genome is among the best finished genomes. Using this
genome project as an example, we demonstrated that
EULER-AIR can 1) discover and correct base-assignment
errors, 2) provide accurate read assignments, 3)
utilize finishing reads for accurate base-assignment,
and 4) provide guidance for designing finishing
experiments. In the genome of Wolbachia, EULER-AIR
found 16 positions with ambiguous base-assignment and
two positions with erroneous bases. Besides Wolbachia,
many other genome sequencing projects have
significantly fewer finishing reads and, hence, are
likely to contain more base-assignment errors in
repeats. We demonstrate that EULER-AIR is a software
tool that can be used to find and correct
base-assignment errors in a genome assembly project.",
acknowledgement = ack-nhfb,
keywords = "expectation maximization; finishing; fragment
assembly",
}
@Article{Xu:2007:MCC,
author = "Rui Xu and Georgios C. Anagnostopoulos and Donald C.
Wunsch",
title = "Multiclass Cancer Classification Using Semisupervised
Ellipsoid {ARTMAP} and Particle Swarm Optimization with
Gene Expression Data",
journal = j-TCBB,
volume = "4",
number = "1",
pages = "65--77",
month = jan,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:57:20 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "It is crucial for cancer diagnosis and treatment to
accurately identify the site of origin of a tumor. With
the emergence and rapid advancement of DNA microarray
technologies, constructing gene expression profiles for
different cancer types has already become a promising
means for cancer classification. In addition to
research on binary classification such as normal versus
tumor samples, which attracts numerous efforts from a
variety of disciplines, the discrimination of multiple
tumor types is also important. Meanwhile, the selection
of genes which are relevant to a certain cancer type
not only improves the performance of the classifiers,
but also provides molecular insights for treatment and
drug development. Here, we use Semisupervised Ellipsoid
ARTMAP (ssEAM) for multiclass cancer discrimination and
particle swarm optimization for informative gene
selection. ssEAM is a neural network architecture
rooted in Adaptive Resonance Theory and suitable for
classification tasks. ssEAM features fast, stable, and
finite learning and creates hyperellipsoidal clusters,
inducing complex nonlinear decision boundaries. PSO is
an evolutionary algorithm-based technique for global
optimization. A discrete binary version of PSO is
employed to indicate whether genes are chosen or not.
The effectiveness of ssEAM/PSO for multiclass cancer
diagnosis is demonstrated by testing it on three
publicly available multiple-class cancer data sets.
ssEAM/PSO achieves competitive performance on all these
data sets, with results comparable to or better than
those obtained by other classifiers.",
acknowledgement = ack-nhfb,
keywords = "cancer classification; gene expression profile;
particle swarm optimization; semisupervised ellipsoid
ARTMAP",
}
@Article{Huang:2007:PPP,
author = "Chengbang Huang and Faruck Morcos and Simon P. Kanaan
and Stefan Wuchty and Danny Z. Chen and Jesus A.
Izaguirre",
title = "Predicting Protein-Protein Interactions from Protein
Domains Using a Set Cover Approach",
journal = j-TCBB,
volume = "4",
number = "1",
pages = "78--87",
month = jan,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:57:20 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "One goal of contemporary proteome research is the
elucidation of cellular protein interactions. Based on
currently available protein-protein interaction and
domain data, we introduce a novel method, Maximum
Specificity Set Cover (MSSC), for the prediction of
protein-protein interactions. In our approach, we map
the relationship between interactions of proteins and
their corresponding domain architectures to a
generalized weighted set cover problem. The application
of a greedy algorithm provides sets of domain
interactions which explain the presence of protein
interactions to the largest degree of specificity.
Utilizing domain and protein interaction data of S.
cerevisiae, MSSC enables prediction of previously
unknown protein interactions, links that are well
supported by a high tendency of coexpression and
functional homogeneity of the corresponding proteins.
Focusing on concrete examples, we show that MSSC
reliably predicts protein interactions in well-studied
molecular systems, such as the 26S proteasome and RNA
polymerase II of S. cerevisiae. We also show that the
quality of the predictions is comparable to the Maximum
Likelihood Estimation while MSSC is faster. This new
algorithm and all data sets used are accessible through
a Web portal at http://ppi.cse.nd.edu.",
acknowledgement = ack-nhfb,
keywords = "bioinformatics (genome or protein) databases; biology;
Computations on discrete structures; genetics; graph
algorithms",
}
@Article{Kim:2007:AAD,
author = "Jong Hyun Kim and Michael S. Waterman and Lei M. Li",
title = "Accuracy Assessment of Diploid Consensus Sequences",
journal = j-TCBB,
volume = "4",
number = "1",
pages = "88--97",
month = jan,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:57:20 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "If the origins of fragments are known in genome
sequencing projects, it is straightforward to
reconstruct diploid consensus sequences. In reality,
however, this is not true. Although there are proposed
methods to reconstruct haplotypes from genome
sequencing projects, an accuracy assessment is required
to evaluate the confidence of the estimated diploid
consensus sequences. In this paper, we define the
confidence score of diploid consensus sequences. It
requires the calculation of the likelihood of an
assembly. To calculate the likelihood, we propose a
linear time algorithm with respect to the number of
polymorphic sites. The likelihood calculation and
confidence score are used for further improvements of
haplotype estimation in two directions. One direction
is that low-scored phases are disconnected. The other
direction is that, instead of using nominal frequency
1/2, the haplotype frequency is estimated to reflect
the actual contribution of each haplotype. Our method
was evaluated on the simulated data whose polymorphism
rate (1.2 percent) was based on Ciona intestinalis. As
a result, the high accuracy of our algorithm was
indicated: The true positive rate of the haplotype
estimation was greater than 97 percent.",
acknowledgement = ack-nhfb,
keywords = "diploid; haplotype; polymorphism; shotgun sequencing",
}
@Article{Alekseyev:2007:CBG,
author = "Max A. Alekseyev and Pavel A. Pevzner",
title = "Colored de Bruijn Graphs and the Genome Halving
Problem",
journal = j-TCBB,
volume = "4",
number = "1",
pages = "98--107",
month = jan,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:57:20 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "Breakpoint graph analysis is a key algorithmic
technique in studies of genome rearrangements. However,
breakpoint graphs are defined only for genomes without
duplicated genes, thus limiting their applications in
rearrangement analysis. We discuss a connection between
the breakpoint graphs and de Bruijn graphs that leads
to a generalization of the notion of breakpoint graph
for genomes with duplicated genes. We further use the
generalized breakpoint graphs to study the Genome
Halving Problem (first introduced and solved by Nadia
El-Mabrouk and David Sankoff). The El-Mabrouk-Sankoff
algorithm is rather complex, and, in this paper, we
present an alternative approach that is based on
generalized breakpoint graphs. The generalized
breakpoint graphs make the El-Mabrouk-Sankoff result
more transparent and promise to be useful in future
studies of genome rearrangements.",
acknowledgement = ack-nhfb,
keywords = "breakpoint graph; de Bruijn graph; genome duplication;
genome halving; genome rearrangement; reversal",
}
@Article{Mossel:2007:DMT,
author = "Elchanan Mossel",
title = "Distorted Metrics on Trees and Phylogenetic Forests",
journal = j-TCBB,
volume = "4",
number = "1",
pages = "108--116",
month = jan,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:57:20 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "We study distorted metrics on binary trees in the
context of phylogenetic reconstruction. Given a binary
tree $T$ on $n$ leaves with a path metric $d$, consider
the pairwise distances $d(u,v)$ between leaves. It is
well known that these determine the tree and the
$d$-length of all edges. Here, we consider distortions
$\hat{d}$ of $d$ such that, for all leaves $u$ and $v$,
it holds that $|d(u,v) - \hat{d}(u,v)| < f/2$ if either
$d(u,v) < M + f/2$ or $\hat{d}(u,v) < M + f/2$, where
$d$ satisfies $f \leq d(e) \leq g$ for all edges $e$.
Given such distortions, we show how to reconstruct in
polynomial time a forest $T_1, \ldots{}, T_\alpha$ such
that the true tree $T$ may be obtained from that forest
by adding $\alpha-1$ edges and $\alpha-1 \leq
2-\Omega(M/g) n$. Our distorted metric result implies a
reconstruction algorithm of phylogenetic forests with a
small number of trees from sequences of length
logarithmic in the number of species. The
reconstruction algorithm is applicable for the general
Markov model. Both the distorted metric result and its
applications to phylogeny are almost tight.",
acknowledgement = ack-nhfb,
keywords = "CFN; distortion; forest; Jukes--Cantor; metric;
phylogenetics; tree",
}
@Article{Aeling:2007:DDE,
author = "Kimberly A. Aeling and Nicholas R. Steffen and Matthew
Johnson and G. Wesley Hatfield and Richard H. Lathrop
and Donald F. Senear",
title = "{DNA} Deformation Energy as an Indirect Recognition
Mechanism in Protein-{DNA} Interactions",
journal = j-TCBB,
volume = "4",
number = "1",
pages = "117--125",
month = jan,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:57:20 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "Proteins that bind to specific locations in genomic
DNA control many basic cellular functions. Proteins
detect their binding sites using both direct and
indirect recognition mechanisms. Deformation energy,
which models the energy required to bend DNA from its
native shape to its shape when bound to a protein, has
been shown to be an indirect recognition mechanism for
one particular protein, Integration Host Factor (IHF).
This work extends the analysis of deformation to two
other DNA-binding proteins, CRP and SRF, and two
endonucleases, I-CreI and I-PpoI. Known binding sites
for all five proteins showed statistically significant
differences in mean deformation energy as compared to
random sequences. Binding sites for the three
DNA-binding proteins and one of the endonucleases had
mean deformation energies lower than random sequences.
Binding sites for I-PpoI had mean deformation energy
higher than random sequences. Classifiers that were
trained using the deformation energy at each base pair
step showed good cross-validated accuracy when
classifying unseen sequences as binders or nonbinders.
These results support DNA deformation energy as an
indirect recognition mechanism across a wider range of
DNA-binding proteins. Deformation energy may also have
a predictive capacity for the underlying catalytic
mechanism of DNA-binding enzymes.",
acknowledgement = ack-nhfb,
keywords = "deformation energy; DNA bending; DNA-protein binding;
indirect readout; indirect recognition; perceptron
learning",
}
@Article{Yang:2007:MFE,
author = "Jing Yang and Sarawan Wongsa and Visakan
Kadirkamanathan and Stephen A. Billings and Phillip C.
Wright",
title = "Metabolic Flux Estimation-{A} Self-Adaptive
Evolutionary Algorithm with Singular Value
Decomposition",
journal = j-TCBB,
volume = "4",
number = "1",
pages = "126--138",
month = jan,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:57:20 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "Metabolic flux analysis is important for metabolic
system regulation and intracellular pathway
identification. A popular approach for intracellular
flux estimation involves using ^{13}{\rm C} tracer
experiments to label states that can be measured by
nuclear magnetic resonance spectrometry or gas
chromatography mass spectrometry. However, the bilinear
balance equations derived from ^{13}{\rm C} tracer
experiments and the noisy measurements require a
nonlinear optimization approach to obtain the optimal
solution. In this paper, the flux quantification
problem is formulated as an error-minimization problem
with equality and inequality constraints through the
^{13}{\rm C} balance and stoichiometric equations. The
stoichiometric constraints are transformed to a null
space by singular value decomposition. Self-adaptive
evolutionary algorithms are then introduced for flux
quantification. The performance of the evolutionary
algorithm is compared with ordinary least squares
estimation by the simulation of the central pentose
phosphate pathway. The proposed algorithm is also
applied to the central metabolism of Corynebacterium
glutamicum under lysine-producing conditions. A
comparison between the results from the proposed
algorithm and data from the literature is given. The
complexity of a metabolic system with bidirectional
reactions is also investigated by analyzing the
fluctuations in the flux estimates when available
measurements are varied.",
acknowledgement = ack-nhfb,
keywords = "evolutionary computing; least squares method;
metabolic flux analysis; singular value
decomposition.",
}
@Article{Wu:2007:QBP,
author = "Gang Wu and Jia-Huai You and Guohui Lin",
title = "Quartet-Based Phylogeny Reconstruction with Answer Set
Programming",
journal = j-TCBB,
volume = "4",
number = "1",
pages = "139--152",
month = jan,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:57:20 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "In this paper, a new representation is presented for
the Maximum Quartet Consistency (MQC) problem, where
solving the MQC problem becomes searching for an
ultrametric matrix that satisfies a maximum number of
given quartet topologies. A number of structural
properties of the MQC problem in this new
representation are characterized through formulating
into answer set programming, a recent powerful logic
programming tool for modeling and solving search
problems. Using these properties, a number of
optimization techniques are proposed to speed up the
search process. The experimental results on a number of
simulated data sets suggest that the new
representation, combined with answer set programming,
presents a unique perspective to the MQC problem.",
acknowledgement = ack-nhfb,
keywords = "Answer Set Programming (ASP); Maximum Quartet
Consistency (MQC); phylogeny; quartet; ultrametric
matrix.",
}
@Article{Reinert:2007:LLE,
author = "Gesine Reinert and Michael S. Waterman",
title = "On the Length of the Longest Exact Position Match in a
Random Sequence",
journal = j-TCBB,
volume = "4",
number = "1",
pages = "153--156",
month = jan,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:57:20 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "A mixed Poisson approximation and a Poisson
approximation for the length of the longest exact match
of a random sequence across another sequence are
provided, where the match is required to start at
position 1 in the first sequence. This problem arises
when looking for suitable anchors in whole genome
alignments.",
acknowledgement = ack-nhfb,
keywords = "Chen-Stein method; length of longest match; mixed
Poisson approximation; Poisson approximation",
}
@Article{Au:2007:CAC,
author = "Wai-Ho Au and Keith C. C. Chan and Andrew K. C. Wong
and Yang Wang",
title = "Correction to {``Attribute Clustering for Grouping,
Selection, and Classification of Gene Expression
Data''}",
journal = j-TCBB,
volume = "4",
number = "1",
pages = "157--157",
month = jan,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:57:20 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "This is a correction to a typographical error in (11)
in [1] which present the calculation of the sum of the
multiple significant interdependence redundancy
measure. Equation (11) in [1] should be:
$$k=\arg\max\nolimits_{k\in\{2,\ldots,p\}}\sum_{r=1}^k
\sum_{A_i\in\{C_r-\eta_r\}}R(A_i:\eta_r).$$(11)We
remark that the experimental results reported in [1]
are based on (11) above not (11) in [1].",
acknowledgement = ack-nhfb,
}
@Article{Biology:2007:RL,
author = "IEEE/ACM Transactions on Computational Biology and
Bioinformatics staff",
title = "2006 Reviewers List",
journal = j-TCBB,
volume = "4",
number = "1",
pages = "158--160",
month = jan,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:57:20 MDT 2008",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Rajapakse:2007:GEI,
author = "Jagath C. Rajapakse and Yan-Qing Zhang and Gary B.
Fogel",
title = "{Guest Editors}' Introduction to the {Special Section:
Computational Intelligence Approaches in Computational
Biology and Bioinformatics}",
journal = j-TCBB,
volume = "4",
number = "2",
pages = "161--162",
month = apr,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:57:55 MDT 2008",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Wang:2007:PBS,
author = "Haiying Wang and Huiru Zheng and Francisco Azuaje",
title = "{Poisson}-Based Self-Organizing Feature Maps and
Hierarchical Clustering for Serial Analysis of Gene
Expression Data",
journal = j-TCBB,
volume = "4",
number = "2",
pages = "163--175",
month = apr,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:57:55 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "Serial analysis of gene expression (SAGE) is a
powerful technique for global gene expression
profiling, allowing simultaneous analysis of thousands
of transcripts without prior structural and functional
knowledge. Pattern discovery and visualization have
become fundamental approaches to analyzing such
large-scale gene expression data. From the pattern
discovery perspective, clustering techniques have
received great attention. However, due to the
statistical nature of SAGE data (i.e., underlying
distribution), traditional clustering techniques may
not be suitable for SAGE data analysis. Based on the
adaptation and improvement of Self-Organizing Maps and
hierarchical clustering techniques, this paper presents
two new clustering algorithms, namely, PoissonS and
PoissonHC, for SAGE data analysis. Tested on synthetic
and experimental SAGE data, these algorithms
demonstrate several advantages over traditional pattern
discovery techniques. The results indicate that, by
incorporating statistical properties of SAGE data,
PoissonS and PoissonHC, as well as a hybrid approach
(neuro-hierarchical approach) based on the combination
of PoissonS and PoissonHC, offer significant
improvements in pattern discovery and visualization for
SAGE data. Moreover, a user-friendly platform, which
may improve and accelerate SAGE data mining, was
implemented. The system is freely available on request
from the authors for nonprofit use.",
acknowledgement = ack-nhfb,
keywords = "hybrid machine learning; Pattern discovery and
visualization; Poisson distribution; self-organizing
maps; serial analysis of gene expression.",
}
@Article{Sjahputera:2007:RAC,
author = "Ozy Sjahputera and James M. Keller and J. Wade Davis
and Kristen H. Taylor and Farahnaz Rahmatpanah and
Huidong Shi and Derek T. Anderson and Samuel N. Blisard
and Robert H. Luke and Mihail Popescu and Gerald C.
Arthur and Charles W. Caldwell",
title = "Relational Analysis of {CpG} Islands Methylation and
Gene Expression in Human Lymphomas Using Possibilistic
{C}-Means Clustering and Modified Cluster Fuzzy
Density",
journal = j-TCBB,
volume = "4",
number = "2",
pages = "176--189",
month = apr,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:57:55 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "Heterogeneous genetic and epigenetic alterations are
commonly found in human non-Hodgkin's lymphomas (NHL).
One such epigenetic alteration is aberrant methylation
of gene promoter-related CpG islands, where
hypermethylation frequently results in transcriptional
inactivation of target genes, while a decrease or loss
of promoter methylation (hypomethylation) is frequently
associated with transcriptional activation. Discovering
genes with these relationships in NHL or other types of
cancers could lead to a better understanding of the
pathobiology of these diseases. The simultaneous
analysis of promoter methylation using Differential
Methylation Hybridization (DMH) and its associated gene
expression using Expressed CpG Island Sequence Tag
(ECIST) microarrays generates a large volume of
methylation-expression relational data. To analyze this
data, we propose a set of algorithms based on fuzzy
sets theory, in particular Possibilistic c-Means (PCM)
and cluster fuzzy density. For each gene, these
algorithms calculate measures of confidence of various
methylation-expression relationships in each NHL
subclass. Thus, these tools can be used as a means of
high volume data exploration to better guide biological
confirmation using independent molecular biology
methods.",
acknowledgement = ack-nhfb,
keywords = "cluster density; clustering; expression; fuzzy sets;
Methylation; microarray",
}
@Article{Lu:2007:ISL,
author = "Yijuan Lu and Qi Tian and Feng Liu and Maribel Sanchez
and Yufeng Wang",
title = "Interactive Semisupervised Learning for Microarray
Analysis",
journal = j-TCBB,
volume = "4",
number = "2",
pages = "190--203",
month = apr,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:57:55 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "Microarray technology has generated vast amounts of
gene expression data with distinct patterns. Based on
the premise that genes of correlated functions tend to
exhibit similar expression patterns, various machine
learning methods have been applied to capture these
specific patterns in microarray data. However, the
discrepancy between the rich expression profiles and
the limited knowledge of gene functions has been a
major hurdle to the understanding of cellular networks.
To bridge this gap so as to properly comprehend and
interpret expression data, we introduce Relevance
Feedback to microarray analysis and propose an
interactive learning framework to incorporate the
expert knowledge into the decision module. In order to
find a good learning method and solve two intrinsic
problems in microarray data, high dimensionality and
small sample size, we also propose a semisupervised
learning algorithm: Kernel Discriminant-EM (KDEM). This
algorithm efficiently utilizes a large set of unlabeled
data to compensate for the insufficiency of a small set
of labeled data and it extends the linear algorithm in
Discriminant-EM (DEM) to a kernel algorithm to handle
nonlinearly separable data in a lower dimensional
space. The Relevance Feedback technique and KDEM
together construct an efficient and effective
interactive semisupervised learning framework for
microarray analysis. Extensive experiments on the yeast
cell cycle regulation data set and Plasmodium
falciparum red blood cell cycle data set show the
promise of this approach.",
acknowledgement = ack-nhfb,
keywords = "kernel DEM; microarray analysis; relevance feedback;
semisupervised learning",
}
@Article{Lerner:2007:CSI,
author = "Boaz Lerner and Josepha Yeshaya and Lev Koushnir",
title = "On the Classification of a Small Imbalanced
Cytogenetic Image Database",
journal = j-TCBB,
volume = "4",
number = "2",
pages = "204--215",
month = apr,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:57:55 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "Solving a multiclass classification task using a small
imbalanced database of patterns of high dimension is
difficult due to the curse-of-dimensionality and the
bias of the training toward the majority classes. Such
a problem has arisen while diagnosing genetic
abnormalities by classifying a small database of
fluorescence in situ hybridization signals of types
having different frequencies of occurrence. We propose
and experimentally study using the cytogenetic domain
two solutions to the problem. The first is hierarchical
decomposition of the classification task, where each
hierarchy level is designed to tackle a simpler problem
which is represented by classes that are approximately
balanced. The second solution is balancing the data by
up-sampling the minority classes accompanied by
dimensionality reduction. Implemented by the naive
Bayesian classifier or the multilayer perceptron neural
network, both solutions have diminished the problem and
contributed to accuracy improvement. In addition, the
experiments suggest that coping with the smallness of
the data is more beneficial than dealing with its
imbalance.",
acknowledgement = ack-nhfb,
keywords = "classification; dimensionality reduction; genetic
diagnosis; imbalanced data; multilayer perceptron
(MLP); naive Bayesian classifier (NBC); small sample
size.",
}
@Article{Igel:2007:GBO,
author = "Christian Igel and Tobias Glasmachers and Britta
Mersch and Nico Pfeifer and Peter Meinicke",
title = "Gradient-Based Optimization of Kernel-Target Alignment
for Sequence Kernels Applied to Bacterial Gene Start
Detection",
journal = j-TCBB,
volume = "4",
number = "2",
pages = "216--226",
month = apr,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:57:55 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "Biological data mining using kernel methods can be
improved by a task-specific choice of the kernel
function. Oligo kernels for genomic sequence analysis
have proven to have a high discriminative power and to
provide interpretable results. Oligo kernels that
consider subsequences of different lengths can be
combined and parameterized to increase their
flexibility. For adapting these parameters efficiently,
gradient-based optimization of the kernel-target
alignment is proposed. The power of this new, general
model selection procedure and the benefits of fitting
kernels to problem classes are demonstrated by adapting
oligo kernels for bacterial gene start detection.",
acknowledgement = ack-nhfb,
keywords = "kernel target alignment; model selection; oligo
kernel; sequence analysis; support vector machines;
translation initiation sites",
}
@Article{Ogul:2007:SLP,
author = "Hasan Ogul and Erkan U. Mumcuo{\u{g}}lu",
title = "Subcellular Localization Prediction with New Protein
Encoding Schemes",
journal = j-TCBB,
volume = "4",
number = "2",
pages = "227--232",
month = apr,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:57:55 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "Subcellular localization is one of the key properties
in functional annotation of proteins. Support vector
machines (SVMs) have been widely used for automated
prediction of subcellular localizations. Existing
methods differ in the protein encoding schemes used. In
this study, we present two methods for protein encoding
to be used for SVM-based subcellular localization
prediction: n{\hbox{-}}\rm peptide compositions with
reduced amino acid alphabets for larger values of n and
pairwise sequence similarity scores based on whole
sequence and N-terminal sequence. We tested the methods
on a common benchmarking data set that consists of
2,427 eukaryotic proteins with four localization sites.
As a result of 5-fold cross-validation tests, the
encoding with n{\hbox{-}}\rm peptide compositions
provided the accuracies of 84.5, 88.9, 66.3, and 94.3
percent for cytoplasmic, extracellular, mitochondrial,
and nuclear proteins, where the overall accuracy was
87.1 percent. The second method provided 83.6, 87.7,
87.9, and 90.5 percent accuracies for individual
locations and 87.8 percent overall accuracy. A hybrid
system, which we called PredLOC, makes a final decision
based on the results of the two presented methods which
achieved an overall accuracy of 91.3 percent, which is
better than the achievements of many of the existing
methods. The new system also outperformed the recent
methods in the experiments conducted on a new-unique
SWISSPROT test set.",
acknowledgement = ack-nhfb,
keywords = "n{\hbox{-}}\rm peptide composition; probabilistic
suffix tree; subcellular localization; support vector
machines.",
}
@Article{Li:2007:DSD,
author = "Wenyuan Li and Ying Liu and Hung-Chung Huang and
Yanxiong Peng and Yongjing Lin and Wee-Keong Ng and
Kok-Leong Ong",
title = "Dynamical Systems for Discovering Protein Complexes
and Functional Modules from Biological Networks",
journal = j-TCBB,
volume = "4",
number = "2",
pages = "233--250",
month = apr,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:57:55 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "Recent advances in high throughput experiments and
annotations via published literature have provided a
wealth of interaction maps of several biomolecular
networks, including metabolic, protein-protein, and
protein-DNA interaction networks. The architecture of
these molecular networks reveals important principles
of cellular organization and molecular functions.
Analyzing such networks, i.e., discovering dense
regions in the network, is an important way to identify
protein complexes and functional modules. This task has
been formulated as the problem of finding heavy
subgraphs, the Heaviest k{\hbox{-}}\rm Subgraph Problem
(k{\hbox{-}}\rm HSP), which itself is NP-hard. However,
any method based on the k{\hbox{-}}\rm HSP requires the
parameter k and an exact solution of k{\hbox{-}}\rm HSP
may still end up as a `spurious' heavy subgraph, thus
reducing its practicability in analyzing large scale
biological networks. We proposed a new formulation,
called the rank-HSP, and two dynamical systems to
approximate its results. In addition, a novel metric,
called the Standard deviation and Mean Ratio (SMR), is
proposed for use in `spurious' heavy subgraphs to
automate the discovery by setting a fixed threshold.
Empirical results on both the simulated graphs and
biological networks have demonstrated the efficiency
and effectiveness of our proposal.",
acknowledgement = ack-nhfb,
keywords = "bioinformatics databases; evolutionary computing;
Graph algorithms; neural nets",
}
@Article{Hu:2007:DMP,
author = "Xiaohua Hu and Daniel D. Wu",
title = "Data Mining and Predictive Modeling of Biomolecular
Network from Biomedical Literature Databases",
journal = j-TCBB,
volume = "4",
number = "2",
pages = "251--263",
month = apr,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:57:55 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "In this paper, we present a novel approach Bio-IEDM
(Biomedical Information Extraction and Data Mining) to
integrate text mining and predictive modeling to
analyze biomolecular network from biomedical literature
databases. Our method consists of two phases. In phase
1, we discuss a semisupervised efficient learning
approach to automatically extract biological
relationships such as protein-protein interaction,
protein-gene interaction from the biomedical literature
databases to construct the biomolecular network. Our
method automatically learns the patterns based on a few
user seed tuples and then extracts new tuples from the
biomedical literature based on the discovered patterns.
The derived biomolecular network forms a large
scale-free network graph. In phase 2, we present a
novel clustering algorithm to analyze the biomolecular
network graph to identify biologically meaningful
subnetworks (communities). The clustering algorithm
considers the characteristics of the scale-free network
graphs and is based on the local density of the vertex
and its neighborhood functions that can be used to find
more meaningful clusters with different density level.
The experimental results indicate our approach is very
effective in extracting biological knowledge from a
huge collection of biomedical literature. The
integration of data mining and information extraction
provides a promising direction for analyzing the
biomolecular network.",
acknowledgement = ack-nhfb,
keywords = "biological complexes (communities); biomolecular
network; information extraction; scale-free network;
semisupervised learning",
}
@Article{Neri:2007:AMA,
author = "Ferrante Neri and Jari Toivanen and Giuseppe Leonardo
Cascella and Yew-Soon Ong",
title = "An Adaptive Multimeme Algorithm for Designing {HIV}
Multidrug Therapies",
journal = j-TCBB,
volume = "4",
number = "2",
pages = "264--278",
month = apr,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:57:55 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "This paper proposes a period representation for
modeling the multidrug HIV therapies and an Adaptive
Multimeme Algorithm (AMmA) for designing the optimal
therapy. The period representation offers benefits in
terms of flexibility and reduction in dimensionality
compared to the binary representation. The AMmA is a
memetic algorithm which employs a list of three local
searchers adaptively activated by an evolutionary
framework. These local searchers, having different
features according to the exploration logic and the
pivot rule, have the role of exploring the decision
space from different and complementary perspectives
and, thus, assisting the standard evolutionary
operators in the optimization process. Furthermore, the
AMmA makes use of an adaptation which dynamically sets
the algorithmic parameters in order to prevent
stagnation and premature convergence. The numerical
results demonstrate that the application of the
proposed algorithm leads to very efficient medication
schedules which quickly stimulate a strong immune
response to HIV. The earlier termination of the
medication schedule leads to lesser unpleasant side
effects for the patient due to strong antiretroviral
therapy. A numerical comparison shows that the AMmA is
more efficient than three popular metaheuristics.
Finally, a statistical test based on the calculation of
the tolerance interval confirms the superiority of the
AMmA compared to the other methods for the problem
under study.",
acknowledgement = ack-nhfb,
keywords = "adaptive algorithms; HIV therapy design; memetic
algorithms; nonlinear integer programming.",
}
@Article{Handl:2007:MOB,
author = "Julia Handl and Douglas B. Kell and Joshua Knowles",
title = "Multiobjective Optimization in Bioinformatics and
Computational Biology",
journal = j-TCBB,
volume = "4",
number = "2",
pages = "279--292",
month = apr,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:57:55 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "This paper reviews the application of multiobjective
optimization in the fields of bioinformatics and
computational biology. A survey of existing work,
organized by application area, forms the main body of
the review, following an introduction to the key
concepts in multiobjective optimization. An original
contribution of the review is the identification of
five distinct `contexts,' giving rise to multiple
objectives: These are used to explain the reasons
behind the use of multiobjective optimization in each
application area and also to point the way to potential
future uses of the technique.",
acknowledgement = ack-nhfb,
keywords = "bioinformatics (genome or protein) databases;
classification and association rules; clustering;
experimental design; global optimization; interactive
data exploration and discovery; machine learning",
}
@Article{Bontempi:2007:BSI,
author = "Gianluca Bontempi",
title = "A Blocking Strategy to Improve Gene Selection for
Classification of Gene Expression Data",
journal = j-TCBB,
volume = "4",
number = "2",
pages = "293--300",
month = apr,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:57:55 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "Because of high dimensionality, machine learning
algorithms typically rely on feature selection
techniques in order to perform effective classification
in microarray gene expression data sets. However, the
large number of features compared to the number of
samples makes the task of feature selection
computationally hard and prone to errors. This paper
interprets feature selection as a task of stochastic
optimization, where the goal is to select among an
exponential number of alternative gene subsets the one
expected to return the highest generalization in
classification. Blocking is an experimental design
strategy which produces similar experimental conditions
to compare alternative stochastic configurations in
order to be confident that observed differences in
accuracy are due to actual differences rather than to
fluctuations and noise effects. We propose an original
blocking strategy for improving feature selection which
aggregates in a paired way the validation outcomes of
several learning algorithms to assess a gene subset and
compare it to others. This is a novelty with respect to
conventional wrappers, which commonly adopt a sole
learning algorithm to evaluate the relevance of a given
set of variables. The rationale of the approach is
that, by increasing the amount of experimental
conditions under which we validate a feature subset, we
can lessen the problems related to the scarcity of
samples and consequently come up with a better
selection. The paper shows that the blocking strategy
significantly improves the performance of a
conventional forward selection for a set of 16 publicly
available cancer expression data sets. The experiments
involve six different classifiers and show that
improvements take place independent of the
classification algorithm used after the selection step.
Two further validations based on available biological
annotation support the claim that blocking strategies
in feature selection may improve the accuracy and the
quality of the solution. The first validation is based
on retrieving PubMEd abstracts associated to the
selected genes and matching them to regular expressions
describing the biological phenomenon underlying the
expression data sets. The biological validation that
follows is based on the use of the Bioconductor package
GoStats in order to perform Gene Ontology statistical
analysis.",
acknowledgement = ack-nhfb,
keywords = "bioinformatics (genome or protein) databases; data
mining; feature evaluation and selection; machine
learning",
}
@Article{Diekmann:2007:EUR,
author = "Yoan Diekmann and Marie-France Sagot and Eric
Tannier",
title = "Evolution under Reversals: Parsimony and Conservation
of Common Intervals",
journal = j-TCBB,
volume = "4",
number = "2",
pages = "301--309",
month = apr,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:57:55 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "In comparative genomics, gene order data is often
modeled as signed permutations. A classical problem for
genome comparison is to detect common intervals in
permutations, that is, genes that are colocalized in
several species, indicating that they remained grouped
during evolution. A second largely studied problem
related to gene order is to compute a minimum scenario
of reversals that transforms a signed permutation into
another. Several studies began to mix the two problems
and it was observed that their results are not always
compatible: Often, parsimonious scenarios of reversals
break common intervals. If a scenario does not break
any common interval, it is called perfect. In two
recent studies, B{\'e}rard et al. defined a
class of permutations for which building a perfect
scenario of reversals sorting a permutation was
achieved in polynomial time and stated as an open
question whether it is possible to decide, given a
permutation, if there exists a minimum scenario of
reversals that is perfect. In this paper, we give a
solution to this problem and prove that this widens the
class of permutations addressed by the aforementioned
studies. We implemented and tested this algorithm on
gene order data of chromosomes from several mammal
species and we compared it to other methods. The
algorithm helps to choose among several possible
scenarios of reversals and indicates that the minimum
scenario of reversals is not always the most
plausible.",
acknowledgement = ack-nhfb,
keywords = "common intervals; computational biology; genome
rearrangements; perfect sorting; signed permutations;
sorting by reversals",
}
@Article{Weskamp:2007:MGA,
author = "Nils Weskamp and Eyke Hullermeier and Daniel Kuhn and
Gerhard Klebe",
title = "Multiple Graph Alignment for the Structural Analysis
of Protein Active Sites",
journal = j-TCBB,
volume = "4",
number = "2",
pages = "310--320",
month = apr,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:57:55 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "Graphs are frequently used to describe the geometry
and also the physicochemical composition of protein
active sites. Here, the concept of graph alignment as a
novel method for the structural analysis of protein
binding pockets is presented. Using inexact
graph-matching techniques, one is able to identify both
conserved areas and regions of difference among
different binding pockets. Thus, using multiple graph
alignments, it is possible to characterize functional
protein families and to examine differences among
related protein families independent of sequence or
fold homology. Optimized algorithms are described for
the efficient calculation of multiple graph alignments
for the analysis of physicochemical descriptors
representing protein binding pockets. Additionally, it
is shown how the calculated graph alignments can be
analyzed to identify structural features that are
characteristic for a given protein family and also
features that are discriminative among related
families. The methods are applied to a substantial
high-quality subset of the PDB database and their
ability to successfully characterize and classify 10
highly populated functional protein families is shown.
Additionally, two related protein families from the
group of serine proteases are examined and important
structural differences are detected automatically and
efficiently.",
acknowledgement = ack-nhfb,
keywords = "drug design; fuzzy patterns; graph mining; knowledge
discovery in databases; structural pattern discovery",
}
@Article{Gusfield:2007:AEAb,
author = "Dan Gusfield",
title = "{Associate Editor} Appreciation and Welcome",
journal = j-TCBB,
volume = "4",
number = "3",
pages = "321--321",
month = jul,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:58:24 MDT 2008",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Fujarewicz:2007:ASM,
author = "Krzysztof Fujarewicz and Marek Kimmel and Tomasz
Lipniacki and Andrzej Swierniak",
title = "Adjoint Systems for Models of Cell Signaling Pathways
and their Application to Parameter Fitting",
journal = j-TCBB,
volume = "4",
number = "3",
pages = "322--335",
month = jul,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:58:24 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "The paper concerns the problem of fitting mathematical
models of cell signaling pathways. Such models
frequently take the form of sets of nonlinear ordinary
differential equations. While the model is continuous
in time, the performance index used in the fitting
procedure, involves measurements taken at discrete time
moments. Adjoint sensitivity analysis is a tool, which
can be used for finding the gradient of a performance
index in the space of parameters of the model. In the
paper a structural formulation of adjoint sensitivity
analysis called the Generalized Backpropagation Through
Time (GBPTT) is used. The method is especially suited
for hybrid, continuous-discrete time systems. As an
example we use the mathematical model of the NF-kB
regulatory module, which plays a major role in the
innate immune response in animals.",
acknowledgement = ack-nhfb,
keywords = "biology and genetics; modeling; ordinary differential
equations; parameter learning",
}
@Article{Wan:2007:CCN,
author = "Xiang Wan and Guohui Lin",
title = "{CISA}: Combined {NMR} Resonance Connectivity
Information Determination and Sequential Assignment",
journal = j-TCBB,
volume = "4",
number = "3",
pages = "336--348",
month = jul,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:58:24 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "A nearly complete sequential resonance assignment is a
key factor leading to successful protein structure
determination via NMR spectroscopy. Assuming the
availability of a set of NMR spectral peak lists, most
of the existing assignment algorithms first use the
differences between chemical shift values for common
nuclei across multiple spectra to provide the evidence
that some pairs of peaks should be assigned to
sequentially adjacent amino acid residues in the target
protein. They then use these connectivities as
constraints to produce a sequential assignment. At
various levels of success, these algorithms typically
generate a large number of potential connectivity
constraints, and it grows exponentially as the quality
of spectral data decreases. A key observation used in
our sequential assignment program, CISA, is that
chemical shift residual signature information can be
used to improve the connectivity determination, and
thus to dramatically decrease the number of predicted
connectivity constraints. Fewer connectivity
constraints lead to less ambiguities in the sequential
assignment. Extensive simulation studies on several
large test datasets demonstrated that CISA is efficient
and effective, compared to three most recently proposed
sequential resonance assignment programs RANDOM, PACES,
and MARS.",
acknowledgement = ack-nhfb,
keywords = "NMR sequential resonance assignment; spin system; spin
system assignment; spin system residual signature; spin
system sequential connectivity",
}
@Article{Cameron:2007:CCS,
author = "Michael Cameron and Hugh Williams",
title = "Comparing Compressed Sequences for Faster Nucleotide
{BLAST} Searches",
journal = j-TCBB,
volume = "4",
number = "3",
pages = "349--364",
month = jul,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:58:24 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "Molecular biologists, geneticists, and other life
scientists use the BLAST homology search package as
their first step for discovery of information about
unknown or poorly annotated genomic sequences. There
are two main variants of BLAST: BLASTP for searching
protein collections and BLASTN for nucleotide
collections. Surprisingly, BLASTN has had very little
attention; for example, the algorithms it uses do not
follow those described in the 1997 BLAST paper [1] and
no exact description has been published. It is
important that BLASTN is state-of-the-art: Nucleotide
collections such as GenBank dwarf the protein
collections in size, they double in size almost yearly,
and they take many minutes to search on modern general
purpose workstations. This paper proposes significant
improvements to the BLASTN algorithms. Each of our
schemes is based on compressed bytepacked formats that
allow queries and collection sequences to be compared
four bases at a time, permitting very fast query
evaluation using lookup tables and numeric comparisons.
Our most significant innovations are two new, fast
gapped alignment schemes that allow accurate sequence
alignment without decompression of the collection
sequences. Overall, our innovations more than double
the speed of BLASTN with no effect on accuracy and have
been integrated into our new version of BLAST that is
freely available for download from
http://www.fsa-blast.org/.",
acknowledgement = ack-nhfb,
keywords = "BLAST; compression; Four Russians algorithm; homology
search; sequence alignment",
}
@Article{Tang:2007:DTS,
author = "Yuchun Tang and Yan-Qing Zhang and Zhen Huang",
title = "Development of Two-Stage {SVM}-{RFE} Gene Selection
Strategy for Microarray Expression Data Analysis",
journal = j-TCBB,
volume = "4",
number = "3",
pages = "365--381",
month = jul,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:58:24 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "Extracting a subset of informative genes from
microarray expression data is a critical data
preparation step in cancer classification and other
biological function analyses. Though many algorithms
have been developed, the Support Vector Machine -
Recursive Feature Elimination (SVM-RFE) algorithm is
one of the best gene feature selection algorithms. It
assumes that a smaller `filter-out' factor in the
SVM-RFE, which results in a smaller number of gene
features eliminated in each recursion, should lead to
extraction of a better gene subset. Because the SVM-RFE
is highly sensitive to the `filter-out' factor, our
simulations have shown that this assumption is not
always correct and that the SVM-RFE is an unstable
algorithm. To select a set of key gene features for
reliable prediction of cancer types or subtypes and
other applications, a new two-stage SVM-RFE algorithm
has been developed. It is designed to effectively
eliminate most of the irrelevant, redundant and noisy
genes while keeping information loss small at the first
stage. A fine selection for the final gene subset is
then performed at the second stage. The two-stage
SVM-RFE overcomes the instability problem of the
SVM-RFE to achieve better algorithm utility. We have
demonstrated that the two-stage SVM-RFE is
significantly more accurate and more reliable than the
SVM-RFE and three correlation-based methods based on
our analysis of three publicly available microarray
expression datasets. Furthermore, the two-stage SVM-RFE
is computationally efficient because its time
complexity is $O(d * \log{_2d})$, where $d$ is the size
of the original gene set.",
acknowledgement = ack-nhfb,
keywords = "bioinformatics; cancer classification; feature
selection; gene selection; microarray gene expression
data analysis; recursive feature elimination; support
vector machines",
}
@Article{Ng:2007:NGW,
author = "Lydia Ng and Sayan Pathak and Chihchau Kuan and Chris
Lau and Hong-wei Dong and Andrew Sodt and Chinh Dang
and Brian Avants and Paul Yushkevich and James Gee and
David Haynor and Ed Lein and Allan Jones and Mike
Hawrylycz",
title = "Neuroinformatics for Genome-Wide {$3$-D} Gene
Expression Mapping in the Mouse Brain",
journal = j-TCBB,
volume = "4",
number = "3",
pages = "382--393",
month = jul,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:58:24 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "Large scale gene expression studies in the mammalian
brain offer the promise of understanding the topology,
networks and ultimately the function of its complex
anatomy, opening previously unexplored avenues in
neuroscience. High-throughput methods permit
genome-wide searches to discover genes that are
uniquely expressed in brain circuits and regions that
control behavior. Previous gene expression mapping
studies in model organisms have employed situ
hybridization (ISH), a technique that uses labeled
nucleic acid probes to bind to specific mRNA
transcripts in tissue sections. A key requirement for
this effort is the development of fast and robust
algorithms for anatomically mapping and quantifying
gene expression for ISH. We describe a neuroinformatics
pipeline for automatically mapping expression profiles
of ISH data and its use to produce the first genomic
scale 3-D mapping of gene expression in a mammalian
brain. The pipeline is fully automated and adaptable to
other organisms and tissues. Our automated study of
over 20,000 genes indicates that at least 78.8\% are
expressed at some level in the adult C56BL/6J mouse
brain. In addition to providing a platform for genomic
scale search, high-resolution images and visualization
tools for expression analysis are available at the
Allen Brain Atlas web site
(http://www.brain-map.org).",
acknowledgement = ack-nhfb,
keywords = "bioinformatics (genome or protein) databases; data
mining; information visualization; registration;
segmentation",
}
@Article{Nguyen:2007:RRN,
author = "C. Thach Nguyen and Nguyen Bao Nguyen and Wing-Kin
Sung and Louxin Zhang",
title = "Reconstructing Recombination Network from Sequence
Data: The Small Parsimony Problem",
journal = j-TCBB,
volume = "4",
number = "3",
pages = "394--402",
month = jul,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:58:24 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "The small parsimony problem is studied for
reconstructing recombination networks from sequence
data. The small parsimony problem is polynomial-time
solvable for phylogenetic trees. However, the problem
is proved NP-hard even for galled recombination
networks. A dynamic programming algorithm is also
developed to solve the small parsimony problem. It
takes $O(dn2^{3h})$ time on an input recombination
network over length-$d$ sequences in which there are
$h$ recombination and $n - h$ tree nodes.",
acknowledgement = ack-nhfb,
keywords = "approximability; combination network; dynamic
programming; NP-hardness; parsimony method;
phylogenetic network",
}
@Article{Lones:2007:RMD,
author = "Michael Lones and Andy Tyrrell",
title = "Regulatory Motif Discovery Using a Population
Clustering Evolutionary Algorithm",
journal = j-TCBB,
volume = "4",
number = "3",
pages = "403--414",
month = jul,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:58:24 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "This paper describes a novel evolutionary algorithm
for regulatory motif discovery in DNA promoter
sequences. The algorithm uses data clustering to
logically distribute the evolving population across the
search space. Mating then takes place within local
regions of the population, promoting overall solution
diversity and encouraging discovery of multiple
solutions. Experiments using synthetic data sets have
demonstrated the algorithm's capacity to find position
frequency matrix models of known regulatory motifs in
relatively long promoter sequences. These experiments
have also shown the algorithm's ability to maintain
diversity during search and discover multiple motifs
within a single population. The utility of the
algorithm for discovering motifs in real biological
data is demonstrated by its ability to find meaningful
motifs within muscle-specific regulatory sequences.",
acknowledgement = ack-nhfb,
keywords = "evolutionary computation; motif discovery;
muscle-specific gene expression; population-based data
clustering; transcription factor binding sites",
}
@Article{Yip:2007:SIS,
author = "Andy M. Yip and Michael K. Ng and Edmond H. Wu and
Tony F. Chan",
title = "Strategies for Identifying Statistically Significant
Dense Regions in Microarray Data",
journal = j-TCBB,
volume = "4",
number = "3",
pages = "415--429",
month = jul,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:58:24 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "We propose and study the notion of dense regions for
the analysis of categorized gene expression data and
present some searching algorithms for discovering them.
The algorithms can be applied to any categorical data
matrices derived from gene expression level matrices.
We demonstrate that dense regions are simple but useful
and statistically significant patterns that can be used
to 1) identify genes and/or samples of interest and 2)
eliminate genes and/or samples corresponding to
outliers, noise, or abnormalities. Some theoretical
studies on the properties of the dense regions are
presented which allow us to characterize dense regions
into several classes and to derive tailor-made
algorithms for different classes of regions. Moreover,
an empirical simulation study on the distribution of
the size of dense regions is carried out which is then
used to assess the significance of dense regions and to
derive effective pruning methods to speed up the
searching algorithms. Real microarray data sets are
employed to test our methods. Comparisons with six
other well-known clustering algorithms using synthetic
and real data are also conducted which confirm the
superiority of our methods in discovering dense
regions. The DRIFT code and a tutorial are available as
supplemental material, which can be found on the
Computer Society Digital Library at
http://computer.org/tcbb/archives.htm.",
acknowledgement = ack-nhfb,
keywords = "bicluster; categorical data; clustering; coexpressed
genes; dense region; gene expression; microarray",
}
@Article{Liang:2007:BBD,
author = "Kuo-ching Liang and Xiaodong Wang and Dimitris
Anastassiou",
title = "Bayesian Basecalling for {DNA} Sequence Analysis Using
Hidden Markov Models",
journal = j-TCBB,
volume = "4",
number = "3",
pages = "430--440",
month = jul,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:58:24 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "It has been shown that electropherograms of DNA
sequences can be modeled with hidden Markov models.
Basecalling, the procedure that determines the sequence
of bases from the given eletropherogram, can then be
performed using the Viterbi algorithm. A training step
is required prior to basecalling in order to estimate
the HMM parameters. In this paper, we propose a
Bayesian approach which employs the Markov chain Monte
Carlo (MCMC) method to perform basecalling. Such an
approach not only allows one to naturally encode the
prior biological knowledge into the basecalling
algorithm, it also exploits both the training data and
the basecalling data in estimating the HMM parameters,
leading to more accurate estimates. Using the recently
sequenced genome of the organism Legionella pneumophila
we show that the MCMC basecaller outperforms the
state-of-the-art basecalling algorithm in terms of
total errors while requiring much less training than
other proposed statistical basecallers.",
acknowledgement = ack-nhfb,
keywords = "basecalling; DNA sequencing; electropherogram; hidden
Markov model (HMM); Markov chain Monte Carlo (MCMC)",
}
@Article{Thireou:2007:BLS,
author = "Trias Thireou and Martin Reczko",
title = "Bidirectional Long Short-Term Memory Networks for
Predicting the Subcellular Localization of Eukaryotic
Proteins",
journal = j-TCBB,
volume = "4",
number = "3",
pages = "441--446",
month = jul,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:58:24 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "An algorithm called Bidirectional Long Short-Term
Memory Networks (BLSTM) for processing sequential data
is introduced. This supervised learning method trains a
special recurrent neural network to use very long
ranged symmetric sequence context using a combination
of nonlinear processing elements and linear feedback
loops for storing long-range context. The algorithm is
applied to the sequence-based prediction of protein
localization and predicts 93.3\% novel non-plant
proteins and 88.4\% novel plant proteins correctly,
which is an improvement over feedforward and standard
recurrent networks solving the same problem. The BLSTM
system is available as a web-service
(http://www.stepc.gr/~synaptic/blstm.html).",
acknowledgement = ack-nhfb,
keywords = "biological sequence analysis; long short-term memory;
protein subcellular localization prediction; recurrent
neural networks",
}
@Article{Korodi:2007:CAN,
author = "Gergely Korodi and Ioan Tabus",
title = "Compression of Annotated Nucleotide Sequences",
journal = j-TCBB,
volume = "4",
number = "3",
pages = "447--457",
month = jul,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:58:24 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "This article introduces an algorithm for the lossless
compression of DNA files, which contain annotation text
besides the nucleotide sequence. First a grammar is
specifically designed to capture the regularities of
the annotation text. A revertible transformation uses
the grammar rules in order to equivalently represent
the original file as a collection of parsed segments
and a sequence of decisions made by the grammar parser.
This decomposition enables the efficient use of
state-of-the-art encoders for processing the parsed
segments. The output size of the decision-making
process of the grammar is optimized by extending the
states to account for high-order Markovian
dependencies. The practical implementation of the
algorithm achieves a significant improvement when
compared to the general-purpose methods currently used
for DNA files.",
acknowledgement = ack-nhfb,
keywords = "4 [Data]: Coding and Information Theory | Data
compaction and compression; Annotation; Compression;
F.4 [Theory of Computation]: Mathematical Logic and
Formal Languages | Formal languages; Formal Grammars;
G.3 [Mathematics of Computing]: Probability and
Statistics | Markov processes; J.3 [Computer
Applications]: Life and Medical Sciences | Biology and
genetics; nucleotide sequences",
}
@Article{Bordewich:2007:CHN,
author = "Magnus Bordewich and Charles Semple",
title = "Computing the Hybridization Number of Two Phylogenetic
Trees Is Fixed-Parameter Tractable",
journal = j-TCBB,
volume = "4",
number = "3",
pages = "458--466",
month = jul,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:58:24 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "Reticulation processes in evolution mean that the
ancestral history of certain groups of present-day
species is non-tree-like. These processes include
hybridization, lateral gene transfer, and
recombination. Despite the existence of reticulation,
such events are relatively rare and so a fundamental
problem for biologists is the following: given a
collection of rooted binary phylogenetic trees on sets
of species that correctly represent the tree-like
evolution of different parts of their genomes, what is
the smallest number of `reticulation' vertices in any
network that explains the evolution of the species
under consideration. It has been previously shown that
this problem is NP-hard even when the collection
consists of only two rooted binary phylogenetic trees.
However, in this paper, we show that the problem is
fixed-parameter tractable in the two-tree instance,
when parameterized by this smallest number of
reticulation vertices.",
acknowledgement = ack-nhfb,
keywords = "agreement forest; hybridization network; reticulate
evolution; rooted phylogenetic tree; subtree prune and
regraft",
}
@Article{Huang:2007:EGS,
author = "D. Huang and Tommy Chow",
title = "Effective Gene Selection Method With Small Sample Sets
Using Gradient-Based and Point Injection Techniques",
journal = j-TCBB,
volume = "4",
number = "3",
pages = "467--475",
month = jul,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:58:24 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "Microarray gene expression data usually consist of a
large amount of genes. Among these genes, only a small
fraction is informative for performing cancer
diagnostic test. This paper focuses on effective
identification of informative genes. We analyze gene
selection models from the perspective of optimization
theory. As a result, a new strategy is designed to
modify conventional search engines. Also, as
overfitting is likely to occur in microarray data
because of their small sample set, a point injection
technique is developed to address the problem of
overfitting. The proposed strategies have been
evaluated on three kinds of cancer diagnosis. Our
results show that the proposed strategies can improve
the performance of gene selection substantially. The
experimental results also indicate that the proposed
methods are very robust under all the investigated
cases.",
acknowledgement = ack-nhfb,
keywords = "gene selection; gradient based learning; optimization
theory; point injection",
}
@Article{Hecht:2007:HTL,
author = "David Hecht and Gary Fogel",
title = "High-Throughput Ligand Screening via Preclustering and
Evolved Neural Networks",
journal = j-TCBB,
volume = "4",
number = "3",
pages = "476--484",
month = jul,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:58:24 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "The pathway for novel lead drug discovery has many
major deficiencies, the most significant of which is
the immense size of small molecule diversity space.
Methods that increase the search efficiency and/or
reduce the size of the search space, increase the rate
at which useful lead compounds are identified.
Artificial neural networks optimized via evolutionary
computation provide a cost and time-effective solution
to this problem. Here, we present results that suggest
preclustering of small molecules prior to neural
network optimization is useful for generating models of
quantitative structure-activity relationships for a set
of HIV inhibitors. Using these methods, it is possible
to prescreen compounds to separate active from inactive
compounds or even actives and mildly active compounds
from inactive compounds with high predictive accuracy
while simultaneously reducing the feature space. It is
also possible to identify `human interpretable'
features from the best models that can be used for
proposal and synthesis of new compounds in order to
optimize potency and specificity.",
acknowledgement = ack-nhfb,
keywords = "artificial neural networks; computational
intelligence; evolutionary computation; medicine and
science",
}
@Article{Zhang:2007:MCU,
author = "Runxuan Zhang and Guang-Bin Huang and N. Sundararajan
and P. Saratchandran",
title = "Multicategory Classification Using An Extreme Learning
Machine for Microarray Gene Expression Cancer
Diagnosis",
journal = j-TCBB,
volume = "4",
number = "3",
pages = "485--495",
month = jul,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:58:24 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "In this paper, the recently developed Extreme Learning
Machine (ELM) is used for direct multicategory
classification problems in the cancer diagnosis area.
ELM avoids problems like local minima, improper
learning rate and overfitting commonly faced by
iterative learning methods and completes the training
very fast. We have evaluated the multi-category
classification performance of ELM on three benchmark
microarray datasets for cancer diagnosis, namely, the
GCM dataset, the Lung dataset and the Lymphoma dataset.
The results indicate that ELM produces comparable or
better classification accuracies with reduced training
time and implementation complexity compared to
artificial neural networks methods like conventional
back-propagation ANN, Linder's SANN, and Support Vector
Machine methods like SVM-OVO and Ramaswamy's SVM-OVA.
ELM also achieves better accuracies for classification
of individual categories.",
acknowledgement = ack-nhfb,
keywords = "extreme learning machine; gene expression; microarray;
multi-category classification; SVM",
}
@Article{Zhang:2007:SSS,
author = "Louxin Zhang",
title = "Superiority of Spaced Seeds for Homology Search",
journal = j-TCBB,
volume = "4",
number = "3",
pages = "496--505",
month = jul,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:58:24 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "In homology search, good spaced seeds have higher
sensitivity for the same cost (weight). However,
elucidating the mechanism that confers power to spaced
seeds and characterizing optimal spaced seeds still
remain unsolved. This paper investigates these two
important open questions by formally analyzing the
average number of non-overlapping hits and the hit
probability of a spaced seed in the Bernoulli sequence
model. We prove that when the length of a non-uniformly
spaced seed is bounded above by an exponential function
of the seed weight, the seed outperforms strictly the
traditional consecutive seed of the same weight in both
(i) the average number of non-overlapping hits and (ii)
the asymptotic hit probability. This clearly answers
the first problem mentioned above in the Bernoulli
sequence model. The theoretical study in this paper
also gives a new solution to finding long optimal
seeds.",
acknowledgement = ack-nhfb,
keywords = "homology search; pattern matching; renewal theory; run
statistics; sequence alignment; spaced seeds",
}
@Article{Matsen:2007:OCT,
author = "Frederick Matsen",
title = "Optimization Over a Class of Tree Shape Statistics",
journal = j-TCBB,
volume = "4",
number = "3",
pages = "506--512",
month = jul,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:58:24 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "Tree shape statistics quantify some aspect of the
shape of a phylogenetic tree. They are commonly used to
compare reconstructed trees to evolutionary models and
to find evidence of tree reconstruction bias.
Historically, to find a useful tree shape statistic,
formulas have been invented by hand and then evaluated
for utility. This article presents the first method
which is capable of optimizing over a class of tree
shape statistics, called Binary Recursive Tree Shape
Statistics (BRTSS). After defining the BRTSS class, a
set of algebraic expressions is defined which can be
used in the recursions. The tree shape statistics
definable using these expressions in the BRTSS is very
general, and includes many of the statistics with which
phylogenetic researchers are already familiar. We then
present a practical genetic algorithm which is capable
of performing optimization over BRTSS given any
objective function. The chapter concludes with a
successful application of the methods to find a new
statistic which indicates a significant difference
between two distributions on trees which were
previously postulated to have similar properties.",
acknowledgement = ack-nhfb,
keywords = "biology and genetics; evolutionary computing and
genetic algorithms",
}
@Article{Mandoiu:2007:GEI,
author = "Ion I. M{\~a}ndoiu and Yi Pan and Alexander
Zelikovsky",
title = "{Guest Editors}' Introduction to the {Special Section
on Bioinformatics Research and Applications}",
journal = j-TCBB,
volume = "4",
number = "4",
pages = "513--514",
month = oct,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:58:47 MDT 2008",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Zheng:2007:RNA,
author = "Chunfang Zheng and Qian Zhu and David Sankoff",
title = "Removing Noise and Ambiguities from Comparative Maps
in Rearrangement Analysis",
journal = j-TCBB,
volume = "4",
number = "4",
pages = "515--522",
month = oct,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:58:47 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "Comparison of genomic maps is hampered by errors and
ambiguities introduced by mapping technology,
incorrectly resolved paralogy, small samples of markers
and extensive genome rearrangement. We design an
analysis to remove or resolve most of these problems
and to extract corrected data where markers occur in
consecutive strips in both genomes. To do this we
introduce the notion of pre-strip, an efficient way of
generating these, and a compatibility analysis
culminating in a Maximum Weighted Clique (MWC) search.
The output can be directly analyzed with genome
rearrangement algorithms, allowing the restoration of
some of the data not incorporated into the clique
solution. We investigate the trade-off between criteria
for discarding excessive pre-strips to make MWC
feasible, in terms of retaining as many markers as
possible in the solution and producing an economical
rearrangement analysis. We explore these questions
through simulation and through comparison of the rice
and sorghum genomes.",
acknowledgement = ack-nhfb,
keywords = "genome rearrangements; Maximum Weight Clique; rice;
sorghum; synteny blocks",
}
@Article{Blin:2007:CGD,
author = "Guillaume Blin and Cedric Chauve and Guillaume Fertin
and Romeo Rizzi and Stephane Vialette",
title = "Comparing Genomes with Duplications: {A} Computational
Complexity Point of View",
journal = j-TCBB,
volume = "4",
number = "4",
pages = "523--534",
month = oct,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:58:47 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "In this paper, we are interested in the computational
complexity of computing (dis)similarity measures
between two genomes when they contain duplicated genes
or genomic markers, a problem that happens frequently
when comparing whole nuclear genomes. Recently, several
methods ( [1], [2]) have been proposed that are based
on two steps to compute a given (dis)similarity measure
M between two genomes G_1 and G_2: first, one
establishes a one-to-one correspondence between genes
of G_1 and genes of G_2 ; second, once this
correspondence is established, it defines explicitly a
permutation and it is then possible to quantify their
similarity using classical measures defined for
permutations, like the number of breakpoints. Hence
these methods rely on two elements: a way to establish
a one-to-one correspondence between genes of a pair of
genomes, and a (dis)similarity measure for
permutations. The problem is then, given a
(dis)similarity measure for permutations, to compute a
correspondence that defines an optimal permutation for
this measure. We are interested here in two models to
compute a one-to-one correspondence: the exemplar
model, where all but one copy are deleted in both
genomes for each gene family, and the matching model,
that computes a maximal correspondence for each gene
family. We show that for these two models, and for
three (dis)similarity measures on permutations, namely
the number of common intervals, the maximum adjacency
disruption (MAD) number and the summed adjacency
disruption (SAD) number, the problem of computing an
optimal correspondence is NP-complete, and even APXhard
for the MAD number and SAD number.",
acknowledgement = ack-nhfb,
keywords = "common intervals; comparative genomics; computational
complexity; maximum adjacency disruption number; summed
adjacency disruption number",
}
@Article{Bonizzoni:2007:ELC,
author = "Paola Bonizzoni and Gianluca Della Vedova and Riccardo
Dondi and Guillaume Fertin and Raffaella Rizzi and
Stephane Vialette",
title = "Exemplar Longest Common Subsequence",
journal = j-TCBB,
volume = "4",
number = "4",
pages = "535--543",
month = oct,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:58:47 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "In this paper, we investigate the computational and
approximation complexity of the Exemplar Longest Common
Subsequence of a set of sequences (ELCS problem), a
generalization of the Longest Common Subsequence
problem, where the input sequences are over the union
of two disjoint sets of symbols, a set of mandatory
symbols and a set of optional symbols. We show that
different versions of the problem are APX-hard even for
instances with two sequences. Moreover, we show that
the related problem of determining the existence of a
feasible solution of the Exemplar Longest Common
Subsequence of two sequences is NP-hard. On the
positive side, we first present an efficient algorithm
for the ELCS problem over instances of two sequences
where each mandatory symbol can appear in total at most
three times in the sequences. Furthermore, we present
two fixed-parameter algorithms for the ELCS problem
over instances of two sequences where the parameter is
the number of mandatory symbols.",
acknowledgement = ack-nhfb,
keywords = "algorithm design and analysis; analysis of algorithms
and problem complexity; combinatorial algorithms;
comparative genomics; longest common subsequence",
}
@Article{Davila:2007:FPA,
author = "Jaime Davila and Sudha Balla and Sanguthevar
Rajasekaran",
title = "Fast and Practical Algorithms for Planted (l, d) Motif
Search",
journal = j-TCBB,
volume = "4",
number = "4",
pages = "544--552",
month = oct,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:58:47 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "We consider the planted (l, d) motif search problem,
which consists of finding a substring of length l that
occurs in a set of input sequences {s1, . . . , sn}
with up to d errors, a problem that arises from the
need to find transcription factor-binding sites in
genomic information. We propose a sequence of practical
algorithms, which start based on the ideas considered
in PMS1. These algorithms are exact, have little space
requirements, and are able to tackle challenging
instances with bigger d, taking less time in the
instances reported solved by exact algorithms. In
particular, one of the proposed algorithms, PMSprune,
is able to solve the challenging instances, such as
(17, 6) and (19, 7), which were not previously reported
as solved in the literature.",
acknowledgement = ack-nhfb,
keywords = "branch and bound algorithms; challenging instances;
exact algorithms; planted motif search problem",
}
@Article{Schneider:2007:SDM,
author = "Adrian Schneider and Gaston Gonnet and Gina
Cannarozzi",
title = "{SynPAM---A} Distance Measure Based on Synonymous
Codon Substitutions",
journal = j-TCBB,
volume = "4",
number = "4",
pages = "553--560",
month = oct,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:58:47 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "Measuring evolutionary distances between DNA or
protein sequences forms the basis of many applications
in computational biology and evolutionary studies. Of
particular interest are distances based on synonymous
substitutions, since these substitutions are considered
to be under very little selection pressure and
therefore assumed to accumulate in an almost clock-like
manner. SynPAM, the method presented here, allows the
estimation of distances between coding DNA sequences
based on synonymous codon substitutions. The problem of
estimating an accurate distance from the observed
substitution pattern is solved by maximum-likelihood
with empirical codon substitution matrices employed for
the underlying Markov model. Comparisons with
established measures of synonymous distance indicate
that SynPAM has less variance and yields useful results
over a longer time range.",
acknowledgement = ack-nhfb,
keywords = "dS; evolutionary distance; molecular evolution;
synonymous substitutions; SynPAM",
}
@Article{Sridhar:2007:AEN,
author = "Srinath Sridhar and Kedar Dhamdhere and Guy Blelloch
and Eran Halperin and R. Ravi and Russell Schwartz",
title = "Algorithms for Efficient Near-Perfect Phylogenetic
Tree Reconstruction in Theory and Practice",
journal = j-TCBB,
volume = "4",
number = "4",
pages = "561--571",
month = oct,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:58:47 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "We consider the problem of reconstructing near-perfect
phylogenetic trees using binary character states
(referred to as BNPP). A perfect phylogeny assumes that
every character mutates at most once in the
evolutionary tree, yielding an algorithm for binary
character states that is computationally efficient but
not robust to imperfections in real data. A
near-perfect phylogeny relaxes the perfect phylogeny
assumption by allowing at most a constant number of
additional mutations. We develop two algorithms for
constructing optimal near-perfect phylogenies and
provide empirical evidence of their performance. The
first simple algorithm is fixed parameter tractable
when the number of additional mutations and the number
of characters that share four gametes with some other
character are constants. The second, more involved
algorithm for the problem is fixed parameter tractable
when only the number of additional mutations is fixed.
We have implemented both algorithms and shown them to
be extremely efficient in practice on biologically
significant data sets. This work proves the BNPP
problem fixed parameter tractable and provides the
first practical phylogenetic tree reconstruction
algorithms that find guaranteed optimal solutions while
being easily implemented and computationally feasible
for data sets of biologically meaningful size and
complexity.",
acknowledgement = ack-nhfb,
keywords = "biology and genetics; computations on discrete
structures; trees",
}
@Article{Chen:2007:CBR,
author = "Jinmiao Chen and Narendra Chaudhari",
title = "Cascaded Bidirectional Recurrent Neural Networks for
Protein Secondary Structure Prediction",
journal = j-TCBB,
volume = "4",
number = "4",
pages = "572--582",
month = oct,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:58:47 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "Protein secondary structure (PSS) prediction is an
important topic in bioinformatics. Our study on a large
set of non-homologous proteins shows that long-range
interactions commonly exist and negatively affect PSS
prediction. Besides, we also reveal strong correlations
between secondary structure (SS) elements. In order to
take into account the long-range interactions and SS-SS
correlations, we propose a novel prediction system
based on cascaded bidirectional recurrent neural
network (BRNN). We compare the cascaded BRNN against
another two BRNN architectures, namely the original
BRNN architecture used for speech recognition as well
as Pollastri's BRNN that was proposed for PSS
prediction. Our cascaded BRNN achieves an overall three
state accuracy Q3 of 74.38\%, and reaches a high
Segment OVerlap (SOV) of 66.0455. It outperforms the
original BRNN and Pollastri's BRNN in both Q3 and SOV.
Specifically, it improves the SOV score by 4-6\%.",
acknowledgement = ack-nhfb,
}
@Article{Xiong:2007:DDK,
author = "Huilin Xiong and Ya Zhang and Xue-Wen Chen",
title = "Data-Dependent Kernel Machines for Microarray Data
Classification",
journal = j-TCBB,
volume = "4",
number = "4",
pages = "583--595",
month = oct,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:58:47 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "One important application of gene expression analysis
is to classify tissue samples according to their gene
expression levels. Gene expression data are typically
characterized by high dimensionality and small sample
size, which makes the classification task quite
challenging. In this paper, we present a data-dependent
kernel for microarray data classification. This kernel
function is engineered so that the class separability
of the training data is maximized. A
bootstrapping-based resampling scheme is introduced to
reduce the possible training bias. The effectiveness of
this adaptive kernel for microarray data classification
is illustrated with a k-Nearest Neighbor (KNN)
classifier. Our experimental study shows that the
data-dependent kernel leads to a significant
improvement in the accuracy of KNN classifiers.
Furthermore, this kernel-based KNN scheme has been
demonstrated to be competitive to, if not better than,
more sophisticated classifiers such as Support Vector
Machines (SVMs) and the Uncorrelated Linear
Discriminant Analysis (ULDA) for classifying gene
expression data.",
acknowledgement = ack-nhfb,
keywords = "bootstrapping resampling; cancer classification;
kernel machines; kernel optimization; microarray data
analysis",
}
@Article{Michal:2007:FCM,
author = "Shahar Michal and Tor Ivry and Omer Cohen and Moshe
Sipper and Danny Barash",
title = "Finding a Common Motif of {RNA} Sequences Using
Genetic Programming: The {GeRNAMo} System",
journal = j-TCBB,
volume = "4",
number = "4",
pages = "596--610",
month = oct,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:58:47 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "We focus on finding a consensus motif of a set of
homologous or functionally related RNA molecules.
Recent approaches to this problem have been limited to
simple motifs, require sequence alignment, and make
prior assumptions concerning the data set. We use
genetic programming to predict RNA consensus motifs
based solely on the data set. Our system -- dubbed
GeRNAMo (Genetic programming of RNA Motifs) -- predicts
the most common motifs without sequence alignment and
is capable of dealing with any motif size. Our program
only requires the maximum number of stems in the motif,
and if prior knowledge is available the user can
specify other attributes of the motif (e.g., the range
of the motif's minimum and maximum sizes), thereby
increasing both sensitivity and speed. We describe
several experiments using either ferritin iron response
element (IRE); signal recognition particle (SRP); or
microRNA sequences, showing that the most common motif
is found repeatedly, and that our system offers
substantial advantages over previous methods.",
acknowledgement = ack-nhfb,
}
@Article{McIntosh:2007:HCR,
author = "Tara McIntosh and Sanjay Chawla",
title = "High Confidence Rule Mining for Microarray Analysis",
journal = j-TCBB,
volume = "4",
number = "4",
pages = "611--623",
month = oct,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:58:47 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "We present an association rule mining method for
mining high confidence rules, which describe
interesting gene relationships from microarray
datasets. Microarray datasets typically contain an
order of magnitude more genes than experiments,
rendering many data mining methods impractical as they
are optimised for sparse datasets. A new family of
row-enumeration rule mining algorithms have emerged to
facilitate mining in dense datasets. These algorithms
rely on pruning infrequent relationships to reduce the
search space by using the support measure. This major
shortcoming results in the pruning of many potentially
interesting rules with low support but high confidence.
We propose a new row-enumeration rule mining method,
MaxConf, to mine high confidence rules from microarray
data. MaxConf is a support-free algorithm which
directly uses the confidence measure to effectively
prune the search space. Experiments on three microarray
datasets show that MaxConf outperforms support-based
rule mining with respect to scalability and rule
extraction. Furthermore, detailed biological analyses
demonstrate the effectiveness of our approach -- the
rules discovered by MaxConf are substantially more
interesting and meaningful compared with support-based
methods.",
acknowledgement = ack-nhfb,
keywords = "association rules; data mining; high confidence rule
mining; microarray analysis",
}
@Article{Ponzoni:2007:IAR,
author = "Ignacio Ponzoni and Francisco Azuaje and Juan Augusto
and David Glass",
title = "Inferring Adaptive Regulation Thresholds and
Association Rules from Gene Expression Data through
Combinatorial Optimization Learning",
journal = j-TCBB,
volume = "4",
number = "4",
pages = "624--634",
month = oct,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:58:47 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "There is a need to design computational methods to
support the prediction of gene regulatory networks.
Such models should offer both biologically-meaningful
and computationally-accurate predictions, which in
combination with other techniques may improve
large-scale, integrative studies. This paper presents a
new machine learning method for the prediction of
putative regulatory associations from expression data,
which exhibit properties never or only partially
addressed by other techniques recently published. The
method was tested on a Saccharomyces cerevisiae gene
expression dataset. The results were statistically
validated and compared with the relationships inferred
by two machine learning approaches to gene regulatory
network prediction. Furthermore, the resulting
predictions were assessed using domain knowledge. The
proposed algorithm may be able to accurately predict
relevant biological associations between genes. One of
the most relevant features of this new method is the
prediction of adaptive regulation thresholds for the
discretization of gene expression values, which is
required prior to the rule association learning
process. Moreover, an important advantage consists of
its low computational cost to infer association rules.
The proposed system may significantly support
exploratory, large-scale studies of automated
identification of potentially-relevant gene expression
associations.",
acknowledgement = ack-nhfb,
keywords = "combinatorial optimization; decision trees; gene
expression data; genetic regulatory networks;
machine-learning",
}
@Article{Noman:2007:IGR,
author = "Nasimul Noman and Hitoshi Iba",
title = "Inferring Gene Regulatory Networks using Differential
Evolution with Local Search Heuristics",
journal = j-TCBB,
volume = "4",
number = "4",
pages = "634--647",
month = oct,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:58:47 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "We present a memetic algorithm for evolving the
structure of biomolecular interactions and inferring
the effective kinetic parameters from the time series
data of gene expression using the decoupled system
formalism. We propose an Information Criteria based
fitness evaluation for gene network model selection
instead of the conventional Mean Squared Error (MSE)
based fitness evaluation. A hill-climbing local-search
method has been incorporated in our evolutionary
algorithm for efficiently attaining the skeletal
architecture which is most frequently observed in
biological networks. The suitability of the method is
tested in gene circuit reconstruction experiments,
varying the network dimension and/or characteristics,
the amount of gene expression data used for inference
and the noise level present in expression profiles. The
reconstruction method inferred the network topology and
the regulatory parameters with high accuracy.
Nevertheless, the performance is limited to the amount
of expression data used and the noise level present in
the data. The proposed fitness function has been found
more suitable for identifying correct network topology
and for estimating the accurate parameter values
compared to the existing ones. Finally, we applied the
methodology for analyzing the cell-cycle gene
expression data of budding yeast and reconstructed the
network of some key regulators.",
acknowledgement = ack-nhfb,
keywords = "biology and genetics; gene regulatory system; global
optimization; inverse problems; medicine and science;
memetic algorithm; microarray data; transcriptional
regulation",
}
@Article{Ho:2007:ITS,
author = "Shinn-Ying Ho and Chih-Hung Hsieh and Fu-Chieh Yu and
Hui-Ling Huang",
title = "An Intelligent Two-Stage Evolutionary Algorithm for
Dynamic Pathway Identification From Gene Expression
Profiles",
journal = j-TCBB,
volume = "4",
number = "4",
pages = "648--704",
month = oct,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:58:47 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "From gene expression profiles, it is desirable to
rebuild cellular dynamic regulation networks to
discover more delicate and substantial functions in
molecular biology, biochemistry, bioengineering and
pharmaceutics. S-system model is suitable to
characterize biochemical network systems and capable to
analyze the regulatory system dynamics. However,
inference of an S-system model of N-gene genetic
networks has 2N(N+1) parameters in a set of non-linear
differential equations to be optimized. This paper
proposes an intelligent two-stage evolutionary
algorithm (iTEA) to efficiently infer the S-system
models of genetic networks from time-series data of
gene expression. To cope with curse of dimensionality,
the proposed algorithm consists of two stages where
each uses a divide-and-conquer strategy. The
optimization problem is first decomposed into N
subproblems having 2(N+1) parameters each. At the first
stage, each subproblem is solved using a novel
intelligent genetic algorithm (IGA) with intelligent
crossover based on orthogonal experimental design
(OED). At the second stage, the obtained N solutions to
the N subproblems are combined and refined using an
OED-based simulated annealing algorithm for handling
noisy gene expression profiles. The effectiveness of
iTEA is evaluated using simulated expression patterns
with and without noise running on a single-processor
PC. It is shown that 1) IGA is efficient enough to
solve subproblems; 2) IGA is significantly superior to
the existing method SPXGA; and 3) iTEA performs well in
inferring S-system models for dynamic pathway
identification.",
acknowledgement = ack-nhfb,
keywords = "divide-and-conquer; evolutionary algorithm; genetic
network; orthogonal experimental design; pathway
identification; S-system model",
}
@Article{Bereg:2007:PNB,
author = "Sergey Bereg and Yuanyi Zhang",
title = "Phylogenetic Networks Based on the Molecular Clock
Hypothesis",
journal = j-TCBB,
volume = "4",
number = "4",
pages = "661--667",
month = oct,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:58:47 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "A classical result in phylogenetic trees is that a
binary phylogenetic tree adhering to the molecular
clock hypothesis exists if and only if the matrix of
distances between taxa is ultrametric. The ultrametric
condition is very restrictive. In this paper we study
phylogenetic networks that can be constructed assuming
the molecular clock hypothesis. We characterize
distance matrices that admit such networks for 3 and 4
taxa. We also design two algorithms for constructing
networks optimizing the least-squares fit.",
acknowledgement = ack-nhfb,
keywords = "least-squares fit; molecular clock hypothesis;
Phylogenetic Networks",
}
@Article{Blazewicz:2007:SPD,
author = "Jacek Blazewicz and Edmund Burke and Marta Kasprzak
and Alexandr Kovalev and Mikhail Kovalyov",
title = "Simplified Partial Digest Problem: Enumerative and
Dynamic Programming Algorithms",
journal = j-TCBB,
volume = "4",
number = "4",
pages = "668--680",
month = oct,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:58:47 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "We study the Simplified Partial Digest Problem (SPDP),
which is a mathematical model for a new simplified
partial digest method of genome mapping. This method is
easy for laboratory implementation and robust with
respect to the experimental errors. SPDP is NP-hard in
the strong sense. We present an $O(n2^n)$ time
enumerative algorithm and an $O(n^{2q})$ time dynamic
programming algorithm for the error-free SPDP, where
$n$ is the number of restriction sites and $q$ is the
number of distinct intersite distances. We also give
examples of the problem, in which there are
$2^{\frac{n+2}{3}-1}$ non-congruent solutions. These
examples partially answer a question recently posed in
the literature about the number of solutions of SPDP.
We adapt our enumerative algorithm for handling SPDP
with imprecise input data. Finally, we describe and
discuss the results of the computer experiments with
our algorithms.",
acknowledgement = ack-nhfb,
keywords = "algorithm design and analysis; dynamic programming;
genome mapping; imprecise information; restriction site
analysis",
}
@Article{Xu:2007:IGR,
author = "Rui Xu and Donald {Wunsch II} and Ronald Frank",
title = "Inference of Genetic Regulatory Networks with
Recurrent Neural Network Models Using Particle Swarm
Optimization",
journal = j-TCBB,
volume = "4",
number = "4",
pages = "681--692",
month = oct,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:58:47 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "Genetic regulatory network inference is critically
important for revealing fundamental cellular processes,
investigating gene functions, and understanding their
relations. The availability of time series gene
expression data makes it possible to investigate the
gene activities of whole genomes, rather than those of
only a pair of genes or among several genes. However,
current computational methods do not sufficiently
consider the temporal behavior of this type of data and
lack the capability to capture the complex nonlinear
system dynamics. We propose a recurrent neural network
(RNN) and particle swarm optimization (PSO) approach to
infer genetic regulatory networks from time series gene
expression data. Under this framework, gene interaction
is explained through a connection weight matrix. Based
on the fact that the measured time points are limited
and the assumption that the genetic networks are
usually sparsely connected, we present a PSO-based
search algorithm to unveil potential genetic network
constructions that fit well with the time series data
and explore possible gene interactions. Furthermore,
PSO is used to train the RNN and determine the network
parameters. Our approach has been applied to both
synthetic and real data sets. The results demonstrate
that the RNN/PSO can provide meaningful insights in
understanding the nonlinear dynamics of the gene
expression time series and revealing potential
regulatory interactions between genes.",
acknowledgement = ack-nhfb,
keywords = "genetic regulatory networks; particle swarm
optimization; recurrent neural networks; time series
gene expression data",
}
@Article{Agius:2007:TSA,
author = "Phaedra Agius and Barry Kreiswirth and Steve Naidich
and Kristin Bennett",
title = "Typing Staphylococcus aureus Using the spa Gene and
Novel Distance Measures",
journal = j-TCBB,
volume = "4",
number = "4",
pages = "693--704",
month = oct,
year = "2007",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:58:47 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "We developed an approach for identifying groups or
families of Staphylococcus aureus bacteria based on
genotype data. With the emergence of drug resistant
strains, S. aureus represents a significant human
health threat. Identifying the family types efficiently
and quickly is crucial in community settings. Here, we
develop a hybrid sequence algorithm approach to type
this bacterium using only its spa gene. Two of the
sequence algorithms we used are well established, while
the third, the Best Common Gap-Weighted Sequence
(BCGS), is novel. We combined the sequence algorithms
with a weighted match/mismatch algorithm for the spa
sequence ends. Normalized similarity scores and
distances between the sequences were derived and used
within unsupervised clustering methods. The resulting
spa groupings correlated strongly with the groups
defined by the well-established Multi locus sequence
typing (MLST) method. Spa typing is preferable to MLST
typing which types seven genes instead of just one.
Furthermore, our spa clustering methods can be
fine-tuned to be more discriminative than MLST,
identifying new strains that the MLST method may not.
Finally, we performed a multidimensional scaling of our
distance matrices to visualize the relationship between
isolates. The proposed methodology provides a promising
new approach to molecular epidemiology.",
acknowledgement = ack-nhfb,
keywords = "clustering; genotyping; molecular epidemiology;
sequence algorithms; staphylococcus aureus",
}
@Article{Congdon:2008:EIC,
author = "Clare Bates Congdon and Joseph C. Aman and Gerardo M.
Nava and H. Rex Gaskins and Carolyn J. Mattingly",
title = "An Evaluation of Information Content as a Metric for
the Inference of Putative Conserved Noncoding Regions
in {DNA} Sequences Using a Genetic Algorithms
Approach",
journal = j-TCBB,
volume = "5",
number = "1",
pages = "1--14",
month = jan,
year = "2008",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:59:11 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "In previous work, we presented GAMI [1], an approach
to motif inference that uses a genetic algorithms
search. GAMI is designed specifically to find putative
conserved regulatory motifs in noncoding regions of
divergent species, and is designed to allow for
analysis of long nucleotide sequences. In this work, we
compare GAMI's performance when run with its original
fitness function (a simple count of the number of
matches) and when run with information content, as well
as several variations on these metrics. Results
indicate that information content does not identify
highly conserved regions, and thus is not the
appropriate metric for this task, while variations on
information content as well as the original metric
succeed in identifying putative conserved regions.",
acknowledgement = ack-nhfb,
keywords = "biology and genetics; evolutionary computing and
genetic algorithms",
}
@Article{Boscolo:2008:ITE,
author = "Riccardo Boscolo and James C. Liao and Vwani P.
Roychowdhury",
title = "An Information Theoretic Exploratory Method for
Learning Patterns of Conditional Gene Coexpression from
Microarray Data",
journal = j-TCBB,
volume = "5",
number = "1",
pages = "15--24",
month = jan,
year = "2008",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:59:11 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "In this article, we introduce an exploratory framework
for learning patterns of conditional co-expression in
gene expression data. The main idea behind the proposed
approach consists of estimating how the information
content shared by a set of M nodes in a network (where
each node is associated to an expression profile)
varies upon conditioning on a set of L conditioning
variables (in the simplest case represented by a
separate set of expression profiles). The method is
non-parametric and it is based on the concept of
statistical co-information, which, unlike conventional
correlation based techniques, is not restricted in
scope to linear conditional dependency patterns.
Moreover, such conditional co-expression relationships
can potentially indicate regulatory interactions that
do not manifest themselves when only pair-wise
relationships are considered. A moment based
approximation of the co-information measure is derived
that efficiently gets around the problem of estimating
high-dimensional multi-variate probability density
functions from the data, a task usually not viable due
to the intrinsic sample size limitations that
characterize expression level measurements. By applying
the proposed exploratory method, we analyzed a whole
genome microarray assay of the eukaryote Saccharomices
cerevisiae and were able to learn statistically
significant patterns of conditional co-expression. A
selection of such interactions that carry a meaningful
biological interpretation are discussed.",
acknowledgement = ack-nhfb,
keywords = "co-information; entropy; gene expression data;
information theory; statistical analysis",
}
@Article{Wiese:2008:REA,
author = "Kay C. Wiese and Alain A. Deschenes and Andrew G.
Hendriks",
title = "{RnaPredict---An} Evolutionary Algorithm for {RNA}
Secondary Structure Prediction",
journal = j-TCBB,
volume = "5",
number = "1",
pages = "25--41",
month = jan,
year = "2008",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:59:11 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "This paper presents two in-depth studies on
RnaPredict, an evolutionary algorithm for RNA secondary
structure prediction. The first study is an analysis of
the performance of two thermodynamic models, INN and
INN-HB. The correlation between the free energy of
predicted structures and the sensitivity is analyzed
for 19 RNA sequences. Although some variance is shown,
there is a clear trend between a lower free energy and
an increase in true positive base pairs. With
increasing sequence length, this correlation generally
decreases. In the second experiment, the accuracy of
the predicted structures for these 19 sequences are
compared against the accuracy of the structures
generated by the mfold dynamic programming algorithm
(DPA) and also to known structures. RnaPredict is shown
to outperform the minimum free energy structures
produced by mfold and has comparable performance when
compared to sub-optimal structures produced by mfold.",
acknowledgement = ack-nhfb,
keywords = "evolutionary computation; RnaPredict; RNA secondary
structure prediction",
}
@Article{Rother:2008:SCP,
author = "Diego Rother and Guillermo Sapiro and Vijay Pande",
title = "Statistical Characterization of Protein Ensembles",
journal = j-TCBB,
volume = "5",
number = "1",
pages = "42--55",
month = jan,
year = "2008",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:59:11 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "When accounting for structural fluctuations or
measurement errors, a single rigid structure may not be
sufficient to represent a protein. One approach to
solve this problem is to represent the possible
conformations as a discrete set of observed
conformations, an ensemble. In this work, we follow a
different richer approach, and introduce a framework
for estimating probability density functions in very
high dimensions, and then apply it to represent
ensembles of folded proteins. This proposed approach
combines techniques such as kernel density estimation,
maximum likelihood, cross-validation, and
bootstrapping. We present the underlying theoretical
and computational framework and apply it to artificial
data and protein ensembles obtained from molecular
dynamics simulations. We compare the results with those
obtained experimentally, illustrating the potential and
advantages of this representation.",
acknowledgement = ack-nhfb,
keywords = "Bayesian networks; bootstrapping; cross-validation;
density estimation; graphical models; maximum
likelihood; protein ensembles",
}
@Article{Cui:2008:EAA,
author = "Yun Cui and Lusheng Wang and Daming Zhu and Xiaowen
Liu",
title = "A $(1.5 + {\epsilon})$-Approximation Algorithm for
Unsigned Translocation Distance",
journal = j-TCBB,
volume = "5",
number = "1",
pages = "56--66",
month = jan,
year = "2008",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:59:11 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "Genome rearrangement is an important area in
computational biology and bioinformatics. The
translocation operation is one of the popular
operations for genome rearrangement. It was proved that
computing the unsigned translocation distance is
NP-hard. In this paper, we present a $(1.5 +
\epsilon)$-approximation algorithm for computing
unsigned translocation distance which improves upon the
best known 1.75-ratio. The running time of our
algorithm is $O(n^2 + (4/\epsilon)^1.5 \surd
\log(4/\epsilon)2 4^\epsilon)$, where $n$ is the total
number of genes in the genome.",
acknowledgement = ack-nhfb,
keywords = "and approximation algorithms; genome rearrangement;
unsigned translocation",
}
@Article{Tan:2008:NBP,
author = "Tuan Zea Tan and Geok See Ng and Chai Quek",
title = "A Novel Biologically and Psychologically Inspired
Fuzzy Decision Support System: Hierarchical
Complementary Learning",
journal = j-TCBB,
volume = "5",
number = "1",
pages = "67--79",
month = jan,
year = "2008",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:59:11 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "A computational intelligent system that models the
human cognitive abilities may promise significant
performance in problem learning because human is
effective in learning and problem solving. Functionally
modeling the human cognitive abilities not only avoids
the details of the underlying neural mechanisms
performing the tasks, but also reduces the complexity
of the system. The complementary learning mechanism is
responsible for human pattern recognition, i.e. human
attends to positive and negative samples when making
decision. Furthermore, human concept learning is
organized in a hierarchical fashion. Such hierarchical
organization allows the divide-and-conquer approach to
the problem. Thus, integrating the functional models of
hierarchical organization and complementary learning
can potentially improve the performance in pattern
recognition. Hierarchical complementary learning
exhibits many of the desirable features of pattern
recognition. It is further supported by the
experimental results that verify the rationale of the
integration and that the hierarchical complementary
learning system is a promising pattern recognition
tool.",
acknowledgement = ack-nhfb,
keywords = "cognitive learning; complementary learning; decision
support; fuzzy neural network; hierarchical model",
}
@Article{Ciocchetta:2008:ATS,
author = "Federica Ciocchetta and Corrado Priami and Paola
Quaglia",
title = "An Automatic Translation of {SBML} into Beta-Binders",
journal = j-TCBB,
volume = "5",
number = "1",
pages = "80--90",
month = jan,
year = "2008",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:59:11 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "A translation of SBML (Systems Biology Markup
Language) into a process algebra is proposed in order
to allow the formal specification, the simulation and
the formal analysis of biological models. Beta-binders,
a language with a quantitative stochastic extension, is
chosen for the translation. The proposed translation
focuses on the main components of SBML models, as
species and reactions. Furthermore, it satisfies the
compositional property, i.e. the translation of the
whole model is obtained by composing the translation of
the subcomponents. An automatic translator tool of SBML
models into Beta-binders has been implemented as well.
Finally, the translation of a simple model is
reported.",
acknowledgement = ack-nhfb,
keywords = "biological systems; modeling; Process algebras;
systems biology; Systems Biology Markup Language
(SBML); translation tool",
}
@Article{Bocker:2008:CAM,
author = "Sebastian Bocker and Veli Makinen",
title = "Combinatorial Approaches for Mass Spectra
Recalibration",
journal = j-TCBB,
volume = "5",
number = "1",
pages = "91--100",
month = jan,
year = "2008",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:59:11 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "Mass spectrometry has become one of the most popular
analysis techniques in Proteomics and Systems Biology.
With the creation of larger datasets, the automated
recalibration of mass spectra becomes important to
ensure that every peak in the sample spectrum is
correctly assigned to some peptide and protein.
Algorithms for recalibrating mass spectra have to be
robust with respect to wrongly assigned peaks, as well
as efficient due to the amount of mass spectrometry
data. The recalibration of mass spectra leads us to the
problem of finding an optimal matching between mass
spectra under measurement errors. We have developed two
deterministic methods that allow robust computation of
such a matching: The first approach uses a
computational geometry interpretation of the problem,
and tries to find two parallel lines with constant
distance that stab a maximal number of points in the
plane. The second approach is based on finding a
maximal common approximate subsequence, and improves
existing algorithms by one order of magnitude
exploiting the sequential nature of the matching
problem. We compare our results to a computational
geometry algorithm using a topological line-sweep.",
acknowledgement = ack-nhfb,
keywords = "biotechnology; combinatorial pattern matching;
computational geometry; mass spectrometry",
}
@Article{Barzuza:2008:CPP,
author = "Tamar Barzuza and Jacques S. Beckmann and Ron Shamir
and Itsik Pe'er",
title = "Computational Problems in Perfect Phylogeny
Haplotyping: Typing without Calling the Allele",
journal = j-TCBB,
volume = "5",
number = "1",
pages = "101--109",
month = jan,
year = "2008",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:59:11 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "A haplotype is an m-long binary vector. The
xor-genotype of two haplotypes is the m-vector of their
coordinate-wise xor. We study the following problem:
Given a set of xor-genotypes, reconstruct their
haplotypes so that the set of resulting haplotypes can
be mapped onto a perfect phylogeny tree. The question
is motivated by studying population evolution in human
genetics, and is a variant of the perfect phylogeny
haplotyping problem that has received intensive
attention recently. Unlike the latter problem, in which
the input is `full' genotypes, here we assume less
informative input, and so may be more economical to
obtain experimentally. Building on ideas of Gusfield,
we show how to solve the problem in polynomial time, by
a reduction to the graph realization problem. The
actual haplotypes are not uniquely determined by that
tree they map onto, and the tree itself may or may not
be unique. We show that tree uniqueness implies
uniquely determined haplotypes, up to inherent degrees
of freedom, and give a sufficient condition for the
uniqueness. To actually determine the haplotypes given
the tree, additional information is necessary. We show
that two or three full genotypes suffice to reconstruct
all the haplotypes, and present a linear algorithm for
identifying those genotypes.",
acknowledgement = ack-nhfb,
keywords = "graph realization; haplotypes; perfect phylogeny;
XOR-genotypes",
}
@Article{Chin:2008:DMR,
author = "Francis Chin and Henry C. M. Leung",
title = "{DNA} Motif Representation with Nucleotide
Dependency",
journal = j-TCBB,
volume = "5",
number = "1",
pages = "110--119",
month = jan,
year = "2008",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:59:11 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "The problem of discovering novel motifs of binding
sites is important to the understanding of gene
regulatory networks. Motifs are generally represented
by matrices (PWM or PSSM) or strings. However, these
representations cannot model biological binding sites
well because they fail to capture nucleotide
interdependence. It has been pointed out by many
researchers that the nucleotides of the DNA binding
site cannot be treated independently, e.g. the binding
sites of zinc finger in proteins. In this paper, a new
representation called Scored PositionSpecific Pattern
(SPSP), which is a generalization of the matrix and
string representations, is introduced which takes into
consideration the dependent occurrences of neighboring
nucleotides. Even though the problem of discovering the
optimal motif in SPSP representation is proved to
beNP-hard, we introduce a heuristic algorithm called
SPSP-Finder, which can effectively find optimal motifs
in most simulated cases and some real cases for which
existing popular motif finding software, such as
Weeder, MEME and AlignACE, fail.",
acknowledgement = ack-nhfb,
keywords = "Computing methodologies; design methodology; pattern
analysis; pattern recognition",
}
@Article{Yin:2008:NAC,
author = "Zong-Xian Yin and Jung-Hsien Chiang",
title = "Novel Algorithm for Coexpression Detection in
Time-Varying Microarray Data Sets",
journal = j-TCBB,
volume = "5",
number = "1",
pages = "120--135",
month = jan,
year = "2008",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:59:11 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "When analyzing the results of microarray experiments,
biologists generally use unsupervised categorization
tools. However, such tools regard each time point as an
independent dimension and utilize the Euclidean
distance to compute the similarities between
expressions. Furthermore, some of these methods require
the number of clusters to be determined in advance,
which is clearly impossible in the case of a new
dataset. Therefore, this study proposes a novel scheme,
designated as the Variation-based Co-expression
Detection (VCD) algorithm, to analyze the trends of
expressions based on their variation over time. The
proposed algorithm has two advantages. First, it is
unnecessary to determine the number of clusters in
advance since the algorithm automatically detects those
genes whose profiles are grouped together and creates
patterns for these groups. Second, the algorithm
features a new measurement criterion for calculating
the degree of change of the expressions between
adjacent time points and evaluating their trend
similarities. Three real-world microarray datasets are
employed to evaluate the performance of the proposed
algorithm.",
acknowledgement = ack-nhfb,
keywords = "bioinformatics; clustering; data mining; gene
expression; pattern analysis; time series analysis",
}
@Article{Goeffon:2008:PTN,
author = "Adrien Goeffon and Jean-Michel Richer and Jin-Kao
Hao",
title = "Progressive Tree Neighborhood Applied to the Maximum
Parsimony Problem",
journal = j-TCBB,
volume = "5",
number = "1",
pages = "136--145",
month = jan,
year = "2008",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:59:11 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "The Maximum Parsimony problem aims at reconstructing a
phylogenetic tree from DNA sequences while minimizing
the number of genetic transformations. To solve this
NP-complete problem, heuristic methods have been
developed, often based on local search. In this
article, we focus on the influence of the neighborhood
relations. After analyzing the advantages and drawbacks
of the well-known NNI, SPR and TBR neighborhoods, we
introduce the concept of Progressive Neighborhood which
consists in constraining progressively the size of the
neighborhood as the search advances. We empirically
show that applied to the Maximum Parsimony problem,
this progressive neighborhood turns out to be more
efficient and robust than the classic neighborhoods
using a descent algorithm. Indeed, it allows to find
better solutions with a smaller number of iterations or
trees evaluated.",
acknowledgement = ack-nhfb,
keywords = "combinatorial algorithms; maximum parsimony;
optimization; phylogeny reconstruction",
}
@Article{Anonymous:2008:RL,
author = "Anonymous",
title = "2007 Reviewers List",
journal = j-TCBB,
volume = "5",
number = "1",
pages = "146--147",
month = jan,
year = "2008",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:59:11 MDT 2008",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Anonymous:2008:AI,
author = "Anonymous",
title = "2007 Annual Index",
journal = j-TCBB,
volume = "5",
number = "1",
pages = "148--158",
month = jan,
year = "2008",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:59:11 MDT 2008",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Anonymous:2008:CAE,
author = "Anonymous",
title = "Call for Applications for {Editor-in-Chief}",
journal = j-TCBB,
volume = "5",
number = "1",
pages = "159--159",
month = jan,
year = "2008",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:59:11 MDT 2008",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Jackson:2008:CGM,
author = "Benjamin N. Jackson and Patrick S. Schnable and
Srinivas Aluru",
title = "Consensus Genetic Maps as Median Orders from
Inconsistent Sources",
journal = j-TCBB,
volume = "5",
number = "2",
pages = "161--171",
month = apr,
year = "2008",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:59:29 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "A genetic map is an ordering of genetic markers
calculated from a population of known lineage. While
traditionally a map has been generated from a single
population for each species, recently researchers have
created maps from multiple populations. In the face of
these new data, we address the need to find a consensus
map --- a map that combines the information from
multiple partial and possibly inconsistent input maps.
We model each input map as a partial order and
formulate the consensus problem as finding a median
partial order. Finding the median of multiple total
orders (preferences or rankings)is a well studied
problem in social choice. We choose to find the median
using the weighted symmetric difference distance, a
more general version of both the symmetric difference
distance and the Kemeny distance. Finding a median
order using this distance is NP-hard. We show that for
our chosen weight assignment, a median order satisfies
the positive responsiveness, extended Condorcet,and
unanimity criteria. Our solution involves finding the
maximum acyclic subgraph of a weighted directed graph.
We present a method that dynamically switches between
an exact branch and bound algorithm and a heuristic
algorithm, and show that for real data from closely
related organisms, an exact median can often be found.
We present experimental results using seven populations
of the crop plant Zea mays.",
acknowledgement = ack-nhfb,
keywords = "genetic map; Kemeny distance; median order; path and
circuit problems; symmetric difference distance.",
}
@Article{Gupta:2008:EDS,
author = "Anupam Gupta and Ziv Bar-Joseph",
title = "Extracting Dynamics from Static Cancer Expression
Data",
journal = j-TCBB,
volume = "5",
number = "2",
pages = "172--182",
month = apr,
year = "2008",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:59:29 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "Static expression experiments analyze samples from
many individuals. These samples are often snapshots of
the progression of a certain disease such as cancer.
This raises an intriguing question: Can we determine a
temporal order for these samples? Such an ordering can
lead to better understanding of the dynamics of the
disease and to the identification of genes associated
with its progression. In this paper we formally prove,
for the first time, that under a model for the dynamics
of the expression levels of a single gene, it is indeed
possible to recover the correct ordering of the static
expression datasets by solving an instance of the
traveling salesman problem (TSP). In addition, we
devise an algorithm that combines a TSP heuristic and
probabilistic modeling for inferring the underlying
temporal order of the microarray experiments. This
algorithm constructs probabilistic continuous curves to
represent expression profiles leading to accurate
temporal reconstruction for human data. Applying our
method to cancer expression data we show that the
ordering derived agrees well with survival duration. A
classifier that utilizes this ordering improves upon
other classifiers suggested for this task. The set of
genes displaying consistent behavior for the determined
ordering are enriched for genes associated with cancer
progression.",
acknowledgement = ack-nhfb,
keywords = "EM; glioma; microarrays; traveling salesman",
}
@Article{Thomas:2008:GMR,
author = "John Thomas and Naren Ramakrishnan and Chris
Bailey-Kellogg",
title = "Graphical Models of Residue Coupling in Protein
Families",
journal = j-TCBB,
volume = "5",
number = "2",
pages = "183--197",
month = apr,
year = "2008",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:59:29 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "Many statistical measures and algorithmic techniques
have been proposed for studying residue coupling in
protein families. Generally speaking, two residue
positions are considered coupled if, in the sequence
record, some of their amino acid type combinations are
significantly more common than others. While the
proposed approaches have proven useful in finding and
describing coupling, a significant missing component is
a formal probabilistic model that explicates and
compactly represents the coupling, integrates
information about sequence,structure, and function, and
supports inferential procedures for analysis,
diagnosis, and prediction. We present an approach to
learning and using probabilistic graphical models of
residue coupling. These models capture significant
conservation and coupling constraints observable in a
multiply-aligned set of sequences. Our approach can
place a structural prior on considered couplings, so
that all identified relationships have direct
mechanistic explanations. It can also incorporate
information about functional classes, and thereby learn
a differential graphical model that distinguishes
constraints common to all classes from those unique to
individual classes. Such differential models separately
account for class-specific conservation and family-wide
coupling, two different sources of sequence
covariation. They are then able to perform
interpretable functional classification of new
sequences, explaining classification decisions in terms
of the underlying conservation and coupling
constraints. We apply our approach in studies of both G
protein-coupled receptors and PDZ domains, identifying
and analyzing family-wide and class-specific
constraints, and performing functional classification.
The results demonstrate that graphical models of
residue coupling provide a powerful tool for
uncovering, representing, and utilizing significant
sequence structure-function relationships in protein
families.",
acknowledgement = ack-nhfb,
keywords = "correlated mutations; evolutionary covariation;
functional classification; graphical models;
sequence-structure-function relationships",
}
@Article{Mena-Chalco:2008:IPC,
author = "Jesus Mena-Chalco and Helaine Carrer and Yossi Zana
and Roberto M. {Cesar Jr.}",
title = "Identification of Protein Coding Regions Using the
Modified Gabor-Wavelet Transform",
journal = j-TCBB,
volume = "5",
number = "2",
pages = "198--207",
month = apr,
year = "2008",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:59:29 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "An important topic in genomic sequence analysis is the
identification of protein coding regions. In this
context, several coding DNA model-independent methods,
based on the occurrence of specific patterns of
nucleotides at coding regions, have been proposed.
Nonetheless, these methods have not been completely
suitable due to their dependence on an empirically
pre-defined window length required for a local analysis
of a DNA region. We introduce a method, based on a
modified Gabor-wavelet transform (MGWT), for the
identification of protein coding regions. This novel
transform is tuned to analyze periodic signal
components and presents the advantage of being
independent of the window length. We compared the
performance of the MGWT with other methods using
eukaryote datasets. The results show that the MGWT
outperforms all assessed model-independent methods with
respect to identification accuracy. These results
indicate that the source of at least part of the
identification errors produced by the previous methods
is the fixed working scale. The new method not only
avoids this source of errors, but also makes available
a tool for detailed exploration of the nucleotide
occurrence.",
acknowledgement = ack-nhfb,
keywords = "biology and genetics; pattern recognition; signal
processing",
}
@Article{deJong:2008:SSS,
author = "Hidde de Jong and Michel Page",
title = "Search for Steady States of Piecewise-Linear
Differential Equation Models of Genetic Regulatory
Networks",
journal = j-TCBB,
volume = "5",
number = "2",
pages = "208--222",
month = apr,
year = "2008",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:59:29 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "Analysis of the attractors of a genetic regulatory
network gives a good indication of the possible
functional modes of the system. In this paper we are
concerned with the problem of finding all steady states
of genetic regulatory networks described by
piecewise-linear differential equation (PLDE) models.
We show that the problem is NP-hard and translate it
into a propositional satisfiability (SAT) problem. This
allows the use of existing, efficient SAT solvers and
has enabled the development of a steady state search
module of the computer tool Genetic Network Analyzer
(GNA). The practical use of this module is demonstrated
by means of the analysis of a number of relatively
small bacterial regulatory networks as well as randomly
generated networks of several hundreds of genes.",
acknowledgement = ack-nhfb,
keywords = "genetic regulatory networks; large-scale systems;
piecewise-linear differential equations; SAT problem;
steady states",
}
@Article{Sadot:2008:TVB,
author = "Avital Sadot and Jasmin Fisher and Dan Barak and
Yishai Admanit and Michael J. Stern and E. Jane Albert
Hubbard and David Harel",
title = "Toward Verified Biological Models",
journal = j-TCBB,
volume = "5",
number = "2",
pages = "223--234",
month = apr,
year = "2008",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:59:29 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "The last several decades have witnessed a vast
accumulation of biological data and data analysis. Many
of these data sets represent only a small fraction of
the system's behavior, making the visualization of full
system behavior difficult. A more complete
understanding of a biological system is gained when
different types of data (and/or conclusions drawn from
the data) are integrated into a larger-scale
representation or model of the system. Ideally, this
type of model is consistent with all available data
about the system, and it is then used to generate
additional hypotheses to be tested. Computer-based
methods intended to formulate models that integrate
various events and to test the consistency of these
models with respect to the laboratory-based
observations on which they are based are potentially
very useful. In addition, in contrast to informal
models, the consistency of such formal computer-based
models with laboratory data can be tested rigorously by
methods of formal verification. We combined two formal
modeling approaches in computer science that were
originally developed for non-biological system design.
One is the inter-object approach using the language of
live sequence charts (LSCs) with the Play-Engine tool,
and the other is the intra-object approach using the
language of statecharts and Rhapsody as the tool.
Integration is carried out using InterPlay, a
simulation engine coordinator. Using these tools, we
constructed a combined model comprising three modules.
One module represents the early lineage of the somatic
gonad of C. elegans in LSCs, while a second more
detailed module in statecharts represents an
interaction between two cells within this lineage that
determine their developmental outcome. Using the
advantages of the tools, we created a third module
representing a set of key experimental data using LSCs.
We tested the combined statechart-LSC model by showing
that the simulations were consistent with the set of
experimental LSCs. This small-scale modular example
demonstrates the potential for using similar approaches
for verification by exhaustive testing of models by
LSCs. It also shows the advantages of these approaches
for modeling biology.",
acknowledgement = ack-nhfb,
keywords = "C. elegans; modeling; statecharts; verification",
}
@Article{Spillner:2008:CPD,
author = "Andreas Spillner and Binh T. Nguyen and Vincent
Moulton",
title = "Computing Phylogenetic Diversity for Split Systems",
journal = j-TCBB,
volume = "5",
number = "2",
pages = "235--244",
month = apr,
year = "2008",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:59:29 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "In conservation biology it is a central problem to
measure, predict, and preserve biodiversity as species
face extinction. In 1992 Faith proposed measuring the
diversity of a collection of species in terms of their
relationships on a phylogenetic tree, and to use this
information to identify collections of species with
high diversity. Here we are interested in some variants
of the resulting optimization problem that arise when
considering species whose evolution is better
represented by a network rather than a tree. More
specifically, we consider the problem of computing
phylogenetic diversity relative to a split system on a
collection of species of size $n$. We show that for
general split systems this problem is NP-hard. In
addition we provide some efficient algorithms for some
special classes of split systems, in particular
presenting an optimal $O(n)$ time algorithm for
phylogenetic trees and an $O(n\log n + n k)$ time
algorithm for choosing an optimal subset of size $k$
relative to a circular split system.",
acknowledgement = ack-nhfb,
keywords = "biology and genetics; life and medical sciences",
}
@Article{Lancia:2008:HDA,
author = "Giuseppe Lancia and R. Ravi and Romeo Rizzi",
title = "Haplotyping for Disease Association: {A} Combinatorial
Approach",
journal = j-TCBB,
volume = "5",
number = "2",
pages = "245--251",
month = apr,
year = "2008",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:59:29 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "We consider a combinatorial problem derived from
haplotyping a population with respect to a genetic
disease, either recessive or dominant. Given a set of
individuals, partitioned into healthy and diseased, and
the corresponding sets of genotypes, we want to infer
``bad'' and ``good'' haplotypes to account for these
genotypes and for the disease. Assume e.g. the disease
is recessive. Then, the resolving haplotypes must
consist of \emph{bad} and \emph{good} haplotypes, so
that (i) each genotype belonging to a diseased
individual is explained by a pair of bad haplotypes and
(ii) each genotype belonging to a healthy individual is
explained by a pair of haplotypes of which at least one
is good. We prove that the associated decision problem
is NP-complete. However, we also prove that there is a
simple solution, provided the data satisfy a very weak
requirement.",
acknowledgement = ack-nhfb,
keywords = "biology and genetics; combinatorics; discrete
mathematics",
}
@Article{Gusev:2008:HSG,
author = "Alexander Gusev and Ion I. M{\~a}ndoiu and Bogdan
Pa{\c{s}}aniuc",
title = "Highly Scalable Genotype Phasing by Entropy
Minimization",
journal = j-TCBB,
volume = "5",
number = "2",
pages = "252--261",
month = apr,
year = "2008",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:59:29 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "A Single Nucleotide Polymorphism (SNP) is a position
in the genome at which two or more of the possible four
nucleotides occur in a large percentage of the
population. SNPsaccount for most of the genetic
variability between individuals,and mapping SNPs in the
human population has become the next high-priority in
genomics after the completion of the HumanGenome
project. In diploid organisms such as humans, there are
two non-identical copies of each autosomal chromosome.
A description of the SNPs in a chromosome is called a
haplotype. At present, it is prohibitively expensive to
directly determine the haplotypes of an individual, but
it is possible to obtain rather easily the conflated
SNP information in the so called genotype.
Computational methods for genotype phasing, i.e.,
inferring haplotypes from genotype data, have received
much attention in recent years as haplotype information
leads to increased statistical power of disease
association tests. However, many of the existing
algorithms have impractical running time for phasing
large genotype datasets such as those generated by the
international HapMap project. In this paper we propose
a highly scalable algorithm based on entropy
minimization. Our algorithm is capable of phasing both
unrelated and related genotypes coming from complex
pedigrees. Experimental results on both real and
simulated datasets show that our algorithm achieves a
phasing accuracy worse but close to that of best
existing methods while being several orders of
magnitude faster. The open source code implementation
of the algorithm and a web interface are publicly
available at
http://dna.engr.uconn.edu/~software/ent/.",
acknowledgement = ack-nhfb,
keywords = "algorithm; genotype phasing; haplotype; Single
Nucleotide Polymorphism",
}
@Article{Zhao:2008:ICG,
author = "Wentao Zhao and Erchin Serpedin and Edward R.
Dougherty",
title = "Inferring Connectivity of Genetic Regulatory Networks
Using Information-Theoretic Criteria",
journal = j-TCBB,
volume = "5",
number = "2",
pages = "262--274",
month = apr,
year = "2008",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:59:29 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "Recently, the concept of mutual information has been
proposed for infering the structure of genetic
regulatory networks from gene expression profiling.
After analyzing the limitations of mutual information
in inferring the gene-to-gene interactions, this paper
introduces the concept of conditional mutual
information and based on it proposes two novel
algorithms to infer the connectivity structure of
genetic regulatory networks. One of the proposed
algorithms exhibits a better accuracy while the other
algorithm excels in simplicity and flexibility. By
exploiting the mutual information and conditional
mutual information, a practical metric is also proposed
to assess the likeliness of direct connectivity between
genes. This novel metric resolves a common limitation
associated with the current inference algorithms,
namely the situations where the gene connectivity is
established in terms of the dichotomy of being either
connected or disconnected. Based on the data sets
generated by synthetic networks, the performance of the
proposed algorithms is compared favorably relative to
existing state-of-the-art schemes. The proposed
algorithms are also applied on realistic biological
measurements, such as the cutaneous melanoma data set,
and biological meaningful results are inferred.",
acknowledgement = ack-nhfb,
keywords = "biology and genetics; DNA microarray; genetic
regulatory network; information theory",
}
@Article{Bordewich:2008:NRS,
author = "Magnus Bordewich and Charles Semple",
title = "Nature Reserve Selection Problem: {A} Tight
Approximation Algorithm",
journal = j-TCBB,
volume = "5",
number = "2",
pages = "275--280",
month = apr,
year = "2008",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:59:29 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "The Nature Reserve Selection Problem is a problem that
arises in the context of studying biodiversity
conservation. Subject to budgetary constraints, the
problem is to select a set of regions to conserve so
that the phylogenetic diversity of the set of species
contained within those regions is maximized. Recently,
it was shown in a paper by Moulton {\\em et al.} that
this problem is NP-hard. In this paper, we establish a
tight polynomial-time approximation algorithm for the
Nature Reserve Section Problem. Furthermore, we resolve
a question on the computational complexity of a related
problem left open in Moulton {\\em et al.}",
acknowledgement = ack-nhfb,
keywords = "combinatorial algorithms; trees",
}
@Article{Hsieh:2008:OAI,
author = "Yong-Hsiang Hsieh and Chih-Chiang Yu and Biing-Feng
Wang",
title = "Optimal Algorithms for the Interval Location Problem
with Range Constraints on Length and Average",
journal = j-TCBB,
volume = "5",
number = "2",
pages = "281--290",
month = apr,
year = "2008",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:59:29 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "Let $A$ be a sequence of $n$ real numbers, $L_1$ and
$L_2$ be two integers such that $L_1 \leq L_2$, and
$R_1$ and $R_2$ be two real numbers such that $R_1 \leq
R_2$. An interval of $A$ is feasible if its length is
between $L_1$ and $L_2$ and its average is between
$R_1$ and $R_2$. In this paper, we study the following
problems: finding all feasible intervals of $A$,
counting all feasible intervals of $A$, finding a
maximum cardinality set of non-overlapping feasible
intervals of $A$, locating a longest feasible interval
of $A$, and locating a shortest feasible interval of
$A$. The problems are motivated from the problem of
locating CpG islands in biomolecular sequences. In this
paper, we firstly show that all the problems have an
$\Omega(n \log n)$-time lower bound in the comparison
model. Then, we use geometric approaches to design
optimal algorithms for the problems. All the presented
algorithms run in an on-line manner and use $O(n)$
space.",
acknowledgement = ack-nhfb,
keywords = "algorithms; analysis of algorithms; data structures;
geometrical problems and computations",
}
@Article{Lamers:2008:PRX,
author = "Susanna L. Lamers and Marco Salemi and Michael S.
McGrath and Gary B. Fogel",
title = "Prediction of {R5}, {X4}, and {R5X4} {HIV}-1
Coreceptor Usage with Evolved Neural Networks",
journal = j-TCBB,
volume = "5",
number = "2",
pages = "291--300",
month = apr,
year = "2008",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:59:29 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "The HIV-1 genome is highly heterogeneous. This
variation affords the virus a wide range of molecular
properties, including the ability to infect cell types,
such as macrophages and lymphocytes, expressing
different chemokine receptors on the cell surface. In
particular, R5 HIV-1 viruses use CCR5 as co-receptor
for viral entry, X4 viruses use CXCR4, whereas some
viral strains, known as R5X4 or D-tropic, have the
ability to utilize both co-receptors. X4 and R5X4
viruses are associated with rapid disease progression
to AIDS. R5X4 viruses differ in that they have yet to
be characterized by the examination of the genetic
sequence of HIV-1 alone. In this study, a series of
experiments was performed to evaluate different
strategies of feature selection and neural network
optimization. We demonstrate the use of artificial
neural networks trained via evolutionary computation to
predict viral co-receptor usage. The results indicate
identification of R5X4 viruses with predictive accuracy
of 75.5\%.",
acknowledgement = ack-nhfb,
keywords = "AIDS; artificial neural networks; Computational
intelligence; dual-tropic viruses; evolutionary
computation; HIV; phenotype prediction; tropism",
}
@Article{vanIersel:2008:SIT,
author = "Leo van Iersel and Judith Keijsper and Steven Kelk and
Leen Stougie",
title = "Shorelines of Islands of Tractability: Algorithms for
Parsimony and Minimum Perfect Phylogeny Haplotyping
Problems",
journal = j-TCBB,
volume = "5",
number = "2",
pages = "301--312",
month = apr,
year = "2008",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:59:29 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "The problem Parsimony Haplotyping (PH) asks for the
smallest set of haplotypes which can explain a given
set of genotypes, and the problem Minimum Perfect
Phylogeny Haplotyping (MPPH) asks for the smallest such
set which also allows the haplotypes to be embedded in
a perfect phylogeny, an evolutionary tree with
biologically-motivated restrictions. For PH, we extend
recent work by further mapping the interface between
``easy'' and ``hard'' instances, within the framework
of $(k,l)$-bounded instances where the number of 2's
per column and row of the input matrix is restricted.
By exploring, in the same way, the tractability
frontier of MPPH we provide the first concrete,
positive results for this problem. In addition, we
construct for both PH and MPPH polynomial time
approximation algorithms, based on properties of the
columns of the input matrix.",
acknowledgement = ack-nhfb,
keywords = "biology and genetics; combinatorial algorithms;
complexity hierarchies",
}
@Article{Brinza:2008:SPM,
author = "Dumitru Brinza and Alexander Zelikovsky",
title = "{2SNP}: Scalable Phasing Method for Trios and
Unrelated Individuals",
journal = j-TCBB,
volume = "5",
number = "2",
pages = "313--318",
month = apr,
year = "2008",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Thu Jun 12 16:59:29 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "Emerging microarray technologies allow affordable
typing of very long genome sequences. A key challenge
in analyzing of such huge amount of data is scalable
and accurate computational inferring of haplotypes
(i.e., splitting of each genotype into a pair of
corresponding haplotypes). In this paper, we first
phase genotypes consisting only of two SNPs using
genotypes frequencies adjusted to the random mating
model and then extend phasing of two-SNP genotypes to
phasing of complete genotypes using maximum spanning
trees. Runtime of the proposed 2SNP algorithm is $O(nm
(n + \log m)$, where n and m are the numbers of
genotypes and SNPs, respectively, and it can handle
genotypes spanning entire chromosomes in a matter of
hours. On datasets across 23 chromosomal regions from
HapMap[11], 2SNP is several orders of magnitude faster
than GERBIL and PHASE while matching them in quality
measured by the number of correctly phased genotypes,
single-site and switching errors. For example the 2SNP
software phases entire chromosome ($10^5$ SNPs from
HapMap) for 30 individuals in 2 hours with average
switching error 7.7\%. We have also enhanced 2SNP
algorithm to phase family trio data and compared it
with four other well-known phasing methods on simulated
data from [15]. 2SNP is much faster than all of them
while losing in quality only to PHASE. 2SNP software is
publicly available at
\texttt{http://alla.cs.gsu.edu/~software/2SNP}.",
acknowledgement = ack-nhfb,
keywords = "algorithm; genotype; haplotype; phasing; SNP",
}
@Article{Mandoiu:2008:GEI,
author = "Ion I. Mandoiu and Yi Pan and Alexander Zelikovsky",
title = "{Guest Editors}' Introduction to the Special Section on
Bioinformatics Research and Applications",
journal = j-TCBB,
volume = "5",
number = "3",
pages = "321--322",
month = jul,
year = "2008",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2008.85",
ISSN = "1545-5963",
bibdate = "Fri Oct 10 12:59:44 MDT 2008",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Sridhar:2008:MIL,
author = "Srinath Sridhar and Fumei Lam and Guy E. Blelloch and
R. Ravi and Russell Schwartz",
title = "Mixed Integer Linear Programming for Maximum-Parsimony
Phylogeny Inference",
journal = j-TCBB,
volume = "5",
number = "3",
pages = "323--331",
month = jul,
year = "2008",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2008.26",
ISSN = "1545-5963",
bibdate = "Fri Oct 10 12:59:44 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "Reconstruction of phylogenetic trees is a fundamental
problem in computational biology. While excellent
heuristic methods are available for many variants of
this problem, new advances in phylogeny inference will
be required if we are to be able to continue to make
effective use of the rapidly growing stores of
variation data now being gathered. In this paper, we
present two integer linear programming (ILP)
formulations to find the most parsimonious phylogenetic
tree from a set of binary variation data. One method
uses a flow-based formulation that can produce
exponential numbers of variables and constraints in the
worst case. The method has, however, proven extremely
efficient in practice on datasets that are well beyond
the reach of the available provably efficient methods,
solving several large mtDNA and Y-chromosome instances
within a few seconds and giving provably optimal
results in times competitive with fast heuristics than
cannot guarantee optimality. An alternative formulation
establishes that the problem can be solved with a
polynomial-sized ILP. We further present a web server
developed based on the exponential-sized ILP that
performs fast maximum parsimony inferences and serves
as a front end to a database of precomputed phylogenies
spanning the human genome.",
acknowledgement = ack-nhfb,
keywords = "algorithms; computational biology; integer linear
programming; maximum parsimony; phylogenetic tree
reconstruction; Steiner tree problem",
}
@Article{Bernt:2008:SPR,
author = "Matthias Bernt and Daniel Merkle and Martin
Middendorf",
title = "Solving the Preserving Reversal Median Problem",
journal = j-TCBB,
volume = "5",
number = "3",
pages = "332--347",
month = jul,
year = "2008",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2008.39",
ISSN = "1545-5963",
bibdate = "Fri Oct 10 12:59:44 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "Genomic rearrangement operations can be very useful to
infer the phylogenetic relationship of gene orders
representing species. We study the problem of finding
potential ancestral gene orders for the gene orders of
given taxa, such that the corresponding rearrangement
scenario has a minimal number of reversals, and where
each of the reversals has to preserve the common
intervals of the given input gene orders. Common
intervals identify sets of genes that occur
consecutively in all input gene orders. The problem of
finding such an ancestral gene order is called the
preserving reversal median problem (pRMP). A tree-based
data structure for the representation of the common
intervals of all input gene orders is used in our exact
algorithm TCIP for solving the pRMP. It is known that
the minimum number of reversals to transform one gene
order into another can be computed in polynomial time,
whereas the corresponding problem with the restriction
that common intervals should not be destroyed is
already NP-hard. It is shown theoretically that TCIP
can solve a large class of pRMP instances in polynomial
time. Empirically we show the good performance of TCIP
on biological and artificial data.",
acknowledgement = ack-nhfb,
keywords = "biology and genetics; permutations and combinations",
}
@Article{Braga:2008:ESS,
author = "Mar{\'\i}lia D. V. Braga and Marie-France Sagot and
Celine Scornavacca and Eric Tannier",
title = "Exploring the Solution Space of Sorting by Reversals,
with Experiments and an Application to Evolution",
journal = j-TCBB,
volume = "5",
number = "3",
pages = "348--356",
month = jul,
year = "2008",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2008.16",
ISSN = "1545-5963",
bibdate = "Fri Oct 10 12:59:44 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "In comparative genomics, algorithms that sort
permutations by reversals are often used to propose
evolutionary scenarios of rearrangements between
species. One of the main problems of such methods is
that they give one solution while the number of optimal
solutions is huge, with no criteria to discriminate
among them. Bergeron et al. started to give some
structure to the set of optimal solutions, in order to
be able to deliver more presentable results than only
one solution or a complete list of all solutions.
However, no algorithm exists so far to compute this
structure except through the enumeration of all
solutions, which takes too much time even for small
permutations. Bergeron et al. state as an open problem
the design of such an algorithm. We propose in this
paper an answer to this problem, that is, an algorithm
which gives all the classes of solutions and counts the
number of solutions in each class, with a better
theoretical and practical complexity than the complete
enumeration method. We give an example of how to reduce
the number of classes obtained, using further
constraints. Finally, we apply our algorithm to analyse
the possible scenarios of rearrangement between
mammalian sex chromosomes.",
acknowledgement = ack-nhfb,
keywords = "common intervals; evolution; genome rearrangements;
perfect sorting; sex chromosomes; signed permutations;
sorting by reversals",
}
@Article{Vassura:2008:RSP,
author = "Marco Vassura and Luciano Margara and Pietro Di Lena
and Filippo Medri and Piero Fariselli and Rita
Casadio",
title = "Reconstruction of {$3$D} Structures From Protein
Contact Maps",
journal = j-TCBB,
volume = "5",
number = "3",
pages = "357--367",
month = jul,
year = "2008",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2008.27",
ISSN = "1545-5963",
bibdate = "Fri Oct 10 12:59:44 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "The prediction of the protein tertiary structure from
solely its residue sequence (the so called Protein
Folding Problem) is one of the most challenging
problems in Structural Bioinformatics. We focus on the
protein residue contact map. When this map is assigned
it is possible to reconstruct the 3D structure of the
protein backbone. The general problem of recovering a
set of 3D coordinates consistent with some given
contact map is known as a unit-disk-graph realization
problem and it has been recently proven to be
NP-Hard. In this paper we describe a heuristic method
(COMAR) that is able to reconstruct with an
unprecedented rate (3-15 seconds) a 3D model that
exactly matches the target contact map of a
protein. Working with a non-redundant set of 1760
proteins, we find that the scoring efficiency of
finding a 3D model very close to the protein native
structure depends on the threshold value adopted to
compute the protein residue contact map. Contact maps
whose threshold values range from 10 to 18
{\AA}ngstroms allow reconstructing 3D models that are
very similar to the proteins native structure.",
acknowledgement = ack-nhfb,
keywords = "combinatorial algorithms; contact map; molecular
modeling; protein structure prediction",
}
@Article{Lee:2008:IEN,
author = "George Lee and Carlos Rodriguez and Anant Madabhushi",
title = "Investigating the Efficacy of Nonlinear Dimensionality
Reduction Schemes in Classifying Gene and Protein
Expression Studies",
journal = j-TCBB,
volume = "5",
number = "3",
pages = "368--384",
month = jul,
year = "2008",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2008.36",
ISSN = "1545-5963",
bibdate = "Fri Oct 10 12:59:44 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "The recent explosion in procurement and availability
of high-dimensional gene- and protein-expression
profile datasets for cancer diagnostics has
necessitated the development of sophisticated machine
learning tools with which to analyze them. A major
limitation in the ability to accurate classify these
high-dimensional datasets stems from the 'curse of
dimensionality', occurring in situations where the
number of genes or peptides significantly exceeds the
total number of patient samples. Previous attempts at
dealing with this issue have mostly centered on the use
of a dimensionality reduction (DR) scheme, Principal
Component Analysis (PCA), to obtain a low-dimensional
projection of the high-dimensional data. However,
linear PCA and other linear DR methods, which rely on
Euclidean distances to estimate object similarity, do
not account for the inherent underlying nonlinear
structure associated with most biomedical data. The
motivation behind this work is to identify the
appropriate DR methods for analysis of high-dimensional
gene- and protein-expression studies. Towards this end,
we empirically and rigorously compare three nonlinear
(Isomap, Locally Linear Embedding, Laplacian Eigenmaps)
and three linear DR schemes (PCA, Linear Discriminant
Analysis, Multidimensional Scaling) with the intent of
determining a reduced subspace representation in which
the individual object classes are more easily
discriminable.",
acknowledgement = ack-nhfb,
keywords = "and association rules; Bioinformatics (genome or
protein) databases; classification; clustering; data
and knowledge visualization; data mining; feature
extraction or construction",
}
@Article{Cho:2008:CHC,
author = "Hyuk Cho and Inderjit S. Dhillon",
title = "Coclustering of Human Cancer Microarrays Using Minimum
Sum-Squared Residue Coclustering",
journal = j-TCBB,
volume = "5",
number = "3",
pages = "385--400",
month = jul,
year = "2008",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2007.70268",
ISSN = "1545-5963",
bibdate = "Fri Oct 10 12:59:44 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "It is a consensus in microarray analysis that
identifying potential local patterns, characterized by
coherent groups of genes and conditions, may shed light
on the discovery of previously undetectable biological
cellular processes of genes as well as macroscopic
phenotypes of related samples. In order to
simultaneously cluster genes and conditions, we have
previously developed a fast co-clustering algorithm,
Minimum Sum-Squared Residue Co-clustering (MSSRCC),
which employs an alternating minimization scheme and
generates what we call co-clusters in a checkerboard
structure. In this paper, we propose specific
strategies that enable MSSRCC to escape poor local
minima and resolve the degeneracy problem in
partitional clustering algorithms. The strategies
include binormalization, deterministic spectral
initialization, and incremental local search. We assess
the effects of various strategies on both synthetic
gene expression datasets and real human cancer
microarrays and provide empirical evidence that MSSRCC
with the proposed strategies performs better than
existing co-clustering and clustering algorithms. In
particular, the combination of all the three strategies
leads to the best performance. Furthermore, we
illustrate coherence of the resulting co-clusters in a
checkerboard structure, where genes in a co-cluster
manifest the phenotype structure of corresponding
specific samples, and evaluate the enrichment of
functional annotations in Gene Ontology (GO).",
acknowledgement = ack-nhfb,
keywords = "binormalization; co-clustering; deterministic spectral
initialization; gene ontology; local search; microarray
analysis",
}
@Article{Wei:2008:IGF,
author = "Peng Wei and Wei Pan",
title = "Incorporating Gene Functions into Regression Analysis
of {DNA}-Protein Binding Data and Gene Expression Data
to Construct Transcriptional Networks",
journal = j-TCBB,
volume = "5",
number = "3",
pages = "401--415",
month = jul,
year = "2008",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2007.1062",
ISSN = "1545-5963",
bibdate = "Fri Oct 10 12:59:44 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "Useful information on transcriptional networks has
been extracted by regression analyses of gene
expression data and DNA-protein binding data. However,
a potential limitation of these approaches is their
assumption on the common and constant activity level of
a transcription factor (TF) on all the genes in any
given experimental condition; for example, any TF is
assumed to be either an activator or a repressor, but
not both, while it is known that some TFs can be dual
regulators. Rather than assuming a common linear
regression model for all the genes, we propose using
separate regression models for various gene groups; the
genes can be grouped based on their functions or some
clustering results. Furthermore, to take advantage of
the hierarchical structure of many existing gene
function annotation systems, such as Gene Ontology
(GO), we propose a shrinkage method that borrows
information from relevant gene groups. Applications to
a yeast dataset and simulations lend support for our
proposed methods. In particular, we find that the
shrinkage method consistently works well under various
scenarios. We recommend the use of the shrinkage method
as a useful alternative to the existing methods.",
acknowledgement = ack-nhfb,
keywords = "LASSO; microarray; shrinkage estimator; stratified
analysis; transcription factor",
}
@Article{Mak:2008:PPS,
author = "Man-Wai Mak and Jian Guo and Sun-Yuan Kung",
title = "{PairProSVM}: Protein Subcellular Localization Based
on Local Pairwise Profile Alignment and {SVM}",
journal = j-TCBB,
volume = "5",
number = "3",
pages = "416--422",
month = jul,
year = "2008",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2007.70256",
ISSN = "1545-5963",
bibdate = "Fri Oct 10 12:59:44 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "The subcellular locations of proteins are important
functional annotations. An effective and reliable
subcellular localization method is necessary for
proteomics research. This paper introduces a new
method---PairProSVM---to automatically predict the
subcellular locations of proteins. The profiles of all
protein sequences in the training set are constructed
by PSI-BLAST and the pairwise profile-alignment scores
are used to form feature vectors for training a support
vector machine (SVM) classifier. It was found that
PairProSVM outperforms the methods that are based on
sequence alignment and amino-acid compositions even if
most of the homologous sequences have been removed.
This paper also demonstrates that the performance of
PairProSVM is sensitive (and somewhat proportional) to
the degree of its kernel matrix meeting the Mercer's
condition. PairProSVM was evaluated on Reinhardt and
Hubbard's, Huang and Li's, and Gardy et al.'s protein
datasets. The overall accuracies on these three
datasets reach 99.3\\\%, 76.5\\\%, and 91.9\\\%,
respectively, which are higher than or comparable to
those obtained by sequence alignment and by the methods
compared in this paper.",
acknowledgement = ack-nhfb,
keywords = "kernel methods; Mercer condition; profile alignment;
subcellular localization; support vector machines",
}
@Article{Elo:2008:ROT,
author = "Laura L. Elo and Sanna Filen and Riitta Lahesmaa and
Tero Aittokallio",
title = "Reproducibility-Optimized Test Statistic for Ranking
Genes in Microarray Studies",
journal = j-TCBB,
volume = "5",
number = "3",
pages = "423--431",
month = jul,
year = "2008",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/tcbb.2007.1078",
ISSN = "1545-5963",
bibdate = "Fri Oct 10 12:59:44 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "A principal goal of microarray studies is to identify
the genes showing differential expression under
distinct conditions. In such studies, the selection of
an optimal test statistic is a crucial challenge, which
depends on the type and amount of data under analysis.
While previous studies on simulated or spike-in
datasets do not provide practical guidance on how to
choose the best method for a given real dataset, we
introduce an enhanced reproducibility-optimization
procedure, which enables the selection of a suitable
gene- anking statistic directly from the data. In
comparison with existing ranking methods, the
reproducibility-optimized statistic shows good
performance consistently under various simulated
conditions and on Affymetrix spike-in dataset. Further,
the feasibility of the novel statistic is confirmed in
a practical research setting using data from an
in-house cDNA microarray study of asthma-related gene
expression changes. These results suggest that the
procedure facilitates the selection of an appropriate
test statistic for a given dataset without relying on a
priori assumptions, which may bias the findings and
their interpretation. Moreover, the general
reproducibility-optimization procedure is not limited to
detecting differential expression only but could be
extended to a wide range of other applications as
well.",
acknowledgement = ack-nhfb,
keywords = "bootstrap; differential expression; gene expression;
gene ranking; microarray; reproducibility",
}
@Article{Parker:2008:SPT,
author = "Douglass Stott Parker and Ruey-Lung Hsiao and Yi Xing
and Alissa M. Resch and Christopher J. Lee",
title = "Solving the Problem of Trans-Genomic Query with
Alignment Tables",
journal = j-TCBB,
volume = "5",
number = "3",
pages = "432--447",
month = jul,
year = "2008",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2007.1073",
ISSN = "1545-5963",
bibdate = "Fri Oct 10 12:59:44 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "The trans-genomic query (TGQ) problem -- enabling the
free query of biological information, even across
genomes -- is a central challenge facing
bioinformatics. Solutions to this problem can alter the
nature of the field, moving it beyond the jungle of
data integration and expanding the number and scope of
questions that can be answered. An alignment table is a
binary relationship on locations (sequence segments).
An important special case of alignment tables are hit
tables ? tables of pairs of highly similar segments
produced by alignment tools like BLAST. However,
alignment tables also include general binary
relationships, and can represent any useful connection
between sequence locations. They can be curated, and
provide a high-quality queryable backbone of
connections between biological information. Alignment
tables thus can be a natural foundation for TGQ, as
they permit a central part of the TGQ problem to be
reduced to purely technical problems involving tables
of locations. Key challenges in implementing alignment
tables include efficient representation and indexing of
sequence locations. We define a location datatype that
can be incorporated naturally into common off-the-shelf
database systems. We also describe an implementation of
alignment tables in BLASTGRES, an extension of the
open-source POSTGRESQL database system that provides
indexing and operators on locations required for
querying alignment tables. This paper also reviews
several successful large-scale applications of
alignment tables for Trans-Genomic Query. Tables with
millions of alignments have been used in queries about
alternative splicing, an area of genomic analysis
concerning the way in which a single gene can yield
multiple transcripts. Comparative genomics is a large
potential application area for TGQ and alignment
tables.",
acknowledgement = ack-nhfb,
}
@Article{Dawy:2008:FSG,
author = "Zaher Dawy and Michel Sarkis and Joachim Hagenauer and
Jakob C. Mueller",
title = "Fine-Scale Genetic Mapping Using Independent Component
Analysis",
journal = j-TCBB,
volume = "5",
number = "3",
pages = "448--460",
month = jul,
year = "2008",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2007.1072",
ISSN = "1545-5963",
bibdate = "Fri Oct 10 12:59:44 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "The aim of genetic mapping is to locate the loci
responsible for specific traits such as complex
diseases. These traits are normally caused by mutations
at multiple loci of unknown locations and interactions.
In this work, we model the biological system that
relates DNA polymorphisms with complex traits as a
linear mixing process. Given this model, we propose a
new fine-scale genetic mapping method based on
independent component analysis. The proposed method
outputs both independent associated groups of SNPs in
addition to specific associated SNPs with the
phenotype. It is applied to a clinical data set for the
Schizophrenia disease with 368 individuals and 42 SNPs.
It is also applied to a simulation study to investigate
in more depth its performance. The obtained results
demonstrate the novel characteristics of the proposed
method compared to other genetic mapping methods.
Finally, we study the robustness of the proposed method
with missing genotype values and limited sample
sizes.",
acknowledgement = ack-nhfb,
keywords = "association mapping; complex diseases; independent
component analysis (ICA); linkage disequilibrium;
principal component analysis (PCA); single nucleotide
polymorphisms (SNPs)",
}
@Article{Hendy:2008:HCK,
author = "Michael D. Hendy and Sagi Snir",
title = "{Hadamard} Conjugation for the {Kimura} {3ST} Model:
Combinatorial Proof Using Path Sets",
journal = j-TCBB,
volume = "5",
number = "3",
pages = "461--471",
month = jul,
year = "2008",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2007.70227",
ISSN = "1545-5963",
bibdate = "Fri Oct 10 12:59:44 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "Under a stochastic model of molecular sequence
evolution the probability of each possible pattern of a
characters is well defined. The Kimura's
three-substitution-types (K3ST) model of evolution,
allows analytical expression for these probabilities of
by means of the Hadamard conjugation as a function of
the phylogeny T and the substitution probabilities on
each edge of TM . In this paper we produce a direct
combinatorial proof of these results, using pathset
distances which generalise pairwise distances between
sequences. This interpretation provides us with tools
that were proved useful in related problems in the
mathematical analysis of sequence evolution.",
acknowledgement = ack-nhfb,
keywords = "Hadamard conjugation; K3ST model; path-sets;
phylogenetic invariants; phylogenetic trees",
}
@Article{Gambette:2008:ILP,
author = "Philippe Gambette and Daniel H. Huson",
title = "Improved Layout of Phylogenetic Networks",
journal = j-TCBB,
volume = "5",
number = "3",
pages = "472--479",
month = jul,
year = "2008",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/tcbb.2007.1046",
ISSN = "1545-5963",
bibdate = "Fri Oct 10 12:59:44 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "Split networks are increasingly being used in
phylogenetic analysis. Usually, a simple equal angle
algorithm is used to draw such networks, producing
layouts that leave much room for improvement.
Addressing the problem of producing better layouts of
split networks, this paper presents an algorithm for
maximizing the area covered by the network, describes
an extension of the equal-daylight algorithm to
networks, looks into using a spring embedder and
discusses how to construct rooted split networks.",
acknowledgement = ack-nhfb,
keywords = "algorithms; graph drawing; phylogenetic networks;
phylogenetics",
}
@Article{Gusfield:2008:EE,
author = "Dan Gusfield",
title = "{EIC} Editorial",
journal = j-TCBB,
volume = "5",
number = "4",
pages = "481--481",
month = oct,
year = "2008",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2008.115",
ISSN = "1545-5963",
bibdate = "Wed Jan 14 12:51:33 MST 2009",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Giancarlo:2008:GEI,
author = "Raffaele Giancarlo and Sridhar Hannenhalli",
title = "{Guest Editors}' Introduction to the Special Section
on Algorithms in Bioinformatics",
journal = j-TCBB,
volume = "5",
number = "4",
pages = "482--483",
month = oct,
year = "2008",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2008.116",
ISSN = "1545-5963",
bibdate = "Wed Jan 14 12:51:33 MST 2009",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Jeong:2008:ISP,
author = "Jieun Jeong and Piotr Berman and Teresa M. Przytycka",
title = "Improving Strand Pairing Prediction through Exploring
Folding Cooperativity",
journal = j-TCBB,
volume = "5",
number = "4",
pages = "484--491",
month = oct,
year = "2008",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2008.88",
ISSN = "1545-5963",
bibdate = "Wed Jan 14 12:51:33 MST 2009",
bibsource = "http://portal.acm.org/",
abstract = "The topology of $\beta$-sheets is defined by the
pattern of hydrogen-bonded strand pairing. Therefore,
predicting hydrogen bonded strand partners is a
fundamental step towards predicting $\beta$-sheet
topology. At the same time, finding the correct
partners is very difficult due to long range
interactions involved in strand pairing. Additionally,
patterns of aminoacids involved, in $\beta$-sheet
formations are very general and therefore difficult to
use for computational recognition of specific contacts
between strands. In this work, we report a new strand
pairing algorithm. To address above mentioned
difficulties, our algorithm attempts to mimic elements
of the folding process. Namely, in addition to ensuring
that the predicted hydrogen bonded strand pairs satisfy
basic global consistency constraints, it takes into
account hypothetical folding pathways. Consistently
with this view, introducing hydrogen bonds between a
pair of strands changes the probabilities of forming
hydrogen bonds between other pairs of strand. We
demonstrate that this approach provides an improvement
over previously proposed algorithms. We also compare
the performance of this method to that of a global
optimization algorithm that poses the problem as
integer linear programming optimization problem and
solves it using ILOG CPLEX\texttrademark\ package.",
acknowledgement = ack-nhfb,
keywords = "Biology and genetics; Combinatorial algorithms",
}
@Article{Genovese:2008:SAH,
author = "Loredana M. Genovese and Filippo Geraci and Marco
Pellegrini",
title = "{SpeedHap}: An Accurate Heuristic for the Single
Individual {SNP} Haplotyping Problem with Many Gaps,
High Reading Error Rate and Low Coverage",
journal = j-TCBB,
volume = "5",
number = "4",
pages = "492--502",
month = oct,
year = "2008",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2008.67",
ISSN = "1545-5963",
bibdate = "Wed Jan 14 12:51:33 MST 2009",
bibsource = "http://portal.acm.org/",
abstract = "Single nucleotide polymorphism (SNP) is the most
frequent form of DNA variation. The set of SNP's
present in a chromosome (called the em haplotype) is of
interest in a wide area of applications in molecular
biology and biomedicine, including diagnostic and
medical therapy. In this paper we propose a new
heuristic method for the problem of haplotype
reconstruction for (portions of) a pair of homologous
human chromosomes from a single individual (SIH). The
problem is well known in literature and exact
algorithms have been proposed for the case when no (or
few) gaps are allowed in the input fragments. These
algorithms, though exact and of polynomial complexity,
are slow in practice. When gaps are considered no exact
method of polynomial complexity is known. The problem
is also hard to approximate with guarantees. Therefore
fast heuristics have been proposed. In this paper we
describe SpeedHap, a new heuristic method that is able
to tackle the case of many gapped fragments and retains
its effectiveness even when the input fragments have
high rate of reading errors (up to 20\%) and low
coverage (as low as 3). We test SpeedHap on real data
from the HapMap Project.",
acknowledgement = ack-nhfb,
keywords = "Algorithms; Biology and genetics",
}
@Article{Lozano:2008:STA,
author = "Antoni Lozano and Ron Y. Pinter and Oleg Rokhlenko and
Gabriel Valiente and Michal Ziv-Ukelson",
title = "Seeded Tree Alignment",
journal = j-TCBB,
volume = "5",
number = "4",
pages = "503--513",
month = oct,
year = "2008",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2008.59",
ISSN = "1545-5963",
bibdate = "Wed Jan 14 12:51:33 MST 2009",
bibsource = "http://portal.acm.org/",
abstract = "The optimal transformation of one tree into another by
means of elementary edit operations is an important
algorithmic problem that has several interesting
applications to computational biology. Here we
introduce a constrained form of this problem in which a
partial mapping of a set of nodes (the 'seeds') in one
tree to a corresponding set of nodes in the other tree
is given, and present efficient algorithms for both
ordered and unordered trees. Whereas ordered tree
matching based on seeded nodes has applications in
pattern matching of RNA structures, unordered tree
matching based on seeded nodes has applications in
co-speciation and phylogeny reconciliation. The latter
involves the solution of the planar tanglegram layout
problem, for which a polynomial-time algorithm is given
here.",
acknowledgement = ack-nhfb,
keywords = "Biology and genetics; Computer Applications; Discrete
Mathematics; Graph algorithms; Graph Theory; Life and
Medical Sciences; Mathematics of Computing; Trees",
}
@Article{Bansal:2008:NLS,
author = "Mukul S. Bansal and Oliver Eulenstein",
title = "An {$\Omega(n^2/ \log n)$} Speed-Up of {TBR}
Heuristics for the Gene-Duplication Problem",
journal = j-TCBB,
volume = "5",
number = "4",
pages = "514--524",
month = oct,
year = "2008",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2008.69",
ISSN = "1545-5963",
bibdate = "Wed Jan 14 12:51:33 MST 2009",
bibsource = "http://portal.acm.org/",
abstract = "The gene-duplication problem is to infer a species
supertree from gene trees that are confounded by
complex histories of gene duplications. This problem is
NP-hard and thus requires efficient and effective
heuristics. Existing heuristics perform a stepwise
search of the tree space, where each step is guided by
an exact solution to an instance of a local search
problem. We improve on the time complexity of the local
search problem by a factor of n2= log n, where n is the
size of the resulting species supertree. Typically,
several thousand instances of the local search problem
are solved throughout a stepwise heuristic search.
Hence, our improvement makes the gene-duplication
problem much more tractable for large-scale
phylogenetic analyses.",
acknowledgement = ack-nhfb,
keywords = "Algorithms; Computational Biology; Gene Duplication;
Phylogenetics; Supertrees",
}
@Article{Wang:2008:DCO,
author = "Xueyi Wang and Jack Snoeyink",
title = "Defining and Computing Optimum {RMSD} for Gapped and
Weighted Multiple-Structure Alignment",
journal = j-TCBB,
volume = "5",
number = "4",
pages = "525--533",
month = oct,
year = "2008",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2008.92",
ISSN = "1545-5963",
bibdate = "Wed Jan 14 12:51:33 MST 2009",
bibsource = "http://portal.acm.org/",
abstract = "Pairwise structure alignment commonly uses root mean
square deviation (RMSD) to measure the structural
similarity, and methods for optimizing RMSD are well
established. We extend RMSD to weighted RMSD for
multiple structures. By using multiplicative weights,
we show that weighted RMSD for all pairs is the same as
weighted RMSD to an average of the structures. Thus,
using RMSD or weighted RMSD implies that the average is
a consensus structure. Although we show that in
general, the two tasks of finding the optimal
translations and rotations for minimizing weighted RMSD
cannot be separated for multiple structures like they
can for pairs, an inherent difficulty and a fact
ignored by previous work, we develop a near-linear
iterative algorithm to converge weighted RMSD to a
local minimum. 10,000 experiments of gapped alignment
done on each of 23 protein families from HOMSTRAD
(where each structure starts with a random translation
and rotation) converge rapidly to the same minimum.
Finally we propose a heuristic method to iteratively
remove the effect of outliers and find well-aligned
positions that determine the structural conserved
region by modeling B-factors and deviations from the
average positions as weights and iteratively assigning
higher weights to better aligned atoms.",
acknowledgement = ack-nhfb,
keywords = "multiple structure alignment; optimization methods;
structural conserved region; weighted RMSD",
}
@Article{Yao:2008:EAE,
author = "Peggy Yao and Ankur Dhanik and Nathan Marz and Ryan
Propper and Charles Kou and Guanfeng Liu and Henry van
den Bedem and Jean-Claude Latombe and Inbal
Halperin-Landsberg and Russ B. Altman",
title = "Efficient Algorithms to Explore Conformation Spaces of
Flexible Protein Loops",
journal = j-TCBB,
volume = "5",
number = "4",
pages = "534--545",
month = oct,
year = "2008",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2008.96",
ISSN = "1545-5963",
bibdate = "Wed Jan 14 12:51:33 MST 2009",
bibsource = "http://portal.acm.org/",
abstract = "Several applications in biology - e.g., incorporation
of protein flexibility in ligand docking algorithms,
interpretation of fuzzy X-ray crystallographic data,
and homology modeling - require computing the internal
parameters of a flexible fragment (usually, a loop) of
a protein in order to connect its termini to the rest
of the protein without causing any steric clash. One
must often sample many such conformations in order to
explore and adequately represent the conformational
range of the studied loop. While sampling must be fast,
it is made difficult by the fact that two conflicting
constraints - kinematic closure and clash avoidance -
must be satisfied concurrently. This paper describes
two efficient and complementary sampling algorithms to
explore the space of closed clash-free conformations of
a flexible protein loop. The 'seed sampling' algorithm
samples broadly from this space, while the 'deformation
sampling' algorithm uses seed conformations as starting
points to explore the conformation space around them at
a finer grain. Computational results are presented for
various loops ranging from 5 to 25 residues. More
specific results also show that the combination of the
sampling algorithms with a functional site prediction
software (FEATURE) makes it possible to compute and
recognize calcium-binding loop conformations. The
sampling algorithms are implemented in a toolkit
(LoopTK), which is available at
https://simtk.org/home/looptk.",
acknowledgement = ack-nhfb,
keywords = "Biology and genetics; Robotics",
}
@Article{Kim:2008:LSS,
author = "Eagu Kim and John Kececioglu",
title = "Learning Scoring Schemes for Sequence Alignment from
Partial Examples",
journal = j-TCBB,
volume = "5",
number = "4",
pages = "546--556",
month = oct,
year = "2008",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2008.57",
ISSN = "1545-5963",
bibdate = "Wed Jan 14 12:51:33 MST 2009",
bibsource = "http://portal.acm.org/",
abstract = "When aligning biological sequences, the choice of
parameter values for the alignment scoring function is
critical. Small changes in gap penalties, for example,
can yield radically different alignments. A rigorous
way to compute parameter values that are appropriate
for aligning biological sequences is through inverse
parametric sequence alignment. Given a collection of
examples of biologically correct alignments, this is
the problem of finding parameter values that make the
scores of the example alignments close to those of
optimal alignments for their sequences. We extend prior
work on inverse parametric alignment to partial
examples, which contain regions where the alignment is
left unspecified, and to an improved formulation based
on minimizing the average error between the score of an
example and the score of an optimal alignment.
Experiments on benchmark biological alignments show we
can find parameters that generalize across protein
families and that boost the accuracy of multiple
sequence alignment by as much as 25\%.",
acknowledgement = ack-nhfb,
keywords = "Analysis of Algorithms and Problem Complexity; Biology
and genetics; Linear programming; Pattern matching",
}
@Article{Schliep:2008:EAC,
author = "Alexander Schliep and Roland Krause",
title = "Efficient Algorithms for the Computational Design of
Optimal Tiling Arrays",
journal = j-TCBB,
volume = "5",
number = "4",
pages = "557--567",
month = oct,
year = "2008",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2008.50",
ISSN = "1545-5963",
bibdate = "Wed Jan 14 12:51:33 MST 2009",
bibsource = "http://portal.acm.org/",
abstract = "The representation of a genome by oligonucleotide
probes is a prerequisite for the analysis of many of
its basic properties, such as transcription factor
binding sites, chromosomal breakpoints, gene expression
of known genes and detection of novel genes, in
particular those coding for small RNAs. An ideal
representation would consist of a high density set of
oligonucleotides with similar melting temperatures that
do not cross-hybridize with other regions of the genome
and are equidistantly spaced. The implementation of
such design is typically called a tiling array or
genome array. We formulate the minimal cost tiling path
problem for the selection of oligonucleotides from a
set of candidates. Computing the selection of probes
requires multi-criterion optimization, which we cast
into a shortest path problem. Standard algorithms
running in linear time allow us to compute globally
optimal tiling paths from millions of candidate
oligonucleotides on a standard desktop computer for
most problem variants. The solutions to this
multi-criterion optimization are spatially adaptive to
the problem instance. Our formulation incorporates
experimental constraints with respect to specific
regions of interest and trade offs between
hybridization parameters, probe quality and tiling
density easily.",
acknowledgement = ack-nhfb,
keywords = "Biology and genetics; Graph Theory",
}
@Article{Yu:2008:CAA,
author = "Zeyun Yu and Chandrajit Bajaj",
title = "Computational Approaches for Automatic Structural
Analysis of Large Biomolecular Complexes",
journal = j-TCBB,
volume = "5",
number = "4",
pages = "568--582",
month = oct,
year = "2008",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2007.70226",
ISSN = "1545-5963",
bibdate = "Wed Jan 14 12:51:33 MST 2009",
bibsource = "http://portal.acm.org/",
abstract = "We present computational solutions to two problems of
macromolecular structure interpretation from
reconstructed three-dimensional electron microscopy
(3D-EM) maps of large bio-molecular complexes at
intermediate resolution (5A-15A). Thetwo problems
addressed are: (a) 3D structural alignment
(matching)between identified and segmented 3D maps of
structure units(e.g. trimeric configuration of
proteins), and (b) the secondary structure
identification of a segmented protein 3D map
(i.e.locations of a-helices, b -sheets). For problem
(a), we present an efficient algorithm to correlate
spatially (and structurally)two 3D maps of structure
units. Besides providing a similarity score between
structure units, the algorithm yields an effective
technique for resolution refinement of repeated
structure units,by 3D alignment and averaging. For
problem (b), we present an efficient algorithm to
compute eigenvalues and link eigenvectors of a Gaussian
convoluted structure tensor derived from the protein 3D
Map, thereby identifying and locating secondary
structural motifs of proteins. The efficiency and
performance of our approach is demonstrated on several
experimentally reconstructed 3D maps of virus capsid
shells from single-particle cryo-EM, as well as
computationally simulated protein structure density 3D
maps generated from protein model entries in the
Protein Data Bank.",
acknowledgement = ack-nhfb,
keywords = "3D Reconstruction; Alignment; Cryo-EM Maps; Secondary
Structure Detection; Segmentation; Similarity Measure;
Skeletonization; Structure Analysis",
}
@Article{Christinat:2008:GED,
author = "Yann Christinat and Bernd Wachmann and Lei Zhang",
title = "Gene Expression Data Analysis Using a Novel Approach
to Biclustering Combining Discrete and Continuous
Data",
journal = j-TCBB,
volume = "5",
number = "4",
pages = "583--593",
month = oct,
year = "2008",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2007.70251",
ISSN = "1545-5963",
bibdate = "Wed Jan 14 12:51:33 MST 2009",
bibsource = "http://portal.acm.org/",
abstract = "Many different methods exist for pattern detection in
gene expression data. In contrast to classical methods,
biclustering has the ability to cluster a group of
genes together with a group of conditions (replicates,
set of patients or drug compounds). However, since the
problem is NP-complex, most algorithms use heuristic
search functions and therefore might converge towards
local maxima. By using the results of biclustering on
discrete data as a starting point for a local search
function on continuous data, our algorithm avoids the
problem of heuristic initialization. Similar to OPSM,
our algorithm aims to detect biclusters whose rows and
columns can be ordered such that row values are growing
across the bicluster's columns and vice-versa. Results
have been generated on the yeast genome (Saccharomyces
cerevisiae), a human cancer dataset and random data.
Results on the yeast genome showed that 89\% of the one
hundred biggest non-overlapping biclusters were
enriched with Gene Ontology annotations. A comparison
with OPSM and ISA demonstrated a better efficiency when
using gene and condition orders. We present results on
random and real datasets that show the ability of our
algorithm to capture statistically significant and
biologically relevant biclusters.",
acknowledgement = ack-nhfb,
keywords = "Bioinformatics (genome or protein) databases; Data and
knowledge visualization; Data mining; Graph and tree
search strategies; Machine learning",
}
@Article{Lacroix:2008:IMN,
author = "Vincent Lacroix and Ludovic Cottret and Patricia
Th{\'e}bault and Marie-France Sagot",
title = "An Introduction to Metabolic Networks and Their
Structural Analysis",
journal = j-TCBB,
volume = "5",
number = "4",
pages = "594--617",
month = oct,
year = "2008",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2008.79",
ISSN = "1545-5963",
bibdate = "Wed Jan 14 12:51:33 MST 2009",
bibsource = "http://portal.acm.org/",
abstract = "There has been a renewed interest for metabolism in
the computational biology community, leading to an
avalanche of papers coming from methodological network
analysis as well as experimental and theoretical
biology. This paper is meant to serve as an initial
guide for both the biologists interested in formal
approaches and the mathematicians or computer
scientists wishing to inject more realism into their
models. The paper is focused on the structural aspects
of metabolism only. The literature is vast enough
already, and the thread through it difficult to follow
even for the more experienced worker in the field. We
explain methods for acquiring data and reconstructing
metabolic networks, and review the various models that
have been used for their structural analysis. Several
concepts such as modularity are introduced, as are the
controversies that have beset the field these past few
years, for instance, on whether metabolic networks are
small-world or scale-free, and on which model better
explains the evolution of metabolism. Clarifying the
work that has been done also helps in identifying open
questions and in proposing relevant future directions
in the field, which we do along the paper and in the
conclusion.",
acknowledgement = ack-nhfb,
keywords = "Biology and genetics; evolution; Graph Theory;
Introductory and Survey; metabolic networks; modelling;
modularity; reconstruction",
}
@Article{Satya:2008:UIP,
author = "Ravi Vijaya Satya and Amar Mukherjee",
title = "The Undirected Incomplete Perfect Phylogeny Problem",
journal = j-TCBB,
volume = "5",
number = "4",
pages = "618--629",
month = oct,
year = "2008",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2007.70218",
ISSN = "1545-5963",
bibdate = "Wed Jan 14 12:51:33 MST 2009",
bibsource = "http://portal.acm.org/",
abstract = "The incomplete perfect phylogeny (IPP) problem and the
incomplete perfect phylogeny haplotyping (IPPH) problem
deal with constructing a phylogeny for a given set of
haplotypes or genotypes with missing entries. The
earlier approaches for both of these problems dealt
with restricted versions of the problems, where the
root is either available or can be trivially
re-constructed from the data, or certain assumptions
were made about the data. In this paper, we deal with
the unrestricted versions of the problems, where the
root of the phylogeny is neither available nor
trivially recoverable from the data. Both IPP and IPPH
problems have previously been proven to be NP complete.
Here, we present efficient enumerative algorithms that
can handle practical instances of the problem.
Empirical analysis on simulated data shows that the
algorithms perform very well both in terms of speed and
in terms accuracy of the recovered data.",
acknowledgement = ack-nhfb,
keywords = "Haplotype Inference; Incomplete Perfect Phylogeny;
Perfect Phylogeny; Phylogenetics",
}
@Article{Gondro:2008:OCM,
author = "Cedric Gondro and Brian P. Kinghorn",
title = "Optimization of {cDNA} Microarray Experimental Designs
Using an Evolutionary Algorithm",
journal = j-TCBB,
volume = "5",
number = "4",
pages = "630--638",
month = oct,
year = "2008",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2007.70222",
ISSN = "1545-5963",
bibdate = "Wed Jan 14 12:51:33 MST 2009",
bibsource = "http://portal.acm.org/",
abstract = "The cDNA microarray is an important tool for
generating large datasets of gene expression
measurements. An efficient design is critical to ensure
that the experiment will be able to address relevant
biological questions. Microarray experimental design
can be treated as a multicriterion optimization
problem. For this class of problems evolutionary
algorithms (EAs) are well suited, as they can search
the solution space and evolve a design that optimizes
the parameters of interest based on their relative
value to the researcher under a given set of
constraints. This paper introduces the use of EAs for
optimization of experimental designs of spotted
microarrays using a weighted objective function. The EA
and the various criteria relevant to design
optimization are discussed. Evolved designs are
compared with designs obtained through exhaustive
search with results suggesting that the EA can find
just as efficient optimal or near-optimal designs
within a tractable timeframe.",
acknowledgement = ack-nhfb,
keywords = "Evolutionary computing and genetic algorithms;
experimental design; global optimization; microarrays",
}
@Article{Gusfield:2009:FFY,
author = "Dan Gusfield",
title = "Final, Five-Year End, Editorial",
journal = j-TCBB,
volume = "6",
number = "1",
pages = "1--2",
month = jan,
year = "2009",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Mar 2 18:46:49 MST 2009",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Sagot:2009:NEE,
author = "Marie-France Sagot",
title = "New {EIC} Editorial",
journal = j-TCBB,
volume = "6",
number = "1",
pages = "3--3",
month = jan,
year = "2009",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Mar 2 18:46:49 MST 2009",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Huson:2009:SSP,
author = "Daniel H. Huson and Vincent Moulton and Mike Steel",
title = "Special Section: Phylogenetics",
journal = j-TCBB,
volume = "6",
number = "1",
pages = "4--6",
month = jan,
year = "2009",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Mar 2 18:46:49 MST 2009",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Liu:2009:BWT,
author = "Kevin Liu and Serita Nelesen and Sindhu Raghavan and
C. Randal Linder and Tandy Warnow",
title = "Barking Up The Wrong Treelength: The Impact of Gap
Penalty on Alignment and Tree Accuracy",
journal = j-TCBB,
volume = "6",
number = "1",
pages = "7--21",
month = jan,
year = "2009",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Mar 2 18:46:49 MST 2009",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Minh:2009:BPD,
author = "Bui Quang Minh and Fabio Pardi and Steffen Klaere and
Arndt von Haeseler",
title = "Budgeted Phylogenetic Diversity on Circular Split
Systems",
journal = j-TCBB,
volume = "6",
number = "1",
pages = "22--29",
month = jan,
year = "2009",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Mar 2 18:46:49 MST 2009",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Linz:2009:HNT,
author = "Simone Linz and Charles Semple",
title = "Hybridization in Nonbinary Trees",
journal = j-TCBB,
volume = "6",
number = "1",
pages = "30--45",
month = jan,
year = "2009",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Mar 2 18:46:49 MST 2009",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Cardona:2009:MPN,
author = "Gabriel Cardona and Merc{\`e} Llabr{\'e}s and Francesc
Rossell{\'o} and Gabriel Valiente",
title = "Metrics for Phylogenetic Networks {I}: Generalizations
of the {Robinson--Foulds} Metric",
journal = j-TCBB,
volume = "6",
number = "1",
pages = "46--61",
month = jan,
year = "2009",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Mar 2 18:46:49 MST 2009",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Willson:2009:RTS,
author = "Stephen J. Willson",
title = "Robustness of Topological Supertree Methods for
Reconciling Dense Incompatible Data",
journal = j-TCBB,
volume = "6",
number = "1",
pages = "62--75",
month = jan,
year = "2009",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Mar 2 18:46:49 MST 2009",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Allman:2009:ICM,
author = "Elizabeth S. Allman and John A. Rhodes",
title = "The Identifiability of Covarion Models in
Phylogenetics",
journal = j-TCBB,
volume = "6",
number = "1",
pages = "76--88",
month = jan,
year = "2009",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Mar 2 18:46:49 MST 2009",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Matsen:2009:FTI,
author = "Frederick A. Matsen",
title = "{Fourier} Transform Inequalities for Phylogenetic
Trees",
journal = j-TCBB,
volume = "6",
number = "1",
pages = "89--95",
month = jan,
year = "2009",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Mar 2 18:46:49 MST 2009",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Gusfield:2009:OEE,
author = "Dan Gusfield",
title = "Outgoing {EIC} Editorial for this Special Section of
{TCBB} with the Theme of Phylogenetics",
journal = j-TCBB,
volume = "6",
number = "1",
pages = "96--96",
month = jan,
year = "2009",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Mar 2 18:46:49 MST 2009",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Grunewald:2009:MPT,
author = "Stefan Gr{\"u}newald and Vincent Moulton",
title = "Maximum Parsimony for Tree Mixtures",
journal = j-TCBB,
volume = "6",
number = "1",
pages = "97--102",
month = jan,
year = "2009",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Mar 2 18:46:49 MST 2009",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Huson:2009:DRP,
author = "Daniel H. Huson",
title = "Drawing Rooted Phylogenetic Networks",
journal = j-TCBB,
volume = "6",
number = "1",
pages = "103--109",
month = jan,
year = "2009",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Mar 2 18:46:49 MST 2009",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Bordewich:2009:CTM,
author = "Magnus Bordewich and Olivier Gascuel and Katharina T.
Huber and Vincent Moulton",
title = "Consistency of Topological Moves Based on the Balanced
Minimum Evolution Principle of Phylogenetic Inference",
journal = j-TCBB,
volume = "6",
number = "1",
pages = "110--117",
month = jan,
year = "2009",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Mar 2 18:46:49 MST 2009",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Wu:2009:RPT,
author = "Taoyang Wu and Vincent Moulton and Mike Steel",
title = "Refining Phylogenetic Trees Given Additional Data: An
Algorithm Based on Parsimony",
journal = j-TCBB,
volume = "6",
number = "1",
pages = "118--125",
month = jan,
year = "2009",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Mar 2 18:46:49 MST 2009",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Mossel:2009:SEA,
author = "Elchanan Mossel and Sebastien Roch and Mike Steel",
title = "Shrinkage Effect in Ancestral Maximum Likelihood",
journal = j-TCBB,
volume = "6",
number = "1",
pages = "126--133",
month = jan,
year = "2009",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Mar 2 18:46:49 MST 2009",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Ma:2009:GCU,
author = "Jianmin Ma and Minh N. Nguyen and Jagath C.
Rajapakse",
title = "Gene Classification Using Codon Usage and Support
Vector Machines",
journal = j-TCBB,
volume = "6",
number = "1",
pages = "134--143",
month = jan,
year = "2009",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Mar 2 18:46:49 MST 2009",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Maitra:2009:IPO,
author = "Ranjan Maitra",
title = "Initializing Partition-Optimization Algorithms",
journal = j-TCBB,
volume = "6",
number = "1",
pages = "144--157",
month = jan,
year = "2009",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Mar 2 18:46:49 MST 2009",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Narasimhan:2009:SPG,
author = "Sridharakumar Narasimhan and Raghunathan Rengaswamy
and Rajanikanth Vadigepalli",
title = "Structural Properties of Gene Regulatory Networks:
Definitions and Connections",
journal = j-TCBB,
volume = "6",
number = "1",
pages = "158--170",
month = jan,
year = "2009",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Mar 2 18:46:49 MST 2009",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Anonymous:2009:RL,
author = "Anonymous",
title = "2008 Reviewers List",
journal = j-TCBB,
volume = "6",
number = "1",
pages = "171--173",
month = jan,
year = "2009",
CODEN = "ITCBCY",
ISSN = "1545-5963",
bibdate = "Mon Mar 2 18:46:49 MST 2009",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Sagot:2009:EE,
author = "Marie-France Sagot",
title = "{EIC} Editorial",
journal = j-TCBB,
volume = "6",
number = "2",
pages = "177--177",
month = apr,
year = "2009",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2009.44",
ISSN = "1545-5963",
bibdate = "Mon Jun 1 17:03:22 MDT 2009",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Mandoiu:2009:GEI,
author = "Ion Mandoiu and Yi Pan and Raj Sunderraman and
Alexander Zelikovsky",
title = "{Guest Editors}' Introduction to the Special Section on
Bioinformatics Research and Applications",
journal = j-TCBB,
volume = "6",
number = "2",
pages = "178--179",
month = apr,
year = "2009",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2009.45",
ISSN = "1545-5963",
bibdate = "Mon Jun 1 17:03:22 MDT 2009",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Treangen:2009:NHL,
author = "Todd J. Treangen and Aaron E. Darling and Guillaume
Achaz and Mark A. Ragan and Xavier Messeguer and
Eduardo P. C. Rocha",
title = "A Novel Heuristic for Local Multiple Alignment of
Interspersed {DNA} Repeats",
journal = j-TCBB,
volume = "6",
number = "2",
pages = "180--189",
month = apr,
year = "2009",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2009.9",
ISSN = "1545-5963",
bibdate = "Mon Jun 1 17:03:22 MDT 2009",
bibsource = "http://portal.acm.org/",
abstract = "Pairwise local sequence alignment methods have been
the prevailing technique to identify homologous
nucleotides between related species. However, existing
methods that identify and align all homologous
nucleotides in one or more genomes have suffered from
poor scalability and limited accuracy. We propose a
novel method that couples a gapped extension heuristic
with an efficient filtration method for identifying
interspersed repeats in genome sequences. During gapped
extension, we use the MUSCLE implementation of
progressive global multiple alignment with iterative
refinement. The resulting gapped extensions potentially
contain alignments of unrelated sequence. We detect and
remove such undesirable alignments using a hidden
Markov model (HMM) to predict the posterior probability
of homology. The HMM emission frequencies for
nucleotide substitutions can be derived from any
time-reversible nucleotide substitution matrix. We
evaluate the performance of our method and previous
approaches on a hybrid data set of real genomic DNA
with simulated interspersed repeats. Our method
outperforms a related method in terms of sensitivity,
positive predictive value, and localizing boundaries of
homology. The described methods have been implemented
in freely available software, Repeatoire, available
from: http://wwwabi.snv.jussieu.fr/public/Repeatoire.",
acknowledgement = ack-nhfb,
keywords = "DNA repeats; gapped extension.; genome comparison;
hidden Markov model; local multiple alignment; Sequence
alignment",
}
@Article{Qiu:2009:FMK,
author = "Shibin Qiu and Terran Lane",
title = "A Framework for Multiple Kernel Support Vector
Regression and Its Applications to {siRNA} Efficacy
Prediction",
journal = j-TCBB,
volume = "6",
number = "2",
pages = "190--199",
month = apr,
year = "2009",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2008.139",
ISSN = "1545-5963",
bibdate = "Mon Jun 1 17:03:22 MDT 2009",
bibsource = "http://portal.acm.org/",
abstract = "The cell defense mechanism of RNA interference has
applications in gene function analysis and promising
potentials in human disease therapy. To effectively
silence a target gene, it is desirable to select
appropriate initiator siRNA molecules having
satisfactory silencing capabilities. Computational
prediction for silencing efficacy of siRNAs can assist
this screening process before using them in biological
experiments. String kernel functions, which operate
directly on the string objects representing siRNAs and
target mRNAs, have been applied to support vector
regression for the prediction and improved accuracy
over numerical kernels in multidimensional vector
spaces constructed from descriptors of siRNA design
rules. To fully utilize information provided by string
and numerical data, we propose to unify the two in a
kernel feature space by devising a multiple kernel
regression framework where a linear combination of the
kernels is used. We formulate the multiple kernel
learning into a quadratically constrained quadratic
programming (QCQP) problem, which although yields
global optimal solution, is computationally demanding
and requires a commercial solver package. We further
propose three heuristics based on the principle of
kernel-target alignment and predictive accuracy.
Empirical results demonstrate that multiple kernel
regression can improve accuracy, decrease model
complexity by reducing the number of support vectors,
and speed up computational performance dramatically. In
addition, multiple kernel regression evaluates the
importance of constituent kernels, which for the siRNA
efficacy prediction problem, compares the relative
significance of the design rules. Finally, we give
insights into the multiple kernel regression mechanism
and point out possible extensions.",
acknowledgement = ack-nhfb,
keywords = "multiple kernel heuristics; Multiple kernel learning;
QCQP optimization; RNA interference; siRNA efficacy.;
support vector regression",
}
@Article{Park:2009:NBI,
author = "Yongjin Park and Stanley Shackney and Russell
Schwartz",
title = "Network-Based Inference of Cancer Progression from
Microarray Data",
journal = j-TCBB,
volume = "6",
number = "2",
pages = "200--212",
month = apr,
year = "2009",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2008.126",
ISSN = "1545-5963",
bibdate = "Mon Jun 1 17:03:22 MDT 2009",
bibsource = "http://portal.acm.org/",
abstract = "Cancer cells exhibit a common phenotype of
uncontrolled cell growth, but this phenotype may arise
from many different combinations of mutations. By
inferring how cells evolve in individual tumors, a
process called cancer progression, we may be able to
identify important mutational events for different
tumor types, potentially leading to new therapeutics
and diagnostics. Prior work has shown that it is
possible to infer frequent progression pathways by
using gene expression profiles to estimate
&\#8220;distances'' between tumors. Here, we apply gene
network models to improve these estimates of
evolutionary distance by controlling for correlations
among coregulated genes. We test three variants of this
approach: one using an optimized best-fit network,
another using sampling to infer a high-confidence
subnetwork, and one using a modular network inferred
from clusters of similarly expressed genes. Application
to lung cancer and breast cancer microarray data sets
shows small improvements in phylogenies when correcting
from the optimized network and more substantial
improvements when correcting from the sampled or
modular networks. Our results suggest that a network
correction approach improves estimates of tumor
similarity, but sophisticated network models are needed
to control for the large hypothesis space and sparse
data currently available.",
acknowledgement = ack-nhfb,
keywords = "Biology and genetics; graphs and networks; machine
learning.; trees",
}
@Article{Zhu:2009:GGA,
author = "Qian Zhu and Zaky Adam and Vicky Choi and David
Sankoff",
title = "Generalized Gene Adjacencies, Graph Bandwidth, and
Clusters in Yeast Evolution",
journal = j-TCBB,
volume = "6",
number = "2",
pages = "213--220",
month = apr,
year = "2009",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2008.121",
ISSN = "1545-5963",
bibdate = "Mon Jun 1 17:03:22 MDT 2009",
bibsource = "http://portal.acm.org/",
abstract = "We present a parameterized definition of gene clusters
that allows us to control the emphasis placed on
conserved order within a cluster. Though motivated by
biological rather than mathematical considerations,
this parameter turns out to be closely related to the
bandwidth parameter of a graph. Our focus will be on
how this parameter affects the characteristics of
clusters: how numerous they are, how large they are,
how rearranged they are, and to what extent they are
preserved from ancestor to descendant in a phylogenetic
tree. We infer the latter property by dynamic
programming optimization of the presence of individual
edges at the ancestral nodes of the phylogeny. We apply
our analysis to a set of genomes drawn from the Yeast
Gene Order Browser.",
acknowledgement = ack-nhfb,
keywords = "Ashbya gossypii; Candida glabrata; Comparative
genomics; dynamic programming; evolution; gene
clusters; genome rearrangements; graph bandwidth;
Kluyveromyces lactis.; Kluyveromyces waltii; phylogeny;
Saccharomyces cerevisiae; yeast",
}
@Article{Bansal:2009:GDP,
author = "Mukul S. Bansal and Oliver Eulenstein and Andr{\'e}
Wehe",
title = "The Gene-Duplication Problem: Near-Linear Time
Algorithms for {NNI}-Based Local Searches",
journal = j-TCBB,
volume = "6",
number = "2",
pages = "221--231",
month = apr,
year = "2009",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2009.7",
ISSN = "1545-5963",
bibdate = "Mon Jun 1 17:03:22 MDT 2009",
bibsource = "http://portal.acm.org/",
abstract = "The gene-duplication problem is to infer a species
supertree from a collection of gene trees that are
confounded by complex histories of gene-duplication
events. This problem is NP-complete and thus requires
efficient and effective heuristics. Existing heuristics
perform a stepwise search of the tree space, where each
step is guided by an exact solution to an instance of a
local search problem. A classical local search problem
is the {\tt NNI} search problem, which is based on the
nearest neighbor interchange operation. In this work,
we 1) provide a novel near-linear time algorithm for
the {\tt NNI} search problem, 2) introduce extensions
that significantly enlarge the search space of the {\tt
NNI} search problem, and 3) present algorithms for
these extended versions that are asymptotically just as
efficient as our algorithm for the {\tt NNI} search
problem. The exceptional speedup achieved in the
extended {\tt NNI} search problems makes the
gene-duplication problem more tractable for large-scale
phylogenetic analyses. We verify the performance of our
algorithms in a comparison study using sets of large
randomly generated gene trees.",
acknowledgement = ack-nhfb,
keywords = "Computational phylogenetics; gene-duplication; local
search; supertrees; {\tt NNI}.",
}
@Article{Sun:2009:DPP,
author = "Yanni Sun and Jeremy Buhler",
title = "Designing Patterns and Profiles for Faster {HMM}
Search",
journal = j-TCBB,
volume = "6",
number = "2",
pages = "232--243",
month = apr,
year = "2009",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2008.14",
ISSN = "1545-5963",
bibdate = "Mon Jun 1 17:03:22 MDT 2009",
bibsource = "http://portal.acm.org/",
abstract = "Profile HMMs are powerful tools for modeling conserved
motifs in proteins. They are widely used by search
tools to classify new protein sequences into families
based on domain architecture. However, the
proliferation of known motifs and new proteomic
sequence data poses a computational challenge for
search, requiring days of CPU time to annotate an
organism's proteome. It is highly desirable to speed up
HMM search in large databases. We design PROSITE-like
patterns and short profiles that are used as filters to
rapidly eliminate protein-motif pairs for which a full
profile HMM comparison does not yield a significant
match. The design of the pattern-based filters is
formulated as a multichoice knapsack problem.
Profile-based filters with high sensitivity are
extracted from a profile HMM based on their theoretical
sensitivity and false positive rate. Experiments show
that our profile-based filters achieve high sensitivity
(near 100 percent) while keeping around 20\times
speedup with respect to the unfiltered search program.
Pattern-based filters typically retain at least 90
percent of the sensitivity of the source HMM with
30-40\times speedup. The profile-based filters have
sensitivity comparable to the multistage filtering
strategy HMMERHEAD [15] and are faster in most of our
experiments.",
acknowledgement = ack-nhfb,
keywords = "bioinformatics databases; Biology and genetics; hidden
Markov models.; sequence similarity search",
}
@Article{Shaik:2009:FAS,
author = "Jahangheer Shaik and Mohammed Yeasin",
title = "Fuzzy-Adaptive-Subspace-Iteration-Based Two-Way
Clustering of Microarray Data",
journal = j-TCBB,
volume = "6",
number = "2",
pages = "244--259",
month = apr,
year = "2009",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2008.15",
ISSN = "1545-5963",
bibdate = "Mon Jun 1 17:03:22 MDT 2009",
bibsource = "http://portal.acm.org/",
abstract = "This paper presents
Fuzzy-Adaptive-Subspace-Iteration-based Two-way
Clustering (FASIC) of microarray data for finding
differentially expressed genes (DEGs) from two-sample
microarray experiments. The concept of fuzzy membership
is introduced to transform the hard adaptive subspace
iteration (ASI) algorithm into a fuzzy-ASI algorithm to
perform two-way clustering. The proposed approach
follows a progressive framework to assign a relevance
value to genes associated with each cluster.
Subsequently, each gene cluster is scored and ranked
based on its potential to provide a correct
classification of the sample classes. These ranks are
converted into P values using the R-test, and the
significance of each gene is determined. A fivefold
validation is performed on the DEGs selected using the
proposed approach. Empirical analyses on a number of
simulated microarray data sets are conducted to
quantify the results obtained using the proposed
approach. To exemplify the efficacy of the proposed
approach, further analyses on different real microarray
data sets are also performed.",
acknowledgement = ack-nhfb,
keywords = "classification and association rules; Clustering; data
and knowledge visualization; data mining; feature
extraction or construction.",
}
@Article{Vignes:2009:GCI,
author = "Matthieu Vignes and Florence Forbes",
title = "Gene Clustering via Integrated {Markov} Models
Combining Individual and Pairwise Features",
journal = j-TCBB,
volume = "6",
number = "2",
pages = "260--270",
month = apr,
year = "2009",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2007.70248",
ISSN = "1545-5963",
bibdate = "Mon Jun 1 17:03:22 MDT 2009",
bibsource = "http://portal.acm.org/",
abstract = "Clustering of genes into groups sharing common
characteristics is a useful exploratory technique for a
number of subsequent computational analysis. A wide
range of clustering algorithms have been proposed in
particular to analyze gene expression data, but most of
them consider genes as independent entities or include
relevant information on gene interactions in a
suboptimal way. We propose a probabilistic model that
has the advantage to account for individual data (e.g.,
expression) and pairwise data (e.g., interaction
information coming from biological networks)
simultaneously. Our model is based on hidden Markov
random field models in which parametric probability
distributions account for the distribution of
individual data. Data on pairs, possibly reflecting
distance or similarity measures between genes, are then
included through a graph, where the nodes represent the
genes, and the edges are weighted according to the
available interaction information. As a probabilistic
model, this model has many interesting theoretical
features. In addition, preliminary experiments on
simulated and real data show promising results and
points out the gain in using such an approach.
Availability: The software used in this work is written
in C++ and is available with other supplementary
material at
http://mistis.inrialpes.fr/people/forbes/transparentia/supplementary.html.",
acknowledgement = ack-nhfb,
keywords = "gene expression.; Markov random fields; metabolic
networks; model-based clustering",
}
@Article{Heath:2009:SMN,
author = "Lenwood S. Heath and Allan A. Sioson",
title = "Semantics of Multimodal Network Models",
journal = j-TCBB,
volume = "6",
number = "2",
pages = "271--280",
month = apr,
year = "2009",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2007.70242",
ISSN = "1545-5963",
bibdate = "Mon Jun 1 17:03:22 MDT 2009",
bibsource = "http://portal.acm.org/",
abstract = "A multimodal network (MMN) is a novel graph-theoretic
formalism designed to capture the structure of
biological networks and to represent relationships
derived from multiple biological databases. MMNs
generalize the standard notions of graphs and
hypergraphs, which are the bases of current
diagrammatic representations of biological phenomena,
and incorporate the concept of mode. Each vertex of an
MMN is a biological entity, a biot, while each modal
hyperedge is a typed relationship, where the type is
given by the mode of the hyperedge. The semantics of
each modal hyperedge e is given through denotational
semantics, where a valuation function f\_{e} defines
the relationship among the values of the vertices
incident on e. The meaning of an MMN is denoted in
terms of the semantics of a hyperedge sequence. A
companion paper defines MMNs and concentrates on the
structural aspects of MMNs. This paper develops MMN
denotational semantics when used as a representation of
the semantics of biological networks and discusses
applications of MMNs in managing complex biological
data.",
acknowledgement = ack-nhfb,
keywords = "biological model; biological networks; biot;
denotational semantics.; graph; hypergraph; mode;
Multimodal network",
}
@Article{Arribas-Gil:2009:SAS,
author = "Ana Arribas-Gil and Dirk Metzler and Jean-Louis
Plouhinec",
title = "Statistical Alignment with a Sequence Evolution Model
Allowing Rate Heterogeneity along the Sequence",
journal = j-TCBB,
volume = "6",
number = "2",
pages = "281--295",
month = apr,
year = "2009",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2007.70246",
ISSN = "1545-5963",
bibdate = "Mon Jun 1 17:03:22 MDT 2009",
bibsource = "http://portal.acm.org/",
abstract = "We present a stochastic sequence evolution model to
obtain alignments and estimate mutation rates between
two homologous sequences. The model allows two possible
evolutionary behaviors along a DNA sequence in order to
determine conserved regions and take its heterogeneity
into account. In our model, the sequence is divided
into slow and fast evolution regions. The boundaries
between these sections are not known. It is our aim to
detect them. The evolution model is based on a fragment
insertion and deletion process working on fast regions
only and on a substitution process working on fast and
slow regions with different rates. This model induces a
pair hidden Markov structure at the level of
alignments, thus making efficient statistical alignment
algorithms possible. We propose two complementary
estimation methods, namely, a Gibbs sampler for
Bayesian estimation and a stochastic version of the EM
algorithm for maximum likelihood estimation. Both
algorithms involve the sampling of alignments. We
propose a partial alignment sampler, which is
computationally less expensive than the typical whole
alignment sampler. We show the convergence of the two
estimation algorithms when used with this partial
sampler. Our algorithms provide consistent estimates
for the mutation rates and plausible alignments and
sequence segmentations on both simulated and real
data.",
acknowledgement = ack-nhfb,
keywords = "biology and genetics.; Markov processes; mathematics
and statistics; probabilistic algorithms; sequence
evolution",
}
@Article{Weber:2009:VET,
author = "Gunther H. Weber and Oliver Rubel and Min-Yu Huang and
Angela H. DePace and Charless C. Fowlkes and Soile V.
E. Keranen and Cris L. Luengo Hendriks and Hans Hagen
and David W. Knowles and Jitendra Malik and Mark D.
Biggin and Bernd Hamann",
title = "Visual Exploration of Three-Dimensional Gene
Expression Using Physical Views and Linked Abstract
Views",
journal = j-TCBB,
volume = "6",
number = "2",
pages = "296--309",
month = apr,
year = "2009",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2007.70249",
ISSN = "1545-5963",
bibdate = "Mon Jun 1 17:03:22 MDT 2009",
bibsource = "http://portal.acm.org/",
abstract = "During animal development, complex patterns of gene
expression provide positional information within the
embryo. To better understand the underlying gene
regulatory networks, the Berkeley Drosophila
Transcription Network Project (BDTNP) has developed
methods that support quantitative computational
analysis of three-dimensional (3D) gene expression in
early Drosophila embryos at cellular resolution. We
introduce PointCloudXplore (PCX), an interactive
visualization tool that supports visual exploration of
relationships between different genes' expression using
a combination of established visualization techniques.
Two aspects of gene expression are of particular
interest: 1) gene expression patterns defined by the
spatial locations of cells expressing a gene and 2)
relationships between the expression levels of multiple
genes. PCX provides users with two corresponding
classes of data views: 1) Physical Views based on the
spatial relationships of cells in the embryo and 2)
Abstract Views that discard spatial information and
plot expression levels of multiple genes with respect
to each other. Cell Selectors highlight data associated
with subsets of embryo cells within a View. Using
linking, these selected cells can be viewed in multiple
representations. We describe PCX as a 3D gene
expression visualization tool and provide examples of
how it has been used by BDTNP biologists to generate
new hypotheses.",
acknowledgement = ack-nhfb,
keywords = "brushing; information visualization; Interactive data
exploration; multiple linked views; physical views;
scatter plots.; spatial expression patterns;
three-dimensional gene expression; visualization",
}
@Article{Dougherty:2009:CBM,
author = "Edward R. Dougherty and Marcel Brun and Jeffrey M.
Trent and Michael L. Bittner",
title = "Conditioning-Based Modeling of Contextual Genomic
Regulation",
journal = j-TCBB,
volume = "6",
number = "2",
pages = "310--320",
month = apr,
year = "2009",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2007.70247",
ISSN = "1545-5963",
bibdate = "Mon Jun 1 17:03:22 MDT 2009",
bibsource = "http://portal.acm.org/",
abstract = "A more complete understanding of the alterations in
cellular regulatory and control mechanisms that occur
in the various forms of cancer has been one of the
central targets of the genomic and proteomic methods
that allow surveys of the abundance and/or state of
cellular macromolecules. This preference is driven both
by the intractability of cancer to generic therapies,
assumed to be due to the highly varied molecular
etiologies observed in cancer, and by the opportunity
to discern and dissect the regulatory and control
interactions presented by the highly diverse assortment
of perturbations of regulation and control that arise
in cancer. Exploiting the opportunities for inference
on the regulatory and control connections offered by
these revealing system perturbations is fraught with
the practical problems that arise from the way
biological systems operate. Two classes of regulatory
action in biological systems are particularly inimical
to inference, convergent regulation, where a variety of
regulatory actions result in a common set of control
responses (crosstalk), and divergent regulation, where
a single regulatory action produces entirely different
sets of control responses, depending on cellular
context (conditioning). We have constructed a coarse
mathematical model of the propagation of regulatory
influence in such distributed, context-sensitive
regulatory networks that allows a quantitative
estimation of the amount of crosstalk and conditioning
associated with a candidate regulatory gene taken from
a set of genes that have been profiled over a series of
samples where the candidate's activity varies.",
acknowledgement = ack-nhfb,
keywords = "Microarray; regulatory networks.",
}
@Article{Heath:2009:MNS,
author = "Lenwood S. Heath and Allan A. Sioson",
title = "Multimodal Networks: Structure and Operations",
journal = j-TCBB,
volume = "6",
number = "2",
pages = "321--332",
month = apr,
year = "2009",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2007.70243",
ISSN = "1545-5963",
bibdate = "Mon Jun 1 17:03:22 MDT 2009",
bibsource = "http://portal.acm.org/",
abstract = "A multimodal network (MMN) is a novel graph-theoretic
formalism designed to capture the structure of
biological networks and to represent relationships
derived from multiple biological databases. MMNs
generalize the standard notions of graphs and
hypergraphs, which are the bases of current
diagrammatic representations of biological phenomena
and incorporate the concept of mode. Each vertex of an
MMN is a biological entity, a biot, while each modal
hyperedge is a typed relationship, where the type is
given by the mode of the hyperedge. The current paper
defines MMNs and concentrates on the structural aspects
of MMNs. A companion paper develops MMNs as a
representation of the semantics of biological networks
and discusses applications of the MMNs in managing
complex biological data. The MMN model has been
implemented in a database system containing multiple
kinds of biological networks.",
acknowledgement = ack-nhfb,
keywords = "biological networks; biot.; graph; hypergraph; mode;
Multimodal network",
}
@Article{Yukinawa:2009:OAB,
author = "Naoto Yukinawa and Shigeyuki Oba and Kikuya Kato and
Shin Ishii",
title = "Optimal Aggregation of Binary Classifiers for
Multiclass Cancer Diagnosis Using Gene Expression
Profiles",
journal = j-TCBB,
volume = "6",
number = "2",
pages = "333--343",
month = apr,
year = "2009",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2007.70239",
ISSN = "1545-5963",
bibdate = "Mon Jun 1 17:03:22 MDT 2009",
bibsource = "http://portal.acm.org/",
abstract = "Multiclass classification is one of the fundamental
tasks in bioinformatics and typically arises in cancer
diagnosis studies by gene expression profiling. There
have been many studies of aggregating binary
classifiers to construct a multiclass classifier based
on one-versus-the-rest (1R), one-versus-one (11), or
other coding strategies, as well as some comparison
studies between them. However, the studies found that
the best coding depends on each situation. Therefore, a
new problem, which we call the &\#8220;optimal coding
problem,'' has arisen: how can we determine which
coding is the optimal one in each situation? To
approach this optimal coding problem, we propose a
novel framework for constructing a multiclass
classifier, in which each binary classifier to be
aggregated has a weight value to be optimally tuned
based on the observed data. Although there is no a
priori answer to the optimal coding problem, our weight
tuning method can be a consistent answer to the
problem. We apply this method to various classification
problems including a synthesized data set and some
cancer diagnosis data sets from gene expression
profiling. The results demonstrate that, in most
situations, our method can improve classification
accuracy over simple voting heuristics and is better
than or comparable to state-of-the-art multiclass
predictors.",
acknowledgement = ack-nhfb,
keywords = "cancer diagnosis.; error correcting output coding;
gene expression profiling; Multiclass classification",
}
@Article{Olman:2009:PCA,
author = "Victor Olman and Fenglou Mao and Hongwei Wu and Ying
Xu",
title = "Parallel Clustering Algorithm for Large Data Sets with
Applications in Bioinformatics",
journal = j-TCBB,
volume = "6",
number = "2",
pages = "344--352",
month = apr,
year = "2009",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2007.70272",
ISSN = "1545-5963",
bibdate = "Mon Jun 1 17:03:22 MDT 2009",
bibsource = "http://portal.acm.org/",
abstract = "Large sets of bioinformatical data provide a challenge
in time consumption while solving the cluster
identification problem, and that is why a parallel
algorithm is so needed for identifying dense clusters
in a noisy background. Our algorithm works on a graph
representation of the data set to be analyzed. It
identifies clusters through the identification of
densely intraconnected subgraphs. We have employed a
minimum spanning tree (MST) representation of the graph
and solve the cluster identification problem using this
representation. The computational bottleneck of our
algorithm is the construction of an MST of a graph, for
which a parallel algorithm is employed. Our high-level
strategy for the parallel MST construction algorithm is
to first partition the graph, then construct MSTs for
the partitioned subgraphs and auxiliary bipartite
graphs based on the subgraphs, and finally merge these
MSTs to derive an MST of the original graph. The
computational results indicate that when running on 150
CPUs, our algorithm can solve a cluster identification
problem on a data set with 1,000,000 data points almost
100 times faster than on single CPU, indicating that
this program is capable of handling very large data
clustering problems in an efficient manner. We have
implemented the clustering algorithm as the software
CLUMP.",
acknowledgement = ack-nhfb,
keywords = "clustering algorithm; genome application; parallel
processing.; Pattern recognition",
}
@Article{Paul:2009:PCC,
author = "Topon Kumar Paul and Hitoshi Iba",
title = "Prediction of Cancer Class with Majority Voting
Genetic Programming Classifier Using Gene Expression
Data",
journal = j-TCBB,
volume = "6",
number = "2",
pages = "353--367",
month = apr,
year = "2009",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2007.70245",
ISSN = "1545-5963",
bibdate = "Mon Jun 1 17:03:22 MDT 2009",
bibsource = "http://portal.acm.org/",
abstract = "In order to get a better understanding of different
types of cancers and to find the possible biomarkers
for diseases, recently, many researchers are analyzing
the gene expression data using various machine learning
techniques. However, due to a very small number of
training samples compared to the huge number of genes
and class imbalance, most of these methods suffer from
overfitting. In this paper, we present a majority
voting genetic programming classifier (MVGPC) for the
classification of microarray data. Instead of a single
rule or a single set of rules, we evolve multiple rules
with genetic programming (GP) and then apply those
rules to test samples to determine their labels with
majority voting technique. By performing experiments on
four different public cancer data sets, including
multiclass data sets, we have found that the test
accuracies of MVGPC are better than those of other
methods, including AdaBoost with GP. Moreover, some of
the more frequently occurring genes in the
classification rules are known to be associated with
the types of cancers being studied in this paper.",
acknowledgement = ack-nhfb,
keywords = "Classifier design and evaluation; data mining;
evolutionary computing and genetic algorithm; feature
extraction; gene expression; majority voting.",
}
@Article{Sagot:2009:EEI,
author = "Marie-France Sagot",
title = "{EIC} Editorial: Introducing New {Associate Editors}",
journal = j-TCBB,
volume = "6",
number = "3",
pages = "369--369",
month = jul,
year = "2009",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2009.66",
ISSN = "1545-5963",
bibdate = "Tue Aug 11 18:13:22 MDT 2009",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Bi:2009:MCE,
author = "Chengpeng Bi",
title = "A {Monte Carlo} {EM} Algorithm for {De Novo Motif}
Discovery in Biomolecular Sequences",
journal = j-TCBB,
volume = "6",
number = "3",
pages = "370--386",
month = jul,
year = "2009",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2008.103",
ISSN = "1545-5963",
bibdate = "Tue Aug 11 18:13:22 MDT 2009",
bibsource = "http://portal.acm.org/",
abstract = "Motif discovery methods play pivotal roles in
deciphering the genetic regulatory codes (i.e., motifs)
in genomes as well as in locating conserved domains in
protein sequences. The Expectation Maximization (EM)
algorithm is one of the most popular methods used in de
novo motif discovery. Based on the position weight
matrix (PWM) updating technique, this paper presents a
Monte Carlo version of the EM motif-finding algorithm
that carries out stochastic sampling in local alignment
space to overcome the conventional EM's main drawback
of being trapped in a local optimum. The newly
implemented algorithm is named as Monte Carlo EM Motif
Discovery Algorithm (MCEMDA). MCEMDA starts from an
initial model, and then it iteratively performs Monte
Carlo simulation and parameter update until
convergence. A log-likelihood profiling technique
together with the top-$k$ strategy is introduced to
cope with the phase shifts and multiple modal issues in
motif discovery problem. A novel grouping motif
alignment (GMA) algorithm is designed to select motifs
by clustering a population of candidate local
alignments and successfully applied to subtle motif
discovery. MCEMDA compares favorably to other popular
PWM-based and word enumerative motif algorithms tested
using simulated $(l, d)$-motif cases, documented
prokaryotic, and eukaryotic DNA motif
sequences. Finally, MCEMDA is applied to detect large
blocks of conserved domains using protein benchmarks
and exhibits its excellent capacity while compared with
other multiple sequence alignment methods.",
acknowledgement = ack-nhfb,
keywords = "Expectation maximization (EM); Monte Carlo EM; motif
discovery; multiple sequence alignment; transcriptional
regulation.",
}
@Article{Stoye:2009:UAR,
author = "Jens Stoye and Roland Wittler",
title = "A Unified Approach for Reconstructing Ancient Gene
Clusters",
journal = j-TCBB,
volume = "6",
number = "3",
pages = "387--400",
month = jul,
year = "2009",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2008.135",
ISSN = "1545-5963",
bibdate = "Tue Aug 11 18:13:22 MDT 2009",
bibsource = "http://portal.acm.org/",
abstract = "The order of genes in genomes provides extensive
information. In comparative genomics, differences or
similarities of gene orders are determined to predict
functional relations of genes or phylogenetic relations
of genomes. For this purpose, various combinatorial
models can be used to identify gene
clusters&\#8212;groups of genes that are colocated in a
set of genomes. We introduce a unified approach to
model gene clusters and define the problem of labeling
the inner nodes of a given phylogenetic tree with sets
of gene clusters. Our optimization criterion in this
context combines two properties: parsimony, i.e., the
number of gains and losses of gene clusters has to be
minimal, and consistency, i.e., for each ancestral
node, there must exist at least one potential gene
order that contains all the reconstructed clusters. We
present and evaluate an exact algorithm to solve this
problem. Despite its exponential worst-case time
complexity, our method is suitable even for large-scale
data. We show the effectiveness and efficiency on both
simulated and real data.",
acknowledgement = ack-nhfb,
keywords = "Comparative genomics; consistency.; gene cluster; gene
cluster reconstruction; gene order; parsimony;
phylogeny",
}
@Article{Chen:2009:AAM,
author = "Xin Chen and Yun Cui",
title = "An Approximation Algorithm for the Minimum Breakpoint
Linearization Problem",
journal = j-TCBB,
volume = "6",
number = "3",
pages = "401--409",
month = jul,
year = "2009",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2009.3",
ISSN = "1545-5963",
bibdate = "Tue Aug 11 18:13:22 MDT 2009",
bibsource = "http://portal.acm.org/",
abstract = "In the recent years, there has been a growing interest
in inferring the total order of genes or markers on a
chromosome, since current genetic mapping efforts might
only suffice to produce a partial order. Many
interesting optimization problems were thus formulated
in the framework of genome rearrangement. As an
important one among them, the minimum breakpoint
linearization (MBL) problem is to find the total order
of a partially ordered genome that minimizes its
breakpoint distance to a reference genome whose genes
are already totally ordered. It was previously shown to
be NP-hard, and the algorithms proposed so far are all
heuristic. In this paper, we present an {m^2+m\over
2}-approximation algorithm for the MBL problem, where m
is the number of gene maps that are combined together
to form a partial order of the genome under
investigation.",
acknowledgement = ack-nhfb,
keywords = "approximation algorithms.; breakpoint distance;
Comparative genomics; partially ordered genomes",
}
@Article{Wang:2009:EKF,
author = "Zidong Wang and Xiaohui Liu and Yurong Liu and Jinling
Liang and Veronica Vinciotti",
title = "An Extended {Kalman} Filtering Approach to Modeling
Nonlinear Dynamic Gene Regulatory Networks via Short
Gene Expression Time Series",
journal = j-TCBB,
volume = "6",
number = "3",
pages = "410--419",
month = jul,
year = "2009",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2009.5",
ISSN = "1545-5963",
bibdate = "Tue Aug 11 18:13:22 MDT 2009",
bibsource = "http://portal.acm.org/",
abstract = "In this paper, the extended Kalman filter (EKF)
algorithm is applied to model the gene regulatory
network from gene time series data. The gene regulatory
network is considered as a nonlinear dynamic stochastic
model that consists of the gene measurement equation
and the gene regulation equation. After specifying the
model structure, we apply the EKF algorithm for
identifying both the model parameters and the actual
value of gene expression levels. It is shown that the
EKF algorithm is an online estimation algorithm that
can identify a large number of parameters (including
parameters of nonlinear functions) through iterative
procedure by using a small number of observations. Four
real-world gene expression data sets are employed to
demonstrate the effectiveness of the EKF algorithm, and
the obtained models are evaluated from the viewpoint of
bioinformatics.",
acknowledgement = ack-nhfb,
keywords = "clustering; DNA microarray technology; extended Kalman
filtering; gene expression; Modeling; time series
data.",
}
@Article{Bryant:2009:CDT,
author = "David Bryant and Mike Steel",
title = "Computing the Distribution of a Tree Metric",
journal = j-TCBB,
volume = "6",
number = "3",
pages = "420--426",
month = jul,
year = "2009",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2009.32",
ISSN = "1545-5963",
bibdate = "Tue Aug 11 18:13:22 MDT 2009",
bibsource = "http://portal.acm.org/",
abstract = "The Robinson-Foulds (RF) distance is by far the most
widely used measure of dissimilarity between trees.
Although the distribution of these distances has been
investigated for 20 years, an algorithm that is
explicitly polynomial time has yet to be described for
computing the distribution for trees around a given
tree. In this paper, we derive a polynomial-time
algorithm for this distribution. We show how the
distribution can be approximated by a Poisson
distribution determined by the proportion of leaves
that lie in &\#8220;cherries'' of the given tree. We
also describe how our results can be used to derive
normalization constants that are required in a recently
proposed maximum likelihood approach to supertree
construction.",
acknowledgement = ack-nhfb,
keywords = "Biology and genetics; discrete mathematics
applications; normalization constant.; phylogenetics;
Poisson approximation; Robinson-Foulds distance;
trees",
}
@Article{Hulsman:2009:EOK,
author = "Marc Hulsman and Marcel J. T. Reinders and Dick de
Ridder",
title = "Evolutionary Optimization of Kernel Weights Improves
Protein Complex Comembership Prediction",
journal = j-TCBB,
volume = "6",
number = "3",
pages = "427--437",
month = jul,
year = "2009",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2008.137",
ISSN = "1545-5963",
bibdate = "Tue Aug 11 18:13:22 MDT 2009",
bibsource = "http://portal.acm.org/",
abstract = "In recent years, more and more high-throughput data
sources useful for protein complex prediction have
become available (e.g., gene sequence, mRNA expression,
and interactions). The integration of these different
data sources can be challenging. Recently, it has been
recognized that kernel-based classifiers are well
suited for this task. However, the different kernels
(data sources) are often combined using equal weights.
Although several methods have been developed to
optimize kernel weights, no large-scale example of an
improvement in classifier performance has been shown
yet. In this work, we employ an evolutionary algorithm
to determine weights for a larger set of kernels by
optimizing a criterion based on the area under the ROC
curve. We show that setting the right kernel weights
can indeed improve performance. We compare this to the
existing kernel weight optimization methods (i.e.,
(regularized) optimization of the SVM criterion or
aligning the kernel with an ideal kernel) and find that
these do not result in a significant performance
improvement and can even cause a decrease in
performance. Results also show that an expert approach
of assigning high weights to features with high
individual performance is not necessarily the best
strategy.",
acknowledgement = ack-nhfb,
keywords = "biology and genetics; Classifier design and
evaluation; evolutionary computing and genetic
algorithms.",
}
@Article{Chen:2009:IAA,
author = "Zhi-Zhong Chen and Lusheng Wang",
title = "Improved Approximation Algorithms for Reconstructing
the History of Tandem Repeats",
journal = j-TCBB,
volume = "6",
number = "3",
pages = "438--453",
month = jul,
year = "2009",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2008.122",
ISSN = "1545-5963",
bibdate = "Tue Aug 11 18:13:22 MDT 2009",
bibsource = "http://portal.acm.org/",
abstract = "Some genetic diseases in human beings are dominated by
short sequences repeated consecutively called tandem
repeats. Once a region containing tandem repeats is
found, it is of great interest to study the history of
creating the repeats. The computational problem of
reconstructing the duplication history of tandem
repeats has been studied extensively in the literature.
Almost all previous studies focused on the simplest
case where the size of each duplication block is 1.
Only recently we succeeded in giving the first
polynomial-time approximation algorithm with a
guaranteed ratio for a more general case where the size
of each duplication block is at most 2; the algorithm
achieves a ratio of 6 and runs in O(n^{11}) time. In
this paper, we present two new polynomial-time
approximation algorithms for this more general case.
One of them achieves a ratio of 5 and runs in O(n^9)
time, while the other achieves a ratio of 2.5+\epsilon
for any constant \epsilon > 0 but runs slower.",
acknowledgement = ack-nhfb,
keywords = "approximation algorithms.; Computational biology",
}
@Article{Cardona:2009:MPN,
author = "Gabriel Cardona and Merce Llabres and Francesc
Rossello and Gabriel Valiente",
title = "Metrics for Phylogenetic Networks {II}: Nodal and
Triplets Metrics",
journal = j-TCBB,
volume = "6",
number = "3",
pages = "454--469",
month = jul,
year = "2009",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2008.127",
ISSN = "1545-5963",
bibdate = "Tue Aug 11 18:13:22 MDT 2009",
bibsource = "http://portal.acm.org/",
abstract = "The assessment of phylogenetic network reconstruction
methods requires the ability to compare phylogenetic
networks. This is the second in a series of papers
devoted to the analysis and comparison of metrics for
tree-child time consistent phylogenetic networks on the
same set of taxa. In this paper, we generalize to
phylogenetic networks two metrics that have already
been introduced in the literature for phylogenetic
trees: the nodal distance and the triplets distance. We
prove that they are metrics on any class of tree-child
time consistent phylogenetic networks on the same set
of taxa, as well as some basic properties for them. To
prove these results, we introduce a reduction/expansion
procedure that can be used not only to establish
properties of tree-child time consistent phylogenetic
networks by induction, but also to generate all
tree-child time consistent phylogenetic networks with a
given number of leaves.",
acknowledgement = ack-nhfb,
keywords = "nodal distance; partition distance; Phylogenetic
network; temporal representation; time consistency;
tree-child phylogenetic network; triplets distance.",
}
@Article{Sotiropoulos:2009:MRM,
author = "Vassilios Sotiropoulos and Marrie-Nathalie
Contou-Carrere and Prodromos Daoutidis and Yiannis N.
Kaznessis",
title = "Model Reduction of Multiscale Chemical {Langevin}
Equations: {A} Numerical Case Study",
journal = j-TCBB,
volume = "6",
number = "3",
pages = "470--482",
month = jul,
year = "2009",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2009.23",
ISSN = "1545-5963",
bibdate = "Tue Aug 11 18:13:22 MDT 2009",
bibsource = "http://portal.acm.org/",
abstract = "Two very important characteristics of biological
reaction networks need to be considered carefully when
modeling these systems. First, models must account for
the inherent probabilistic nature of systems far from
the thermodynamic limit. Often, biological systems
cannot be modeled with traditional
continuous-deterministic models. Second, models must
take into consideration the disparate spectrum of time
scales observed in biological phenomena, such as slow
transcription events and fast dimerization reactions.
In the last decade, significant efforts have been
expended on the development of stochastic chemical
kinetics models to capture the dynamics of biomolecular
systems, and on the development of robust multiscale
algorithms, able to handle stiffness. In this paper,
the focus is on the dynamics of reaction sets governed
by stiff chemical Langevin equations, i.e., stiff
stochastic differential equations. These are
particularly challenging systems to model, requiring
prohibitively small integration step sizes. We describe
and illustrate the application of a semianalytical
reduction framework for chemical Langevin equations
that results in significant gains in computational
cost.",
acknowledgement = ack-nhfb,
keywords = "chemical Langevin equations (CLEs); Model reduction;
multiscale models; stiff biomolecular systems.;
stochastic chemical kinetics",
}
@Article{Roytberg:2009:SSP,
author = "Mikhail Roytberg and Anna Gambin and Laurent Noe and
Slawomir Lasota and Eugenia Furletova and Ewa Szczurek
and Gregory Kucherov",
title = "On Subset Seeds for Protein Alignment",
journal = j-TCBB,
volume = "6",
number = "3",
pages = "483--494",
month = jul,
year = "2009",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2009.4",
ISSN = "1545-5963",
bibdate = "Tue Aug 11 18:13:22 MDT 2009",
bibsource = "http://portal.acm.org/",
abstract = "We apply the concept of subset seeds proposed in [1]
to similarity search in protein sequences. The main
question studied is the design of efficient seed
alphabets to construct seeds with optimal
sensitivity/selectivity trade-offs. We propose several
different design methods and use them to construct
several alphabets. We then perform a comparative
analysis of seeds built over those alphabets and
compare them with the standard Blastp seeding method
[2], [3], as well as with the family of vector seeds
proposed in [4]. While the formalism of subset seeds is
less expressive (but less costly to implement) than the
cumulative principle used in Blastp and vector seeds,
our seeds show a similar or even better performance
than Blastp on Bernoulli models of proteins compatible
with the common BLOSUM62 matrix. Finally, we perform a
large-scale benchmarking of our seeds against several
main databases of protein alignments. Here again, the
results show a comparable or better performance of our
seeds versus Blastp.",
acknowledgement = ack-nhfb,
keywords = "local alignment; multiple seeds; protein databases;
Protein sequences; seed alphabet; seeds; selectivity.;
sensitivity; similarity search; subset seeds",
}
@Article{Jin:2009:PSP,
author = "Guohua Jin and Luay Nakhleh and Sagi Snir and Tamir
Tuller",
title = "Parsimony Score of Phylogenetic Networks: Hardness
Results and a Linear-Time Heuristic",
journal = j-TCBB,
volume = "6",
number = "3",
pages = "495--505",
month = jul,
year = "2009",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2008.119",
ISSN = "1545-5963",
bibdate = "Tue Aug 11 18:13:22 MDT 2009",
bibsource = "http://portal.acm.org/",
abstract = "Phylogenies&\#8212;the evolutionary histories of
groups of organisms&\#8212;play a major role in
representing the interrelationships among biological
entities. Many methods for reconstructing and studying
such phylogenies have been proposed, almost all of
which assume that the underlying history of a given set
of species can be represented by a binary tree.
Although many biological processes can be effectively
modeled and summarized in this fashion, others cannot:
recombination, hybrid speciation, and horizontal gene
transfer result in networks of relationships rather
than trees of relationships. In previous works, we
formulated a maximum parsimony (MP) criterion for
reconstructing and evaluating phylogenetic networks,
and demonstrated its quality on biological as well as
synthetic data sets. In this paper, we provide further
theoretical results as well as a very fast heuristic
algorithm for the MP criterion of phylogenetic
networks. In particular, we provide a novel
combinatorial definition of phylogenetic networks in
terms of &\#8220;forbidden cycles,'' and provide
detailed hardness and hardness of approximation proofs
for the 'small'' MP problem. We demonstrate the
performance of our heuristic in terms of time and
accuracy on both biological and synthetic data sets.
Finally, we explain the difference between our model
and a similar one formulated by Nguyen et al., and
describe the implications of this difference on the
hardness and approximation results.",
acknowledgement = ack-nhfb,
keywords = "hardness and approximation.; horizontal gene transfer;
Maximum parsimony; phylogenetic networks",
}
@Article{Thomas:2009:PDS,
author = "John Thomas and Naren Ramakrishnan and Chris
Bailey-Kellogg",
title = "Protein Design by Sampling an Undirected Graphical
Model of Residue Constraints",
journal = j-TCBB,
volume = "6",
number = "3",
pages = "506--516",
month = jul,
year = "2009",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2008.124",
ISSN = "1545-5963",
bibdate = "Tue Aug 11 18:13:22 MDT 2009",
bibsource = "http://portal.acm.org/",
abstract = "This paper develops an approach for designing protein
variants by sampling sequences that satisfy residue
constraints encoded in an undirected probabilistic
graphical model. Due to evolutionary pressures on
proteins to maintain structure and function, the
sequence record of a protein family contains valuable
information regarding position-specific residue
conservation and coupling (or covariation) constraints.
Representing these constraints with a graphical model
provides two key benefits for protein design: a
probabilistic semantics enabling evaluation of possible
sequences for consistency with the constraints, and an
explicit factorization of residue dependence and
independence supporting efficient exploration of the
constrained sequence space. We leverage these benefits
in developing two complementary MCMC algorithms for
protein design: constrained shuffling mixes wild-type
sequences positionwise and evaluates graphical model
likelihood, while component sampling directly generates
sequences by sampling clique values and propagating to
other cliques. We apply our methods to design WW
domains. We demonstrate that likelihood under a model
of wild-type WWs is highly predictive of foldedness of
new WWs. We then show both theoretical and rapid
empirical convergence of our algorithms in generating
high-likelihood, diverse new sequences. We further show
that these sequences capture the original sequence
constraints, yielding a model as predictive of
foldedness as the original one.",
acknowledgement = ack-nhfb,
keywords = "graphical models; Markov chain Monte Carlo (MCMC).;
Protein design; residue coupling",
}
@Article{Smith:2009:RSD,
author = "Jennifer A. Smith",
title = "{RNA} Search with Decision Trees and Partial
Covariance Models",
journal = j-TCBB,
volume = "6",
number = "3",
pages = "517--527",
month = jul,
year = "2009",
CODEN = "ITCBCY",
DOI = "http://dx.doi.org/10.1109/TCBB.2008.120",
ISSN = "1545-5963",
bibdate = "Tue Aug 11 18:13:22 MDT 2009",
bibsource = "http://portal.acm.org/",
abstract = "The use of partial covariance models to search for RNA
family members in genomic sequence databases is
explored. The partial models are formed from contiguous
subranges of the overall RNA family multiple alignment
columns. A binary decision-tree framework is presented
for choosing the order to apply the partial models and
the score thresholds on which to make the decisions.
The decision trees are chosen to minimize computation
time subject to the constraint that all of the training
sequences are passed to the full covariance model for
final evaluation. Computational intelligence methods
are suggested to select the decision tree since the
tree can be quite complex and there is no obvious
method to build the tree in these cases. Experimental
results from seven RNA families shows execution times
of 0.066-0.268 relative to using the full covariance
model alone. Tests on the full sets of known sequences
for each family show that at least 95 percent of these
sequences are found for two families and 100 percent
for five others. Since the full covariance model is run
on all sequences accepted by the partial model decision
tree, the false alarm rate is at least as low as that
of the full model alone.",
acknowledgement = ack-nhfb,
keywords = "Bioinformatics; computational intelligence; covariance
models; decision trees; RNA database search.",
}