Articles are listed in reverse chronological order. Impact factors (IFs) at the time of publication are provided. When not available, 5-year average or most recent IFs are provided. Links to publishers are provided for each article. Local copies are made available, under the warning that articles are provided under the copyright permission for noncommercial dissemination of academic work. Citation counts per paper below are slightly outdated. As of January 12, 2018, the total citation count per google scholar is 1263, h-index is 21, and i-10 index is 43. link
Shehu’s advisees indicated by: undergraduate (u), graduate (g), and postdoctoral (p) students. Corresponding authors are indicated by (*).
J45: David Morrisg, Tatiana Maximovap, Erion Plaku, and Amarda Shehu*. Attenuating Dependence on Structural Data in Computing Protein Energy Landscapes. BMC Bioinformatics 2018, under review.
J44: Wanli Qiao, Nasrin Akhterg, Xiaowen Fangu, Tatiana Maximovap, Erion Plaku, and Amarda Shehu*. From Mutations to Mechanisms and Dysfunction via Computation and Mining of Protein Energy Landscapes. BMC Genomics 2018
J43: Nasrin Akhterg, Wanli Qiao, and Amarda Shehu*. An Energy Landscape Treatment of Decoy Selection in Template-free Protein Structure Prediction. Computation 6(2), 39, 2018 (doi: 10.3390/computation6020039 invited to special issue on “Computation in Molecular Modeling”).
@Article{computation6020039,
AUTHOR = {Akhter, Nasrin and Qiao, Wanli and Shehu, Amarda},
TITLE = {An Energy Landscape Treatment of Decoy Selection in Template-Free Protein Structure Prediction},
JOURNAL = {Computation},
VOLUME = {6},
YEAR = {2018},
NUMBER = {2},
ARTICLE NUMBER = {39},
URL = {http://www.mdpi.com/2079-3197/6/2/39},
ISSN = {2079-3197
}
J42: Daniel Veltri, Uday Kamath, and Amarda Shehu*. Deep Learning Improves Antimicrobial Peptide Recognition. Bioinformatics 2018,
@article{veltri2018deep,
title={Deep learning improves antimicrobial peptide recognition},
author={Veltri, Daniel and Kamath, Uday and Shehu, Amarda},
journal={Bioinformatics},
volume={1},
pages={8},
year={2018},
publisher={Oxford University Press}
}
J41: Nasrin Akhterg and Amarda Shehu*. From Extraction of Local Structures of Protein Energy Landscapes to Improved Decoy Selection in Template-free Protein Structure Prediction. Molecules 2018, 23(1), 216.
@article{AkhterShehuMolecules2018,
title={From Extraction of Local Structures of Protein Energy Landscapes to Improved Decoy Selection in Template-Free Protein Structure Prediction},
author={Akhter, Nasrin and Shehu, Amarda},
journal={Molecules},
volume={23},
number={1},
pages={216},
year={2018},
publisher={Multidisciplinary Digital Publishing Institute}
}
J40: Tatiana Maximovap, Zijing Zhang, Daniel B Carr, Erion Plaku, and Amarda Shehu*. Sample-based Models of Protein Energy Landscapes and Slow Structural Rearrangements. J Comput Biol (JCB) 2018.
@article{MaximovaShehuJCB2018,
title={Sample-based models of protein energy landscapes and slow structural rearrangements},
author={Maximova, Tatiana and Zhang, Zijing and Carr, Daniel B and Plaku, Erion and Shehu, Amarda},
journal={Journal of Computational Biology},
volume={25},
number={1},
pages={33–50},
year={2018},
publisher={Mary Ann Liebert, Inc. 140 Huguenot Street, 3rd Floor New Rochelle, NY 10801 USA}
}
J39: Emmanuel Sapinp, Kenneth De Jong*, and Amarda Shehu*. From Optimization to Mapping: An Evolutionary Algorithm for Protein Energy Landscapes. IEEE/ACM Trans Comp Biol and Bioinf (TCBB) 2017, (doi: 10.1109/TCBB.2016.2628745).
@article{SapinShehuTCBB2017,
title={From Optimization to Mapping: An Evolutionary Algorithm for Protein Energy Landscapes},
author={Sapin, Emmanuel and De Jong, Kenneth A and Shehu, Amarda},
journal={IEEE/ACM transactions on computational biology and bioinformatics},
year={2016},
publisher={IEEE}
}
J38: Tatiana Maximovap, Erion Plaku*, and Amarda Shehu*. Structure-guided Protein Transition Modeling with a Probabilistic Roadmap Algorithm. IEEE/ACM Trans Comp Biol and Bioinf (TCBB) 2016, (doi: 10.1109/TCBB.2016.2586044).
@article{MaximovaShehuTCBB2016,
title={Structure-guided protein transition modeling with a probabilistic roadmap algorithm},
author={Maximova, Tatiana and Plaku, Erion and Shehu, Amarda},
journal={IEEE/ACM transactions on computational biology and bioinformatics},
year={2016},
publisher={IEEE} }
J37: Daniel Veltrig, Uday Kamath, and Amarda Shehu*. Improving Recognition of Antimicrobial Peptides and Target Selectivity through Machine Learning and Genetic Programming. IEEE/ACM Trans Comp Biol and Bioinf (TCBB), 14(2): 1545-5963, 2017.
@article{VeltriKamathShehuTCBB15,
author = {Veltri, D. AND Kamath, U. AND Shehu, A.},
journal = {IEEE/ACM Trans Comput Biol and Bioinf},
title = {Improving Recognition of Antimicrobial Peptides and Target Selectivity through Machine Learning and Genetic Programming},
year = 2017,
volume = {14},
number = {2},
pages = {300-313}
}
J36: Amarda Shehu* and Erion Plaku*. A Survey of Computational Treatments of Biomolecules by Robotics-inspired Methods Modeling Equilibrium Structure and Dynamics. J Artif Intel Res (JAIR) 57:509-572, 2016.
@article{ShehuPlakuJAIR16,
author = {Shehu, A. AND Plaku, E.}
journal = {J Artif Intel Res},
title = {“A Survey of Computational Treatments of Biomolecules by Robotics-Inspired Methods Modeling Equilibrium Structure and Dynamics”},
year = 2016,
volume = {57},
pages = {509-572}
}
J35: Emmanuel Sapinp, Daniel B Carr, Kenneth A De Jong*, and Amarda Shehu*. Computing energy landscape maps and structural excursions of proteins. BMC Genomics 17(Suppl 4):546, 2016.
Background
Structural excursions of a protein at equilibrium are key to biomolecular recognition and function modulation. Protein modeling research is driven by the need to aid wet laboratories in characterizing equilibrium protein dynamics. In principle, structural excursions of a protein can be directly observed via simulation of its dynamics, but the disparate temporal scales involved in such excursions make this approach computationally impractical. On the other hand, an informative representation of the structure space available to a protein at equilibrium can be obtained efficiently via stochastic optimization, but this approach does not directly yield information on equilibrium dynamics.
Methods
We present here a novel methodology that first builds a multi-dimensional map of the energy landscape that underlies the structure space of a given protein and then queries the computed map for energetically-feasible excursions between structures of interest. An evolutionary algorithm builds such maps with a practical computational budget. Graphical techniques analyze a computed multi-dimensional map and expose interesting features of an energy landscape, such as basins and barriers. A path searching algorithm then queries a nearest-neighbor graph representation of a computed map for energetically-feasible basin-to-basin excursions.
Results
Evaluation is conducted on intrinsically-dynamic proteins of importance in human biology and disease. Visual statistical analysis of the maps of energy landscapes computed by the proposed methodology reveals features already captured in the wet laboratory, as well as new features indicative of interesting, unknown thermodynamically-stable and semi-stable regions of the equilibrium structure space. Comparison of maps and structural excursions computed by the proposed methodology on sequence variants of a protein sheds light on the role of equilibrium structure and dynamics in the sequence-function relationship.
@article{SapinShehuBMCGeonmics16,
author = {Sapin, E. AND Carr, D. AND {De Jong}, K. A. AND Shehu, A.}
journal = {BMC Genomics},
title = {Computing energy landscape maps and structural excursions of proteins},
year = 2016,
volume = {14},
number = {Suppl 4},
pages = {546}
}
J34: Kevin Molloyg, Rudy Clauseng, and Amarda Shehu*. A Stochastic Roadmap Method to Model Protein Structural Transitions. Robotica 34(08):1705-1733, 2016 (featured on issue cover).
@article{MolloyShehuRobotica16,
author = {Molloy, K. AND Shehu, A.},
journal = {Robotica},
title = {A stochastic roadmap method to model protein structural transitions},
year = 2015,
volume = {34},
number = {08},
pages = {1705-1733}
}
J33: Kevin Molloyg and Amarda Shehu*. A General, Adaptive, Roadmap-based Algorithm for Protein Motion Computation. IEEE Trans NanoBioScience (TNB) 15(2): 158-165, 2016.
@article{MolloyShehuTNB16,
author = {Molloy, K. AND Shehu, A.},
journal = {IEEE Trans NanoBioScience},
number = {15},
pages = {158-165},
title = {A General, Adaptive, Roadmap-based Algorithm for Protein Motion Computation},
volume = {2},
year = 2016 }
J32: Tatiana Maximovap, Ryan Moffattg, Buyong Ma, Ruth Nussinov*, and Amarda Shehu*. Principles and Overview of Sampling Methods for Modeling Macromolecular Structure and Dynamics. PLoS Comp Biol 12(4): e1004619, 2016, (top 50 most downloaded in 2016 and featured on April issue front cover. Also featured in the PLoS Comp Biol blog.)
@article{MaximovaNussinovShehu15,
author = {Maximova, T. AND Moffatt R. AND Ma, B. AND Nussinov, R. AND Shehu, A.},
journal = {PLoS Comput Biol},
title = {Principles and Overview of Sampling Methods for Modeling Macromolecular Structure and Dynamics},
year = 2015,
volume = {12},
number = {4},
pages = {e1004619}
}
J31: Amarda Shehu* and Ruth Nussinov*. Computational Methods for Exploration and Analysis of Macromolecular Structure and Dynamics. PLoS Comput Biol (PCB) 11(10): e1004585, 2015 (editorial).
@article{ShehuNussinovPCB2015,
title={Computational methods for exploration and analysis of macromolecular structure and dynamics},
author={Shehu, Amarda and Nussinov, Ruth},
journal={PLoS computational biology}, volume={11},
number={10},
pages={e1004585},
year={2015},
publisher={Public Library of Science}
}
J30: Didier Devaurs, Kevin Molloy, Marc Vaisset, Amarda Shehu, Thierry Simeon, and Juan Cortes*. Characterizing Energy Landscapes of Peptides using a Combination of Stochastic Algorithms. IEEE Trans NanoBioScience (TNB), 14(5): 545-552, 2015.
@article{DevaursCortes15,
author = {Devaurs, D. AND Molloy, K. AND Vaisset, M. AND Shehu, A. AND Simeon, T. AND Cortes, J.},
journal = {IEEE Trans NanoBioScience},
title = {Characterizing Energy Landscapes of Peptides Using a Combination of Stochastic Algorithms},
year = 2015,
volume = {14},
number = {5},
pages = {545–552}
}
J29: Irina Hashmig and Amarda Shehu*. idDock+:Integrating Machine Learning in Probabilistic Search for Protein-protein Docking. J Computational Biology (JCB), 22(9):806-822, 2015.
@article{HashmiShehuJCB15,
author = {Hashmi, I. AND Shehu, A.},
journal = {J Comput Biol},
title = {idDock+:Integrating Machine Learning in Probabilistic Search for Protein-protein Docking},
year = 2015,
volume = {22},
number = {9},
pages = {806-822}
}
J28: Rudy Clauseng and Amarda Shehu*. A Data-driven Evolutionary Algorithm for Mapping Multi-basin Protein Energy Landscapes. J Computational Biology (JCB), 22(9): 844-860, 2015.
@article{ClausenShehuJCB15,
author = {Clausen, R. AND Shehu, A.},
journal = {J Comput Biol},
title = {A Data-driven Evolutionary Algorithm for Mapping Multi-basin Protein Energy Landscapes},
year = 2015,
volume = {22},
number = {9},
pages = {844-860}
}
J27: Rudy Clauseng, Buyong Ma, Ruth Nussinov, and Amarda Shehu*. Mapping the Conformation Space of Wildtype and Mutant H-Ras with a Memetic, Cellular, and Multiscale Evolutionary Algorithm. PLoS Computational Biology (PCB) 11(9): e1004470, 2015.
@article{ClausenShehuPLoSCB15, author = {Clausen, R. AND Ma, B. AND Nussinov, R. AND Shehu, A.},
journal = {PLoS Comput Biol},
title = {Mapping the Conformation Space of Wildtype and Mutant H-Ras with a Memetic, Cellular, and Multiscale Evolutionary Algorithm},
year = 2015,
volume = 11,
number = 9,
pages = {e1004470}
}
J26: Uday Kamathg, Kenneth A De Jong*, and Amarda Shehu*. Effective Automated Feature Construction and Selection for Classification of Biological Sequences. PLoS One, 9(7): e99982, 2014.
Background
Many open problems in bioinformatics involve elucidating underlying functional signals in biological sequences. DNA sequences, in particular, are characterized by rich architectures in which functional signals are increasingly found to combine local and distal interactions at the nucleotide level. Problems of interest include detection of regulatory regions, splice sites, exons, hypersensitive sites, and more. These problems naturally lend themselves to formulation as classification problems in machine learning. When classification is based on features extracted from the sequences under investigation, success is critically dependent on the chosen set of features.
Methodology
We present an algorithmic framework (EFFECT) for automated detection of functional signals in biological sequences. We focus here on classification problems involving DNA sequences which state-of-the-art work in machine learning shows to be challenging and involve complex combinations of local and distal features. EFFECT uses a two-stage process to first construct a set of candidate sequence-based features and then select a most effective subset for the classification task at hand. Both stages make heavy use of evolutionary algorithms to efficiently guide the search towards informative features capable of discriminating between sequences that contain a particular functional signal and those that do not.
Results
To demonstrate its generality, EFFECT is applied to three separate problems of importance in DNA research: the recognition of hypersensitive sites, splice sites, and ALU sites. Comparisons with state-of-the-art algorithms show that the framework is both general and powerful. In addition, a detailed analysis of the constructed features shows that they contain valuable biological information about DNA architecture, allowing biologists and other researchers to directly inspect the features and potentially use the insights obtained to assist wet-laboratory studies on retainment or modification of a specific signal. Code, documentation, and all data for the applications presented here are provided for the community at http://www.cs.gmu.edu/~ashehu/?q=OurTools.
@article{KamathDeJongShehuPLoS14,
author = {Kamath, U. AND {De Jong}, K. A. AND Shehu, A.},
journal = {PLoS {ONE}},
title = {Effective Automated Feature Construction and Selection for Classification of Biological Sequences},
year = 2014,
volume = 9,
number = 7,
pages = {e99982}
}
J25: Kevin Molloyg, M. Jennifer Vanu, Daniel Barbara*, and Amarda Shehu*. Exploring Representations of Protein Structure for Automated Remote Homology Detection and Mapping of Protein Structure Space. BMC Bioinformatics 15 (Suppl 8):S4, 2014.
Background
Due to rapid sequencing of genomes, there are now millions of deposited protein sequences with no known function. Fast sequence-based comparisons allow detecting close homologs for a protein of interest to transfer functional information from the homologs to the given protein. Sequence-based comparison cannot detect remote homologs, in which evolution has adjusted the sequence while largely preserving structure. Structure-based comparisons can detect remote homologs but most methods for doing so are too expensive to apply at a large scale over structural databases of proteins. Recently, fragment-based structural representations have been proposed that allow fast detection of remote homologs with reasonable accuracy. These representations have also been used to obtain linearly-reducible maps of protein structure space. It has been shown, as additionally supported from analysis in this paper that such maps preserve functional co-localization of the protein structure space.
Methods
Inspired by a recent application of the Latent Dirichlet Allocation (LDA) model for conducting structural comparisons of proteins, we propose higher-order LDA-obtained topic-based representations of protein structures to provide an alternative route for remote homology detection and organization of the protein structure space in few dimensions. Various techniques based on natural language processing are proposed and employed to aid the analysis of topics in the protein structure domain.
Results
We show that a topic-based representation is just as effective as a fragment-based one at automated detection of remote homologs and organization of protein structure space. We conduct a detailed analysis of the information content in the topic-based representation, showing that topics have semantic meaning. The fragment-based and topic-based representations are also shown to allow prediction of superfamily membership.
Conclusions
This work opens exciting venues in designing novel representations to extract information about protein structures, as well as organizing and mining protein structure space with mature text mining tools.
@article{MolloyBarbaraShehuBMCBioinf14,
author = {Molloy, K. AND Min, J. V. AND Barbara, D. AND Shehu, A.},
journal = {BMC Bioinf},
title = {Exploring Representations of Protein Structure for Automated Remote Homology Detection and Mapping of Protein Structure Space},
volume = 15,
number = {Suppl 8},
pages = {S4},
year = 2014}
J24: Nadine Kabbani*, Jacob C. Nordman, Brian Corgiat, Daniel Veltrig, Amarda Shehu, and David J. Adams. Are Nicotinic Receptors Coupled to G Proteins? BioEssays 35(12): 1025–1034, 2013, (selected for journal front cover video display. Read the highlight written on our article in same issue by Edward Howrot.)
@article{KabbaniShehuAdams,
author = {Kabbani, N. AND Nordman, J. C. AND Corgiat, B. AND Veltri, D. AND Shehu, A. AND Adams, D. J.},
journal = {BioEssays},
volume = {35},
title = {Are nicotinic receptors coupled to G Proteins?},
number = {12},
pages = {1025-1034},
year = 2013 }
J23: Abrar Ashoor, Jacob C. Nordman, Daniel Veltrig, Keun-Hang Susan Yang, Lina Al Kury, Yaroslav Shuba, Mohamed Mahgoub, Frank C. Howarth, Carl Lupica, Amarda Shehu, Nadine Kabbani, and Murat Oz*. Menthol Inhibits 5-HT3 Receptor-mediated Currents. J of Pharmacology and Experimental Therapeutics (JPET) 347(2):398-409, 2013, (selected for issue front cover).
@article{MuratJPET13,
author = {Ashoor, A. AND Nordman, J. C. AND Veltri, D. AND Yang, K.-H. S. AND {Al Kury}, L. AND Shuba, Y. AND Mahgoub, M. AND Howarth, F. C. AND Lupica, C. AND Shehu, A. AND Kabbani, N. AND Oz, M.},
journal = {J of Pharmacology and Experimental Therapeutics (JPET)},
volume = {347},
title = {Menthol Inhibits 5-HT3 Receptor-mediated Currents},
number = {2},
pages = {398-409},
year = 2013}
J22: Abrar Ashoor, Jacob C. Nordman, Daniel Veltrig, Keun-Hang Susan Yang, Lina Al Kury, Yaroslav Shuba, Mohamed Mahgoub, Frank C. Howarth, Bassem Sadek, Amarda Shehu, Nadine Kabbani, and Murat Oz*. Menthol Binding and Inhibition of Alpha7-nicotinic Acetylcholine Receptors. PLos One 8(7):e67674, 2013.
@article{MuratPLOSONE13,
author = {Ashoor, A. AND Nordman, J. C. AND Veltri, D. AND Yang, K.-H. S. AND {Al Kury}, L. AND Shuba, Y. AND Mahgoub, M. AND Howarth, F. C. AND Sadek, B. AND Shehu, A. AND Kabbani, N. AND Oz, M.},
journal = {{PLoS} One},
volume = {8},
number = {7},
title = {Menthol Binding and Inhibition of Alpha7-nicotinic Acetylcholine Receptors}, pages = {e67674},
year = 2013}
J21: Kevin Molloyg, Sameh Salehu, and Amarda Shehu*. Probabilistic Search and Energy Guidance for Biased Decoy Sampling in Ab-initio Protein Structure Prediction. IEEE/ACM Trans Comp Biol and Bioinf 10(5):1162-1175, 2013.
@article{MolloyShehuTCBB13,
author = {Molloy, K. AND Saleh, S. AND Shehu, A.},
journal = {IEEE/ACM Trans Bioinf and Comp Biol},
volume = {10},
title = {Probabilistic Search and Energy Guidance for Biased Decoy Sampling in Ab-initio Protein Structure Prediction},
number = {5},
pages = {1162-1175},
year = 2013}
J20: Irina Hashmig and Amarda Shehu*. HopDock: A Probabilistic Search Algorithm for Decoy Sampling in Protein-protein Docking. Proteome Sci 11(Suppl1):S6, 2013.
Background
Elucidating the three-dimensional structure of a higher-order molecular assembly formed by interacting molecular units, a problem commonly known as docking, is central to unraveling the molecular basis of cellular activities. Though protein assemblies are ubiquitous in the cell, it is currently challenging to predict the native structure of a protein assembly in silico.
Methods
This work proposes HopDock, a novel search algorithm for protein-protein docking. HopDock efficiently obtains an ensemble of low-energy dimeric configurations, also known as decoys, that can be effectively used by ab-initio docking protocols. HopDock is based on the Basin Hopping (BH) framework which perturbs the structure of a dimeric configuration and then follows it up with an energy minimization to explicitly sample a local minimum of a chosen energy function. This process is repeated in order to sample consecutive energy minima in a trajectory-like fashion. HopDock employs both geometry and evolutionary conservation analysis to narrow down the interaction search space of interest for the purpose of efficiently obtaining a diverse decoy ensemble.
Results and conclusions
A detailed analysis and a comparative study on seventeen different dimers shows HopDock obtains a broad view of the energy surface near the native dimeric structure and samples many near-native configurations. The results show that HopDock has high sampling capability and can be employed to effectively obtain a large and diverse ensemble of decoy configurations that can then be further refined in greater structural detail in ab-initio docking protocols.
@article{HashmiShehuProteomeSci13,
author = {Hashmi, I. AND Shehu, A.},
journal = {Proteome Sci},
volume = {11},
title = {HopDock: A Probabilistic Search Algorithm for Decoy Sampling in Protein-protein Docking},
number = {Suppl1},
pages = {S6},
year = 2013}
J19: Sameh Salehu, Brian Olsong, and Amarda Shehu*. A population-based evolutionary search approach to the multiple minima problem in de novo protein structure prediction. BMC Structural Biology J 13(Suppl1):S4, 2013.
Background
Elucidating the native structure of a protein molecule from its sequence of amino acids, a problem known as de novo structure prediction, is a long standing challenge in computational structural biology. Difficulties in silico arise due to the high dimensionality of the protein conformational space and the ruggedness of the associated energy surface. The issue of multiple minima is a particularly troublesome hallmark of energy surfaces probed with current energy functions. In contrast to the true energy surface, these surfaces are weakly-funneled and rich in comparably deep minima populated by non-native structures. For this reason, many algorithms seek to be inclusive and obtain a broad view of the low-energy regions through an ensemble of low-energy (decoy) conformations. Conformational diversity in this ensemble is key to increasing the likelihood that the native structure has been captured.
Methods
We propose an evolutionary search approach to address the multiple-minima problem in decoy sampling for de novo structure prediction. Two population-based evolutionary search algorithms are presented that follow the basic approach of treating conformations as individuals in an evolving population. Coarse graining and molecular fragment replacement are used to efficiently obtain protein-like child conformations from parents. Potential energy is used both to bias parent selection and determine which subset of parents and children will be retained in the evolving population. The effect on the decoy ensemble of sampling minima directly is measured by additionally mapping a conformation to its nearest local minimum before considering it for retainment. The resulting memetic algorithm thus evolves not just a population of conformations but a population of local minima.
Results and conclusions
Results show that both algorithms are effective in terms of sampling conformations in proximity of the known native structure. The additional minimization is shown to be key to enhancing sampling capability and obtaining a diverse ensemble of decoy conformations, circumventing premature convergence to sub-optimal regions in the conformational space, and approaching the native structure with proximity that is comparable to state-of-the-art decoy sampling methods. The results are shown to be robust and valid when using two representative state-of-the-art coarse-grained energy functions.
@article{SalehShehuBMCStructBiol13,
author = {Saleh, S. AND Olson, B. AND Shehu, A.},
journal = {BMC Struct Biol},
volume = {13},
title = {A population-based evolutionary search approach to the multiple minima problem in de novo protein structure prediction},
number = {Suppl1},
pages = {S4},
year = 2013}
J18: Brian Olsong and Amarda Shehu*. Rapid Sampling of Local Minima in Protein Energy Surface and Effective Reduction through a Multi-objective Filter. Proteome Sci 11(Suppl1):S12 2013.
Background
Many problems in protein modeling require obtaining a discrete representation of the protein conformational space as an ensemble of conformations. In ab-initio structure prediction, in particular, where the goal is to predict the native structure of a protein chain given its amino-acid sequence, the ensemble needs to satisfy energetic constraints. Given the thermodynamic hypothesis, an effective ensemble contains low-energy conformations which are similar to the native structure. The high-dimensionality of the conformational space and the ruggedness of the underlying energy surface currently make it very difficult to obtain such an ensemble. Recent studies have proposed that Basin Hopping is a promising probabilistic search framework to obtain a discrete representation of the protein energy surface in terms of local minima. Basin Hopping performs a series of structural perturbations followed by energy minimizations with the goal of hopping between nearby energy minima. This approach has been shown to be effective in obtaining conformations near the native structure for small systems. Recent work by us has extended this framework to larger systems through employment of the molecular fragment replacement technique, resulting in rapid sampling of large ensembles.
Methods
This paper investigates the algorithmic components in Basin Hopping to both understand and control their effect on the sampling of near-native minima. Realizing that such an ensemble is reduced before further refinement in full ab-initio protocols, we take an additional step and analyze the quality of the ensemble retained by ensemble reduction techniques. We propose a novel multi-objective technique based on the Pareto front to filter the ensemble of sampled local minima.
Results and conclusions
We show that controlling the magnitude of the perturbation allows directly controlling the distance between consecutively-sampled local minima and, in turn, steering the exploration towards conformations near the native structure. For the minimization step, we show that the addition of Metropolis Monte Carlo-based minimization is no more effective than a simple greedy search. Finally, we show that the size of the ensemble of sampled local minima can be effectively and efficiently reduced by a multi-objective filter to obtain a simpler representation of the probed energy surface.
@article{OlsonShehuProteomSci13,
author = {Olson, B. AND Shehu, A.},
journal = {Proteome Sci},
volume = {11},
title = {Rapid Sampling of Local Minima in Protein Energy Surface and Effective Reduction through a Multi-objective Filter},
number = {Suppl1},
pages = {S12},
year = 2013}
J17: Kevin Molloyg and Amarda Shehu*. Elucidating the Ensemble of Functionally-relevant Transitions in Protein Systems with a Robotics-inspired Method. BMC Structural Biology J 13(Suppl1):S8, 2013.
Background
Many proteins tune their biological function by transitioning between different functional states, effectively acting as dynamic molecular machines. Detailed structural characterization of transition trajectories is central to understanding the relationship between protein dynamics and function. Computational approaches that build on the Molecular Dynamics framework are in principle able to model transition trajectories at great detail but also at considerable computational cost. Methods that delay consideration of dynamics and focus instead on elucidating energetically-credible conformational paths connecting two functionally-relevant structures provide a complementary approach. Effective sampling-based path planning methods originating in robotics have been recently proposed to produce conformational paths. These methods largely model short peptides or address large proteins by simplifying conformational space.
Methods
We propose a robotics-inspired method that connects two given structures of a protein by sampling conformational paths. The method focuses on small- to medium-size proteins, efficiently modeling structural deformations through the use of the molecular fragment replacement technique. In particular, the method grows a tree in conformational space rooted at the start structure, steering the tree to a goal region defined around the goal structure. We investigate various bias schemes over a progress coordinate for balance between coverage of conformational space and progress towards the goal. A geometric projection layer promotes path diversity. A reactive temperature scheme allows sampling of rare paths that cross energy barriers.
Results and conclusions
Experiments are conducted on small- to medium-size proteins of length up to 214 amino acids and with multiple known functionally-relevant states, some of which are more than 13Å apart of each-other. Analysis reveals that the method effectively obtains conformational paths connecting structural states that are significantly different. A detailed analysis on the depth and breadth of the tree suggests that a soft global bias over the progress coordinate enhances sampling and results in higher path diversity. The explicit geometric projection layer that biases the exploration away from over-sampled regions further increases coverage, often improving proximity to the goal by forcing the exploration to find new paths. The reactive temperature scheme is shown effective in increasing path diversity, particularly in difficult structural transitions with known high-energy barriers.
@article{MolloyShehuBMCStructBiol13,
author = {Molloy, K. AND Shehu, A.},
journal = {BMC Struct Biol},
volume = {13},
title = {Elucidating the Ensemble of Functionally-relevant Transitions in Protein Systems with a Robotics-inspired Method},
number = {Suppl 1},
pages = {S8},
year = 2013}
J16: Brian Olsong, Irina Hashmig, Kevin Molloyg, and Amarda Shehu*. Basin Hopping as a General and Versatile Optimization Framework for the Characterization of Biological Macromolecules. Advances in Artificial Intelligence J 2012, 674832 (special issue on Artificial Intelligence Applications in Biomedicine).
@article{OlsonShehuAdvAI12,
author = {Olson, B. AND Hashmi, I. AND Molloy, K. AND Shehu, A.},
journal = {Advances in AI J},
number = {674832},
title = {Basin Hopping as a General and Versatile Optimization Framework for the Characterization of Biological Macromolecules},
volume = {2012},
year = 2012}
J15: Brian Olsong and Amarda Shehu*. Evolutionary-inspired Probabilistic Search for Enhancing Sampling of Local Minima in the Protein Energy Surface. Proteome Science 2012, 10(Suppl1): S5.
Background
Despite computational challenges, elucidating conformations that a protein system assumes under physiologic conditions for the purpose of biological activity is a central problem in computational structural biology. While these conformations are associated with low energies in the energy surface that underlies the protein conformational space, few existing conformational search algorithms focus on explicitly sampling low-energy local minima in the protein energy surface.
Methods
This work proposes a novel probabilistic search framework, PLOW, that explicitly samples low-energy local minima in the protein energy surface. The framework combines algorithmic ingredients from evolutionary computation and computational structural biology to effectively explore the subspace of local minima. A greedy local search maps a conformation sampled in conformational space to a nearby local minimum. A perturbation move jumps out of a local minimum to obtain a new starting conformation for the greedy local search. The process repeats in an iterative fashion, resulting in a trajectory-based exploration of the subspace of local minima.
Results and conclusions
The analysis of PLOW’s performance shows that, by navigating only the subspace of local minima, PLOW is able to sample conformations near a protein’s native structure, either more effectively or as well as state-of-the-art methods that focus on reproducing the native structure for a protein system. Analysis of the actual subspace of local minima shows that PLOW samples this subspace more effectively that a naive sampling approach. Additional theoretical analysis reveals that the perturbation function employed by PLOW is key to its ability to sample a diverse set of low-energy conformations. This analysis also suggests directions for further research and novel applications for the proposed framework.
@article{OlsonShehuProtSci12,
author = {Olson, B. AND Shehu, A.},
journal = {Proteome Sci},
number = {10},
pages = {S5},
title = {Evolutionary-inspired probabilistic search for enhancing sampling of local minima in the
protein energy surface},
volume = {10},
year = 2012}
J14: Irina Hashmig, Bahar Aklbal-Delibas, Nurit Haspel, and Amarda Shehu*. Guiding Protein Docking with Geometric and Evolutionary Information. J Bioinf and Comp Biol 2012, 10(3): 1242002.
@article{HashmiShehu12,
author = {Hashmi, I. AND Akbal-Delibas, B. AND Haspel, N. AND Shehu, A.},
journal = {J Bioinf and Comp Biol},
number = {3},
pages = {1242002},
title = {Guiding Protein Docking with Geometric and Evolutionary Information},
volume = {10},
year = 2012}
J13: Bahar Aklbal-Delibas, Irina Hashmig, Amarda Shehu, and Nurit Haspel*. An Evolutionary Conservation Based Method for Refining and Reranking Protein Complex Structures. J Bioinf and Comp Biol 2012, 10(3):1242008.
@article{AkbalHaspel12,
author = {Akbal-Delibas, B. AND Hashmi, I. AND Shehu, A. AND Haspel, N.},
journal = {J Bioinf and Comp Biol},
number = {3},
pages = {1242008},
title = {An Evolutionary Conservation Based Method for Refining and Reranking Protein Complex Structures},
volume = {10},
year = 2012}
J12: Brian Olsong, Kevin Molloyg, S.-Farid Hendig, and Amarda Shehu*. Guiding Search in the Protein Conformational Space with Structural Profiles. J Bioinf and Comp Biol 2012, 10(3):1242005.
@article{OlsonMolloyShehu12,
author = {Olson, B. S. AND Molloy, K. AND Hendi, S.-F. AND Shehu, A.},
journal = {J Bioinf and Comp Biol},
number = {3},
pages = {1242005},
title = {Guiding Search in the Protein Conformational Space with Structural Profiles},
volume = {10},
year = 2012}
J11: Amarda Shehu* and Lydia Kavraki*. Modeling Structures and Motions of Loops in Protein Molecules. Entropy 2012, 14(2):252-290 (invited review article), IF 2011: 1.109).
@article{ShehuKavrakiEntropy12,
author = {Shehu, A. AND Kavraki, L. E.},
journal = {Entropy J},
number = {2},
pages = {252-290},
title = {Modeling Structures and Motions of Loops in Protein Molecules},
volume = {14},
year = 2012}
J10: Uday Kamathg, Jack Comptonu, Rezarta Islamaj Dogan, Kenneth A. De Jong*, and Amarda Shehu*. An Evolutionary Algorithm Approach for Feature Generation from Sequence Data and its Application to DNA Splice-Site Prediction. IEEE Trans Comp Biol and Bioinf 2012, 9(5):1387-1398 (IF 2011: 2.25).
@article{KamathShehuTCBB12,
author = {Kamath, U. AND Compton, J. AND Islamaj Dogan, R. AND De Jong, K. A. AND Shehu, A.},
journal = {IEEE Trans Comp Biol and Bioinf},
number = {5},
pages = {1387-1398},
title = {An Evolutionary Algorithm Approach for Feature Generation from Sequence Data and its Application to DNA Splice-Site Prediction},
volume = {9},
year = 2012}
J9: Uday Kamathg, Amarda Shehu*, and Kenneth A. De Jong*. A Two-Stage Evolutionary Approach for Effective Classification of Hypersensitive DNA Sequences. J Bioinf and Comp Biol 2011, 9(3): 399-413.
@article{KamathShehuDeJongJBCB11,
author = {Kamath, U. AND Shehu, A. AND De Jong, K.},
journal = {J. Bioinf. and Comp. Biol.},
title = {A Two-Stage Evolutionary Approach for Effective Classification of Hypersensitive DNA Sequences},
number = {3},
pages = {399-413},
volume = {9},
year = 2011 }
J8: Brian Olsong, Kevin Molloyg, and Amarda Shehu*. In Search of the Protein Native State with a Probabilistic Sampling Approach. J Bioinf and Comp Biol 2011, 9(3):383-398.
@article{OlsonMolloyShehuJBCB11,
author = {Olson, B. AND Molloy, K. AND Shehu, A.},
journal = {J. Bioinf. and Comp. Biol.},
title = {In Search of the Protein Native State with a Probabilistic Sampling Approach},
number = {3},
pages = {383-398},
volume = {9},
year = 2011 }
J7: Amarda Shehu* and Brian Olsong. Guiding the Search for Native-like Protein Conformations with an Ab-initio Tree-based Exploration. Intl J of Robot Res 2010, 29(8):1106-1127.
@article{ShehuOlsonIJRR10,
author = {Shehu, A. AND Olson, B.},
journal = {Intl. J. Robot. Res.},
title = {Guiding the Search for Native-like Protein Conformations with an Ab-initio Tree-based Exploration},
number = {8},
pages = {1106-1127},
volume = {29},
year = 2010 }
J6: Joseph A. Hegler, Joachim Laetzer, Amarda Shehu, Cecilia Clementi, and Peter G. Wolynes*. Restriction vs. Guidance: Fragment Assembly and Associative Memory Hamiltonians for Protein Structure Prediction. Proc. Nat. Acad. Sci. USA 2009, 106(36):15302-15307.
@article{HeglerWolynesPNAS09,
author = {Hegler, J. A. AND Laetzer, J. AND Shehu, A. AND Clementi, C. AND Wolynes, P. G.},
journal = {Proc. Nat. Acad. Sci. USA},
title = {Restriction vs. Guidance: Fragment Assembly and Associative Memory Hamiltonians for Protein Structure Prediction},
number = {36},
pages = {15302-15307},
volume = {106},
year = 2009, }
J5: Amarda Shehu, Lydia E. Kavraki*, and Cecilia Clementi*. Multiscale Characterization of Protein Conformational Ensembles. Proteins: Structure, Function, and Bioinformatics, 2009,76(4):837-851.
@article{ShehuKavrakiClementiProteins09,
author = {Shehu, A. AND Kavraki, L. E. AND Clementi, C.},
journal = {Proteins: Struct, Funct, and Bioinf},
title = {Multiscale Characterization of Protein Conformational Ensembles},
number = {4},
pages = {837-851},
volume = {76},
year = 2009, }
J4: Amarda Shehu, Lydia E. Kavraki, and Cecilia Clementi*. Unfolding the Fold of Cyclic Cysteine-rich Peptides. Protein Science, 2008, 17(3):482-493.
@article{ShehuKavrakiClementiProtSci08,
author = {Shehu, A. AND Kavraki, L. E. AND Clementi, C.},
journal = {Protein Sci},
number = {3},
pages = {482-493},
title = {Unfolding the Fold of Cyclic Cysteine-rich Peptides},
volume = {17},
year = 2008}
J3: Amarda Shehu, Cecilia Clementi, and Lydia E. Kavraki*. Sampling Conformation Space to Model Equilibrium Fluctuations in Proteins. Algorithmica, 2007, 48(4):303-327.
@article{ShehuClementiKavrakiAlgo07,
author = {Shehu, A. AND Clementi, C. AND Kavraki, L. E.},
journal = {Algorithmica},
number = {4},
pages = {303-327},
title = {Sampling Conformation Space to Model Equilibrium Fluctuations in Proteins},
volume = {48},
year = 2007}
J2: Amarda Shehu, Lydia E. Kavraki, and Cecilia Clementi*. On the Characterization of Protein Native State Ensembles. Biophysical Journal, 2007, 92(5):1503-1511.
@article{ShehuKavrakiClementiBiophysJ07,
author = {Shehu, A. AND Kavraki, L. E. AND Clementi, C.},
journal = {BiophysJ},
number = {5},
pages = {1503-1511},
title = {On the Characterization of Protein Native State Ensembles},
volume = {92},
year = 2007}
J1: Amarda Shehu, Cecilia Clementi*, and Lydia E. Kavraki*. Modeling Protein Conformational Ensembles: From Missing Loops to Equilibrium Fluctuations. Proteins: Structure, Function, and Bioinformatics 2006, 65(1):164-179.
@article{ShehuClementiKavrakiProt06,
author = {Shehu, A. AND Clementi, C. AND Kavraki, L. E.},
journal = {Proteins: Struct, Funct, and Bioinf},
number = {1},
pages = {164-179},
title = {Modeling Protein Conformational Ensembles: {F}rom Missing Loops to Equilibrium Fluctuations},
volume = {65},
year = 2006}