@article {7, title = {Highly sensitive and ultrafast read mapping for RNA-seq analysis.}, journal = {DNA Res}, year = {2016}, month = {2016 Jan 5}, abstract = {

As sequencing technologies progress, the amount of data produced grows exponentially, shifting the bottleneck of discovery towards the data analysis phase. In particular, currently available mapping solutions for RNA-seq leave room for improvement in terms of sensitivity and performance, hindering an efficient analysis of transcriptomes by massive sequencing. Here, we present an innovative approach that combines re-engineering, optimization and parallelization. This solution results in a significant increase of mapping sensitivity over a wide range of read lengths and substantial shorter runtimes when compared with current RNA-seq mapping methods available.

}, issn = {1756-1663}, doi = {10.1093/dnares/dsv039}, author = {Medina, I and T{\'a}rraga, J and Mart{\'\i}nez, H and Barrachina, S and Castillo, M I and Paschall, J and Salavert-Torres, J and Blanquer-Espert, I and Hern{\'a}ndez-Garc{\'\i}a, V and Quintana-Ort{\'\i}, E S and Dopazo, J} } @article {8, title = {Acceleration of short and long DNA read mapping without loss of accuracy using suffix array.}, journal = {Bioinformatics}, volume = {30}, year = {2014}, month = {2014 Dec 1}, pages = {3396-8}, abstract = {

UNLABELLED: HPG Aligner applies suffix arrays for DNA read mapping. This implementation produces a highly sensitive and extremely fast mapping of DNA reads that scales up almost linearly with read length. The approach presented here is faster (over 20{\texttimes} for long reads) and more sensitive (over 98\% in a wide range of read lengths) than the current state-of-the-art mappers. HPG Aligner is not only an optimal alternative for current sequencers but also the only solution available to cope with longer reads and growing throughputs produced by forthcoming sequencing technologies.

AVAILABILITY AND IMPLEMENTATION: https://github.com/opencb/hpg-aligner.

}, keywords = {Algorithms, Animals, DNA, Drosophila, High-Throughput Nucleotide Sequencing, Humans, Sequence Alignment, Sequence Analysis, Software}, issn = {1367-4811}, doi = {10.1093/bioinformatics/btu553}, author = {Tarraga, Joaquin and Arnau, Vicente and Mart{\'\i}nez, H{\'e}ctor and Moreno, Raul and Cazorla, Diego and Salavert-Torres, Jos{\'e} and Blanquer-Espert, Ignacio and Dopazo, Joaqu{\'\i}n and Medina, Ignacio} } @article {9, title = {Genome Maps, a new generation genome browser.}, journal = {Nucleic Acids Res}, volume = {41}, year = {2013}, month = {2013 Jul}, pages = {W41-6}, abstract = {

Genome browsers have gained importance as more genomes and related genomic information become available. However, the increase of information brought about by new generation sequencing technologies is, at the same time, causing a subtle but continuous decrease in the efficiency of conventional genome browsers. Here, we present Genome Maps, a genome browser that implements an innovative model of data transfer and management. The program uses highly efficient technologies from the new HTML5 standard, such as scalable vector graphics, that optimize workloads at both server and client sides and ensure future scalability. Thus, data management and representation are entirely carried out by the browser, without the need of any Java Applet, Flash or other plug-in technology installation. Relevant biological data on genes, transcripts, exons, regulatory features, single-nucleotide polymorphisms, karyotype and so forth, are imported from web services and are available as tracks. In addition, several DAS servers are already included in Genome Maps. As a novelty, this web-based genome browser allows the local upload of huge genomic data files (e.g. VCF or BAM) that can be dynamically visualized in real time at the client side, thus facilitating the management of medical data affected by privacy restrictions. Finally, Genome Maps can easily be integrated in any web application by including only a few lines of code. Genome Maps is an open source collaborative initiative available in the GitHub repository (https://github.com/compbio-bigdata-viz/genome-maps). Genome Maps is available at: http://www.genomemaps.org.

}, keywords = {Genome, Genomics, Internet, Software}, issn = {1362-4962}, doi = {10.1093/nar/gkt530}, author = {Medina, Ignacio and Salavert, Francisco and Sanchez, Rub{\'e}n and de Maria, Alejandro and Alonso, Roberto and Escobar, Pablo and Bleda, Marta and Dopazo, Joaqu{\'\i}n} } @article {2, title = {Multicore and Cloud-Based Solutions for Genomic Variant Analysis}, journal = {Euro-Par 2012: Parallel Processing Workshops}, volume = {7640}, year = {2013}, month = {2013}, chapter = {273}, abstract = {Genomic variant analysis is a complex process that allows to find and study genome mutations. For this purpose, analysis and tests from both biological and statistical points of view must be conducted. Biological data for this kind of analysis are typically stored according to the Variant Call Format (VCF), in gigabytes-sized files that cannot be efficiently processed using conventional software. In this paper, we introduce part of the High Performance Genomics (HPG) project, whose goal is to develop a collection of efficient and open-source software applications for the genomics area. The paper is mainly focused on HPG Variant, a suite that allows to get the effect of mutations and to conduct genomic-wide and family-based analysis, using a multi-tier architecture based on CellBase Database and a RESTful web service API. Two user clients are also provided: an HTML5 web client and a command-line interface, both using a back-end parallelized using OpenMP. Along with HPG Variant, a library for VCF files handling and a collection of utilities for VCF files preprocessing have been developed. Positive performance results are shown in comparison with other applications such as PLINK, GenABEL, SNPTEST or VCFtools.}, keywords = {genomic variant analysis, Multicore, Mutation, OpenMP, web service}, isbn = {978-3-642-36948-3}, issn = {0302-9743}, doi = {10.1007/978-3-642-36949-0_30}, url = {http://rd.springer.com/chapter/10.1007/978-3-642-36949-0_30}, author = {Cristina Y. Gonz{\'a}lez and Bleda, Marta and Salavert, Francisco and Rub{\'e}n Sanchez and Dopazo, Joaqu{\'\i}n and Medina, Ignacio} } @article {3, title = {CellBase, a comprehensive collection of RESTful web services for retrieving relevant biological information from heterogeneous sources.}, journal = {Nucleic Acids Res}, volume = {40}, year = {2012}, month = {2012 Jul}, pages = {W609-14}, abstract = {During the past years, the advances in high-throughput technologies have produced an unprecedented growth in the number and size of repositories and databases storing relevant biological data. Today, there is more biological information than ever but, unfortunately, the current status of many of these repositories is far from being optimal. Some of the most common problems are that the information is spread out in many small databases; frequently there are different standards among repositories and some databases are no longer supported or they contain too specific and unconnected information. In addition, data size is increasingly becoming an obstacle when accessing or storing biological data. All these issues make very difficult to extract and integrate information from different sources, to analyze experiments or to access and query this information in a programmatic way. CellBase provides a solution to the growing necessity of integration by easing the access to biological data. CellBase implements a set of RESTful web services that query a centralized database containing the most relevant biological data sources. The database is hosted in our servers and is regularly updated. CellBase documentation can be found at http://docs.bioinfo.cipf.es/projects/cellbase.}, keywords = {Animals, Databases, Gene Regulatory Networks, Genetic, Genetic Variation, Humans, Internet, Mice, MicroRNAs, Molecular Sequence Annotation, Protein Interaction Mapping, Rats, Software, Systems Biology, Systems Integration, Transcription Factors}, issn = {1362-4962}, doi = {10.1093/nar/gks575}, author = {Bleda, Marta and Tarraga, Joaquin and de Maria, Alejandro and Salavert, Francisco and Garcia-Alonso, Luz and Celma, Matilde and Martin, Ainoha and Dopazo, Joaqu{\'\i}n and Medina, Ignacio} } @article {5, title = {VARIANT: Command Line, Web service and Web interface for fast and accurate functional characterization of variants found by Next-Generation Sequencing.}, journal = {Nucleic Acids Res}, volume = {40}, year = {2012}, month = {2012 Jul}, pages = {W54-8}, abstract = {The massive use of Next-Generation Sequencing (NGS) technologies is uncovering an unexpected amount of variability. The functional characterization of such variability, particularly in the most common form of variation found, the Single Nucleotide Variants (SNVs), has become a priority that needs to be addressed in a systematic way. VARIANT (VARIant ANalyis Tool) reports information on the variants found that include consequence type and annotations taken from different databases and repositories (SNPs and variants from dbSNP and 1000 genomes, and disease-related variants from the Genome-Wide Association Study (GWAS) catalog, Online Mendelian Inheritance in Man (OMIM), Catalog of Somatic Mutations in Cancer (COSMIC) mutations, etc). VARIANT also produces a rich variety of annotations that include information on the regulatory (transcription factor or miRNA-binding sites, etc.) or structural roles, or on the selective pressures on the sites affected by the variation. This information allows extending the conventional reports beyond the coding regions and expands the knowledge on the contribution of non-coding or synonymous variants to the phenotype studied. Contrarily to other tools, VARIANT uses a remote database and operates through efficient RESTful Web Services that optimize search and transaction operations. In this way, local problems of installation, update or disk size limitations are overcome without the need of sacrifice speed (thousands of variants are processed per minute). VARIANT is available at: http://variant.bioinfo.cipf.es.}, keywords = {Databases, Genetic Variation, High-Throughput Nucleotide Sequencing, Internet, Molecular Sequence Annotation, Mutation, Nucleic Acid, Polymorphism, Single Nucleotide, Software, User-Computer Interface}, issn = {1362-4962}, doi = {10.1093/nar/gks572}, url = {http://nar.oxfordjournals.org/content/40/W1/W54}, author = {Medina, Ignacio and de Maria, Alejandro and Bleda, Marta and Salavert, Francisco and Alonso, Roberto and Gonzalez, Cristina Y and Dopazo, Joaqu{\'\i}n} }