@article{1ba8b0c7f6c749f3839b40082ede681d,
title = "Improving the Arabidopsis genome annotation using maximal transcript alignment assemblies",
abstract = "The spliced alignment of expressed sequence data to genomic sequence has proven a key tool in the comprehensive annotation of genes in eukaryotic genomes. A novel algorithm was developed to assemble clusters of overlapping transcript alignments (ESTs and full-length cDNAs) into maximal alignment assemblies, thereby comprehensively incorporating all available transcript data and capturing subtle splicing variations. Complete and partial gene structures identified by this method were used to improve The Institute for Genomic Research Arabidopsis genome annotation (TIGR release v.4.0). The alignment assemblies permitted the automated modeling of several novel genes and >1000 alternative splicing variations as well as updates (including UTR annotations) to nearly half of the ∼27 000 annotated protein coding genes. The algorithm of the Program to Assemble Spliced Alignments (PASA) tool is described, as well as the results of automated updates to Arabidopsis gene annotations.",
author = "Haas, {Brian J.} and Delcher, {Arthur L.} and Mount, {Stephen M.} and Wortman, {Jennifer R.} and Smith, {Roger K.} and Hannick, {Linda I.} and Rama Maiti and Ronning, {Catherine M.} and Rusch, {Douglas B.} and Town, {Christopher D.} and Salzberg, {Steven L.} and Owen White",
note = "Funding Information: We would like to thank the TIGR Information Technology group for their support, particularly Susan Lo, Michael Heaney and Billy Lee. Thanks are due to Mihai Pop for fruitful algorithmic input. Finally, we would like to give special thanks to Jim Kent, Liliana Florea, Webb Miller and, especially, Xiaoqiu Huang for providing the community with useful sets of tools for interrogating sequence data and providing a foundation for the work described here. Additional thanks are due to Volker Brendel for making the GeneSeqer program available and for providing a thorough review of the manuscript. A.L.D. and S.L.S. were supported in part by NIH grant R01-LM06845-04. The Arabidopsis genome re-annotation at TIGR is supported by the National Science Foundation (Cooperative Agreement DBI 9813586).",
year = "2003",
month = oct,
day = "1",
doi = "10.1093/nar/gkg770",
language = "English (US)",
volume = "31",
pages = "5654--5666",
journal = "Nucleic Acids Research",
issn = "1362-4962",
publisher = "Oxford University Press",
number = "19",
}