@article{4070a6783ca84ff6bc3e9fc05603dd7c,
title = "Tackling the widespread and critical impact of batch effects in high-throughput data",
abstract = "High-throughput technologies are widely used, for example to assay genetic variants, gene and protein expression, and epigenetic modifications. One often overlooked complication with such studies is batch effects, which occur because measurements are affected by laboratory conditions, reagent lots and personnel differences. This becomes a major problem when batch effects are correlated with an outcome of interest and lead to incorrect conclusions. Using both published studies and our own analyses, we argue that batch effects (as well as other technical and biological artefacts) are widespread and critical to address. We review experimental and computational approaches for doing so.",
author = "Leek, {Jeffrey T.} and Scharpf, {Robert B.} and Bravo, {H{\'e}ctor Corrada} and David Simcha and Benjamin Langmead and Johnson, {W. Evan} and Donald Geman and Keith Baggerly and Irizarry, {Rafael A.}",
note = "Funding Information: S.T. thanks members of her laboratory for helpful discussions. J.A.T. thanks the Wellcome Trust, the Juvenile Diabetes Research Foundation International and the National Institute for Health Research Cambridge Biomedical Research Centre for funding, and J. Nadeau for sharing unpublished information. The Cambridge Institute for Medical Research is a recipient of a Wellcome Trust Strategic Award (079895). M.V. would like to thank M. Walhout, J. Dekker and J. Vandenhaute for helpful conversations on the subject discussed here. Funding Information: We thank the referees for helpful comments and suggestions. One referee in particular went beyond the call of duty to help us improve clarity. We thank the TCGA and 1000 Genomes Project for making the data public. The GoKinD collection of DNA was genotyped through the Genetic Association Information Network (GAIN) programme with the support of the Foundation for the National Institutes of Health and The National Institute of Diabetes and Digestive and Kidney Diseases. The work of J.T.L., H.C.B., B.L. and R.A.I. is partially funded by US National Institutes of Health grants GM0083084, HG004059 and HG005220.",
year = "2010",
month = oct,
day = "14",
doi = "10.1038/nrg2825",
language = "English (US)",
volume = "11",
pages = "733--739",
journal = "Nature Reviews Genetics",
issn = "1471-0056",
publisher = "Nature Publishing Group",
number = "10",
}