
Jong Wha (Joanne) Joo successfully defended her thesis,”Design of efficient and accurate statistical approaches to correct for confounding effects in genetic association studies,” on Friday, December 4, 2015 in Boelter 4760. Her talk, which is posted on our YouTube channel ZarlabUCLA, discusses using a mixed model analysis (GAMMA) to efficiently analyzes large numbers of phenotypes while simultaneously considering population structure, an expression quantitative trait loci (eQTL) mapping tool to eliminate spurious hotspots while retaining genuine regulatory hotspots, and a multiple testing correction method (slideLMM) for linear mixed models.
More details about her research are available in the three papers she discusses:
Joo, Jong Wha J; Hormozdiari, Farhad; Han, Buhm; Eskin, Eleazar Multiple testing correction in linear mixed models. Journal Article In: Genome Biol, 17 (1), pp. 62, 2016, ISSN: 1474-760X. @article{Joo:GenomeBiol:2016, title = {Multiple testing correction in linear mixed models.}, author = {Jong Wha J. Joo and Farhad Hormozdiari and Buhm Han and Eleazar Eskin}, url = {http://dx.doi.org/10.1186/s13059-016-0903-6}, issn = {1474-760X}, year = {2016}, date = {2016-01-01}, journal = {Genome Biol}, volume = {17}, number = {1}, pages = {62}, address = {England}, abstract = {BACKGROUND: Multiple hypothesis testing is a major issue in genome-wide association studies (GWAS), which often analyze millions of markers. The permutation test is considered to be the gold standard in multiple testing correction as it accurately takes into account the correlation structure of the genome. Recently, the linear mixed model (LMM) has become the standard practice in GWAS, addressing issues of population structure and insufficient power. However, none of the current multiple testing approaches are applicable to LMM. RESULTS: We were able to estimate per-marker thresholds as accurately as the gold standard approach in real and simulated datasets, while reducing the time required from months to hours. We applied our approach to mouse, yeast, and human datasets to demonstrate the accuracy and efficiency of our approach. CONCLUSIONS: We provide an efficient and accurate multiple testing correction approach for linear mixed models. We further provide an intuition about the relationships between per-marker threshold, genetic relatedness, and heritability, based on our observations in real data}, keywords = {}, pubstate = {published}, tppubtype = {article} } BACKGROUND: Multiple hypothesis testing is a major issue in genome-wide association studies (GWAS), which often analyze millions of markers. The permutation test is considered to be the gold standard in multiple testing correction as it accurately takes into account the correlation structure of the genome. Recently, the linear mixed model (LMM) has become the standard practice in GWAS, addressing issues of population structure and insufficient power. However, none of the current multiple testing approaches are applicable to LMM. RESULTS: We were able to estimate per-marker thresholds as accurately as the gold standard approach in real and simulated datasets, while reducing the time required from months to hours. We applied our approach to mouse, yeast, and human datasets to demonstrate the accuracy and efficiency of our approach. CONCLUSIONS: We provide an efficient and accurate multiple testing correction approach for linear mixed models. We further provide an intuition about the relationships between per-marker threshold, genetic relatedness, and heritability, based on our observations in real data |
Joo, Jong Wha J; Kang, Eun Yong; Org, Elin; Furlotte, Nick; Parks, Brian; Lusis, Aldons J; Eskin, Eleazar In: Research in Computational Molecular Biology, pp. 136-153, Springer International Publishing, 2015. @inbook{Joo:ResearchInComputationalMolecularBiology:2015b, title = {Efficient and Accurate Multiple-Phenotypes Regression Method for High Dimensional Data Considering Population Structure}, author = {Jong Wha J. Joo and Eun Yong Kang and Elin Org and Nick Furlotte and Brian Parks and Aldons J. Lusis and Eleazar Eskin}, url = {http://dx.doi.org/10.1007/978-3-319-16706-0_15}, year = {2015}, date = {2015-01-01}, booktitle = {Research in Computational Molecular Biology}, pages = {136-153}, publisher = {Springer International Publishing}, organization = {University of California}, abstract = {A typical GWAS tests correlation between a single phenotype and each genotype one at a time. However, it is often very useful to analyze many phenotypes simultaneously. For example, this may increase the power to detect variants by capturing unmeasured aspects of complex biological networks that a single phenotype might miss. There are several multivariate approaches that try to detect variants related to many phenotypes, but none of them consider population structure and each may result in a significant number of false positive identifications. Here, we introduce a new methodology, referred to as GAMMA, that could both simultaneously analyze many phenotypes as well as correct for population structure. In a simulated study, GAMMA accurately identifies true genetic effects without false positive identifications, while other methods either fail to detect true effects or result in many false positive identifications. We further apply our method to genetic studies of yeast and gut microbiome from mouse and show that GAMMA identifies several variants that are likely to have a true biological mechanism.}, keywords = {}, pubstate = {published}, tppubtype = {inbook} } A typical GWAS tests correlation between a single phenotype and each genotype one at a time. However, it is often very useful to analyze many phenotypes simultaneously. For example, this may increase the power to detect variants by capturing unmeasured aspects of complex biological networks that a single phenotype might miss. There are several multivariate approaches that try to detect variants related to many phenotypes, but none of them consider population structure and each may result in a significant number of false positive identifications. Here, we introduce a new methodology, referred to as GAMMA, that could both simultaneously analyze many phenotypes as well as correct for population structure. In a simulated study, GAMMA accurately identifies true genetic effects without false positive identifications, while other methods either fail to detect true effects or result in many false positive identifications. We further apply our method to genetic studies of yeast and gut microbiome from mouse and show that GAMMA identifies several variants that are likely to have a true biological mechanism. |
Joo, Jong Wha J; Sul, Jae Hoon ; Han, Buhm ; Ye, Chun ; Eskin, Eleazar Effectively identifying regulatory hotspots while capturing expression heterogeneity in gene expression studies. Journal Article In: Genome Biol, 15 (4), pp. R61, 2014, ISSN: 1465-6914. @article{Joo:GenomeBiol:2014, title = {Effectively identifying regulatory hotspots while capturing expression heterogeneity in gene expression studies.}, author = { Jong Wha J. Joo and Jae Hoon Sul and Buhm Han and Chun Ye and Eleazar Eskin}, url = {http://dx.doi.org/10.1186/gb-2014-15-4-r61}, issn = {1465-6914}, year = {2014}, date = {2014-01-01}, journal = {Genome Biol}, volume = {15}, number = {4}, pages = {R61}, abstract = {Expression quantitative trait loci (eQTL) mapping is a tool that can systematically identify genetic variation affecting gene expression. eQTL mapping studies have shown that certain genomic locations, referred to as regulatory hotspots, may affect the expression levels of many genes. Recently, studies have shown that various confounding factors may induce spurious regulatory hotspots. Here, we introduce a novel statistical method that effectively eliminates spurious hotspots while retaining genuine hotspots. Applied to simulated and real datasets, we validate that our method achieves greater sensitivity while retaining low false discovery rates compared to previous methods}, keywords = {}, pubstate = {published}, tppubtype = {article} } Expression quantitative trait loci (eQTL) mapping is a tool that can systematically identify genetic variation affecting gene expression. eQTL mapping studies have shown that certain genomic locations, referred to as regulatory hotspots, may affect the expression levels of many genes. Recently, studies have shown that various confounding factors may induce spurious regulatory hotspots. Here, we introduce a novel statistical method that effectively eliminates spurious hotspots while retaining genuine hotspots. Applied to simulated and real datasets, we validate that our method achieves greater sensitivity while retaining low false discovery rates compared to previous methods |