読んだ。
Cancer Inform. 2014 Jan 16;13:13-20.
オミックス研究では、各染色体でのCNVや遺伝子発現状態、各種統計量の定量値や分布などをずらずらっと描きたいが、横に長く描くといけてないと誰かが思ったのだろうか、丸く描くやり方がある。
これをcircos plot と呼ぶが、5年くらい前にこの図を論文で見たときに描いてみたいとおもったときに、Rでよさそうなパッケージがなかったような気がするが、出来てた。
library(OmicCircos); options(stringsAsFactors = FALSE); data("TCGA.PAM50_genefu_hg18"); data("TCGA.BC.fus"); data("TCGA.BC.cnv.2k.60"); data("TCGA.BC.gene.exp.2k.60"); data("TCGA.BC.sample60"); data("TCGA.BC_Her2_cnv_exp"); #transform p-value pvalue <- -1 * log10(TCGA.BC_Her2_cnv_exp[,5]); pvalue <- cbind(TCGA.BC_Her2_cnv_exp[,c(1:3)], pvalue); Her2.i <- which(TCGA.BC.sample60[,2] == "Her2"); Her2.n <- TCGA.BC.sample60[Her2.i,1]; Her2.j <- which(colnames(TCGA.BC.cnv.2k.60) %in% Her2.n); cnv <- TCGA.BC.cnv.2k.60[,c(1:3,Her2.j)]; cnv.m <- cnv[,c(4:ncol(cnv))]; cnv.m[cnv.m > 2] <- 2; cnv.m[cnv.m < -2] <- -2; cnv <- cbind(cnv[,1:3], cnv.m); Her2.j <- which(colnames(TCGA.BC.gene.exp.2k.60) %in% Her2.n); gene.exp <- TCGA.BC.gene.exp.2k.60[,c(1:3,Her2.j)]; colors <- rainbow(10, alpha=0.5); #png("OmicCircos4vignette10.png", 600, 600); par(mar=c(2, 2, 2, 2)); plot(c(1,800), c(1,800), type="n", axes=FALSE, xlab="", ylab="", main=""); zoom <- c(1, 22, 939245.5, 154143883, 0, 180); circos(R=400, cir="hg18", W=4, type="chr", print.chr.lab=TRUE, scale=TRUE, zoom=zoom); circos(R=300, cir="hg18", W=100, mapping=gene.exp, col.v=4, type="heatmap2", cluster=TRUE, col.bar=TRUE, lwd=0.01, zoom=zoom); circos(R=220, cir="hg18", W=80, mapping=cnv, col.v=4, type="ml3", B=FALSE, lwd=1, cutoff=0, zoom=zoom); circos(R=140, cir="hg18", W=80, mapping=pvalue, col.v=4, type="l", B=TRUE, lwd=1, col=colors[1], zoom=zoom); circos(R=130, cir="hg18", W=10, mapping=TCGA.BC.fus, type="link", lwd=2, zoom=zoom); ## zoom in links by using the hightlight functions ## highlight the.col1=rainbow(10, alpha=0.5)[1]; highlight <- c(140, 400, 11, 282412.5, 11, 133770314.5, the.col1, the.col1); circos(R=110, cir="hg18", W=40, mapping=highlight, type="hl", lwd=2, zoom=zoom); the.col2=rainbow(10, alpha=0.5)[6]; highlight <- c(140, 400, 17, 739525, 17, 78385909, the.col2, the.col2); circos(R=110, cir="hg18", W=40, mapping=highlight, type="hl", lwd=2, zoom=zoom); ## highlight link highlight.link1 <- c(400, 400, 140, 376.8544, 384.0021, 450, 540.5); circos(cir="hg18", mapping=highlight.link1, type="highlight.link", col=the.col1, lwd=1); highlight.link2 <- c(400, 400, 140, 419.1154, 423.3032, 543, 627); circos(cir="hg18", mapping=highlight.link2, type="highlight.link", col=the.col2, lwd=1); ## zoom in chromosome 11 zoom <- c(11, 11, 282412.5, 133770314.5, 180, 270); circos(R=400, cir="hg18", W=4, type="chr", print.chr.lab=TRUE, scale=TRUE, zoom=zoom); circos(R=300, cir="hg18", W=100, mapping=gene.exp, col.v=4, type="heatmap2", cluster=TRUE, lwd=0.01, zoom=zoom); circos(R=220, cir="hg18", W=80, mapping=cnv, col.v=4, type="ml3", B=FALSE, lwd=1, cutoff=0, zoom=zoom); circos(R=140, cir="hg18", W=80, mapping=pvalue, col.v=4, type="l", B=TRUE, lwd=1, col=colors[1], zoom=zoom); ## zoom in chromosome 17 zoom <- c(17, 17, 739525, 78385909, 274, 356); circos(R=400, cir="hg18", W=4, type="chr", print.chr.lab=TRUE, scale=TRUE, zoom=zoom); circos(R=300, cir="hg18", W=100, mapping=gene.exp, col.v=4, type="heatmap2", cluster=TRUE, lwd=0.01, zoom=zoom); circos(R=220, cir="hg18", W=80, mapping=cnv, col.v=4, type="ml3", B=FALSE, lwd=1, cutoff=0, zoom=zoom); circos(R=140, cir="hg18", W=80, mapping=pvalue, col.v=4, type="l", B=TRUE, lwd=1, col=colors[1], zoom=zoom); #dev.off()
library(OmicCircos); options(stringsAsFactors = FALSE); data("TCGA.BC.fus"); data("TCGA.BC.cnv.2k.60"); data("TCGA.BC.gene.exp.2k.60"); data("TCGA.BC.sample60"); ## gene expression data for PCA exp.m <- TCGA.BC.gene.exp.2k.60[,c(4:ncol(TCGA.BC.gene.exp.2k.60))]; cnv <- TCGA.BC.cnv.2k.60; ## PCA type.n <- unique(TCGA.BC.sample60[,2]); colors <- rainbow(length(type.n), alpha=0.5); pca.col <- rep(NA, nrow(TCGA.BC.sample60)); for (i in 1:length(type.n)){ n <- type.n[i]; n.i <- which(TCGA.BC.sample60[,2] == n); n.n <- TCGA.BC.sample60[n.i,1]; g.i <- which(colnames(exp.m) %in% n.n); pca.col[g.i] <- colors[i]; } exp.m <- na.omit(exp.m); pca.out <- prcomp(t(exp.m), scale = TRUE); ## subtype cnv cnv.i <- c(); for (i in 1:length(type.n)){ n <- type.n[i]; n.i <- which(TCGA.BC.sample60[,2] == n); n.n <- TCGA.BC.sample60[n.i,1]; cnv.i <- which(colnames(cnv) %in% n.n); } ## main #png("OmicCircos4vignette8.png", 600, 600); par(mar=c(5, 5, 5, 5)); plot(c(1,800), c(1,800), type="n", axes=FALSE, xlab="", ylab="", main=""); legend(680,800, c("Basal","Her2","LumA","LumB"), pch=19, col=colors[c(2,4,1,3)], cex=0.5, title ="Gene Expression (PCA)", box.col="white"); legend(5,800, c("1 Basal", "2 Her2", "3 LumA", "4 LumB", "(center)"), cex=0.5, title ="CNV (OmicCircos)", box.col="white"); circos(xc=400, yc=400, R=390, cir="hg18", W=4, type="chr", print.chr.lab=TRUE, scale=TRUE); R.v <- 330; for (i in 1:length(type.n)){ n <- type.n[i]; n.i <- which(TCGA.BC.sample60[,2] == n); n.n <- TCGA.BC.sample60[n.i,1]; cnv.i <- which(colnames(cnv) %in% n.n); cnv.v <- cnv[,cnv.i]; cnv.v[cnv.v > 2] <- 2; cnv.v[cnv.v < -2] <- -2; cnv.m <- cbind(cnv[,c(1:3)], cnv.v); if (i%%2==1){ circos(xc=400, yc=400, R=R.v, cir="hg18", W=60, mapping=cnv.m, col.v=4, type="ml3", B=TRUE, lwd=1, cutoff=0, scale=TRUE); } else { circos(xc=400, yc=400, R=R.v, cir="hg18", W=60, mapping=cnv.m, col.v=4, type="ml3", B=FALSE, lwd=1, cutoff=0, scale=TRUE); } R.v <- R.v - 60; } points(pca.out$x[,1]*6+410, pca.out$x[,2]*6+400, pch=19, col=pca.col, cex=2); #dev.off()