OmicCircos

MikuHatsune2017-09-08

読んだ。
Cancer Inform. 2014 Jan 16;13:13-20.
 
オミックス研究では、各染色体でのCNVや遺伝子発現状態、各種統計量の定量値や分布などをずらずらっと描きたいが、横に長く描くといけてないと誰かが思ったのだろうか、丸く描くやり方がある。
これをcircos plot と呼ぶが、5年くらい前にこの図を論文で見たときに描いてみたいとおもったときに、Rでよさそうなパッケージがなかったような気がするが、出来てた。
 

library(OmicCircos);
options(stringsAsFactors = FALSE);

data("TCGA.PAM50_genefu_hg18");
data("TCGA.BC.fus");
data("TCGA.BC.cnv.2k.60");
data("TCGA.BC.gene.exp.2k.60");
data("TCGA.BC.sample60");
data("TCGA.BC_Her2_cnv_exp");

#transform p-value
pvalue <- -1 * log10(TCGA.BC_Her2_cnv_exp[,5]);
pvalue <- cbind(TCGA.BC_Her2_cnv_exp[,c(1:3)], pvalue);

Her2.i <- which(TCGA.BC.sample60[,2] == "Her2");
Her2.n <- TCGA.BC.sample60[Her2.i,1];

Her2.j <- which(colnames(TCGA.BC.cnv.2k.60) %in% Her2.n);
cnv    <- TCGA.BC.cnv.2k.60[,c(1:3,Her2.j)]; 
cnv.m  <- cnv[,c(4:ncol(cnv))];
cnv.m[cnv.m >  2] <- 2;
cnv.m[cnv.m < -2] <- -2;
cnv <- cbind(cnv[,1:3], cnv.m);

Her2.j   <- which(colnames(TCGA.BC.gene.exp.2k.60) %in% Her2.n);
gene.exp <- TCGA.BC.gene.exp.2k.60[,c(1:3,Her2.j)]; 

colors <- rainbow(10, alpha=0.5);

#png("OmicCircos4vignette10.png", 600, 600);
par(mar=c(2, 2, 2, 2));

plot(c(1,800), c(1,800), type="n", axes=FALSE, xlab="", ylab="", main="");

zoom <- c(1, 22, 939245.5, 154143883, 0, 180);
circos(R=400, cir="hg18", W=4,   type="chr", print.chr.lab=TRUE, scale=TRUE, zoom=zoom);
circos(R=300, cir="hg18", W=100, mapping=gene.exp, col.v=4,  type="heatmap2", cluster=TRUE, col.bar=TRUE, lwd=0.01, zoom=zoom);
circos(R=220, cir="hg18", W=80,  mapping=cnv,      col.v=4,   type="ml3", B=FALSE, lwd=1, cutoff=0, zoom=zoom);
circos(R=140, cir="hg18", W=80,  mapping=pvalue,   col.v=4,    type="l",   B=TRUE, lwd=1, col=colors[1], zoom=zoom);
circos(R=130, cir="hg18", W=10,  mapping=TCGA.BC.fus, type="link", lwd=2, zoom=zoom);

## zoom in links by using the hightlight functions 
## highlight
the.col1=rainbow(10, alpha=0.5)[1];

highlight <- c(140, 400, 11, 282412.5, 11, 133770314.5, the.col1, the.col1);
circos(R=110, cir="hg18", W=40, mapping=highlight, type="hl", lwd=2, zoom=zoom);
the.col2=rainbow(10, alpha=0.5)[6];
highlight <- c(140, 400, 17, 739525, 17, 78385909, the.col2, the.col2);
circos(R=110, cir="hg18", W=40, mapping=highlight, type="hl", lwd=2, zoom=zoom);
## highlight link
highlight.link1 <- c(400, 400, 140, 376.8544, 384.0021, 450, 540.5);
circos(cir="hg18", mapping=highlight.link1, type="highlight.link", col=the.col1, lwd=1);
highlight.link2 <- c(400, 400, 140, 419.1154, 423.3032, 543, 627);
circos(cir="hg18", mapping=highlight.link2, type="highlight.link", col=the.col2, lwd=1);

## zoom in chromosome 11
zoom <- c(11, 11, 282412.5, 133770314.5, 180, 270);
circos(R=400, cir="hg18", W=4,   type="chr", print.chr.lab=TRUE, scale=TRUE, zoom=zoom);
circos(R=300, cir="hg18", W=100, mapping=gene.exp, col.v=4,  type="heatmap2", cluster=TRUE, lwd=0.01, zoom=zoom);
circos(R=220, cir="hg18", W=80,  mapping=cnv,      col.v=4,   type="ml3", B=FALSE, lwd=1, cutoff=0, zoom=zoom);
circos(R=140, cir="hg18", W=80,  mapping=pvalue,   col.v=4,    type="l",   B=TRUE, lwd=1, col=colors[1], zoom=zoom);

## zoom in chromosome 17
zoom <- c(17, 17, 739525, 78385909, 274, 356);
circos(R=400, cir="hg18", W=4,   type="chr", print.chr.lab=TRUE, scale=TRUE, zoom=zoom);
circos(R=300, cir="hg18", W=100, mapping=gene.exp, col.v=4,  type="heatmap2", cluster=TRUE, lwd=0.01, zoom=zoom);
circos(R=220, cir="hg18", W=80,  mapping=cnv,      col.v=4,   type="ml3", B=FALSE, lwd=1, cutoff=0, zoom=zoom);
circos(R=140, cir="hg18", W=80,  mapping=pvalue,   col.v=4,    type="l",   B=TRUE, lwd=1, col=colors[1], zoom=zoom);

#dev.off()


 

library(OmicCircos);
options(stringsAsFactors = FALSE);

data("TCGA.BC.fus");
data("TCGA.BC.cnv.2k.60");
data("TCGA.BC.gene.exp.2k.60");
data("TCGA.BC.sample60");

## gene expression data for PCA
exp.m <- TCGA.BC.gene.exp.2k.60[,c(4:ncol(TCGA.BC.gene.exp.2k.60))];

cnv   <- TCGA.BC.cnv.2k.60;

## PCA
type.n  <- unique(TCGA.BC.sample60[,2]);
colors  <- rainbow(length(type.n), alpha=0.5);

pca.col <- rep(NA, nrow(TCGA.BC.sample60));
for (i in 1:length(type.n)){
  n   <- type.n[i];
  n.i <- which(TCGA.BC.sample60[,2] == n);
  n.n <- TCGA.BC.sample60[n.i,1];
  g.i <- which(colnames(exp.m) %in% n.n);
  pca.col[g.i] <- colors[i];
}

exp.m   <- na.omit(exp.m);
pca.out <- prcomp(t(exp.m), scale = TRUE);

## subtype cnv
cnv.i <- c();
for (i in 1:length(type.n)){
  n     <- type.n[i];
  n.i   <- which(TCGA.BC.sample60[,2] == n);
  n.n   <- TCGA.BC.sample60[n.i,1];
  cnv.i <- which(colnames(cnv) %in% n.n);
}

## main
#png("OmicCircos4vignette8.png", 600, 600);
par(mar=c(5, 5, 5, 5));

plot(c(1,800), c(1,800), type="n", axes=FALSE, xlab="", ylab="", main="");

legend(680,800, c("Basal","Her2","LumA","LumB"), pch=19, col=colors[c(2,4,1,3)], cex=0.5, 
     title ="Gene Expression (PCA)", box.col="white");

legend(5,800, c("1 Basal", "2 Her2", "3 LumA", "4 LumB", "(center)"), cex=0.5, 
     title ="CNV (OmicCircos)", box.col="white");

circos(xc=400, yc=400, R=390, cir="hg18", W=4,  type="chr", print.chr.lab=TRUE, scale=TRUE);
R.v <- 330;
for (i in 1:length(type.n)){
  n     <- type.n[i];
  n.i   <- which(TCGA.BC.sample60[,2] == n);
  n.n   <- TCGA.BC.sample60[n.i,1];
  cnv.i <- which(colnames(cnv) %in% n.n);
  cnv.v <- cnv[,cnv.i];
  cnv.v[cnv.v > 2]  <- 2;
  cnv.v[cnv.v < -2] <- -2;
  cnv.m <- cbind(cnv[,c(1:3)], cnv.v);
  if (i%%2==1){
    circos(xc=400, yc=400, R=R.v, cir="hg18", W=60, mapping=cnv.m, col.v=4,  
           type="ml3", B=TRUE, lwd=1, cutoff=0, scale=TRUE);
  } else {
    circos(xc=400, yc=400, R=R.v, cir="hg18", W=60, mapping=cnv.m, col.v=4,  
           type="ml3", B=FALSE, lwd=1, cutoff=0, scale=TRUE);
  }
  R.v <- R.v - 60;
}

points(pca.out$x[,1]*6+410, pca.out$x[,2]*6+400, pch=19, col=pca.col, cex=2);

#dev.off()