Isotonic regressionの高速化 - 驚異のアニヲタ社会復帰の予備

MikuHatsune2012-03-25

Isotonic regressionを扱っている。
制約条件行列の作成で、行列が大きくなると時間がかかるので、for loopをなんとか回避して関数の高速化を行った。

library(quadprog)
#for loopの関数
f1 <- function(Mat, multiple=3){
	N <- nrow(Mat) # 観測用量点の数
	M <- ncol(Mat) # 用量反応曲線の数

	# デフォルトはtriplicateとする
	ws <- rep(multiple, N*M)
	Dmat <- diag(ws)
	dvec <- ws*c(Mat)
	
	Q <- NULL
	for(j in 1:M){
		for(i in 1:(N-1)){
		
			st <- (j-1)*N + i
			end <- st+1
			tmp <- rep(0, N*M)
			tmp[st] <- 1
			tmp[end] <- -1
			Q <- rbind(Q, tmp)
		}
	}
	for(i in 2:M){
		for(j in 1:N){
			st <- j
			end <- N*(i-1)+j
			tmp <- rep(0, N*M)
			tmp[st] <- 1
			tmp[end] <- -1
			Q <- rbind(Q, tmp)
		}
	}
	Amat2 <- Q
	Amat <- t(Amat2)
	bvec <- rep(0, length(Amat[1, ]))
	# Dmat,dvec,Amat,bvecはそのまま
	# meqは制約条件のうち、等式である数。今回は0
	
	out1<-solve.QP(Dmat, dvec, Amat, bvec=bvec, meq=0)
	estMat <- matrix(out1$solution, N, M)
	obsMat <- matrix(out1$unconstrained.solution, N, M)
	
	modifMat <- matrix(out1$solution, N, M)
	modifMat[modifMat < 0] <- 0
	modifMat[modifMat > 100] <- 100
	
	Matlist <- list(estMat, obsMat, modifMat)
	names(Matlist) <- c("estimation", "observation", "modified")
	for(i in 1:length(Matlist)){
		colnames(Matlist[[i]]) <- colnames(Mat)
	}

	return(Matlist)
}

#ベクトル処理によりfor loopを回避
f2 <- function(Mat, multiple=3){
	N <- nrow(Mat) # 観測用量点の数
	M <- ncol(Mat) # 用量反応曲線の数

	# デフォルトはtriplicateとする
	ws <- rep(multiple, N*M)
	Dmat <- diag(ws)
	dvec <- ws*c(Mat)
	
	#最初のQ
	Qvec1 <- numeric((N*M) * (M*(N - 1)))
	Qvec1_1 <- (1:(M*(N - 1))) + rep(0:(M - 1), each=N - 1) + (N*M)*(0:(M*(N - 1) - 1))
	Qvec1[Qvec1_1] <- 1
	Qvec1[Qvec1_1 + 1] <- -1
	Q1 <- matrix(Qvec1, nc=N*M, byrow=TRUE)
	
	#次のQ
	Qvec2 <- numeric(N*(N*(M - 1)))
	Qvec2_1 <- N*(0:(N - 1)) + 1:N + rep((N*N)*(0:(M - 2)), each=N)
	Qvec2[Qvec2_1] <- 1
	Q2 <- matrix(Qvec2, nc=N, byrow=TRUE)
	
	Q3 <- diag(-1, N*(M - 1))
	
	Qq <- rbind(Q1, cbind(Q2, Q3))
	
	Amat2 <- Qq
	Amat <- t(Amat2)
	bvec <- rep(0, length(Amat[1, ]))
	# Dmat,dvec,Amat,bvecはそのまま
	# meqは制約条件のうち、等式である数。今回は0
	
	out1<-solve.QP(Dmat, dvec, Amat, bvec=bvec, meq=0)
	estMat <- matrix(out1$solution, N, M)
	obsMat <- matrix(out1$unconstrained.solution, N, M)
	
	modifMat <- matrix(out1$solution, N, M)
	modifMat[modifMat < 0] <- 0
	modifMat[modifMat > 100] <- 100
	
	Matlist <- list(estMat, obsMat, modifMat)
	names(Matlist) <- c("estimation", "observation", "modified")
	for(i in 1:length(Matlist)){
		colnames(Matlist[[i]]) <- colnames(Mat)
	}

	return(Matlist)
}

rows <- c(5:30, 50, 100, 300, 500) #行の大きさ
cols <- 4 #列の大きさ

timeres <- matrix(0, length(rows), 2)
dimnames(timeres) <- list(rows, c("conv", "new"))
for(i in 1:length(rows)){
	data <- matrix(runif(rows[i]*cols), nc=cols)
	timeres[i, 1] <- system.time(f1(data))[3]
	timeres[i, 2] <- system.time(f2(data))[3]
	print(rows[i])
}

par(mfrow=c(1, 2))
matplot(rows, timeres, type="l", log="", lwd=2, xlab="num of matrix rows", ylab="elapsed time")
legend(min(rows), max(timeres), colnames(timeres), col=1:2, lty=1:2, pch=1:2, lwd=2)
matplot(rows, timeres, type="l", log="xy", lwd=2, xlab="num of matrix rows", ylab="elapsed time")
legend(min(rows), max(timeres), colnames(timeres), col=1:2, lty=1:2, pch=1:2, lwd=2)

ベクトル処理のほうが速い。しかし、ベクトルでも指数関数的に処理時間が増えていくようだ。