Rの高速化

Rの高速化というものを見つけたのでやってみる。
PCスペックは
OS: Ubuntu 12.04 64-bit
CPU: Intel Core i5-2540M 2.60GHz * 4
メモリ: 12GB
R: 2.14.1
で行った。
 
Cでは0.16秒
Pythonでは3.66秒かかった。
他の言語は使っていないのでパスする。
Rでは

            [,1]  [,2]    [,3] [,4]  [,5]
for       30.137 0.028  30.277    0 0.000
vector     0.304 0.156   0.464    0 0.000
apply    163.795 0.712 165.106    0 0.000
ff        24.094 1.152  94.267    0 0.124
compiler  36.698 0.080  36.951    0 0.000
core 2    67.940 1.616 156.543    0 0.032
core 3    49.747 1.328 122.340    0 0.100
core 4    48.567 1.616 120.034    0 0.184

という結果になった。

###C
#include <stdio.h> 
#include <time.h> 
#define num_steps 10000000

int main(void){
	clock_t t1, t2; 
		t1 = clock();
		int i; 
		double x, pi, sum =0.0, step;
		step = 1.0 / (double) num_steps;
		
		for(i = 1; i <= num_steps; i ++){
			x = (i - 0.5) * step;
			sum = sum + 4.0 / (1.0 + x * x);
		}
		pi = step * sum;
		printf("%f\n", pi);
		t2 = clock();
	printf("%f\n", (double)(t2 - t1) / CLOCKS_PER_SEC);
		return(0);
}

#compileのやり方を知らない
#上のスクリプトをhoge.cに保存
#以下、ターミナルで
gcc -o hoge hoge.c
./hoge

###Python
import sys
import time
before = time.time()
num_steps = 10000000
sum = 0
step = 1.0/num_steps
for i in range(1,num_steps):
	x = (i-0.5)*step
	sum += 4.0/(1.0 + x*x)
	

pi = step*sum
print pi
print "\n"
after = time.time()
print "RunningTime=", after-before, "s"

###Perl
$t1 = (times)[0];
$num_steps = 10000000;
$step = 1.0 / $num_steps;
	for($i = 1; $i <= $num_steps; $i ++){
		$x = ($i - 0.5) * $step;
		$sum = $sum + 4.0 / (1.0 + $x * $x);
	}
	$pi = $step * $sum;
	print $pi;
	print "\n";

$t2 = (times)[0];
$t3 = $t2 - $t1;
print "$t3 秒 \n";

###Ruby
require "benchmark"
num_steps = 10000000
step = 1.0 / num_steps
sum = 0
puts Benchmark::CAPTION
puts Benchmark.measure{
	for i in 1 ..num_steps
		x = (i - 0.5) * step
		sum = sum + 4.0 / (1.0 + x * x)
	end

pi = step * sum
puts pi
}

###R
timeres <- matrix(0, 4 + max_core, 5)
max_core <- 4
rownames(timeres) <- c("for", "vector", "apply", "ff", "compiler", paste("core", 2:max_core))

#for loopで書く
before <- proc.time()
num_steps <- 10000000
step <- 1.0 / num_steps
summ <- 0
for(i in 1: num_steps){
	x <- (i - 0.5) * step
	summ <- summ + 4.0 / (1.0 + x * x)
}
pi <- step * summ
print(pi)
after <- proc.time()
timeres["for", ] <- after - before


#ベクトル化
before <- proc.time()
num_steps <- 1:10000000
step <- 1.0 / 10000000
x <- (num_steps - 0.5) * step
pi <- sum((4.0 / (1.0 + x * x))* step)
print(pi)
after <- proc.time()
timeres["vector", ] <- after - before

#apply
before <- proc.time()
num_steps <- 1:10000000
step <- 1.0 / 10000000
menseki <- function(A){
	x <- (A - 0.5) * step
	y <- 4.0 / (1.0 + x * x)
	return(y * step)
}
pi <- sum(sapply(num_steps,menseki))
print(pi)
after <- proc.time()
timeres["apply", ] <- after - before

#並列化
for(core in 2:max_core){
	before <- proc.time()
	library(snow)
	#library(Rmpi)
	cl <- makeCluster(core, type="SOCK")
	num_steps <- 1:10000000
	step <- 1.0 / 10000000
	clusterExport(cl, "step")
	menseki <- function(A){
		x <- (A - 0.5) * step
		y <- 4.0 / (1.0 + x * x)
		return(y * step)
	}
	pi <- sum(parSapply(cl, num_steps ,menseki))
	print(pi)
	after <- proc.time()
	timeres[paste("core", core), ] <- after - before
	stopCluster(cl)
}

#ff
before <- proc.time()
library(ff)
library(snowfall)
sfInit(parallel=TRUE, cpus=max_core, type="SOCK")
num_steps <-ff(vmode="integer", 1:10000000, length=10000000)
step <- 1.0 / 10000000
sfLibrary(ff)
sfExport("step")
menseki <- function(A){
	x <- (A - 0.5) * step
	y <- 4.0 / (1.0 + x * x)
	return(y * step)
}
pi <- sum(sfSapply(num_steps, menseki))
print(pi)
after <- proc.time()
timeres["ff", ] <- after - before


#compiler
list(.Code, list(7L, GETVAR.OP, 1L, LDCONST.OP, 2L, SUB.OP, 3L, GETVAR.OP, 4L, MUL.OP, 
	5L, SETVAR.OP, 6L, POP.OP, LDCONST.OP, 7L, LDCONST.OP, 8L, GETVAR.OP, 6L, 
	GETVAR.OP, 6L, MUL.OP, 9L, ADD.OP, 10L, DIV.OP, 11L, SETVAR.OP, 12L, POP.OP, GETVAR.OP, 
	12L, GETVAR.OP, 4L, MUL.OP, 13L, RETURN.OP), list({
	x <- (A - 0.5) * step
	y <- 4/(1 + x * x )
	return( y * step )}, A, 0.5, A - 0.5, step, ( A - 0.5) * step, x, 4, 1, x * x, 1 + x * x, 4/(1 + x * x ), y, y * step)) 

before <- proc.time()
library("compiler")
num_steps <- 1:10000000
step <- 1.0 / 10000000
menseki <- function(A){
	x <- (A - 0.5) * step
	y <- 4.0 / (1.0 + x * x)
	return(y * step)
}
menseki2 <- cmpfun(menseki)
pi <- sum(sapply(num_steps, menseki2))
print(pi)
after <- proc.time()
timeres["compiler", ] <- after - before