for
for(number in 1:3){
print(number * number)
}
## [1] 1
## [1] 4
## [1] 9
For consistency, I’ve changed the above code to use similar variables as the rest of the example
for(i in 1:3){
print(i * i)
}
## [1] 1
## [1] 4
## [1] 9
lapply
fun_lappy <- function(x){
# function that takes in a value, and returns
# the value, the value squared, and the value cubed
c(x, x^2, x^3)
}
fun_lappy(3) # returns: 3, 3^2 = 9, 3^3 = 27
## [1] 3 9 27
1:3
## [1] 1 2 3
lapply(X = 1:3, FUN = fun_lappy)
## [[1]]
## [1] 1 1 1
##
## [[2]]
## [1] 2 4 8
##
## [[3]]
## [1] 3 9 27
foreach do
library(foreach)
foreach(i = 1:3) %do% {
i * i
}
## [[1]]
## [1] 1
##
## [[2]]
## [1] 4
##
## [[3]]
## [1] 9
# capture the output of foreach into a variable
foreach_list <- foreach(i = 1:3) %do% {
i * i
}
# getting things out of a list needs double square brackets
foreach_list[[1]]
## [1] 1
foreach_list[[2]]
## [1] 4
foreach_list[[3]]
## [1] 9
Appending things to a vector in a regular for loop
Sometimes you want to loop through something and add the output to a vector
or list. You can do this using a regular for
loop, but this is usually
when people will advise you not to use loops in R. This is especially
the case with cbind
and rbind
when you have to
append values to a dataframe using a loop.
vector <- c()
for(i in 1:3){
sq <- i * i
print(sq)
vector <- c(vector, sq)
}
## [1] 1
## [1] 4
## [1] 9
vector
## [1] 1 4 9
foreach dopar
library(doParallel)
## Loading required package: iterators
## Loading required package: parallel
foreach_list <- foreach(i = 1:3) %dopar% {
i * i
}
## Warning: executing %dopar% sequentially: no parallel backend registered
# create clusters
cl <- makeCluster(4)
registerDoParallel(cl)
# same as above
foreach_list <- foreach(i = 1:3) %dopar% {
i * i
}
foreach_list
## [[1]]
## [1] 1
##
## [[2]]
## [1] 4
##
## [[3]]
## [1] 9
# close clusters
stopCluster(cl)
registerDoSEQ()
parallel apply family
1:3
## [1] 1 2 3
lapply(X = 1:3, FUN = fun_lappy)
## [[1]]
## [1] 1 1 1
##
## [[2]]
## [1] 2 4 8
##
## [[3]]
## [1] 3 9 27
cl <- makeCluster(4)
registerDoParallel(cl)
parSapply(cl = cl, X = 1:3, FUN = fun_lappy)
## [,1] [,2] [,3]
## [1,] 1 2 3
## [2,] 1 4 9
## [3,] 1 8 27
parLapply(cl = cl, X = 1:3, fun = fun_lappy)
## [[1]]
## [1] 1 1 1
##
## [[2]]
## [1] 2 4 8
##
## [[3]]
## [1] 3 9 27
stopCluster(cl)
registerDoSEQ()
Benchmark with small values
library(microbenchmark)
vector <- c(1:1000)
empty <- c()
cl <- makeCluster(4)
registerDoParallel(cl)
# separate each thing you want to time, with a comma
microbenchmark(
# apply
sapply(X = vector, FUN = function(x){x * x}),
lapply(X = vector, FUN = function(x){x * x}),
# regular for loop
for(i in vector){
empty <- c(empty, i * i)
},
# foreach loop
foreach_list <- foreach(i = vector) %do% {
i * i
},
# foreach loop with parallel backend
foreach_list <- foreach(i = vector) %dopar% {
i * i
},
# parallel sapply
parSapply(cl, vector, function(x){x * x}),
# parallel lapply
parLapply(cl = cl, X = vector, fun = function(x){x * x}),
# onyl test each loop 10 times, instead of default value
times = 10)
## Unit: microseconds
## expr
## sapply(X = vector, FUN = function(x) { x * x })
## lapply(X = vector, FUN = function(x) { x * x })
## for (i in vector) { empty <- c(empty, i * i) }
## foreach_list <- foreach(i = vector) %do% { i * i }
## foreach_list <- foreach(i = vector) %dopar% { i * i }
## parSapply(cl, vector, function(x) { x * x })
## parLapply(cl = cl, X = vector, fun = function(x) { x * x })
## min lq mean median uq max neval
## 742.861 868.628 956.4620 903.1240 1097.101 1303.277 10
## 495.295 595.710 765.4985 659.1065 930.858 1193.816 10
## 1562.566 8733.582 16339.6369 17027.2665 22505.735 30457.564 10
## 239166.682 245262.464 247710.1623 247072.6320 248961.145 261592.290 10
## 346741.386 364317.662 379288.0742 369792.7685 380320.741 452426.713 10
## 40031.085 40296.467 40880.2206 40837.6970 41370.241 42223.376 10
## 38770.815 39704.909 40254.9182 40162.3770 40530.919 42081.794 10
stopCluster(cl)
registerDoSEQ()
Benchmark with larger values
print_difftime_prompt <- function(str_what_did_you_time, diff_time, sep=':'){
parse_time <- unclass(diff_time)[1]
parse_units <- attr(unclass(diff_time), 'units')
prompt_string <- sprintf('%s took: %s %s', str_what_did_you_time, parse_time, parse_units)
cat(prompt_string, '\n')
# return(prompt_string)
}
vector <- c(1:100000)
cl <- makeCluster(4)
registerDoParallel(cl)
# sapply
strt <- Sys.time()
output <- sapply(X = vector, FUN = function(x){x})
print_difftime_prompt('sapply', Sys.time() - strt)
## sapply took: 0.155578851699829 secs
rm(output)
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 506772 27.1 899071 48.1 874656 46.8
## Vcells 821585 6.3 1467557 11.2 1451443 11.1
# lapply
strt <- Sys.time()
output <- lapply(X = vector, FUN = function(x){x})
print_difftime_prompt('lapply', Sys.time() - strt)
## lapply took: 0.0575652122497559 secs
rm(output)
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 506821 27.1 899071 48.1 899071 48.1
## Vcells 821649 6.3 1467557 11.2 1451443 11.1
# regular for loop
fill <- c()
strt <- Sys.time()
for(i in vector){
fill <- c(fill, i)
}
print_difftime_prompt('regular for loop', Sys.time() - strt)
## regular for loop took: 17.0807673931122 secs
# foreach loop
strt <- Sys.time()
foreach_list <- foreach(i = vector) %do% {
i
}
print_difftime_prompt('foreach do', Sys.time() - strt)
## foreach do took: 44.5841858386993 secs
rm(foreach_list)
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 802903 42.9 1476915 78.9 1476915 78.9
## Vcells 1067737 8.2 2128632 16.3 2128595 16.3
# foreach loop with parallel backend
strt <- Sys.time()
foreach_list <- foreach(i = vector) %dopar% {
i
}
print_difftime_prompt('foreach dopar', Sys.time() - strt)
## foreach dopar took: 1.18145899772644 mins
rm(foreach_list)
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 802952 42.9 1835812 98.1 1835812 98.1
## Vcells 1067798 8.2 2932173 22.4 2932173 22.4
# parallel sapply
strt <- Sys.time()
output <- parSapply(cl, vector, function(x){x})
print_difftime_prompt('parSapply', Sys.time() - strt)
## parSapply took: 0.325727939605713 secs
rm(output)
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 803004 42.9 1835812 98.1 1835812 98.1
## Vcells 1067867 8.2 2932173 22.4 2932173 22.4
# parallel lapply
strt <- Sys.time()
output <- parLapply(cl = cl, X = vector, fun = function(x){x})
print_difftime_prompt('parLapply', Sys.time() - strt)
## parLapply took: 0.216223001480103 secs
rm(output)
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 803050 42.9 1835812 98.1 1835812 98.1
## Vcells 1067926 8.2 2932173 22.4 2932173 22.4
stopCluster(cl)
registerDoSEQ()
list to df
library(plyr)
foreach_list <- foreach(i = 1:3) %do% {
i * i
}
foreach_list
## [[1]]
## [1] 1
##
## [[2]]
## [1] 4
##
## [[3]]
## [1] 9
ldply(foreach_list, .fun = data.frame)
## X..1L.. X..2L.. X..3L..
## 1 1 NA NA
## 2 NA 4 NA
## 3 NA NA 9
as.data.frame(foreach_list)
## X1L X4L X9L
## 1 1 4 9