for

for(number in 1:3){
  print(number * number)
}
## [1] 1
## [1] 4
## [1] 9

For consistency, I’ve changed the above code to use similar variables as the rest of the example

for(i in 1:3){
  print(i * i)
}
## [1] 1
## [1] 4
## [1] 9

lapply

fun_lappy <- function(x){
  # function that takes in a value, and returns
  # the value, the value squared, and the value cubed
  c(x, x^2, x^3)
}
fun_lappy(3) # returns: 3, 3^2 = 9, 3^3 = 27
## [1]  3  9 27
1:3
## [1] 1 2 3
lapply(X = 1:3, FUN = fun_lappy)
## [[1]]
## [1] 1 1 1
##
## [[2]]
## [1] 2 4 8
##
## [[3]]
## [1]  3  9 27

foreach do

library(foreach)

foreach(i = 1:3) %do% {
  i * i
}
## [[1]]
## [1] 1
##
## [[2]]
## [1] 4
##
## [[3]]
## [1] 9
# capture the output of foreach into a variable
foreach_list <- foreach(i = 1:3) %do% {
  i * i
}

# getting things out of a list needs double square brackets
foreach_list[[1]]
## [1] 1
foreach_list[[2]]
## [1] 4
foreach_list[[3]]
## [1] 9

Appending things to a vector in a regular for loop

Sometimes you want to loop through something and add the output to a vector or list. You can do this using a regular for loop, but this is usually when people will advise you not to use loops in R. This is especially the case with cbind and rbind when you have to append values to a dataframe using a loop.

vector <- c()
for(i in 1:3){
  sq <- i * i
  print(sq)
  vector <- c(vector, sq)
}
## [1] 1
## [1] 4
## [1] 9
vector
## [1] 1 4 9

foreach dopar

library(doParallel)
## Loading required package: iterators
## Loading required package: parallel
foreach_list <- foreach(i = 1:3) %dopar% {
  i * i
}
## Warning: executing %dopar% sequentially: no parallel backend registered
# create clusters
cl <- makeCluster(4)
registerDoParallel(cl)

# same as above
foreach_list <- foreach(i = 1:3) %dopar% {
  i * i
}
foreach_list
## [[1]]
## [1] 1
##
## [[2]]
## [1] 4
##
## [[3]]
## [1] 9
# close clusters
stopCluster(cl)
registerDoSEQ()

parallel apply family

1:3
## [1] 1 2 3
lapply(X = 1:3, FUN = fun_lappy)
## [[1]]
## [1] 1 1 1
##
## [[2]]
## [1] 2 4 8
##
## [[3]]
## [1]  3  9 27
cl <- makeCluster(4)
registerDoParallel(cl)

parSapply(cl = cl, X = 1:3, FUN = fun_lappy)
##      [,1] [,2] [,3]
## [1,]    1    2    3
## [2,]    1    4    9
## [3,]    1    8   27
parLapply(cl = cl, X = 1:3, fun = fun_lappy)
## [[1]]
## [1] 1 1 1
##
## [[2]]
## [1] 2 4 8
##
## [[3]]
## [1]  3  9 27
stopCluster(cl)
registerDoSEQ()

Benchmark with small values

library(microbenchmark)
vector <- c(1:1000)
empty <- c()

cl <- makeCluster(4)
registerDoParallel(cl)

# separate each thing you want to time, with a comma
microbenchmark(
  # apply
  sapply(X = vector, FUN = function(x){x * x}),
  lapply(X = vector, FUN = function(x){x * x}),

  # regular for loop
  for(i in vector){
    empty <- c(empty, i * i)
  },
  # foreach loop
  foreach_list <- foreach(i = vector) %do% {
    i * i
  },

  # foreach loop with parallel backend
  foreach_list <- foreach(i = vector) %dopar% {
    i * i
  },

  # parallel sapply
  parSapply(cl, vector, function(x){x * x}),

  # parallel lapply
  parLapply(cl = cl, X = vector, fun = function(x){x * x}),

  # onyl test each loop 10 times, instead of default value
  times = 10)
## Unit: microseconds
##                                                             expr
##              sapply(X = vector, FUN = function(x) {     x * x })
##              lapply(X = vector, FUN = function(x) {     x * x })
##               for (i in vector) {     empty <- c(empty, i * i) }
##           foreach_list <- foreach(i = vector) %do% {     i * i }
##        foreach_list <- foreach(i = vector) %dopar% {     i * i }
##                 parSapply(cl, vector, function(x) {     x * x })
##  parLapply(cl = cl, X = vector, fun = function(x) {     x * x })
##         min         lq        mean      median         uq        max neval
##     742.861    868.628    956.4620    903.1240   1097.101   1303.277    10
##     495.295    595.710    765.4985    659.1065    930.858   1193.816    10
##    1562.566   8733.582  16339.6369  17027.2665  22505.735  30457.564    10
##  239166.682 245262.464 247710.1623 247072.6320 248961.145 261592.290    10
##  346741.386 364317.662 379288.0742 369792.7685 380320.741 452426.713    10
##   40031.085  40296.467  40880.2206  40837.6970  41370.241  42223.376    10
##   38770.815  39704.909  40254.9182  40162.3770  40530.919  42081.794    10
stopCluster(cl)
registerDoSEQ()

Benchmark with larger values

print_difftime_prompt <- function(str_what_did_you_time, diff_time, sep=':'){
    parse_time <- unclass(diff_time)[1]
    parse_units <- attr(unclass(diff_time), 'units')
    prompt_string <- sprintf('%s took: %s %s', str_what_did_you_time, parse_time, parse_units)
    cat(prompt_string, '\n')
    # return(prompt_string)
}

vector <- c(1:100000)

cl <- makeCluster(4)
registerDoParallel(cl)

# sapply
strt <- Sys.time()
output <- sapply(X = vector, FUN = function(x){x})
print_difftime_prompt('sapply', Sys.time() - strt)
## sapply took: 0.155578851699829 secs
rm(output)
gc()
##          used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 506772 27.1     899071 48.1   874656 46.8
## Vcells 821585  6.3    1467557 11.2  1451443 11.1
# lapply
strt <- Sys.time()
output <- lapply(X = vector, FUN = function(x){x})
print_difftime_prompt('lapply', Sys.time() - strt)
## lapply took: 0.0575652122497559 secs
rm(output)
gc()
##          used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 506821 27.1     899071 48.1   899071 48.1
## Vcells 821649  6.3    1467557 11.2  1451443 11.1
# regular for loop
fill <- c()
strt <- Sys.time()
for(i in vector){
  fill <- c(fill, i)
}
print_difftime_prompt('regular for loop', Sys.time() - strt)
## regular for loop took: 17.0807673931122 secs
# foreach loop
strt <- Sys.time()
foreach_list <- foreach(i = vector) %do% {
  i
}
print_difftime_prompt('foreach do', Sys.time() - strt)
## foreach do took: 44.5841858386993 secs
rm(foreach_list)
gc()
##           used (Mb) gc trigger (Mb) max used (Mb)
## Ncells  802903 42.9    1476915 78.9  1476915 78.9
## Vcells 1067737  8.2    2128632 16.3  2128595 16.3
# foreach loop with parallel backend
strt <- Sys.time()
foreach_list <- foreach(i = vector) %dopar% {
  i
}
print_difftime_prompt('foreach dopar', Sys.time() - strt)
## foreach dopar took: 1.18145899772644 mins
rm(foreach_list)
gc()
##           used (Mb) gc trigger (Mb) max used (Mb)
## Ncells  802952 42.9    1835812 98.1  1835812 98.1
## Vcells 1067798  8.2    2932173 22.4  2932173 22.4
# parallel sapply
strt <- Sys.time()
output <- parSapply(cl, vector, function(x){x})
print_difftime_prompt('parSapply', Sys.time() - strt)
## parSapply took: 0.325727939605713 secs
rm(output)
gc()
##           used (Mb) gc trigger (Mb) max used (Mb)
## Ncells  803004 42.9    1835812 98.1  1835812 98.1
## Vcells 1067867  8.2    2932173 22.4  2932173 22.4
# parallel lapply
strt <- Sys.time()
output <- parLapply(cl = cl, X = vector, fun = function(x){x})
print_difftime_prompt('parLapply', Sys.time() - strt)
## parLapply took: 0.216223001480103 secs
rm(output)
gc()
##           used (Mb) gc trigger (Mb) max used (Mb)
## Ncells  803050 42.9    1835812 98.1  1835812 98.1
## Vcells 1067926  8.2    2932173 22.4  2932173 22.4
stopCluster(cl)
registerDoSEQ()

list to df

library(plyr)
foreach_list <- foreach(i = 1:3) %do% {
  i * i
}
foreach_list
## [[1]]
## [1] 1
##
## [[2]]
## [1] 4
##
## [[3]]
## [1] 9
ldply(foreach_list, .fun = data.frame)
##   X..1L.. X..2L.. X..3L..
## 1       1      NA      NA
## 2      NA       4      NA
## 3      NA      NA       9
as.data.frame(foreach_list)
##   X1L X4L X9L
## 1   1   4   9