用R中列表中的行有效地填充二维矩阵

debugcn 发表于 Dev

皮埃尔

我有一个二维矩阵列表。每个矩阵都使用函数填充fillMatrices。此函数将多个个体添加到矩阵中的每一天 0 并更新列a_M,b_M和c_M。个体数量来自初始矩阵ind。该代码有效，但是当列表中的矩阵数量很大时它很慢。例如 n = 10000：

user  system elapsed 
3.73    0.83    4.55

如果可能，我想将经过的时间减少到 <= 1 秒，并将 n 增加到 720000 个矩阵。因此，我正在寻找仅优化第 3 部分的方法。这是代码：

    ###############################################
    ###############################################
    ## Section 3
    ## Run the function "fillMatrices"
    indexTime <- 1
    dt_t_1 <- do.call(rbind, lapply(list_matrices, function(x) x[1,]))
    dt_t <- fillMatrices(dt_t_1 = dt_t_1, species = c("a_M", "b_M", "c_M"), maxDuration = 5, matrixColumns = col_mat)

    ## Fill the matrices within the list
    system.time(for(i in 1:n){
    list_matrices[[i]][indexTime + 1,] <- dt_t[,i]
    })

    ## test <- list_matrices[[1]]

第 1 节的代码用于初始化矩阵，该函数fillMatrices可以在第 2 节中找到。在我的示例中，该函数用于填充一个物种的矩阵。实际上，该函数通过更改参数用于 3 个物种（即应用 3 次）species = c("a_M", "b_M", "c_M")。如何加速我的代码？任何建议将不胜感激。

以下是第 1 节和第 2 节的代码：

rm(list=ls(all=TRUE))
library(ff)
library(dplyr)
set.seed(12345)

## Define the number of individuals
n <- 10000

###############################################
###############################################
## Section 1
## Build the list of 2D matrices
v_date <- as.vector(outer(c(paste(seq(0, 1, by = 1), "day", sep="_"), paste(seq(2, 5, by = 1), "days", sep="_")), c("a_M", "b_M", "c_M"), paste, sep="|"))
col_mat <- c("year", "day", "time", "ID", "died", v_date)
list_matrices <- list()
for(i in 1:n){
  print(i)
  list_matrices[[i]] <- ff(-999, dim=c(3650, length(col_mat)), dimnames=list(NULL, col_mat), vmode="double", overwrite = TRUE)
}
## test <- list_matrices[[1]]
## dim(list_matrices[[1]])

## Fill the first row of each matrix
for(i in 1:n){
  print(i)
  list_matrices[[i]][1,] <- c(1, 1, 1, i-1, 0, rep(0, length(v_date)))
}
## test <- list_matrices[[2]]

## Build the matrix "individual"
ind <- as.matrix(data.frame(year = rep(1, n), day = rep(1, n), time = rep(1, n), died = rep(0, n), ID = (seq(1, n, 1))- 1, a_M = sample(1:10, n, replace = T), b_M = sample(1:10, n, replace = T), c_M = sample(1:10, n, replace = T)))
## print(ind)

###############################################
###############################################
## Section 2
## Function to convert a data frame into a matrix
convertDFToMat <- function(x){
  mat <- as.matrix(x[,-1])
  ifelse(is(x[,1], "data.frame"), rownames(mat) <- pull(x[,1]), rownames(mat) <- x[,1])
  ## Convert character matrix into numeric matrix
  mat <- apply(mat, 2, as.numeric)

  return(mat)
}

## Define the function that is used to fill the matrices within the list
fillMatrices <- function(dt_t_1, species, maxDuration, matrixColumns){

  ## Format data
  dt <- as.data.frame(dt_t_1) %>% 
    reshape::melt(id = c("ID")) %>% 
    arrange(ID) %>%
    dplyr::mutate_all(as.character)
  ## summary(dt)

  ## Break out the variable "variable" into different columns, with one row for each individual-day
  dt_reshape_filter_1 <- dt %>%
    dplyr::filter(!variable %in% c("year", "day", "time", "ID", "died")) %>%
    dplyr::mutate(day = variable %>% gsub(pattern = "\\_.*", replacement = "", x = .), col  = variable %>% gsub(pattern = ".*\\|", replacement = "", x = .)) %>%
    dplyr::select(-variable) %>%
    tidyr::spread(col, value) %>%
    dplyr::mutate_all(as.numeric) %>%
    dplyr::arrange(ID, day)
  ## summary(dt_reshape_filter_1)

  ## Apply requested transformations and build the data frame
  dt_transform <- dt_reshape_filter_1 %>% 
    dplyr::rename_at(vars(species), ~ c("a", "b", "c")) %>%
    dplyr::mutate(day = day + 1) %>% 
    dplyr::filter(day < maxDuration + 1) %>% 
    dplyr::bind_rows(tibble(ID = ind[,c("ID")], day = 0, a = ind[,c("a_M")], b = ind[,c("b_M")])) %>%
    dplyr::mutate(c = a + b) %>%
    dplyr::rename_at(vars("a", "b", "c"), ~ species) %>%
    dplyr::arrange(ID, day)
  ## summary(dt_transform)

  ## Take different columns of the data frame and gather them into a single column
  dt_gather <- dt_transform %>% 
    tidyr::gather(variable, value, species) %>% 
    dplyr::mutate(day = if_else(day > 1, paste0(day, "_days"), paste0(day, "_day"))) %>% 
    tidyr::unite(variable, c("day", "variable"), sep = "|") %>%
    dplyr::rename(var2 = ID) %>%
    dplyr::mutate_all(as.character)
  ## summary(dt_gather)

  ## Add the other columns in the data frame and convert the resulting data frame into a matrix
  dt_reshape_filter_2 <- dt %>%
    dplyr::rename(var2 = ID) %>%
    dplyr::filter(variable %in% c("year", "day", "time", "ID", "died")) %>%
    tidyr::spread(variable, value) %>%
    dplyr::arrange(as.numeric(var2)) %>%
    dplyr::mutate(year = ind[,c("year")], 
                  day = ind[,c("day")], 
                  time = ind[,c("time")],
                  ID = ind[,c("ID")],
                  died = ind[,c("died")]) %>%
    tidyr::gather(variable, value, c(year, day, time, ID, died)) %>%
    dplyr::arrange(as.numeric(var2)) %>%
    dplyr::mutate_all(as.character)
  ## summary(dt_reshape_filter_2)

  ## Build the output matrix         
  dt_bind <- bind_rows(dt_reshape_filter_2, dt_gather) %>%
    tidyr::spread(var2, value) %>%
    dplyr::arrange(match(variable, matrixColumns)) %>%
    dplyr::select("variable", as.character(ind[,c("ID")]))
  ## summary(dt_bind)
  dt_mat <- convertDFToMat(dt_bind)
  ## summary(dt_mat)

  return(dt_mat)

}

油菜

制作 3D 数组而不是 2D 矩阵列表为您提供更多选择

library(ff)
library(dplyr)
set.seed(12345)

## Define the number of individuals
n <- 10000L
n_row <- 3650L

#array way:
v_date <- as.vector(outer(c(paste(seq(0, 1, by = 1), "day", sep="_"), paste(seq(2, 5, by = 1), "days", sep="_")), c("a_M", "b_M", "c_M"), paste, sep="|"))
col_mat <- c("year", "day", "time", "ID", "died", v_date)

arr1 <- ff(-999L, dim = c(n_row, length(col_mat), n), dimnames = list(NULL, col_mat, NULL))

## Fill the first row of each matrix slice
arr1[1, , ] <- c(1L, 1L, 1L, NA, 0L, rep(0L, length(v_date)))
arr1[1, 4, ] <- seq_len(n)-1L

## Build the matrix "individual"
ind <- as.matrix(data.frame(year = rep(1L, n), day = rep(1L, n), time = rep(1L, n), died = rep(0L, n), ID = (seq(1L, n, 1L))- 1L, a_M = sample(1L:10L, n, replace = T), b_M = sample(1L:10L, n, replace = T), c_M = sample(1L:10L, n, replace = T)))

##fill the matrix
indexTime <- 1L
dt_t <- fillMatrices(dt_t_1 = t(arr1[1, ,]), species = c("a_M", "b_M", "c_M"), maxDuration = 5, matrixColumns = col_mat)

## reassign
system.time(
  arr1[indexTime + 1, ,] <- dt_t
)

   user  system elapsed 
   0.05    0.70    0.7

# for comparison

#> system.time(for(i in 1:n){
#+   list_matrices[[i]][indexTime + 1,] <- dt_t[,i]
#+ })
#   user  system elapsed 
#   4.75    1.08    5.90

据我所知，它给我的结果与您原来的方法相同，但速度要快得多。

本文收集自互联网，转载请注明来源。

如有侵权，请联系[email protected] 删除。

编辑于2021-07-28

我来说两句

0条评论

登录后参与评论

来自分类Dev

Related 相关文章

文章

用R中列表中的行有效地填充二维矩阵

用R中列表中的行有效地填充二维矩阵

如何有效地在 Javascript 中删除二维数组中的空列

有效地对C中的二维数组的列进行排序

如何有效地将矩阵的每一行与R中列表的每个部分进行比较？

有效地用相同的对象填充列表

有效地找到两个二维numpy数组的行相交

如何有效地散布一个numpy的二维数组

有效地找到两个二维numpy数组的行相交

有效地将值添加到二维数组

有效地移动二维 numpy 子阵列

使用StaticArrays有效地填充矩阵中的块

如何有效地更改矩阵/嵌套列表中的条目？

如何用概率列表有效地计算二项式概率？在R中

有效地重塑R中的非标准伪编码矩阵或表

R中的二维数组列表上的矩阵乘法

有效地填充稀疏矩阵

如何有效地搜索列表中的项目？

有效地从列表中删除重复项

如何有效地搜索列表中的项目？

用标记有效地替换字符串列表中的数字

如何有效地在二维上合并两个3d数组？

如何有效地在二维上合并两个3d数组？

如何从二维 numpy 数组有效地生成 0 和 1 的掩码数组？

R：有效地从向量构造矩阵并将其填充为特定条件

使用对列表有效地选择SQL Server中的行？

如何有效地在新行上打印列表中的项目？

有效地将向量解压缩到二进制矩阵Octave中

有效地连接大矩阵的长列表

有效地使用R中的集合

在 r 中更有效地重塑？