除了使用R base函数以外，是否有一种有效的方法来获得“ pmax”？

debugcn 发表于 Dev

费尔南多·布里托·洛佩斯

我想使用Rcpp创建一个函数，该函数可能优于R base的pmax函数。我还尝试处理Rcpp函数内部的缺失值，这可能不是一个好主意。所有向量都必须具有一些缺失值，并且它们都是正值。这就是我将缺少的代码重新编码为-1的原因，因此，如果所有值都丢失，则最大值不存在时，我可以将其重新添加。

这是我的第一次尝试，但尚未成功：

library("benchr")
library("Rcpp")

Pmax <- function(...) {
  argd_list <- list(...)
  cppFunction("
  NumericVector cpp_pmax(List args) {
    List args0 = args[0];
    int n_arg = args.length();
    int n_vec = args0.length();
    NumericVector out(n_vec);
    out = args[0];
    for (int i = 1; i < n_arg; ++i) {
        NumericVector pa(n_vec);
        pa = args[i];
        for (int j = 0; j < n_vec; ++j) {
          if (R_IsNA(out[j])) {
            out[j] = -1;
          }
          if (R_IsNA(pa[j])) {
            pa[j] = -1;
          }
          out[j] = std::max(out[j], pa[j]);
        }
    }
    for (int j = 0; j < n_vec; ++j) {
      if (out[j] == -1) {
        out[j] = NA_REAL;
      }
    }
    return out;
  }
")
  output <- cpp_pmax(argd_list)
  return(output)
}


n <- 200000
x1 <- sample(0:1, n, replace = TRUE)
y1 <- sample(0:1, n, replace = TRUE)
z1 <- sample(0:1, n, replace = TRUE)
x1[sample(1:n, 90)]<-NA
y1[sample(1:n, 60)]<-NA
z1[sample(1:n, 70)]<-NA

pm1 <- Pmax(x1, y1, z1)
pm2 <- pmax(x1, y1, z1, na.rm = TRUE)

all(pm1 == pm2)

benchr::benchmark(pmax(x1, y1, z1, na.rm = TRUE),
                  Pmax(x1, y1, z1))

Benchmark summary:
  Time units : milliseconds 
expr                           n.eval   min lw.qu median  mean up.qu   max total relative
pmax(x1, y1, z1, na.rm = TRUE)    100  1.34  1.37   1.39  1.44  1.46  1.74   144     1.00
Pmax(x1, y1, z1)                  100 13.30 13.50  13.80 19.90 15.70 67.50  1990     9.88

编辑：

我删除了一些循环，只是在Rcpp之外用NA代替了-1，它加快了一点，但仍不胜过R base pmax。

尽管Rcpp :: pmax是一个不错的实现，但它仅处理两个向量，不确定是否可以处理缺失的值。缺少价位时，我得到了不同的结果。

第二次尝试是：

Pmax1 <- function(...) {
  args_list <- list(...)
  cppFunction("
  NumericVector cpp_pmax(List args) {
    List args0 = args[0];
    int n_arg = args.length();
    int n_vec = args0.length();
    NumericVector out(n_vec);
    out = args[0];
    for (int i = 1; i < n_arg; ++i) {
        NumericVector pa(n_vec);
        pa = args[i];
        for (int j = 0; j < n_vec; ++j) {
          if (R_IsNA(out[j])) {
            out[j] = -1;
          }
          if (R_IsNA(pa[j])) {
            pa[j] = -1;
          }
          out[j] = std::max(out[j], pa[j]);
        }
    }
    return out;
  }
")
  output <- cpp_pmax(args_list)
  output[output == -1] <- NA
  return(output)
}

Pmax2 <- function(...) {
  args_list <- list(...)
  cppFunction("
  NumericVector cpp_pmax(List args) {
    NumericVector out = args[0];
    int n_arg = args.length();
    int n_vec = out.length();
    for (int j = 0; j < n_vec; ++j) {
      if (NumericVector::is_na(out[j])) out[j] = -1;
    }
    for (int i = 1; i < n_arg; ++i) {
      NumericVector pa = args[i];
      for (int j = 0; j < n_vec; ++j) {
        if (NumericVector::is_na(pa[j])) pa[j] = -1;
        out[j] = std::max(out[j], pa[j]);
      }
    }
    return out;
  }
")
  output <- cpp_pmax(args_list)
  output[output == -1] <- NA
  return(output)
}

n <- 200000
x <- sample(0:5, n, replace = TRUE)
y <- sample(0:5, n, replace = TRUE)
z <- sample(0:5, n, replace = TRUE)
w <- sample(0:5, n, replace = TRUE)
x[sample(1:n, 900)]<-NA
y[sample(1:n, 600)]<-NA
z[sample(1:n, 700)]<-NA
z[sample(1:n, 800)]<-NA

benchr::benchmark(pmax(x,  y, z, w, na.rm = TRUE),
                  Pmax1(x,  y, z, w),
                  Pmax2(x, y, z, w))

Benchmark summary:
  Time units : milliseconds 
                          expr n.eval   min lw.qu median  mean up.qu  max total relative
pmax(x, y, z, w, na.rm = TRUE)    100  2.38  2.43   2.46  2.46  2.48  2.6   246     1.00
Pmax1(x, y, z, w)                 100 16.00 16.90  17.20 19.40 17.70 61.2  1940     6.98
Pmax2(x, y, z, w)                 100  9.44  9.74   9.90 11.30 10.10 45.6  1130     4.02

有谁知道如何使其比R base pmax更快？

想法是在Rcpp函数内部都具有一个通用函数来处理不同数量的向量。

基于@DirkEddelbuettel和@Cole的更新

感谢您帮助优化代码。受@DirkEddelbuettel和@Cole的启发，我只添加了Rcpp :: pmax即可删除其中一个循环，它也有助于加快循环速度。

library("bench")
library("Rcpp")

cppFunction("
  IntegerVector cpp_pmax1(List args) {
    IntegerVector tmp = args[0];
    IntegerVector out = clone(tmp);
    int n_arg = args.length();
    int n_vec = out.length();
    for (int i = 1; i < n_arg; ++i) {
      IntegerVector pa = args[i];
      for (int j = 0; j < n_vec; ++j) {
        if (pa[j] > out[j]) out[j] = pa[j];
      }
    }
    return out;
  }
")

cppFunction("
  IntegerVector cpp_pmax2(List args) {
    IntegerVector tmp = args[0];
    IntegerVector out = clone(tmp);
    int n_arg = args.length();
    int n_vec = out.length();
    for (int i = 1; i < n_arg; ++i) {
      IntegerVector pa = args[i];
      out = pmax(out, pa);
    }
    return out;
  }
")

Pmax1 <- function(...) {
  cpp_pmax1(list(...))
}


Pmax2 <- function(...) {
  cpp_pmax2(list(...))
}


n <- 200000
x <- sample(0:5, n, replace = TRUE)
y <- sample(0:5, n, replace = TRUE)
z <- sample(0:5, n, replace = TRUE)
w <- sample(0:5, n, replace = TRUE)
k <- sample(0:5, n, replace = TRUE)
x[sample(1:n, 900)] <- NA
y[sample(1:n, 600)] <- NA
z[sample(1:n, 700)] <- NA
w[sample(1:n, 800)] <- NA
k[sample(1:n, 800)] <- NA

pm0 <- pmax(x,  y, z, w, k, na.rm = TRUE)
pm1 <- Pmax1(x, y, z, w, k)
pm2 <- Pmax2(x, y, z, w, k)

benchr::benchmark(pmax(x,  y, z, w, k, na.rm = TRUE),
                  Pmax1(x, y, z, w, k),
                  Pmax2(x, y, z, w, k))


Benchmark summary:
  Time units : microseconds 
                             expr n.eval  min lw.qu median mean up.qu  max  total relative
pmax(x, y, z, w, k, na.rm = TRUE)    100 2880  2900   2920 3050  3080 8870 305000     5.10
Pmax1(x, y, z, w, k)                 100 2150  2180   2200 2310  2350 8060 231000     3.85
Pmax2(x, y, z, w, k)                 100  527   558    572  812   719 7870  81200     1.00

谢谢！

油菜

从bench::mark发现中可以看出，似乎存在一些内存分配问题。

bench::mark(pmax(x,  y, z, w, na.rm = TRUE),
            Pmax2(x, y, z, w))

## # A tibble: 2 x 13
##   expression                         min  median `itr/sec` mem_alloc
##   <bch:expr>                     <bch:t> <bch:t>     <dbl> <bch:byt>
## 1 pmax(x, y, z, w, na.rm = TRUE)  5.79ms  6.28ms     157.    781.3KB
## 2 Pmax2(x, y, z, w)              39.56ms 54.48ms      19.7    9.18MB

记忆胁迫

与base相比，内存分配是其10倍pmax()。您的rcpp是相对简单的，因此这暗示着某种强制。在查看样本数据时，您正在将整数向量发送到数字签名。这产生了昂贵的强制。让我们更新期望IntegerVector的签名和代码。我只是将所有内容从NumericVector更改IntegerVector为。

  expression                         min  median `itr/sec` mem_alloc
  <bch:expr>                     <bch:t> <bch:t>     <dbl> <bch:byt>
1 pmax(x, y, z, w, na.rm = TRUE)  1.89ms  2.33ms     438.    781.3KB
2 Pmax2_int(x, y, z, w)          37.42ms 49.88ms      17.6    2.32MB

重新编译

OP代码包含cppFunction在较大的功能代码中。除非我们需要在每个循环中重新编译它，否则我们可以编译然后从R中调用已编译的代码。这是此数据集大小最大的性能提升。

cppFunction("
  IntegerVector cpp_pmax_pre(List args) {
    IntegerVector out = args[0];
    int n_arg = args.length();
    int n_vec = out.length();
    for (int j = 0; j < n_vec; ++j) {
      if (IntegerVector::is_na(out[j])) out[j] = -1;
    }
    for (int i = 1; i < n_arg; ++i) {
      IntegerVector pa = args[i];
      for (int j = 0; j < n_vec; ++j) {
        if (IntegerVector::is_na(pa[j])) pa[j] = -1;
        out[j] = std::max(out[j], pa[j]);
      }
    }
    return out;
  }
")

Pmax2_int_pre <- function(...) {
  args_list <- list(...)
  output <- cpp_pmax_pre(args_list)
  output[output == -1] <- NA
  return(output)
}

bench::mark(pmax(x,  y, z, w, na.rm = TRUE),
            Pmax2_int_pre(x, y, z, w))

## # A tibble: 2 x 13
##   expression                        min median `itr/sec` mem_alloc
##   <bch:expr>                     <bch:> <bch:>     <dbl> <bch:byt>
## 1 pmax(x, y, z, w, na.rm = TRUE) 2.31ms 2.42ms      397.   781.3KB
## 2 Pmax2_int_pre(x, y, z, w)      2.48ms 3.55ms      270.    2.29MB

奖金-NA_INTEGER

根据Rcpp for Everyone，该值NA_INTEGER应等于-2147483648的最低整数值。使用此方法，我们可以删除NA的替换，因为在处理int数据类型时可以直接与NA进行比较。

在此实现过程中，我还发现了上一部分的问题-我们需要克隆初始参数，以免意外地通过引用更改它。尽管如此，我们仍然比base稍快pmax()。

cppFunction("
  IntegerVector cpp_pmax_last(List args) {
    IntegerVector tmp = args[0];
    IntegerVector out = clone(tmp);
    int n_arg = args.length();
    int n_vec = out.length();
    for (int i = 1; i < n_arg; ++i) {
      IntegerVector pa = args[i];
      for (int j = 0; j < n_vec; ++j) {
        if (pa[j] > out[j]) out[j] = pa[j];
      }
    }
    return out;
  }
")

Pmax2_last <- function(...) {
  cpp_pmax_last(list(...))
}

bench::mark(pmax(x,  y, z, w, na.rm = TRUE),
            Pmax2_last(x, y, z, w),
)

## # A tibble: 2 x 13
##   expression                        min median `itr/sec` mem_alloc `gc/sec`
##   <bch:expr>                     <bch:> <bch:>     <dbl> <bch:byt>    <dbl>
## 1 pmax(x, y, z, w, na.rm = TRUE) 5.98ms 6.36ms      154.     781KB        0
## 2 Pmax2_last(x, y, z, w)         5.09ms 5.46ms      177.     784KB        0

本文收集自互联网，转载请注明来源。

如有侵权，请联系[email protected] 删除。

编辑于2021-04-7

我来说两句

0条评论

登录后参与评论

来自分类Dev

Related 相关文章

文章