我敢肯定对此有一个简单的答案,但是我似乎找不到正确的代码。我有一个文件列表和一个字符串列表,我想将这些文件的内容分配为数据帧。然后,我想在同一循环内对数据帧执行其他操作。我还需要保留每个数据框用于下游工作。这是我的代码:
samples <- c('fc14','g14','fc18','g18','fc21','g21')
fc_samples <- grep("fc", samples, value=TRUE)
fc_files <- c('fc14_g14_full_annot_uniq.txt','fc18_g18_full_annot_uniq.txt','fc21_g21_full_annot_uniq.txt')
# make dataframes
for (file in fc_files)
{ fc_n <- 1
g_n <- 1
print(file);
# THE BIT THAT DOESN'T WORK
assign(paste("data", fc_samples[fc_n], sep='_'), read.table(file,sep = "\t", header=T));
# HERE I EXPECT THE TOP OF MY DF TO BE PRINTED BUT IT ISN'T
head(data_fc14);
# I TRY THIS INSTEAD
do.call("<-",list(paste("data", fc_samples[fc_n], sep='_'), read.table(file,sep = "\t", header=T)))
# I TRY TO PRINT THE DF AGAIN BUT STILL NO LUCK
head(paste("data", fc_samples[fc_n], sep='_'))
# FIRST DOWNSTREAM THING I WOULD LIKE TO DO,
# WON'T WORK UNTIL I SOLVE THE DF ASSIGNMENT ISSUE
names(paste("data", fc_samples[fc_n], sep='_'))[names(paste("data", fc_samples[fc_n], sep='_'))==c('SAMPLE_fc','CHROM_fc','START_fc','REF_fc','ALT_fc','REGION_fc','DP_fc','FREQ_fc','GENE_fc','AFFECTS_fc','dbSNP_fc',
# 'NOVEL_fc')] <- c('SAMPLE','CHROM','START','REF','ALT','REGION','DP','FREQ','GENE','AFFECTS','dbSNP','NOVEL')
# ITERATE TO THE NEXT FILE
fc_n <- fc_n+1
}
修改您的代码:
samples <- c('fc14','g14','fc18','g18','fc21','g21')
fc_samples <- grep("fc", samples, value=TRUE)
# Make dummy example files
fc_files <- file.path("example-data", c(
'fc14_g14_full_annot_uniq.txt','fc18_g18_full_annot_uniq.txt',
'fc21_g21_full_annot_uniq.txt'))
set.seed(123) ; dummy_df <-
setNames(
as.data.frame(replicate(12, rnorm(7))),
c('SAMPLE_fc','CHROM_fc','START_fc','REF_fc','ALT_fc','REGION_fc',
'DP_fc','FREQ_fc','GENE_fc','AFFECTS_fc','dbSNP_fc','NOVEL_fc')
)
if (!dir.exists("./example-data")) dir.create("example-data")
invisible({
lapply(fc_files, write.table, x = dummy_df, sep = "\t")
})
# "fc_n <- 1" should be outside the loop:
fc_n <- 1
for (file in fc_files) {
g_n <- 1
assign(paste("data", fc_samples[fc_n], sep='_'),
read.table(file,sep = "\t", header=T))
# Copy data to be able to change its names
f <- get(paste("data", fc_samples[fc_n], sep='_'))
names(f)[names(f) == c('SAMPLE_fc','CHROM_fc','START_fc',
'REF_fc','ALT_fc','REGION_fc',
'DP_fc','FREQ_fc','GENE_fc','AFFECTS_fc',
'dbSNP_fc','NOVEL_fc')] <-
c('SAMPLE','CHROM','START','REF','ALT','REGION','DP','FREQ',
'GENE','AFFECTS','dbSNP','NOVEL')
# Assign it back, now that names have been changed
assign(paste("data", fc_samples[fc_n], sep='_'), f)
fc_n <- fc_n+1
}
一种“更优雅”的方式:
assign()
ing不被认为是最佳实践,而是使用列表。
尽管我偶尔会自己使用它,但有时还是有很好的理由。
# For the '%>%' pipe
library(magrittr)
data <-
samples %>%
grep(pattern = "fc", value = TRUE) %>%
setNames(nm = .) %>%
lapply(grep, x = fc_files, value = TRUE) %>%
lapply(read.table, sep = "\t", header = TRUE) %>%
lapply(function(f) setNames(f, sub("_fc", "", names(f))))
identical(data_fc14, data$fc14)
# [1] TRUE
identical(data_fc18, data$fc18)
# [1] TRUE
identical(data_fc21, data$fc21)
# [1] TRUE
# Clean up
print(unlink("example-data", recursive = TRUE))
本文收集自互联网,转载请注明来源。
如有侵权,请联系[email protected] 删除。
我来说两句