# 问题
课题需要用到ExplorATE的R软件包进行数据处理,在使用作者的test data测试运行函数mk.reference()时,发生了报错如下
Error in { : task 30 failed -
> "the condition has length > 1"
# 我所运行的代码
Lp.references <- ExplorATE::mk.reference(
RepMask = "RM_lp.out",
gff3 = "geneModel_lp.gff3",
anot = "blastAnot_lp.outfmt6",
cleanTEsProt = T,
featureSum = T,
outdir = "out_lp",
rm.cotrans = T,
overlapping = T,
trme = "trme_lp.fa",
stranded = F,
by = "classRep",
threads = 4,
rule = c(80,80,80),
over.res= "HS",
annot_by = "transcripts"
)
该函数的源代码
mk.reference <- function(RepMask,overlapping=F, by="classRep", trme, threads=1, annot_by="transcripts", rule=c(80,80,80), best.by="total_repeat_length", bedtools="bedtools", outdir, over.res="HS",custom.lengths=NULL, ...){
if(overlapping==T){
RM <- ovlp.res(RepMask=RepMask, threads=threads, outdir=outdir,over.res=over.res,...)
}else{
RM <- RepMask
}
RM <- RM[RM$classRep%!in%c("Unknown", "rRNA", "Satellite", "Simple_repeat","Low_complexity","RNA","scRNA","snRNA","srpRNA", "tRNA","Other"),]
if(annot_by=="transcripts"){
message("applying the Wicker-like rule ...")
WRout <- Wickerlike.rule(RM,by=by, rule = rule, best.by = best.by)
write.table(unique(WRout$SeqID),"targetTEs.txt", quote = F, row.names = F, col.names=F)
message("making references from transcripts annotations ...")
system(paste0("awk", ' \'BEGIN{while((getline<"targetTEs.txt")>0)l[">"$1]=1}/^>/{f=l[$1]}f\' ',trme," > targetTEs.fa"))
Ref.salmon <- data.frame(seqID=WRout$SeqID,repID=WRout[,4])
} else {
if(annot_by=="fragments"){
message("applying the Wicker-like rule ...")
WRout <- Wickerlike.rule(RM,by=by, rule = rule)
BED <- WRout[,1:4]
message("making references from fragments annotations ...")
write.table(BED,"WRout.bed", quote = F, row.names = F, col.names=F, sep="\t")
system("cat WRout.bed | sort -k1,1 -k2,2n > RM_sort.bed")
system(paste(bedtools,"merge -i RM_sort.bed -c 4 -o collapse > RM_sort_merged.bed"))
system(paste(bedtools,"getfasta -fi",trme,"-bed RM_sort_merged.bed -fo targetTEs.fa"))
Ref.salmon <- data.frame(seqID=paste0(BED[,1],":",BED[,2],"-",BED[,3]),repID=BED[,4])
} else {
message("error in annot_by argument")
}
}
message("taking decoy sequences")
allIDfasta <- system(paste("grep '>'",trme,"|sed 's/>//' | awk '{print $1}'"),intern = T)
decoys <- allIDfasta[allIDfasta%!in%unique(WRout$SeqID)]
write.table(decoys,"decoys.temp", col.names = F, row.names = F, quote = F)
system(paste0("awk", ' \'BEGIN{while((getline<"decoys.temp")>0)l[">"$1]=1}/^>/{f=l[$1]}f\' ',trme," > decoy.fa"))
message("making trmeSalmon.fasta file")
system("cat targetTEs.fa decoy.fa > trmeSalmon.fasta")
if(annot_by=="transcripts"){
system("rm targetTEs.fa decoy.fa decoys.temp")
}
if(annot_by=="fragments"){
system(paste("rm WRout.bed targetTEs.fa RM_sort_merged.bed RM_sort.bed decoy.fa decoys.temp", paste0(trme,".fai")))
}
message("writing files in the output directory...")
system(paste("mv trmeSalmon.fasta",outdir))
write.table(Ref.salmon,paste0(outdir,"/references.csv"), col.names = F, row.names = F, quote = F, sep = ";")
write.table(decoys,paste0(outdir,"/decoys.txt"), col.names = F, row.names = F, quote = F)
message(paste("The reference.csv, decoys.txt and trmeSalmon.fasta files are in", outdir, "directory"))
Ref.salmon
}
查询网络上的教程好像只提到在if语句中可能出现这种报错,将if语句改为ifelse语句即可。
我将包中mk.reference的源代码中的if语句全部改为了ifelse的语句结构,并在单独运行修改后的函数源代码时没有出现报错。
我修改后的函数代码:
mk.reference <- function(RepMask,overlapping=F, by="classRep", trme, threads=1, annot_by="transcripts", rule=c(80,80,80), best.by="total_repeat_length", bedtools="bedtools", outdir, over.res="HS",custom.lengths=NULL, ...){
ifelse(overlapping==T,
(RM <- ovlp.res(RepMask=RepMask, threads=threads, outdir=outdir,over.res=over.res,...)),
(RM <- RepMask))
RM <- RM[RM$classRep%!in%c("Unknown", "rRNA", "Satellite", "Simple_repeat","Low_complexity","RNA","scRNA","snRNA","srpRNA", "tRNA","Other"),]
ifelse(annot_by=="transcripts",
{message("applying the Wicker-like rule ...")
WRout <- Wickerlike.rule(RM,by=by, rule = rule, best.by = best.by)
write.table(unique(WRout$SeqID),"targetTEs.txt", quote = F, row.names = F, col.names=F)
message("making references from transcripts annotations ...")
system(paste0("awk", ' \'BEGIN{while((getline<"targetTEs.txt")>0)l[">"$1]=1}/^>/{f=l[$1]}f\' ',trme," > targetTEs.fa"))
Ref.salmon <- data.frame(seqID=WRout$SeqID,repID=WRout[,4])},
ifelse(annot_by=="fragments",
{message("applying the Wicker-like rule ...")
WRout <- Wickerlike.rule(RM,by=by, rule = rule)
BED <- WRout[,1:4]
message("making references from fragments annotations ...")
write.table(BED,"WRout.bed", quote = F, row.names = F, col.names=F, sep="\t")
system("cat WRout.bed | sort -k1,1 -k2,2n > RM_sort.bed")
system(paste(bedtools,"merge -i RM_sort.bed -c 4 -o collapse > RM_sort_merged.bed"))
system(paste(bedtools,"getfasta -fi",trme,"-bed RM_sort_merged.bed -fo targetTEs.fa"))
Ref.salmon <- data.frame(seqID=paste0(BED[,1],":",BED[,2],"-",BED[,3]),repID=BED[,4])},
message("error in annot_by argument")))
message("taking decoy sequences")
allIDfasta <- system(paste("grep '>'",trme,"|sed 's/>//' | awk '{print $1}'"),intern = T)
decoys <- allIDfasta[allIDfasta%!in%unique(WRout$SeqID)]
write.table(decoys,"decoys.temp", col.names = F, row.names = F, quote = F)
system(paste0("awk", ' \'BEGIN{while((getline<"decoys.temp")>0)l[">"$1]=1}/^>/{f=l[$1]}f\' ',trme," > decoy.fa"))
message("making trmeSalmon.fasta file")
system("cat targetTEs.fa decoy.fa > trmeSalmon.fasta")
ifelse(annot_by=="transcripts",system("rm targetTEs.fa decoy.fa decoys.temp"))
ifelse(annot_by=="fragments",system(paste("rm WRout.bed targetTEs.fa RM_sort_merged.bed RM_sort.bed decoy.fa decoys.temp", paste0(trme,".fai"))))
message("writing files in the output directory...")
system(paste("mv trmeSalmon.fasta",outdir))
write.table(Ref.salmon,paste0(outdir,"/references.csv"), col.names = F, row.names = F, quote = F, sep = ";")
write.table(decoys,paste0(outdir,"/decoys.txt"), col.names = F, row.names = F, quote = F)
message(paste("The reference.csv, decoys.txt and trmeSalmon.fasta files are in", outdir, "directory"))
Ref.salmon
}
随即,我也通过new project-build-clean and install的方法重装了修改后的包,然而再度运行时依然出现同样的报错。
希望有人可以指出报错原因与修改方法,或是指出我修改的代码是否有问题。如果需要我提供别的信息以诊断也请留言,感谢!