下面是将文件 split__1.fasta中选取一段序列,如下421至480. 存于selected_split_1.fasta文件中__。
如何写一个循环,可以从split__1.fasta 至 split__68.fasta,中分别选取421至480. 并存于selected_split_1.fasta至selected_split_68.fasta文件中文件中
from Bio import SeqIO
fin = open('split_1.fasta', 'r')
fout = open('selected_split_1.fasta', 'w')
with open("selected_split_1.fasta","w") as f:
for seq_record in SeqIO.parse("split_1.fasta", "fasta"):
f.write(">")
f.write(str(seq_record.id) + "\n")
f.write(str(seq_record.seq[421:480]) + "\n") #start 421 to end 480 base positions
fin.close()
fout.close()
from Bio import SeqIO
for xx in range(1, 68):
xn = "split__" + str(xx) + ".fasta"
yn = "selected_split_" + str(xx) + ".fasta"
fin = open(xn, 'r')
fout = open(yn, 'w')
with open(yn,"w") as f:
for seq_record in SeqIO.parse(xn, "fasta"):
f.write(">")
f.write(str(seq_record.id) + "\n")
f.write(str(seq_record.seq[421:480]) + "\n") #start 421 to end 480 base positions
fin.close()
fout.close()
for i in range(1,69):
inf="split_%s.fasta" % i
outf="selected_"+inf
with open(inf,'r') as infp:
data=infp.readlines()
with open(outf,'w') as outfp:
outfp.writelines(data[421:481])
# -*- coding:utf-8 -*-
import os
from Bio import SeqIO
# root_dir为要读取文件的根目录
root_dir = r"C:\Users\2350586L\PycharmProjects\split\splitE"
# 读取批量文件后要写入的文件
with open("FANCE1020_1080.fasta", "w") as f:
# 依次读取根目录下的每一个文件
for file in os.listdir(root_dir):
file_name = root_dir + "\\" + file
filein = open(file_name, "r")
# 按行读取每个文件中的内容
for seq_record in SeqIO.parse(file_name, "fasta"):
f.write(">")
f.write(str(seq_record.id) + "\n")
f.write(str(seq_record.seq[1020:1080]) + "\n") #start 481 to end 540 base positions
filein.close()
print("FINISHED")