这个是用来计算去重后剩余的蛋白质氨基酸含量的代码,但是经过几次验证,它总是会计算进部分重复的蛋白质序列,有没有更好的写法,或者这个代码有什么问题吗
if($need_comp_p{$one_title}){
my $one_sequence_len=length($one_seq);
my @protein_m=();#one protein results of amino acid contents
for my $m (@AA){
chomp($one_seq);
my $one_sequence_AA_len_s=length($one_seq);
$one_seq=~s/$m//ig;
chomp($one_seq);
my $one_sequence_AA_len_e=length($one_seq);
my $m_AA_p= ($one_sequence_AA_len_s - $one_sequence_AA_len_e)/$one_sequence_len;
$m_AA_p=sprintf "%.4f",$m_AA_p;
push @protein_m, $m_AA_p;
};
push @all_sta, ["name","$one_title","len","$one_sequence_len",@protein_m];
}else{};
my $calculat_p_number= @all_sta;
print "total protein number in the sequence file: $j\n";
print "the calculated protein number:$calculat_p_number\n";