将无PE头和MZ头的 PE文件反编译为asm文件。
参考下面这篇文章
手动添加PE头和MZ头:
empty = threading.Semaphore(value=20)
def fix_header(fp, ha):
with open(fp, 'rb') as f:
data = f.read()
e_lfnew = data[0x3C: 0x40]
offset = int.from_bytes(e_lfnew, byteorder='little', signed=True)
new_data = b"MZ" + data[2:offset] + b"PE\0\0" + data[offset + 4:]
if os.path.exists(black):
new_path = '{0}{1}'.format(black,ha)
if os.path.exists(white):
new_path = '{0}{1}'.format(white,ha)
if os.path.exists(test):
new_path = '{0}{1}'.format(test,ha)
with open(new_path, 'wb') as f:
f.write(new_data)
empty.release()
table = []
with tqdm(total=len(train_black_path), ncols=80, desc="fix") as pbar:
for fp, ha in zip(train_black_path, train_black_filenames):
empty.acquire()
t = threading.Thread(target=fix_header, args=(fp, ha), daemon=True)
t.start()
table.append(t)
pbar.update(1)
批量编译为asm文件:
import sys
import os
import subprocess
import pickle
import pandas as pd
from tqdm import tqdm
import threading
global idcScriptFileName
global ida32tFilePath
global ida64tFilePath
idcScriptFileName = "batchmod.idc"
ida32tFilePath = "D:\IDA7.5\IDA7.5\idat.exe"
ida64tFilePath = "D:\IDA7.5\IDA7.5\idat64.exe"
data_dir = "D:/00 malware data/Datacon/datacon_miner/cfg/"
with open('{0}{1}'.format(data_dir,'train_black_path.pkl'), 'rb') as f:
train_black_path = pickle.load(f)
def to_asm(fp):
fp = fp.replace('\n', '').replace('\r', '')
tmpExecStr = ida32tFilePath + " -A -c -S" + idcScriptFileName + " " + fp
os.system(tmpExecStr) # single process with cmdwindow
print(tmpExecStr)
empty.release()
empty = threading.Semaphore(value=20)
with tqdm(total=len(train_black_path), ncols=80, desc="to_asm_black") as pbar:
for fp in train_black_path:
empty.acquire()
t = threading.Thread(target=to_asm, args=(fp, ), daemon=True)
t.start()
pbar.update(1)
batchmod.idc
//by obaby
#include <idc.idc>
static main()
{
// turn on coagulation of data in the final pass of analysis
SetShortPrm(INF_AF2, GetShortPrm(INF_AF2) | AF2_DODATA);
Message("Waiting for the end of the auto analysis...\n");
Wait();
Message("\n\n------ Creating the output file.... --------\n");
auto path = GetIdbPath()[0:-3] + "asm"; //change the data to the length of the file path
auto byteFilePath = GetIdbPath()[0:-3] + "bytes";//change the data to the length of the file path
auto file = fopen(path,"w");
auto byteFile = fopen(byteFilePath,"w");
GenerateFile(OFILE_LST,file,0,-1,0);
auto addr = MinEA();
auto i=0;
for( addr; addr != BADADDR; addr = NextAddr(addr) ){
fprintf(byteFile,"%02X",IdbByte(addr));
if (i == 15){
fprintf(byteFile,"\n");
i=0;
} else {
fprintf(byteFile," ");
i++;
}
}
fclose(file);
fclose(byteFile);
Message("All done, exiting...\n");
Exit(0); // exit to OS, error code 0 - success
}
正常运行,无报错
将文件:batchmod.idc放在IDA的idc目录下,放在代码同级目录下.
cmd上运行,直接到下一行无报错
生成有.text等关键字的asm文件,类似于kaggle数据集中的asm文件