基于tensorflow的模型做预测时,cpu占用率过高,如何降低程序的cpu占用率?

 如图,在cpu服务器,liunux系统下跑,基于tensorflow的模型做预测时,cpu占用率过高,最多达到了700%,严重影响服务器上其他程序的运行,请问可以怎么改程序,降低程序的cpu占用率?

 def test(self, height, width, input_path, output_path,checkpoint_path):
        imgsName = sorted(os.listdir(input_path))#遍历文件夹中的所有图像
        H, W = height, width
        inp_chns = 3 if self.args.model == 'color' else 1
        self.batch_size = 1 if self.args.model == 'color' else 1
        model_name = "deblur.model"
        ckpt_name = model_name + '-' + '15000'
        tf.reset_default_graph()
        graph = tf.get_default_graph() 
        inputs = tf.placeholder(shape=[self.batch_size, H, W, inp_chns], dtype=tf.float32) #输入占位符
        outputs = self.generator(inputs, reuse=False)#建立计算图
        saver = tf.train.Saver(tf.global_variables(), max_to_keep=2)
        sess=tf.Session(graph=graph,config=tf.ConfigProto(device_count={"CPU": 1},allow_soft_placement=True,inter_op_parallelism_threads=1,intra_op_parallelism_threads=1,use_per_session_threads=True))#设置sess
        saver.restore(sess, os.path.join(checkpoint_path, 'B5678-1-60-noise7', ckpt_name))#加载训练的模型
        for imgName in imgsName: #循环处理之前遍历的图像
            blur =cv2.imread(os.path.join(input_path, imgName),-1)#读入图
            h, w = blur.shape
            x=h//512
            #print(x)
            y=w//512
            #print(y)
            if x>y:
                blur = np.pad(blur, ((0, ((x+1)*512 - h)), (0,((x+1)*512 - w))), 'edge') #把图像扩充为512*512的整数倍方便裁切
                after_deblur=np.zeros((((x+1)*512), ((x+1)*512))) #建立相同大小空矩阵
            if x<=y:
                blur = np.pad(blur, ((0, ((y+1)*512 - h)), (0,((y+1)*512 - w))), 'edge')   #把图像扩充为512*512的整数倍方便裁切
                after_deblur=np.zeros((((y+1)*512), ((y+1)*512)))#建立相同大小空矩阵
            #把图像切分成512*512的小图,依次送入神经网络得到结果
            starttotal = time.time()
            for ii in range(x+1):
                for jj in range(y+1):
                    blurPad = blur[ii * 512:(ii + 1) * 512, jj * 512:(jj + 1) * 512]      #按顺序裁切成512*512的图像块
                    blurPad = np.expand_dims(blurPad, -1)
                    blurPad = np.expand_dims(blurPad, 0)
                    if self.args.model != 'color':
                        blurPad = np.transpose(blurPad, (3, 1, 2, 0))
                    start = time.time()
                    deblur = sess.run(outputs, feed_dict={inputs: blurPad / 4095.0})#把图像块送入计算图中sess.run计算
                    duration = time.time() - start
                    res = deblur[-1]
                    res = np.clip(res, a_min=0, a_max=1)
                    if self.args.model != 'color':
                        res = np.transpose(res, (3, 1, 2, 0))
                    res = res[0, :, :, :] * 4095.0
                    res = (res.astype(np.uint16))
                    res = np.squeeze(res)
                    after_deblur = (after_deblur.astype(np.uint16))
                    after_deblur[ii * 512:(ii + 1) * 512, jj * 512:(jj + 1) * 512]=res #用计算得到的结果替换空矩阵相同位置的值
            durationtotal = time.time() - starttotal
            print('total time use %4.3fs' % (durationtotal))
            #print(after_deblur.shape)
            after_deblur = after_deblur[:h, :w] 
            after_deblur = np.clip(after_deblur, a_min=0, a_max=4095)
            #print(after_deblur.shape)
            imtiff = Image.fromarray(after_deblur)
            imtiff.save(os.path.join(output_path,imgName)) #写出图像
        sess.close()
        del sess

 

楼主,看下这两个方法能不能减少内存?我看你写的那个排序,imgsName = sorted(os.listdir(input_path))并没有起到排序的作用,正确的排序我写在代码里了,你也可以审视一下是否需要排序,如果不需要,也可以减少一些内存

#第1种
def test(self, height, width, input_path, output_path,checkpoint_path):
    #imgsName = sorted(os.listdir(input_path))#遍历文件夹中的所有图像
    
    from glob import glob
    input_path = glob(path +"\*")  #*.jpg 
    input_path.sort(key=lambda x:eval(os.path.basename(x).split(".")[0]))  #直接返回路径和名称,不用os.path.join(input_path, imgName)
    imgsName = iter(tuple(input_path))  #遍历文件夹中的所有图像  
    
    H, W = height, width
    inp_chns = 3 if self.args.model == 'color' else 1
    self.batch_size = 1 if self.args.model == 'color' else 1
    model_name = "deblur.model"
    ckpt_name = model_name + '-' + '15000'
    tf.reset_default_graph()
    graph = tf.get_default_graph() 
    inputs = tf.placeholder(shape=[self.batch_size, H, W, inp_chns], dtype=tf.float32) #输入占位符
    outputs = self.generator(inputs, reuse=False)#建立计算图
    saver = tf.train.Saver(tf.global_variables(), max_to_keep=2)
    sess=tf.Session(graph=graph,config=tf.ConfigProto(device_count={"CPU": 1},allow_soft_placement=True,inter_op_parallelism_threads=1,intra_op_parallelism_threads=1,use_per_session_threads=True))#设置sess
    saver.restore(sess, os.path.join(checkpoint_path, 'B5678-1-60-noise7', ckpt_name))#加载训练的模型
    for imgName in imgsName: #循环处理之前遍历的图像
        blur =cv2.imread(imgName,-1)#读入图
        h, w = blur.shape
        x=h//512
        #print(x)
        y=w//512
        #print(y)
        if x>y:
            blur = np.pad(blur, ((0, ((x+1)*512 - h)), (0,((x+1)*512 - w))), 'edge') #把图像扩充为512*512的整数倍方便裁切
            after_deblur=np.zeros((((x+1)*512), ((x+1)*512))) #建立相同大小空矩阵
        if x<=y:
            blur = np.pad(blur, ((0, ((y+1)*512 - h)), (0,((y+1)*512 - w))), 'edge')   #把图像扩充为512*512的整数倍方便裁切
            after_deblur=np.zeros((((y+1)*512), ((y+1)*512)))#建立相同大小空矩阵
        #把图像切分成512*512的小图,依次送入神经网络得到结果
        starttotal = time.time()
        for ii in range(x+1):
            for jj in range(y+1):
                blurPad = blur[ii * 512:(ii + 1) * 512, jj * 512:(jj + 1) * 512]      #按顺序裁切成512*512的图像块
                blurPad = np.expand_dims(blurPad, -1)
                blurPad = np.expand_dims(blurPad, 0)
                if self.args.model != 'color':
                    blurPad = np.transpose(blurPad, (3, 1, 2, 0))
                start = time.time()
                deblur = sess.run(outputs, feed_dict={inputs: blurPad / 4095.0})#把图像块送入计算图中sess.run计算
                duration = time.time() - start
                res = deblur[-1]
                res = np.clip(res, a_min=0, a_max=1)
                if self.args.model != 'color':
                    res = np.transpose(res, (3, 1, 2, 0))
                res = res[0, :, :, :] * 4095.0
                res = (res.astype(np.uint16))
                res = np.squeeze(res)
                after_deblur = (after_deblur.astype(np.uint16))
                after_deblur[ii * 512:(ii + 1) * 512, jj * 512:(jj + 1) * 512]=res #用计算得到的结果替换空矩阵相同位置的值
        durationtotal = time.time() - starttotal
        print('total time use %4.3fs' % (durationtotal))
        #print(after_deblur.shape)
        after_deblur = after_deblur[:h, :w] 
        after_deblur = np.clip(after_deblur, a_min=0, a_max=4095)
        #print(after_deblur.shape)
        imtiff = Image.fromarray(after_deblur)
        imtiff.save(os.path.join(output_path,imgName)) #写出图像
    sess.close()
    del sess
    
#第二种
def test(self, height, width, input_path, output_path,checkpoint_path):
    
    #imgsName = sorted(os.listdir(input_path))#遍历文件夹中的所有图像
    
    input_image = os.listdir(path)
    input_image.sort(key=lambda x:eval(x.split(".")[0]))
    imgsName = iter(tuple(input_image))
    
    H, W = height, width
    inp_chns = 3 if self.args.model == 'color' else 1
    self.batch_size = 1 if self.args.model == 'color' else 1
    model_name = "deblur.model"
    ckpt_name = model_name + '-' + '15000'
    tf.reset_default_graph()
    graph = tf.get_default_graph() 
    inputs = tf.placeholder(shape=[self.batch_size, H, W, inp_chns], dtype=tf.float32) #输入占位符
    outputs = self.generator(inputs, reuse=False)#建立计算图
    saver = tf.train.Saver(tf.global_variables(), max_to_keep=2)
    sess=tf.Session(graph=graph,config=tf.ConfigProto(device_count={"CPU": 1},allow_soft_placement=True,inter_op_parallelism_threads=1,intra_op_parallelism_threads=1,use_per_session_threads=True))#设置sess
    saver.restore(sess, os.path.join(checkpoint_path, 'B5678-1-60-noise7', ckpt_name))#加载训练的模型
    for imgName in imgsName: #循环处理之前遍历的图像
        blur =cv2.imread(os.path.join(input_path, imgName),-1)#读入图
        h, w = blur.shape
        x=h//512
        #print(x)
        y=w//512
        #print(y)
        if x>y:
            blur = np.pad(blur, ((0, ((x+1)*512 - h)), (0,((x+1)*512 - w))), 'edge') #把图像扩充为512*512的整数倍方便裁切
            after_deblur=np.zeros((((x+1)*512), ((x+1)*512))) #建立相同大小空矩阵
        if x<=y:
            blur = np.pad(blur, ((0, ((y+1)*512 - h)), (0,((y+1)*512 - w))), 'edge')   #把图像扩充为512*512的整数倍方便裁切
            after_deblur=np.zeros((((y+1)*512), ((y+1)*512)))#建立相同大小空矩阵
        #把图像切分成512*512的小图,依次送入神经网络得到结果
        starttotal = time.time()
        for ii in range(x+1):
            for jj in range(y+1):
                blurPad = blur[ii * 512:(ii + 1) * 512, jj * 512:(jj + 1) * 512]      #按顺序裁切成512*512的图像块
                blurPad = np.expand_dims(blurPad, -1)
                blurPad = np.expand_dims(blurPad, 0)
                if self.args.model != 'color':
                    blurPad = np.transpose(blurPad, (3, 1, 2, 0))
                start = time.time()
                deblur = sess.run(outputs, feed_dict={inputs: blurPad / 4095.0})#把图像块送入计算图中sess.run计算
                duration = time.time() - start
                res = deblur[-1]
                res = np.clip(res, a_min=0, a_max=1)
                if self.args.model != 'color':
                    res = np.transpose(res, (3, 1, 2, 0))
                res = res[0, :, :, :] * 4095.0
                res = (res.astype(np.uint16))
                res = np.squeeze(res)
                after_deblur = (after_deblur.astype(np.uint16))
                after_deblur[ii * 512:(ii + 1) * 512, jj * 512:(jj + 1) * 512]=res #用计算得到的结果替换空矩阵相同位置的值
        durationtotal = time.time() - starttotal
        print('total time use %4.3fs' % (durationtotal))
        #print(after_deblur.shape)
        after_deblur = after_deblur[:h, :w] 
        after_deblur = np.clip(after_deblur, a_min=0, a_max=4095)
        #print(after_deblur.shape)
        imtiff = Image.fromarray(after_deblur)
        imtiff.save(os.path.join(output_path,imgName)) #写出图像
    sess.close()
    del sess

 

with tf.Session(config=tf.ConfigProto(
  device_count={"CPU":12},
  inter_op_parallelism_threads=1,
  intra_op_parallelism_threads=1,
  gpu_options=gpu_options,
)) as sess:

在Session定义时,ConfigProto中可以尝试指定下面三个参数:

  1. device_count, 告诉tf Session使用CPU数量上限,如果你的CPU数量较多,可以适当加大这个值
  2. inter_op_parallelism_threadsintra_op_parallelism_threads告诉session操作的线程并行程度,如果值越小,线程的复用就越少,越可能使用较多的CPU核数。如果值为0,TF会自动选择一个合适的值。

如果是Intel的CPU,可以把Intel的MKL包编译进TensorFlow中,以增加训练效率。

你的服务器多少c的? 700 已经严重影响,难道是8c 的?那你就设置少用几个C

1、从代码层修改,分批处理,效果一般

2、用GPU,没有就算了

3、用 nice 命令,从服务器上设置进程的优先级

4、设置device_count,告诉tf-Session可以使用的CPU数量上限

    with tf.Session(config=tf.ConfigProto(device_count={"CPU":6}))

 

https://blog.csdn.net/bill20100829/article/details/115016611

楼主,文件夹里的文件多吗?

还有这块,相同的操作,你可以用几个变量表示,减少了每次不必要的乘法,这样每次循环都减少了时间和空间,三层循环,时空复杂度本身就高。

有完整的代码嘛,我想看看

700%就是占了7个线程,你的电脑肯定不止7个线程,没事的。我的电脑是6核12线程的,跑大数据经常用10个线程,CPU占1000%,还剩下2个线程一样可以干其他活。你试试。

这个我都可以直接给你搞定