mtcnn和facenet实现部分遮挡下的人脸识别

问题遇到的现象和发生背景

问题相关代码，请勿粘贴截图

运行结果及报错内容

我的解答思路和尝试过的方法

我想要达到的结果

对于这些代码我还不太清楚，希望可以帮我用中文详细注释一下代码，谢谢。
``

from facenet import MTCNN, InceptionResnetV1
from torchvision.transforms import functional as F
import numpy as np
import matplotlib.pyplot as plt
import cv2
from PIL import Image
from scipy.spatial.distance import canberra
from sklearn.preprocessing import Normalizer
import torch
import tkinter as tk
from tkinter.filedialog import askopenfilename
from PIL import Image, ImageTk

max_w_h = (700, 500)

def ShowImg(img, img_labels):
    rgba_img = cv2.cvtColor(img, cv2.COLOR_BGR2RGBA)
    show_img = Image.fromarray(rgba_img)
    show_img = ImageTk.PhotoImage(show_img)
    img_labels.config(image=show_img)
    img_labels.image = show_img

def Resize(img):
    h, w, _ = img.shape
    ratio = h / w
    if max_w_h[0] * ratio > max_w_h[1]:
        return cv2.resize(img, (int(max_w_h[1] / ratio), int(max_w_h[1])))
    else:
        return cv2.resize(img, (int(max_w_h[0]), int(max_w_h[0] * ratio)))

gpu = False
device = torch.device('cuda:0' if gpu & torch.cuda.is_available() else 'cpu')
print('Running on device: {}'.format(device))

# loading the facenet model
facerec = InceptionResnetV1(pretrained='vggface2').eval().to(device)

# Loading the mtcnn model
mtcnn = MTCNN(margin=30, device=device)
l2_encoder = Normalizer(norm='l2')

# load trained faces
data = np.load('data.npz')
trainx_embed, trainy = data['a'], data['b']

def calculate_distance(embedding, known_faces, known_labels):
    store = dict()
    for i in known_labels:
        if i not in store:
            store[i] = []
    for i in range(known_faces.shape[0]):
        store[known_labels[i]].append(canberra(embedding, known_faces[i]))
    for i in store.keys():
        store[i] = sum(store[i]) / len(store[i])
    dist = min(store.values())
    for i in store:
        if store[i] == dist:
            return (dist, i)

def run(imgp):
    try:
        img = Image.open(imgp)
        _, img_cropped, box = mtcnn(img)
        face_array = np.array(img_cropped)
        face_pixels = F.to_tensor(np.float32(face_array))
        face_pixels = (face_pixels - 127.5) / 128.0
        feas = facerec(face_pixels.unsqueeze(0).to(device))
        feas = feas.cpu().detach().numpy()
        distance, label = calculate_distance(feas, trainx_embed, trainy)
        print(label)
        # plot
        image = cv2.imread(imgp)
        if distance > 350:  # 368
            label = "UNKNOWN"
        p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))
        thickness = max(round(sum(image.shape) / 2 * 0.003), 2)
        cv2.rectangle(image, p1, p2, (0, 255, 255), thickness=thickness, lineType=cv2.LINE_AA)
        tf = max(thickness - 1, 1)  # font thickness
        w, h = cv2.getTextSize(label, 0, fontScale=thickness / 3, thickness=tf)[0]  # text width, height
        outside = p1[1] - h - 3 >= 0  # label fits outside box
        cv2.putText(image, label, (p1[0], p1[1] - 2 if outside else p1[1] + h + 2), 0, thickness / 3, (0, 255, 255),
                    thickness=tf, lineType=cv2.LINE_AA)
        data_img = Resize(image)
        ShowImg(data_img, img_l)
        img_l.update()
    except:
        print('Can not detect face!')

def select_img():
    path = askopenfilename()
    try:
        run(path)
    except:
        pass

if __name__ == '__main__':
    root = tk.Tk()
    root.title("Face Detection Demo")
    sc_width = root.winfo_screenwidth()
    sc_height = root.winfo_screenheight()
    WID = 600  # 窗口宽
    HEI = 600  # 窗口高
    root.geometry('%dx%d+%d+%d' % (WID, HEI, (sc_width - WID) / 2, (sc_height - HEI) / 2 - 30))
    type = ('宋体', "12", 'normal')

    up_frame = tk.Frame(root)
    tk.Button(up_frame, text='加载图片', command=select_img, font=type).pack(padx=1, side=tk.LEFT)
    tk.Button(up_frame, text='退出系统', command=root.destroy, font=type).pack(padx=1, side=tk.LEFT)
    up_frame.pack(pady=1, anchor='n')

    img_frame = tk.Frame(root)
    img_l = tk.Label(img_frame)
    img_l.pack()
    img_frame.pack(pady=20)
    root.mainloop()

对应代码块后面#写了注释

from facenet import MTCNN, InceptionResnetV1             #导入各种需要用到的包
from torchvision.transforms import functional as F
import numpy as np
import matplotlib.pyplot as plt
import cv2
from PIL import Image
from scipy.spatial.distance import canberra
from sklearn.preprocessing import Normalizer
import torch
import tkinter as tk
from tkinter.filedialog import askopenfilename
from PIL import Image, ImageTk
 
max_w_h = (700, 500)
 
def ShowImg(img, img_labels):      #定义展示图片的函数
    rgba_img = cv2.cvtColor(img, cv2.COLOR_BGR2RGBA)
    show_img = Image.fromarray(rgba_img)
    show_img = ImageTk.PhotoImage(show_img)
    img_labels.config(image=show_img)
    img_labels.image = show_img
 
def Resize(img):                    #定义图片改变大小的函数
    h, w, _ = img.shape
    ratio = h / w
    if max_w_h[0] * ratio > max_w_h[1]:
        return cv2.resize(img, (int(max_w_h[1] / ratio), int(max_w_h[1])))
    else:
        return cv2.resize(img, (int(max_w_h[0]), int(max_w_h[0] * ratio)))
 
gpu = False
device = torch.device('cuda:0' if gpu & torch.cuda.is_available() else 'cpu')
print('Running on device: {}'.format(device))     #如果gpu可用就用gpu，否则使用CPU
 
# loading the facenet model
facerec = InceptionResnetV1(pretrained='vggface2').eval().to(device)    #加载facenet模型
 
# Loading the mtcnn model
mtcnn = MTCNN(margin=30, device=device)     #加载mtcnn模型
l2_encoder = Normalizer(norm='l2')
 
# load trained faces
data = np.load('data.npz')      #加载训练的数据
trainx_embed, trainy = data['a'], data['b']
 
def calculate_distance(embedding, known_faces, known_labels):  #定义计算距离的函数
    store = dict()
    for i in known_labels:
        if i not in store:
            store[i] = []
    for i in range(known_faces.shape[0]):
        store[known_labels[i]].append(canberra(embedding, known_faces[i]))
    for i in store.keys():
        store[i] = sum(store[i]) / len(store[i])
    dist = min(store.values())
    for i in store:
        if store[i] == dist:
            return (dist, i)
 
def run(imgp):        #定义运行的过程
    try:
        img = Image.open(imgp)
        _, img_cropped, box = mtcnn(img)
        face_array = np.array(img_cropped)
        face_pixels = F.to_tensor(np.float32(face_array))
        face_pixels = (face_pixels - 127.5) / 128.0
        feas = facerec(face_pixels.unsqueeze(0).to(device))
        feas = feas.cpu().detach().numpy()
        distance, label = calculate_distance(feas, trainx_embed, trainy)
        print(label)
        # plot
        image = cv2.imread(imgp)
        if distance > 350:  # 368
            label = "UNKNOWN"
        p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))
        thickness = max(round(sum(image.shape) / 2 * 0.003), 2)
        cv2.rectangle(image, p1, p2, (0, 255, 255), thickness=thickness, lineType=cv2.LINE_AA)
        tf = max(thickness - 1, 1)  # font thickness
        w, h = cv2.getTextSize(label, 0, fontScale=thickness / 3, thickness=tf)[0]  # text width, height
        outside = p1[1] - h - 3 >= 0  # label fits outside box
        cv2.putText(image, label, (p1[0], p1[1] - 2 if outside else p1[1] + h + 2), 0, thickness / 3, (0, 255, 255),
                    thickness=tf, lineType=cv2.LINE_AA)
        data_img = Resize(image)
        ShowImg(data_img, img_l)
        img_l.update()
    except:
        print('Can not detect face!')
 
def select_img():     #定义选择图片的函数
    path = askopenfilename()
    try:
        run(path)
    except:
        pass
 
if __name__ == '__main__':#主函数，程序入口
    root = tk.Tk()#创建tk窗体
    root.title("Face Detection Demo")#写好标题
    sc_width = root.winfo_screenwidth()#宽
    sc_height = root.winfo_screenheight()#高
    WID = 600  # 窗口宽
    HEI = 600  # 窗口高
    root.geometry('%dx%d+%d+%d' % (WID, HEI, (sc_width - WID) / 2, (sc_height - HEI) / 2 - 30))
    type = ('宋体', "12", 'normal')#字体
 
    up_frame = tk.Frame(root)
    tk.Button(up_frame, text='加载图片', command=select_img, font=type).pack(padx=1, side=tk.LEFT)#创建按钮
    tk.Button(up_frame, text='退出系统', command=root.destroy, font=type).pack(padx=1, side=tk.LEFT)
    up_frame.pack(pady=1, anchor='n')
 
    img_frame = tk.Frame(root)
    img_l = tk.Label(img_frame)
    img_l.pack()
    img_frame.pack(pady=20)
    root.mainloop()

您好，我是有问必答小助手，您的问题已经有小伙伴帮您解答，感谢您对有问必答的支持与关注！
PS：问答VIP年卡【限时加赠：IT技术图书免费领】，了解详情>>> https://vip.csdn.net/askvip?utm_source=1146287632