大手子们带带我!Python批量提取PDF简历的信息,提取关键词,并写入到Excel中

问题

如何用Python批量提取PDF简历的信息,提取所需信息,并写入到Excel中?

提取「姓名」、「学校」、「学历」、「电话」、「邮箱」这5项信息

已经写好的代码如下:
import re
import pdfplumber
import xlwt
import glob

path = 'D:\Desktop\Sample\*.pdf'
list1 = glob.glob(path)    # 目录下所有PDF文件以list形式储存
def collect_single_info(path):
    print(path)  # 查看文件路径及文件名,调试用,正式代码可删
    with pdfplumber.open(path) as pdf:
        for page in pdf.pages:
            first_page = pdf.pages[0]
            # print(first_page.extract_text())  # 查看简历正文首页,调试用,正式代码可删
            try:
                school = re.findall('\S+大学', first_page.extract_text())[0]  # 提取学校名称
                str_school = str(school)
                print(str_school)
            except Exception as error:
                str_school = str('未能获取学校名称,需人工复核')
                print(str_school, error)

            if '博士' in first_page.extract_text():                           # 提取学历
                education = re.findall('博士', first_page.extract_text())[0]  # 先判断是否为博士
                str_education = str(education)
                print(str_education[2:-2])

            elif '硕士' in first_page.extract_text():
                education = re.findall('硕士', first_page.extract_text())[0]  # 其次判断是否为硕士
                str_education = str(education)
                print(str_education)

            elif '本科' in first_page.extract_text():
                education = re.findall('本科', first_page.extract_text())[0]  # 最后判断是否为本科
                str_education = str(education)
                print(str_education)

            else:
                str_education = str('学历未知,需人工复核简历')                   # 如简历中完全没有注明学历,则提示人工复核
                print(str_education)

            try:
                # phone_number = re.findall(r"(?<=\D)1[34789]\d{9}", first_page.extract_text())[0]  # 提取电话
                # https://baike.baidu.com/item/%E6%89%8B%E6%9C%BA%E5%8F%B7%E7%A0%81/1417348 参考国内号段规则
                phone_number = re.findall(r"1[35678]\d{9}", first_page.extract_text())[0]
                # 198、199开头的手机号易与80后、90后的身份证号部分重合,故而此处直接转为人工复核
                str_phone_number = str(phone_number)

            except Exception as error:
                str_phone_number = str('未能获取电话,需人工复核')
                print(str_phone_number)

            try:
                mail = re.findall('\w+@\w+.com', first_page.extract_text())[0]  # 提取邮箱
                str_mail = str(mail)
                print(str_mail)
            except Exception as error:
                print('未能获取邮箱,需人工复核')

            break

# collect_info(list1[19])   # 输出「单个提取」结果


def collect_multi_info(path):
    table = []
    for table in list1:
        collect_single_info(table)

collect_multi_info(path)    # 输出「全部提取」结果

wb = xlwt.Workbook()

sheet1 = wb.add_sheet('Sheet1')
sheet1.write(0, 0, '姓名')    # 固定表头
sheet1.write(0, 1, '学校')    # 固定表头
sheet1.write(0, 2, '学历')    # 固定表头
sheet1.write(0, 3, '电话')    # 固定表头
sheet1.write(0, 4, '邮箱')    # 固定表头
sheet1.write(1, 0, str_name)   # 姓名,后续依次填在下面
sheet1.write(1, 1, str_school)  # 学校,同上
sheet1.write(1, 2, str_education)  # 学历,同上
sheet1.write(1, 3, str_phone_number) # 电话,同上
sheet1.write(1, 4, str_mail)  # 邮箱,同上

wb.save("D:\\Desktop\\Sample\\候选人信息.xls")
print("[System:信息提取完毕!]")

遇到的问题

0)不知道怎么把全部PDF内容提取出来之后写到一个excel里
1)姓名没办法提取出来,想到的办法是「提取文件名第一个-前的内容」,但不会写代码……

我想要达到的结果
姓名学校学历电话邮箱
张三中国XX大学硕士137****1234zhangsan@qq.com
李四华南XX大学本科159****5678lisi@163.com