哈工大pyltp使用

python3.10环境
已安装pyltp0.4.0,LTP3.4.0,通过依存句法分析抽取语义三元组,代码如下

import os
from pyltp import Segmentor, Postagger, Parser, NamedEntityRecognizer, SementicRoleLabeller
class LtpParser:
    def __init__(self):
        LTP_DIR = "D:/ltp/ltp_data_v3.4.0"
        self.segmentor = Segmentor(os.path.join(LTP_DIR, "cws.model"))
        self.postagger = Postagger(os.path.join(LTP_DIR, "pos.model"))
        self.parser = Parser(os.path.join(LTP_DIR, "parser.model"))
        self.recognizer = NamedEntityRecognizer(os.path.join(LTP_DIR, "ner.model"))
        self.labeller = SementicRoleLabeller(os.path.join(LTP_DIR, 'pisrl_win.model'))

    '''语义角色标注'''
    def format_labelrole(self, words, postags):
        arcs = self.parser.parse(words, postags)
        roles = self.labeller.label(words, postags, arcs)
        roles_dict = {}
        for role in roles:
            roles_dict[role.index] = {arg.name:[arg.name,arg.range.start, arg.range.end] for arg in role.arguments}
        return roles_dict

    '''句法分析---为句子中的每个词语维护一个保存句法依存儿子节点的字典'''
    def build_parse_child_dict(self, words, postags, arcs):
        child_dict_list = []
        format_parse_list = []
        for index in range(len(words)):
            child_dict = dict()
            for arc_index in range(len(arcs)):
                if arcs[arc_index].head == index+1:   #arcs的索引从1开始
                    if arcs[arc_index].relation in child_dict:
                        child_dict[arcs[arc_index].relation].append(arc_index)
                    else:
                        child_dict[arcs[arc_index].relation] = []
                        child_dict[arcs[arc_index].relation].append(arc_index)
            child_dict_list.append(child_dict)
        rely_id = [arc.head for arc in arcs]  # 提取依存父节点id
        relation = [arc.relation for arc in arcs]  # 提取依存关系
        heads = ['Root' if id == 0 else words[id - 1] for id in rely_id]  # 匹配依存父节点词语
        for i in range(len(words)):
            # ['ATT', '***', 0, 'nh', '总理', 1, 'n']
            a = [relation[i], words[i], i, postags[i], heads[i], rely_id[i]-1, postags[rely_id[i]-1]]
            format_parse_list.append(a)

        return child_dict_list, format_parse_list

    '''parser主函数'''
    def parser_main(self, sentence):
        words = list(self.segmentor.segment(sentence))
        postags = list(self.postagger.postag(words))
        arcs = self.parser.parse(words, postags)
        child_dict_list, format_parse_list = self.build_parse_child_dict(words, postags, arcs)
        roles_dict = self.format_labelrole(words, postags)
        return words, postags, child_dict_list, roles_dict, format_parse_list


if __name__ == '__main__':
    parse = LtpParser()
    sentence = '中国是一个自由、和平的国家'
    words, postags, child_dict_list, roles_dict, format_parse_list = parse.parser_main(sentence)
    print(words, len(words))
    print(postags, len(postags))
    print(child_dict_list, len(child_dict_list))
    print(roles_dict)
    print(format_parse_list, len(format_parse_list))

出现错误

Traceback (most recent call last):
  File "C:\Users\Lenovo\Desktop\科研项目、投稿论文材料\数字记忆知识图谱\哈工大.py", line 58, in <module>
    words, postags, child_dict_list, roles_dict, format_parse_list = parse.parser_main(sentence)
  File "C:\Users\Lenovo\Desktop\科研项目、投稿论文材料\数字记忆知识图谱\哈工大.py", line 50, in parser_main
    child_dict_list, format_parse_list = self.build_parse_child_dict(words, postags, arcs)
  File "C:\Users\Lenovo\Desktop\科研项目、投稿论文材料\数字记忆知识图谱\哈工大.py", line 28, in build_parse_child_dict
    if arcs[arc_index].head == index+1:   #arcs的索引从1开始
AttributeError: 'tuple' object has no attribute 'head'

请问是什么原因??

arcs = self.parser.parse(words, postags)


改成



arcs, *_ = self.parser.parse(words, postags)

问题出在 build_parse_child_dict 函数中的 arcs 变量。错误信息指出 'tuple' object has no attribute 'head',说明 arcs 变量是一个元组对象,而不是一个期望的对象类型。 根据代码,arcs 是由 self.parser.parse(words, postags) 返回的结果。这个方法返回一个元组,包含了句子中每个词语的依存句法分析结果。然而,在代码中没有对返回的元组进行解包,导致将整个元组作为 arcs,进而导致错误。

要解决这个问题,你需要将 arcs 解包为单独的变量。在 parser_main 函数中,

错误已经提示您了,元组类型没有head这个属性,错误位置就是在arcs[arc_index].head == index+1 那您要检查下这个arcs是什么类型的数据,其次arcs[arc_index]取值后的数据是什么样的格式,根据错误提示来说,arcs[arc_index]取值后已经是一个元组类型的数据了,而元组类型的数据没有head这个属性值。因此您需要再检查下这一块的代码,看下取值方法是否使用错误了。

错误信息提示了tuple对象没有head属性,但是你在代码中调用了该属性,所以报错了,检查一下相应位置的代码

基于new bing的分析:

import os
from pyltp import Segmentor, Postagger, Parser, NamedEntityRecognizer, SementicRoleLabeller


class LtpParser:
    def __init__(self):
        LTP_DIR = "D:/ltp/ltp_data_v3.4.0"
        self.segmentor = Segmentor(os.path.join(LTP_DIR, "cws.model"))
        self.postagger = Postagger(os.path.join(LTP_DIR, "pos.model"))
        self.parser = Parser(os.path.join(LTP_DIR, "parser.model"))
        self.recognizer = NamedEntityRecognizer(os.path.join(LTP_DIR, "ner.model"))
        self.labeller = SementicRoleLabeller(os.path.join(LTP_DIR, 'pisrl_win.model'))

    '''语义角色标注'''
    def format_labelrole(self, words, postags):
        arcs = self.parser.parse(words, postags)
        roles = self.labeller.label(words, postags, arcs)
        roles_dict = {}
        for role in roles:
            roles_dict[role.index] = {arg.name:[arg.name,arg.range.start, arg.range.end] for arg in role.arguments}
        return roles_dict

    '''句法分析---为句子中的每个词语维护一个保存句法依存儿子节点的字典'''
    def build_parse_child_dict(self, words, postags, arcs):
        child_dict_list = []
        format_parse_list = []
        for index in range(len(words)):
            child_dict = dict()
            for arc_index in range(len(arcs)):
                if arcs[arc_index].head == index + 1:
                    relation = arcs[arc_index].relation
                    if relation in child_dict:
                        child_dict[relation].append(arc_index)
                    else:
                        child_dict[relation] = []
                        child_dict[relation].append(arc_index)
            child_dict_list.append(child_dict)
        rely_id = [arc.head for arc in arcs]
        relation = [arc.relation for arc in arcs]
        heads = ['Root' if id == 0 else words[id - 1] for id in rely_id]
        for i in range(len(words)):
            a = [relation[i], words[i], i, postags[i], heads[i], rely_id[i] - 1, postags[rely_id[i] - 1]]
            format_parse_list.append(a)

        return child_dict_list, format_parse_list

    '''parser主函数'''
    def parser_main(self, sentence):
        words = list(self.segmentor.segment(sentence))
        postags = list(self.postagger.postag(words))
        arcs = list(self.parser.parse(words, postags))
        child_dict_list, format_parse_list = self.build_parse_child_dict(words, postags, arcs)
        roles_dict = self.format_labelrole(words, postags)
        return words, postags, child_dict_list, roles_dict, format_parse_list


if __name__ == '__main__':
    parse = LtpParser()
    sentence = '中国是一个自由、和平的国家'
    words, postags, child_dict_list, roles_dict, format_parse_list = parse.parser_main(sentence)
    print(words, len(words))
    print(postags, len(postags))
    print(child_dict_list, len(child_dict_list))
    print(roles_dict)
    print(format_parse_list, len(format_parse_list))


来自GPT的回答:

根据你提供的代码和错误信息,问题出现在build_parse_child_dict函数中的if arcs[arc_index].head == index+1:这一行。

根据pyltp库的文档,在LTP 3.4.0版本中,parse函数返回的是一个包含两个元素的元组,其中第一个元素是一个列表,包含了pyltp的Arc对象,第二个元素是一个整数,表示句子的长度。

因此,你需要修改build_parse_child_dict函数中的代码,将arcs改为arcs[0],以获取Arc对象列表。修改后的代码如下:

python
Copy code
def build_parse_child_dict(self, words, postags, arcs):
    child_dict_list = []
    format_parse_list = []
    for index in range(len(words)):
        child_dict = dict()
        for arc_index in range(len(arcs[0])):
            if arcs[0][arc_index].head == index + 1:  # arcs的索引从1开始
                if arcs[0][arc_index].relation in child_dict:
                    child_dict[arcs[0][arc_index].relation].append(arc_index)
                else:
                    child_dict[arcs[0][arc_index].relation] = []
                    child_dict[arcs[0][arc_index].relation].append(arc_index)
        child_dict_list.append(child_dict)
    rely_id = [arc.head for arc in arcs[0]]  # 提取依存父节点id
    relation = [arc.relation for arc in arcs[0]]  # 提取依存关系
    heads = ['Root' if id == 0 else words[id - 1] for id in rely_id]  # 匹配依存父节点词语
    for i in range(len(words)):
        # ['ATT', '***', 0, 'nh', '总理', 1, 'n']
        a = [relation[i], words[i], i, postags[i], heads[i], rely_id[i] - 1, postags[rely_id[i] - 1]]
        format_parse_list.append(a)

    return child_dict_list, format_parse_list
通过上述修改,应该可以解决你遇到的错误。