python3.10环境
已安装pyltp0.4.0,LTP3.4.0,通过依存句法分析抽取语义三元组,代码如下
import os
from pyltp import Segmentor, Postagger, Parser, NamedEntityRecognizer, SementicRoleLabeller
class LtpParser:
def __init__(self):
LTP_DIR = "D:/ltp/ltp_data_v3.4.0"
self.segmentor = Segmentor(os.path.join(LTP_DIR, "cws.model"))
self.postagger = Postagger(os.path.join(LTP_DIR, "pos.model"))
self.parser = Parser(os.path.join(LTP_DIR, "parser.model"))
self.recognizer = NamedEntityRecognizer(os.path.join(LTP_DIR, "ner.model"))
self.labeller = SementicRoleLabeller(os.path.join(LTP_DIR, 'pisrl_win.model'))
'''语义角色标注'''
def format_labelrole(self, words, postags):
arcs = self.parser.parse(words, postags)
roles = self.labeller.label(words, postags, arcs)
roles_dict = {}
for role in roles:
roles_dict[role.index] = {arg.name:[arg.name,arg.range.start, arg.range.end] for arg in role.arguments}
return roles_dict
'''句法分析---为句子中的每个词语维护一个保存句法依存儿子节点的字典'''
def build_parse_child_dict(self, words, postags, arcs):
child_dict_list = []
format_parse_list = []
for index in range(len(words)):
child_dict = dict()
for arc_index in range(len(arcs)):
if arcs[arc_index].head == index+1: #arcs的索引从1开始
if arcs[arc_index].relation in child_dict:
child_dict[arcs[arc_index].relation].append(arc_index)
else:
child_dict[arcs[arc_index].relation] = []
child_dict[arcs[arc_index].relation].append(arc_index)
child_dict_list.append(child_dict)
rely_id = [arc.head for arc in arcs] # 提取依存父节点id
relation = [arc.relation for arc in arcs] # 提取依存关系
heads = ['Root' if id == 0 else words[id - 1] for id in rely_id] # 匹配依存父节点词语
for i in range(len(words)):
# ['ATT', '***', 0, 'nh', '总理', 1, 'n']
a = [relation[i], words[i], i, postags[i], heads[i], rely_id[i]-1, postags[rely_id[i]-1]]
format_parse_list.append(a)
return child_dict_list, format_parse_list
'''parser主函数'''
def parser_main(self, sentence):
words = list(self.segmentor.segment(sentence))
postags = list(self.postagger.postag(words))
arcs = self.parser.parse(words, postags)
child_dict_list, format_parse_list = self.build_parse_child_dict(words, postags, arcs)
roles_dict = self.format_labelrole(words, postags)
return words, postags, child_dict_list, roles_dict, format_parse_list
if __name__ == '__main__':
parse = LtpParser()
sentence = '中国是一个自由、和平的国家'
words, postags, child_dict_list, roles_dict, format_parse_list = parse.parser_main(sentence)
print(words, len(words))
print(postags, len(postags))
print(child_dict_list, len(child_dict_list))
print(roles_dict)
print(format_parse_list, len(format_parse_list))
出现错误
Traceback (most recent call last):
File "C:\Users\Lenovo\Desktop\科研项目、投稿论文材料\数字记忆知识图谱\哈工大.py", line 58, in <module>
words, postags, child_dict_list, roles_dict, format_parse_list = parse.parser_main(sentence)
File "C:\Users\Lenovo\Desktop\科研项目、投稿论文材料\数字记忆知识图谱\哈工大.py", line 50, in parser_main
child_dict_list, format_parse_list = self.build_parse_child_dict(words, postags, arcs)
File "C:\Users\Lenovo\Desktop\科研项目、投稿论文材料\数字记忆知识图谱\哈工大.py", line 28, in build_parse_child_dict
if arcs[arc_index].head == index+1: #arcs的索引从1开始
AttributeError: 'tuple' object has no attribute 'head'
请问是什么原因??
把
arcs = self.parser.parse(words, postags)
改成
arcs, *_ = self.parser.parse(words, postags)
问题出在 build_parse_child_dict 函数中的 arcs 变量。错误信息指出 'tuple' object has no attribute 'head',说明 arcs 变量是一个元组对象,而不是一个期望的对象类型。 根据代码,arcs 是由 self.parser.parse(words, postags) 返回的结果。这个方法返回一个元组,包含了句子中每个词语的依存句法分析结果。然而,在代码中没有对返回的元组进行解包,导致将整个元组作为 arcs,进而导致错误。
要解决这个问题,你需要将 arcs 解包为单独的变量。在 parser_main 函数中,
错误已经提示您了,元组类型没有head这个属性,错误位置就是在arcs[arc_index].head == index+1 那您要检查下这个arcs是什么类型的数据,其次arcs[arc_index]取值后的数据是什么样的格式,根据错误提示来说,arcs[arc_index]取值后已经是一个元组类型的数据了,而元组类型的数据没有head这个属性值。因此您需要再检查下这一块的代码,看下取值方法是否使用错误了。
错误信息提示了tuple对象没有head属性,但是你在代码中调用了该属性,所以报错了,检查一下相应位置的代码
基于new bing的分析:
import os
from pyltp import Segmentor, Postagger, Parser, NamedEntityRecognizer, SementicRoleLabeller
class LtpParser:
def __init__(self):
LTP_DIR = "D:/ltp/ltp_data_v3.4.0"
self.segmentor = Segmentor(os.path.join(LTP_DIR, "cws.model"))
self.postagger = Postagger(os.path.join(LTP_DIR, "pos.model"))
self.parser = Parser(os.path.join(LTP_DIR, "parser.model"))
self.recognizer = NamedEntityRecognizer(os.path.join(LTP_DIR, "ner.model"))
self.labeller = SementicRoleLabeller(os.path.join(LTP_DIR, 'pisrl_win.model'))
'''语义角色标注'''
def format_labelrole(self, words, postags):
arcs = self.parser.parse(words, postags)
roles = self.labeller.label(words, postags, arcs)
roles_dict = {}
for role in roles:
roles_dict[role.index] = {arg.name:[arg.name,arg.range.start, arg.range.end] for arg in role.arguments}
return roles_dict
'''句法分析---为句子中的每个词语维护一个保存句法依存儿子节点的字典'''
def build_parse_child_dict(self, words, postags, arcs):
child_dict_list = []
format_parse_list = []
for index in range(len(words)):
child_dict = dict()
for arc_index in range(len(arcs)):
if arcs[arc_index].head == index + 1:
relation = arcs[arc_index].relation
if relation in child_dict:
child_dict[relation].append(arc_index)
else:
child_dict[relation] = []
child_dict[relation].append(arc_index)
child_dict_list.append(child_dict)
rely_id = [arc.head for arc in arcs]
relation = [arc.relation for arc in arcs]
heads = ['Root' if id == 0 else words[id - 1] for id in rely_id]
for i in range(len(words)):
a = [relation[i], words[i], i, postags[i], heads[i], rely_id[i] - 1, postags[rely_id[i] - 1]]
format_parse_list.append(a)
return child_dict_list, format_parse_list
'''parser主函数'''
def parser_main(self, sentence):
words = list(self.segmentor.segment(sentence))
postags = list(self.postagger.postag(words))
arcs = list(self.parser.parse(words, postags))
child_dict_list, format_parse_list = self.build_parse_child_dict(words, postags, arcs)
roles_dict = self.format_labelrole(words, postags)
return words, postags, child_dict_list, roles_dict, format_parse_list
if __name__ == '__main__':
parse = LtpParser()
sentence = '中国是一个自由、和平的国家'
words, postags, child_dict_list, roles_dict, format_parse_list = parse.parser_main(sentence)
print(words, len(words))
print(postags, len(postags))
print(child_dict_list, len(child_dict_list))
print(roles_dict)
print(format_parse_list, len(format_parse_list))
来自GPT的回答:
根据你提供的代码和错误信息,问题出现在build_parse_child_dict函数中的if arcs[arc_index].head == index+1:这一行。
根据pyltp库的文档,在LTP 3.4.0版本中,parse函数返回的是一个包含两个元素的元组,其中第一个元素是一个列表,包含了pyltp的Arc对象,第二个元素是一个整数,表示句子的长度。
因此,你需要修改build_parse_child_dict函数中的代码,将arcs改为arcs[0],以获取Arc对象列表。修改后的代码如下:
python
Copy code
def build_parse_child_dict(self, words, postags, arcs):
child_dict_list = []
format_parse_list = []
for index in range(len(words)):
child_dict = dict()
for arc_index in range(len(arcs[0])):
if arcs[0][arc_index].head == index + 1: # arcs的索引从1开始
if arcs[0][arc_index].relation in child_dict:
child_dict[arcs[0][arc_index].relation].append(arc_index)
else:
child_dict[arcs[0][arc_index].relation] = []
child_dict[arcs[0][arc_index].relation].append(arc_index)
child_dict_list.append(child_dict)
rely_id = [arc.head for arc in arcs[0]] # 提取依存父节点id
relation = [arc.relation for arc in arcs[0]] # 提取依存关系
heads = ['Root' if id == 0 else words[id - 1] for id in rely_id] # 匹配依存父节点词语
for i in range(len(words)):
# ['ATT', '***', 0, 'nh', '总理', 1, 'n']
a = [relation[i], words[i], i, postags[i], heads[i], rely_id[i] - 1, postags[rely_id[i] - 1]]
format_parse_list.append(a)
return child_dict_list, format_parse_list
通过上述修改,应该可以解决你遇到的错误。