python 统计各html标签的个数

统计所处理的所有网页文件中的各个HTML标签的出现次数
在屏幕上分行显示出现最多的三个标记及其出现次数

img


底下的函数怎么写呢?

img


写得不太好👉👈,集合里标签的类型有重复,该怎么修改呢?
谢谢!


import re
from collections import defaultdict

def count_tags(filenames):
    # Initialize a dictionary to count the occurrences of each tag
    tag_counts = defaultdict(int)

    # Regular expression to match HTML tags
    pattern = re.compile(r'<[^>]+>')

    # Process each file
    for filename in filenames:
        with open(filename, 'r') as f:
            # Read the contents of the file
            contents = f.read()

            # Find all the tags in the contents
            tags = pattern.findall(contents)

            # Increment the count for each tag
            for tag in tags:
                tag_counts[tag] += 1

    # Sort the tags by count in descending order
    sorted_tags = sorted(tag_counts, key=tag_counts.get, reverse=True)

    # Print the top three tags and their counts
    for i, tag in enumerate(sorted_tags[:3]):
        print(f'{tag}: {tag_counts[tag]}')

if __name__ == '__main__':
    # Test with some sample filenames
    count_tags(['file1.html', 'file2.html'])