import re
from collections import Counter
with open('MO.txt', 'r') as f:
content = f.read()
char_count = len(content)
word_list = content.split()
word_count = len(word_list)
top_three = Counter(word_list).most_common(3)
word = word_list[-11]
second_letter = word[1]
numbers = re.findall(r'\d+', content)
sorted_numbers = sorted(numbers, key=int, reverse=True)
print("文件共有", char_count, "个字符")
print("文件共有", word_count, "个单词")
print("文章中重复出现次数最多的 3 个单词是:", top_three)
print("文章倒数第 11 个单词的第 2 个字母是:", second_letter)
print("文件中出现的数字按数值从大到小输出:", sorted_numbers)
关键问题是读到文件内容后,怎么分割,怎么保存
def getdata(filename):
linedata = open(filename, 'r') #读取txt文件
cnt = 0
res = [] #存读取后的数据,二元列表
for line in linedata:
linelist = [int(s) for s in line.split()] #每一行根据分割后的结果存入列表
res.append([])
for x in linelist:
res[cnt].append(x)
res[cnt].append(max(res[cnt]))
res[cnt].append(min(res[cnt]))
res[cnt].append(sum(res[cnt]))
cnt += 1
return res
filename = 'testdata.txt'
data = getdata(filename)
for num in data:
print(num)
# 首先,读取文件并计算文件中字符总数
def count_chars(file_path):
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
return len(content)
# 接下来,读取文件并按空格分词,得到所有单词并计算单词总数和出现频率
def count_words(file_path):
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
words_list = content.split() # 按空格分词
words_cnt = len(words_list) # 计算单词总数
# 统计每个单词出现的频率
cnt_dict = {}
for word in words_list:
if word.isdigit(): # 如果是数字,跳过
continue
if word not in cnt_dict:
cnt_dict[word] = 0
cnt_dict[word] += 1
# 找出出现频率最高的三个单词
top_words = sorted(cnt_dict.items(), key=lambda x: x[1], reverse=True)[:3]
top_words = [x[0] for x in top_words]
# 找出倒数第11个单词的第2个字母
if len(words_list) >= 11:
word = words_list[-11]
if len(word) >= 2:
second_letter = word[1]
else:
second_letter = ''
else:
second_letter = ''
return words_cnt, cnt_dict, top_words, second_letter
# 最后,找出文件中的数字并按数值从大到小排列输出
def find_numbers(file_path):
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
num_list = [int(word) for word in content.split() if word.isdigit()] # 找出所有数字
num_list = list(set(num_list)) # 去除重复的数字
num_list.sort(reverse=True) # 从大到小排序
return num_list
# 测试代码
file_path = 'MO.txt'
char_cnt = count_chars(file_path)
print('字符总数:', char_cnt)
word_cnt, word_freq, top_words, second_letter = count_words(file_path)
print('单词总数:', word_cnt)
print('出现频率最高的三个单词:', top_words)
print('倒数第11个单词的第2个字母:', second_letter)
num_list = find_numbers(file_path)
print('数字列表:', num_list)