import re
class Node:
def __init__(self, word):
self.word = word
self.count = 1
self.left = None
self.right = None
class WordBST:
def __init__(self):
self.root = None
def insert(self, word):
if self.root is None:
self.root = Node(word)
return
cur = self.root
while True:
if word == cur.word:
cur.count += 1
return
if word < cur.word:
if cur.left is None:
cur.left = Node(word)
return
cur = cur.left
else:
if cur.right is None:
cur.right = Node(word)
return
cur = cur.right
def search(self, word):
cur = self.root
while cur:
if word == cur.word:
return cur.count
if word < cur.word:
cur = cur.left
else:
cur = cur.right
return 0
def inorder(self, node, res):
if node:
self.inorder(node.left, res)
res.append((node.word, node.count))
self.inorder(node.right, res)
with open(r"C:\Users\aaaaa\Desktop\warandpeace.txt", encoding="utf-8") as f:
text = f.read().lower()
words = re.findall(r'\w+', text)
tree = WordBST()
for w in words:
tree.insert(w)
all_words = []
tree.inorder(tree.root, all_words)
print("пьер:", tree.search("пьер"))
print("наташа:", tree.search("наташа"))
print("андрей:", tree.search("андрей"))
print("уникальных слов:", len(all_words))
max_word = ""
max_count = 0
for word, count in all_words:
if count > max_count:
max_word = word
max_count = count
print("самое частое:", max_word, max_count)
top10 = sorted(all_words, key=lambda x: x[1], reverse=True)[:10]
print("топ-10:", top10)