Загрузка данных
import tkinter as tk
from tkinter import ttk, scrolledtext
import numpy as np
# косинусное сходство (не меняется)
def cosine_similarity(vec_a: np.ndarray, vec_b: np.ndarray) -> float:
norm_a = np.linalg.norm(vec_a)
norm_b = np.linalg.norm(vec_b)
if norm_a == 0 or norm_b == 0:
return 0.0
return float(np.dot(vec_a, vec_b) / (norm_a * norm_b))
_RNG = np.random.default_rng(42)
_WORDS = [
"king", "queen", "man", "woman",
"paris", "france", "berlin", "germany",
"cat", "dog", "puppy", "kitten",
"hot", "cold", "warm", "cool",
"big", "small", "large", "tiny",
"fast", "slow", "quick", "sluggish",
"happy", "sad", "joyful", "melancholy",
"doctor", "nurse", "hospital", "patient",
"apple", "orange", "fruit", "banana",
"computer", "keyboard", "mouse", "screen",
]
DIM = 64
# искусственные векторы для слов (используется только во втором задании, но оставляем)
def _make_vectors():
def axis():
v = _RNG.standard_normal(DIM)
return v / np.linalg.norm(v)
gender = axis()
royal = axis()
country = axis()
city = axis()
animal = axis()
young = axis()
temp = axis()
size = axis()
speed = axis()
mood = axis()
medical = axis()
food = axis()
tech = axis()
periph = axis()
def noise():
return _RNG.standard_normal(DIM) * 0.05
v = {}
v["king"] = 2.0*royal + 1.8*gender + noise()
v["queen"] = 2.0*royal - 1.8*gender + noise()
v["man"] = 1.8*gender + noise()
v["woman"] = -1.8*gender + noise()
v["france"] = 2.0*country + noise()
v["germany"] = 2.0*country + noise()
v["paris"] = 2.0*country + 2.0*city + noise()
v["berlin"] = 2.0*country + 2.0*city + noise()
v["cat"] = 2.0*animal + noise()
v["dog"] = 2.0*animal + noise()
v["kitten"] = 2.0*animal + 2.0*young + noise()
v["puppy"] = 2.0*animal + 2.0*young + noise()
v["hot"] = 2.0*temp + noise()
v["cold"] = -2.0*temp + noise()
v["warm"] = 1.0*temp + noise()
v["cool"] = -1.0*temp + noise()
v["big"] = 2.0*size + noise()
v["small"] = -2.0*size + noise()
v["large"] = 1.8*size + noise()
v["tiny"] = -1.8*size + noise()
v["fast"] = 2.0*speed + noise()
v["slow"] = -2.0*speed + noise()
v["quick"] = 1.8*speed + noise()
v["sluggish"] = -1.8*speed + noise()
v["happy"] = 2.0*mood + noise()
v["sad"] = -2.0*mood + noise()
v["joyful"] = 1.8*mood + noise()
v["melancholy"] = -1.8*mood + noise()
v["doctor"] = 2.0*medical + 0.5*gender + noise()
v["nurse"] = 2.0*medical - 0.5*gender + noise()
v["hospital"] = 2.0*medical + noise()
v["patient"] = 1.0*medical + noise()
v["apple"] = 2.0*food + noise()
v["orange"] = 2.0*food + noise()
v["fruit"] = 2.0*food + noise()
v["banana"] = 2.0*food + noise()
v["computer"] = 2.0*tech + noise()
v["keyboard"] = 2.0*tech + 2.0*periph + noise()
v["mouse"] = 2.0*tech + 2.0*periph + noise()
v["screen"] = 2.0*tech + 1.0*periph + noise()
for w in v:
v[w] = v[w] / np.linalg.norm(v[w])
return v
VECTORS = _make_vectors()
VOCAB = list(VECTORS.keys())
# поиск аналогий (задание 2, не меняется)
def find_analogy(word_a: str, word_b: str, word_c: str, topn: int = 5):
target = VECTORS[word_a] - VECTORS[word_b] + VECTORS[word_c]
norm = np.linalg.norm(target)
if norm > 0:
target = target / norm
exclude = {word_a.lower(), word_b.lower()} - {word_c.lower()}
scores = []
for w, v in VECTORS.items():
if w.lower() in exclude:
continue
scores.append((w, cosine_similarity(target, v)))
scores.sort(key=lambda x: x[1], reverse=True)
return scores[:topn]
def _verify():
tests = [
("king", "man", "woman", "queen"),
("paris", "france", "germany", "berlin"),
("cat", "kitten", "puppy", "dog"),
]
for a, b, c, expected in tests:
res = find_analogy(a, b, c, topn=1)
top = res[0][0]
status = "OK" if top == expected else f"FAIL (got {top})"
print(f" {a} - {b} + {c} = {top} [{status}]")
_verify()
# ---------- стили ----------
BG = "#f5f5f5"
PANEL = "#f5f5f5"
CARD = "#ffffff"
ACCENT = "#4a7cff"
ACCENT2 = "#2e8b57"
TEXT = "#222222"
MUTED = "#666666"
BORDER = "#d0d0d0"
RED = "#cc4444"
AMBER = "#cc8800"
GREEN = "#228822"
def make_entry(parent, placeholder="", width=18, **kwargs):
e = tk.Entry(parent, width=width, bg=CARD, fg=TEXT,
insertbackground=ACCENT, relief="flat",
highlightthickness=1, highlightbackground=BORDER,
highlightcolor=ACCENT, font=("Consolas", 11), **kwargs)
def _on_focus_in(ev):
if e.get() == placeholder:
e.delete(0, tk.END)
e.configure(fg=TEXT)
def _on_focus_out(ev):
if not e.get():
e.insert(0, placeholder)
e.configure(fg=MUTED)
if placeholder:
e.insert(0, placeholder)
e.configure(fg=MUTED)
e.bind("<FocusIn>", _on_focus_in)
e.bind("<FocusOut>", _on_focus_out)
return e
def make_btn(parent, text, cmd, accent=False):
colour = ACCENT if accent else CARD
fg = "#FFFFFF" if accent else TEXT
b = tk.Button(parent, text=text, command=cmd,
bg=colour, fg=fg, activebackground=ACCENT, activeforeground="#fff",
relief="flat", padx=14, pady=6,
font=("Segoe UI", 10, "bold"), cursor="hand2",
highlightthickness=1, highlightbackground=BORDER)
return b
def make_label(parent, text, size=11, colour=None, bold=False):
font = ("Segoe UI", size, "bold") if bold else ("Segoe UI", size)
return tk.Label(parent, text=text, bg=PANEL, fg=colour or TEXT, font=font)
def section_card(parent):
f = tk.Frame(parent, bg=CARD, padx=18, pady=14)
return f
def result_box(parent, height=10):
st = scrolledtext.ScrolledText(
parent, height=height, bg=PANEL, fg=TEXT,
font=("Consolas", 10), relief="flat",
wrap=tk.WORD, state=tk.DISABLED,
highlightthickness=1, highlightbackground=BORDER,
insertbackground=ACCENT, padx=10, pady=8,
)
st.tag_config("header", foreground=ACCENT, font=("Consolas", 10, "bold"))
st.tag_config("good", foreground=ACCENT2)
st.tag_config("amber", foreground=AMBER)
st.tag_config("muted", foreground=MUTED)
st.tag_config("error", foreground=RED)
st.tag_config("eq", foreground=ACCENT, font=("Consolas", 10, "bold"))
return st
def write(box, text, tag=None):
box.configure(state=tk.NORMAL)
if tag:
box.insert(tk.END, text, tag)
else:
box.insert(tk.END, text)
box.see(tk.END)
box.configure(state=tk.DISABLED)
def clear_box(box):
box.configure(state=tk.NORMAL)
box.delete("1.0", tk.END)
box.configure(state=tk.DISABLED)
def bar(sim: float, width=20) -> str:
filled = max(0, int((sim + 1) / 2 * width))
return "█" * filled + "░" * (width - filled)
def sim_colour(sim: float) -> str:
if sim >= 0.7: return "good"
if sim >= 0.3: return "amber"
return "muted"
# ---------- ЗАДАНИЕ 1: Сравнение векторов по координатам ----------
class SimilarityTab(tk.Frame):
# Примеры векторов для быстрого теста
VECTOR_EXAMPLES = [
("[1, 0, 0]", "[0, 1, 0]", "ортогональные (cos=0)"),
("[1, 0, 0]", "[1, 0, 0]", "одинаковые (cos=1)"),
("[1, 0, 0]", "[-1, 0, 0]", "противоположные (cos=-1)"),
("[2, 3, 4]", "[1, 2, 3]", "произвольные 3D"),
]
def __init__(self, parent):
super().__init__(parent, bg=PANEL)
self._build()
def _build(self):
hdr = tk.Frame(self, bg=PANEL, pady=6)
hdr.pack(fill=tk.X, padx=16)
make_label(hdr, "Косинусное сходство векторов", 14, ACCENT, bold=True).pack(side=tk.LEFT)
make_label(hdr, " — Task 1 (по координатам)", 14, MUTED).pack(side=tk.LEFT)
# Блок тестов функции cosine_similarity (не меняется)
c1 = section_card(self)
c1.pack(fill=tk.X, padx=16, pady=(4, 8))
make_label(c1, "Тесты функции cosine_similarity()", 10, MUTED).pack(anchor=tk.W, pady=(0, 6))
make_btn(c1, "▶ Запустить 3 теста", self._run_tests, accent=True).pack(anchor=tk.W)
self.test_box = result_box(c1, height=6)
self.test_box.pack(fill=tk.X, pady=(8, 0))
# НОВЫЙ БЛОК: сравнение двух векторов по координатам
c2 = section_card(self)
c2.pack(fill=tk.X, padx=16, pady=(0, 8))
make_label(c2, "Сравнить два вектора по координатам", 10, MUTED).pack(anchor=tk.W, pady=(0, 6))
# Поле для вектора A
row_a = tk.Frame(c2, bg=CARD)
row_a.pack(anchor=tk.W, fill=tk.X, pady=2)
make_label(row_a, "Вектор A:", 10, bold=True).pack(side=tk.LEFT, padx=(0, 8))
self.vec_a_entry = make_entry(row_a, "1, 0, 0, ...", width=40)
self.vec_a_entry.pack(side=tk.LEFT, fill=tk.X, expand=True)
# Поле для вектора B
row_b = tk.Frame(c2, bg=CARD)
row_b.pack(anchor=tk.W, fill=tk.X, pady=2)
make_label(row_b, "Вектор B:", 10, bold=True).pack(side=tk.LEFT, padx=(0, 8))
self.vec_b_entry = make_entry(row_b, "0, 1, 0, ...", width=40)
self.vec_b_entry.pack(side=tk.LEFT, fill=tk.X, expand=True)
# Кнопка вычисления
btn_row = tk.Frame(c2, bg=CARD)
btn_row.pack(anchor=tk.W, pady=(8, 4))
make_btn(btn_row, "Вычислить сходство", self._calc_vector_similarity, accent=True).pack(side=tk.LEFT)
# Метка для результата
self.vector_result = tk.Label(c2, text="", bg=CARD, fg=ACCENT2, font=("Consolas", 11), anchor="w")
self.vector_result.pack(anchor=tk.W, pady=(8, 0), fill=tk.X)
# Пояснение по формату
make_label(c2, "Формат: числа через запятую, пробелы не важны (например: 1, -0.5, 3.2)", 9, MUTED).pack(anchor=tk.W, pady=(4, 0))
# Блок примеров векторов (заменяет старые пресеты пар слов)
c3 = section_card(self)
c3.pack(fill=tk.X, padx=16, pady=(0, 16))
make_label(c3, "Примеры векторов", 10, MUTED).pack(anchor=tk.W, pady=(0, 6))
make_btn(c3, "▶ Показать примеры", self._show_vector_examples, accent=True).pack(anchor=tk.W)
self.example_box = result_box(c3, height=10)
self.example_box.pack(fill=tk.X, pady=(8, 0))
def _parse_vector(self, text: str) -> np.ndarray:
"""Преобразует строку вида '1, 2, 3' в numpy-вектор."""
text = text.strip()
if not text:
raise ValueError("Пустая строка")
parts = text.replace(',', ' ').split()
if not parts:
raise ValueError("Не удалось разобрать числа")
numbers = [float(p) for p in parts]
return np.array(numbers)
def _calc_vector_similarity(self):
"""Вычисляет косинусное сходство между введёнными векторами."""
vec_text_a = self.vec_a_entry.get().strip()
vec_text_b = self.vec_b_entry.get().strip()
try:
vec_a = self._parse_vector(vec_text_a)
vec_b = self._parse_vector(vec_text_b)
except ValueError as e:
self.vector_result.config(text=f" Ошибка ввода: {e}", fg=RED)
return
if vec_a.size == 0 or vec_b.size == 0:
self.vector_result.config(text=" Векторы не должны быть пустыми", fg=RED)
return
sim = cosine_similarity(vec_a, vec_b)
col = ACCENT2 if sim >= 0.7 else (AMBER if sim >= 0.3 else MUTED)
# Дополнительная информация: длины векторов
info = f" dim(A)={vec_a.size}, dim(B)={vec_b.size} | "
self.vector_result.config(
text=f"{info}cos(θ) = {sim:+.6f} {bar(sim, 20)}",
fg=col
)
def _run_tests(self):
clear_box(self.test_box)
write(self.test_box, "═" * 52 + "\n", "header")
write(self.test_box, " UNIT TESTS cosine_similarity(a, b)\n", "header")
write(self.test_box, "═" * 52 + "\n", "header")
test_cases = [
(np.array([1, 0, 0]), np.array([1, 0, 0]), 1.0, "идентичные векторы"),
(np.array([1, 0, 0]), np.array([-1, 0, 0]), -1.0, "противоположные векторы"),
(np.array([1, 0, 0]), np.array([0, 1, 0]), 0.0, "ортогональные векторы"),
]
all_ok = True
for i, (a, b, expected, label) in enumerate(test_cases, 1):
got = cosine_similarity(a, b)
passed = abs(got - expected) < 1e-6
if not passed:
all_ok = False
write(self.test_box, f"\nТест {i}: {label}\n", "muted")
write(self.test_box, f" Ожидается : {expected:+.6f}\n")
write(self.test_box, f" Получено : {got:+.6f}\n")
write(self.test_box, f" {'PASS' if passed else 'FAIL'}\n", "good" if passed else "error")
write(self.test_box, "\n" + "─" * 52 + "\n", "muted")
summary = " ВСЕ ТЕСТЫ ПРОЙДЕНЫ\n" if all_ok else " ЕСТЬ ОШИБКИ ✗\n"
write(self.test_box, summary, "good" if all_ok else "error")
def _show_vector_examples(self):
"""Показывает таблицу с примерами векторов и их косинусным сходством."""
clear_box(self.example_box)
write(self.example_box, "═" * 64 + "\n", "header")
write(self.example_box, f" {'Вектор A':<25} {'Вектор B':<25} {'Сходство':>10} Визуализация\n", "header")
write(self.example_box, "═" * 64 + "\n", "header")
for vec_a_str, vec_b_str, desc in self.VECTOR_EXAMPLES:
try:
a = self._parse_vector(vec_a_str)
b = self._parse_vector(vec_b_str)
sim = cosine_similarity(a, b)
tag = sim_colour(sim)
write(self.example_box, f" {vec_a_str:<25} {vec_b_str:<25} ")
write(self.example_box, f"{sim:>+.4f} {bar(sim, 16)}\n", tag)
except:
write(self.example_box, f" {vec_a_str:<25} {vec_b_str:<25} (ошибка парсинга)\n", "error")
write(self.example_box, "\n" + "═" * 64 + "\n", "header")
write(self.example_box, " Цвета: ", "muted")
write(self.example_box, "■ высокое ", "good")
write(self.example_box, "■ среднее ", "amber")
write(self.example_box, "■ низкое\n", "muted")
write(self.example_box, " Формат ввода: числа через запятую, например: 0.5, -1, 2.3\n", "muted")
# ---------- ЗАДАНИЕ 2 (без изменений) ----------
class AnalogiesTab(tk.Frame):
PRESETS = [
("king", "man", "woman", "queen"),
("paris", "france", "germany", "berlin"),
("cat", "kitten", "puppy", "dog"),
("big", "large", "small", "tiny"),
("happy", "joyful", "sad", "melancholy"),
]
def __init__(self, parent):
super().__init__(parent, bg=PANEL)
self._build()
def _build(self):
hdr = tk.Frame(self, bg=PANEL, pady=6)
hdr.pack(fill=tk.X, padx=16)
make_label(hdr, "Поиск аналогий", 14, ACCENT, bold=True).pack(side=tk.LEFT)
make_label(hdr, " — Task 2", 14, MUTED).pack(side=tk.LEFT)
formula = tk.Frame(self, bg=CARD, padx=18, pady=12)
formula.pack(fill=tk.X, padx=16, pady=(4, 8))
make_label(formula, "A − B + C = ?", 15, ACCENT, bold=True).pack(side=tk.LEFT)
make_label(formula, " пример: king − man + woman ≈ queen", 11, MUTED).pack(side=tk.LEFT)
c1 = section_card(self)
c1.pack(fill=tk.X, padx=16, pady=(0, 8))
make_label(c1, "Своя аналогия", 10, MUTED).pack(anchor=tk.W, pady=(0, 6))
row = tk.Frame(c1, bg=CARD)
row.pack(anchor=tk.W)
for attr, ph, lbl in [("eA", "A", ""), ("eB", "B", "−"), ("eC", "C", "+")]:
if lbl:
make_label(row, f" {lbl} ", 13, MUTED, bold=True).pack(side=tk.LEFT)
e = make_entry(row, ph, width=12)
e.pack(side=tk.LEFT, padx=2)
setattr(self, attr, e)
make_label(row, " = ?", 13, ACCENT, bold=True).pack(side=tk.LEFT, padx=4)
make_btn(row, "Найти", self._calc_analogy, accent=True).pack(side=tk.LEFT, padx=(8, 0))
self.analogy_box = result_box(c1, height=8)
self.analogy_box.pack(fill=tk.X, pady=(8, 0))
make_label(c1, f"Словарь: {', '.join(VOCAB)}", 9, MUTED).pack(anchor=tk.W, pady=(4, 0))
c2 = section_card(self)
c2.pack(fill=tk.X, padx=16, pady=(0, 16))
make_label(c2, "5 классических аналогий", 10, MUTED).pack(anchor=tk.W, pady=(0, 6))
make_btn(c2, "▶ Запустить все 5 примеров", self._run_presets, accent=True).pack(anchor=tk.W)
self.preset_box = result_box(c2, height=18)
self.preset_box.pack(fill=tk.X, pady=(8, 0))
def _get_analogy_input(self):
def clean(e, ph):
v = e.get().strip().lower()
return v if v != ph.lower() else ""
return clean(self.eA, "A"), clean(self.eB, "B"), clean(self.eC, "C")
def _calc_analogy(self):
a, b, c = self._get_analogy_input()
clear_box(self.analogy_box)
for w in (a, b, c):
if w not in VECTORS:
write(self.analogy_box, f" Слово «{w}» не найдено!\n", "error")
return
results = find_analogy(a, b, c)
write(self.analogy_box, f" Запрос: ", "muted")
write(self.analogy_box, f"{a} − {b} + {c} = ?\n", "eq")
write(self.analogy_box, "─" * 46 + "\n", "muted")
for rank, (word, sim) in enumerate(results, 1):
tag = "good" if rank == 1 else sim_colour(sim)
prefix = " ★ " if rank == 1 else f" {rank}. "
write(self.analogy_box, f"{prefix}{word:<14} ", tag)
write(self.analogy_box, f"sim={sim:+.4f} {bar(sim, 16)}\n", tag)
def _run_presets(self):
clear_box(self.preset_box)
write(self.preset_box, "═" * 60 + "\n", "header")
write(self.preset_box, " АНАЛОГИИ: A − B + C ≈ ?\n", "header")
write(self.preset_box, "═" * 60 + "\n", "header")
for idx, (a, b, c, expected) in enumerate(self.PRESETS, 1):
results = find_analogy(a, b, c, topn=3)
top_word, top_sim = results[0]
correct = top_word.lower() == expected.lower()
write(self.preset_box, f"\n [{idx}] ", "muted")
write(self.preset_box, f"{a} − {b} + {c}", "eq")
write(self.preset_box, f" = ? (ожидается: {expected})\n", "muted")
write(self.preset_box, " " + "─" * 50 + "\n", "muted")
for rank, (word, sim) in enumerate(results, 1):
tag = "good" if rank == 1 else "muted"
mark = "★" if rank == 1 else f"{rank}."
status = " ✓" if (rank == 1 and correct) else (" ✗" if rank == 1 else "")
write(self.preset_box, f" {mark} {word:<14} ", tag)
write(self.preset_box, f"cos={sim:+.4f} {bar(sim, 14)}", tag)
if status:
stag = "good" if correct else "error"
write(self.preset_box, status + "\n", stag)
else:
write(self.preset_box, "\n", tag)
write(self.preset_box, "\n" + "═" * 60 + "\n", "header")
write(self.preset_box, " Косинусная мера = dot(target, candidate)\n", "muted")
write(self.preset_box, " target = vec(A) − vec(B) + vec(C)\n", "muted")
# ---------- ГЛАВНОЕ ОКНО (без изменений) ----------
class App(tk.Tk):
def __init__(self):
super().__init__()
self.title("Косинусное сходство и аналогии")
self.geometry("760x620")
self.minsize(700, 600)
self.configure(bg=BG)
self._build()
def _build(self):
title_bar = tk.Frame(self, bg=BG, pady=10)
title_bar.pack(fill=tk.X, padx=20)
tk.Label(title_bar, text="Задание 1",
font=("Segoe UI", 10), bg=BG, fg=MUTED).pack(side=tk.RIGHT)
sep = tk.Frame(self, bg=BORDER, height=1)
sep.pack(fill=tk.X)
style = ttk.Style(self)
style.theme_use("clam")
style.configure("Custom.TNotebook", background=BG, borderwidth=0, tabmargins=0)
style.configure("Custom.TNotebook.Tab",
background=PANEL, foreground=MUTED,
padding=[20, 8], borderwidth=0,
font=("Segoe UI", 11))
style.map("Custom.TNotebook.Tab",
background=[("selected", CARD)],
foreground=[("selected", ACCENT)])
nb = ttk.Notebook(self, style="Custom.TNotebook")
nb.pack(fill=tk.BOTH, expand=True)
sim_frame = tk.Frame(nb, bg=PANEL)
ana_frame = tk.Frame(nb, bg=PANEL)
nb.add(sim_frame, text=" Задание 1 - Сходство ")
nb.add(ana_frame, text=" Задание 2 - Аналогии ")
for frame, TabClass in [(sim_frame, SimilarityTab), (ana_frame, AnalogiesTab)]:
canvas = tk.Canvas(frame, bg=PANEL, highlightthickness=0)
scroll = ttk.Scrollbar(frame, orient=tk.VERTICAL, command=canvas.yview)
scroll.pack(side=tk.RIGHT, fill=tk.Y)
canvas.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
canvas.configure(yscrollcommand=scroll.set)
inner = TabClass(canvas)
win = canvas.create_window((0, 0), window=inner, anchor="nw")
def _on_configure(e, c=canvas, w=win):
c.itemconfig(w, width=c.winfo_width())
c.configure(scrollregion=c.bbox("all"))
canvas.bind("<Configure>", _on_configure)
inner.bind("<Configure>", lambda e, c=canvas: c.configure(scrollregion=c.bbox("all")))
canvas.bind_all("<MouseWheel>",
lambda e, c=canvas: c.yview_scroll(-1 * (e.delta // 120), "units"))
status = tk.Frame(self, bg=CARD, height=28)
status.pack(fill=tk.X, side=tk.BOTTOM)
status.pack_propagate(False)
tk.Label(status,
text=f" Словарь: {len(VOCAB)} слов · Размерность: {DIM}D · NumPy {np.__version__}",
bg=CARD, fg=MUTED, font=("Consolas", 9), anchor="w").pack(side=tk.LEFT, padx=8)
if __name__ == "__main__":
app = App()
app.mainloop()