Pastein: import re from collections import Counter text = "Python's a popular programming language. It's widely used in 'web' and data analysis." # Собственный список стоп-слов

Загрузка данных

import re
from collections import Counter

text = "Python's a popular programming language. It's widely used in 'web' and data analysis."

# Собственный список стоп-слов (английские)
custom_stopwords = {'a', 'an', 'the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'it', 'its', 'it\'s', 's'}

# Приведение к нижнему регистру и удаление пунктуации
text_clean = re.sub(r"[^\w\s]", " ", text.lower())
words = text_clean.split()

# Удаление стоп-слов
filtered_words = [word for word in words if word not in custom_stopwords]

# Подсчёт частоты и топ-5
word_counts = Counter(filtered_words)
print("Топ-5 самых частотных слов:")
for word, count in word_counts.most_common(5):
    print(f"  {word}: {count}")













from collections import Counter

# Исходный текст
text = "Data science is an interdisciplinary field that uses scientific methods."

# Заданные стоп-слова
stop_words = {'is', 'an', 'that', 'uses'}

print("Исходный текст:")
print(f"'{text}'\n")

# Шаг 1: Приведение к нижнему регистру
text_lower = text.lower()
print(f"1. Приведение к нижнему регистру:\n'{text_lower}'\n")

# Шаг 2: Удаление знаков препинания вручную
punctuation = '.,!?;:()"\''
text_clean = ''
for char in text_lower:
    if char not in punctuation:
        text_clean += char
    else:
        text_clean += ' '  # Заменяем знак препинания на пробел

print(f"2. После удаления знаков препинания:\n'{text_clean}'\n")

# Шаг 3: Разбиение на слова
words = text_clean.split()
print(f"3. Список всех слов:\n{words}\n")

# Шаг 4: Удаление стоп-слов
filtered_words = [word for word in words if word not in stop_words]
print(f"4. Слова после удаления стоп-слов {stop_words}:\n{filtered_words}\n")

# Шаг 5: Подсчёт частоты слов
word_counts = Counter(filtered_words)
print(f"5. Частота слов:\n{dict(word_counts)}\n")

# Шаг 6: Вывод топ-5
print("6. Топ-5 самых частотных слов:")
for word, count in word_counts.most_common(5):
    print(f"   '{word}': {count} раз(а)")

Больше возможностей при регистрации: