Загрузка данных


import re
import requests

url = "https://tproger.ru/articles/top-55-kursov-python--onlajn-obuchenie-dlya-razrabotchikov-s-nulya-besplatno-i-platno"
html = requests.get(url).text

h1 = re.findall(r"<h1[^>]*>(.*?)</h1>", html, flags=re.I | re.S)
h2 = re.findall(r"<h2[^>]*>(.*?)</h2>", html, flags=re.I | re.S)

emails = re.findall(
    r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b",
    html
)
emails = list(dict.fromkeys(emails))

links = re.findall(r'href=["\'](https?://[^"\']+)["\']', html, flags=re.I)
links = [l for l in links if "tproger.ru" not in l]
links = list(dict.fromkeys(links))

print("ЗАГОЛОВКИ H1:")
print(h1)
print("ЗАГОЛОВКИ H2:")
print(h2)
print("EMAIL-АДРЕСА")
print(emails)
print("ВНЕШНИЕ ССЫЛКИ:")
print(links)