Загрузка данных


import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

pd.set_option('display.max_columns', 10)
pd.set_option('display.max_rows', 15)

# =========================
# ЗАДАНИЕ 1
# =========================
np.random.seed(42)

df = pd.DataFrame({
    'date': pd.date_range('2024-01-01', periods=100, freq='D'),
    'product': np.random.choice(['A', 'B', 'C', 'D'], 100),
    'category': np.random.choice(['Electronics', 'Clothing', 'Food', 'Books'], 100),
    'sales': np.random.randint(100, 1000, 100),
    'price': np.random.uniform(10, 200, 100).round(2),
    'rating': np.random.uniform(1, 5, 100).round(1)
})

print("=== Исходные данные ===")
print(df.head())
df.info()

# =========================
# ЗАДАНИЕ 2
# =========================
df_time = df.set_index('date')

print("\n=== Срез дат ===")
print(df_time.loc['2024-01-10':'2024-01-20'])
print(df_time.loc['2024-02'])
print(df_time.loc['2024-03-15'])

df_reset = df_time.reset_index()

# =========================
# ЗАДАНИЕ 3
# =========================
df_multi = df.set_index(['category', 'product'])

print("\n=== MultiIndex ===")
print(df_multi.loc['Electronics'].head())
print(df_multi.loc[('Electronics', 'A')].head())
print(df_multi.xs('B', level='product').head())

df_swapped = df_multi.swaplevel()
df_sorted_multi = df_multi.sort_index(level='product')

# =========================
# ЗАДАНИЕ 4
# =========================
df['category'] = df['category'].astype('category')

ratings = pd.Categorical(df['rating'].round(),
                         categories=[1,2,3,4,5],
                         ordered=True)
df['rating_cat'] = ratings

df['category'] = df['category'].cat.rename_categories({
    'Electronics': 'Электроника',
    'Clothing': 'Одежда',
    'Food': 'Еда',
    'Books': 'Книги'
})

df['category'] = df['category'].cat.add_categories(['Toys'])
df['category'] = df['category'].cat.remove_categories(['Toys'])

print("\n=== Категории ===")
print(df['category'].value_counts())
print(df.groupby('category')['sales'].mean())

# =========================
# ЗАДАНИЕ 5
# =========================
print("\n=== Статистика ===")
print(df[['sales', 'price', 'rating']].describe())

print("Среднее:", df['sales'].mean())
print("Медиана:", df['sales'].median())

df_sorted = df.sort_values('date')
df_sorted['cum_sales'] = df_sorted['sales'].cumsum()

df['sales_rank'] = df['sales'].rank()

# =========================
# ЗАДАНИЕ 6
# =========================
print("\n=== Корреляция ===")
print(df[['sales', 'price', 'rating']].corr())

plt.figure(figsize=(6,4))
sns.heatmap(df[['sales','price','rating']].corr(), annot=True)
plt.title("Корреляция")
plt.show()

# =========================
# ЗАДАНИЕ 7
# =========================
df['price_bin'] = pd.cut(df['price'], bins=4,
                        labels=['Низкая','Средняя','Высокая','Очень высокая'])

df['sales_quantile'] = pd.qcut(df['sales'], q=4,
                              labels=['Q1','Q2','Q3','Q4'])

print("\n=== Бины ===")
print(df['price_bin'].value_counts())
print(df.groupby('price_bin')['sales'].mean())

# =========================
# ЗАДАНИЕ 8
# =========================
df_ts = df.set_index('date').sort_index()

df_ts['pct_change'] = df_ts['sales'].pct_change() * 100
df_ts['rolling_mean_7'] = df_ts['sales'].rolling(7).mean()

plt.figure(figsize=(10,5))
plt.plot(df_ts['sales'], label='Продажи')
plt.plot(df_ts['rolling_mean_7'], label='Среднее 7 дней')
plt.legend()
plt.grid()
plt.show()

# =========================
# ЗАДАНИЕ 9
# =========================
print("\n=== GroupBy ===")
print(df.groupby('product').agg({
    'sales': ['mean','sum','count'],
    'price': 'mean',
    'rating': 'median'
}))

agg = df.groupby(['category','product']).agg(
    total_sales=('sales','sum'),
    avg_price=('price','mean'),
    count=('sales','size')
)

print(agg.head())

# =========================
# ЗАДАНИЕ 10
# =========================
pivot = pd.pivot_table(df,
                       values='sales',
                       index='category',
                       columns='product',
                       aggfunc='sum',
                       fill_value=0)

print("\n=== Pivot ===")
print(pivot)

pivot.plot(kind='bar', figsize=(10,5))
plt.title("Продажи по категориям")
plt.show()

# =========================
# ЗАДАНИЕ 11 (РЕАЛЬНЫЕ ДАННЫЕ)
# =========================
data = {
    'date': ['2026-04-01','2026-04-02','2026-04-03','2026-04-04','2026-04-05',
             '2026-04-06','2026-04-07','2026-04-08','2026-04-09','2026-04-10'],
    'category': ['Еда','Транспорт','Еда','Развлечения','Связь',
                 'Еда','Транспорт','Еда','Развлечения','Еда'],
    'amount': [12, 3, 15, 20, 5, 10, 4, 18, 25, 14],
    'payment': ['карта','наличные','карта','карта','карта',
                'наличные','карта','карта','наличные','карта']
}

df_my = pd.DataFrame(data)
df_my['date'] = pd.to_datetime(df_my['date'])
df_my = df_my.set_index('date')

df_my['amount_pln'] = df_my['amount'] * 4.5

print("\n=== Мои данные ===")
print(df_my.groupby('category')['amount'].agg(['sum','mean']))

print("\nКорреляция:")
print(df_my[['amount','amount_pln']].corr())

df_my['amount'].plot(kind='bar', title="Мои расходы")
plt.show()