Загрузка данных
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
pd.set_option('display.max_columns', 10)
pd.set_option('display.max_rows', 15)
# =========================
# ЗАДАНИЕ 1
# =========================
np.random.seed(42)
df = pd.DataFrame({
'date': pd.date_range('2024-01-01', periods=100, freq='D'),
'product': np.random.choice(['A', 'B', 'C', 'D'], 100),
'category': np.random.choice(['Electronics', 'Clothing', 'Food', 'Books'], 100),
'sales': np.random.randint(100, 1000, 100),
'price': np.random.uniform(10, 200, 100).round(2),
'rating': np.random.uniform(1, 5, 100).round(1)
})
print("=== Исходные данные ===")
print(df.head())
df.info()
# =========================
# ЗАДАНИЕ 2
# =========================
df_time = df.set_index('date')
print("\n=== Срез дат ===")
print(df_time.loc['2024-01-10':'2024-01-20'])
print(df_time.loc['2024-02'])
print(df_time.loc['2024-03-15'])
df_reset = df_time.reset_index()
# =========================
# ЗАДАНИЕ 3
# =========================
df_multi = df.set_index(['category', 'product'])
print("\n=== MultiIndex ===")
print(df_multi.loc['Electronics'].head())
print(df_multi.loc[('Electronics', 'A')].head())
print(df_multi.xs('B', level='product').head())
df_swapped = df_multi.swaplevel()
df_sorted_multi = df_multi.sort_index(level='product')
# =========================
# ЗАДАНИЕ 4
# =========================
df['category'] = df['category'].astype('category')
ratings = pd.Categorical(df['rating'].round(),
categories=[1,2,3,4,5],
ordered=True)
df['rating_cat'] = ratings
df['category'] = df['category'].cat.rename_categories({
'Electronics': 'Электроника',
'Clothing': 'Одежда',
'Food': 'Еда',
'Books': 'Книги'
})
df['category'] = df['category'].cat.add_categories(['Toys'])
df['category'] = df['category'].cat.remove_categories(['Toys'])
print("\n=== Категории ===")
print(df['category'].value_counts())
print(df.groupby('category')['sales'].mean())
# =========================
# ЗАДАНИЕ 5
# =========================
print("\n=== Статистика ===")
print(df[['sales', 'price', 'rating']].describe())
print("Среднее:", df['sales'].mean())
print("Медиана:", df['sales'].median())
df_sorted = df.sort_values('date')
df_sorted['cum_sales'] = df_sorted['sales'].cumsum()
df['sales_rank'] = df['sales'].rank()
# =========================
# ЗАДАНИЕ 6
# =========================
print("\n=== Корреляция ===")
print(df[['sales', 'price', 'rating']].corr())
plt.figure(figsize=(6,4))
sns.heatmap(df[['sales','price','rating']].corr(), annot=True)
plt.title("Корреляция")
plt.show()
# =========================
# ЗАДАНИЕ 7
# =========================
df['price_bin'] = pd.cut(df['price'], bins=4,
labels=['Низкая','Средняя','Высокая','Очень высокая'])
df['sales_quantile'] = pd.qcut(df['sales'], q=4,
labels=['Q1','Q2','Q3','Q4'])
print("\n=== Бины ===")
print(df['price_bin'].value_counts())
print(df.groupby('price_bin')['sales'].mean())
# =========================
# ЗАДАНИЕ 8
# =========================
df_ts = df.set_index('date').sort_index()
df_ts['pct_change'] = df_ts['sales'].pct_change() * 100
df_ts['rolling_mean_7'] = df_ts['sales'].rolling(7).mean()
plt.figure(figsize=(10,5))
plt.plot(df_ts['sales'], label='Продажи')
plt.plot(df_ts['rolling_mean_7'], label='Среднее 7 дней')
plt.legend()
plt.grid()
plt.show()
# =========================
# ЗАДАНИЕ 9
# =========================
print("\n=== GroupBy ===")
print(df.groupby('product').agg({
'sales': ['mean','sum','count'],
'price': 'mean',
'rating': 'median'
}))
agg = df.groupby(['category','product']).agg(
total_sales=('sales','sum'),
avg_price=('price','mean'),
count=('sales','size')
)
print(agg.head())
# =========================
# ЗАДАНИЕ 10
# =========================
pivot = pd.pivot_table(df,
values='sales',
index='category',
columns='product',
aggfunc='sum',
fill_value=0)
print("\n=== Pivot ===")
print(pivot)
pivot.plot(kind='bar', figsize=(10,5))
plt.title("Продажи по категориям")
plt.show()
# =========================
# ЗАДАНИЕ 11 (РЕАЛЬНЫЕ ДАННЫЕ)
# =========================
data = {
'date': ['2026-04-01','2026-04-02','2026-04-03','2026-04-04','2026-04-05',
'2026-04-06','2026-04-07','2026-04-08','2026-04-09','2026-04-10'],
'category': ['Еда','Транспорт','Еда','Развлечения','Связь',
'Еда','Транспорт','Еда','Развлечения','Еда'],
'amount': [12, 3, 15, 20, 5, 10, 4, 18, 25, 14],
'payment': ['карта','наличные','карта','карта','карта',
'наличные','карта','карта','наличные','карта']
}
df_my = pd.DataFrame(data)
df_my['date'] = pd.to_datetime(df_my['date'])
df_my = df_my.set_index('date')
df_my['amount_pln'] = df_my['amount'] * 4.5
print("\n=== Мои данные ===")
print(df_my.groupby('category')['amount'].agg(['sum','mean']))
print("\nКорреляция:")
print(df_my[['amount','amount_pln']].corr())
df_my['amount'].plot(kind='bar', title="Мои расходы")
plt.show()