Загрузка данных


import duckdb
from sqlmodel import SQLModel, Field, Session, create_engine, select
from typing import Optional


class CarSale(SQLModel, table=True):
    id: Optional[int] = Field(default=None, primary_key=True)
    name: str
    year: int
    selling_price: int
    km_driven: int
    fuel: str
    seller_type: str
    transmission: str
    owner: str
    mileage: Optional[float] = None 
    engine: Optional[int] = None    
    max_power: Optional[float] = None 
    torque: Optional[str] = None 
    seats: Optional[int] = None
    
sqlite_url = "sqlite:///cars.db"
engine_db = create_engine(sqlite_url)

def init_cars_database(csv_path: str):
    SQLModel.metadata.create_all(engine_db)
    con = duckdb.connect()
    
    query = f"""
        SELECT 
            trim(name),
            CAST(year AS INTEGER),
            CAST(selling_price AS INTEGER),
            CAST(km_driven AS INTEGER),
            trim(fuel),
            trim(seller_type),
            trim(transmission),
            trim(owner),
            CAST(NULLIF(regexp_extract(mileage, '([0-9.]+)'), '') AS FLOAT),
            CAST(NULLIF(regexp_extract(engine, '([0-9]+)'), '') AS INTEGER),
            CAST(NULLIF(regexp_extract(max_power, '([0-9.]+)'), '') AS FLOAT),
            trim(torque),
            CAST(seats AS INTEGER)
        FROM read_csv_auto('{csv_path}')
    """

    raw_data = con.execute(query).fetchall()
    
    with Session(engine_db) as session:
        for row in raw_data:
            car = CarSale(
                name=row[0],
                year=row[1],
                selling_price=row[2],
                km_driven=row[3],
                fuel=row[4],
                seller_type=row[5],
                transmission=row[6],
                owner=row[7],
                mileage=row[8],
                engine=row[9],
                max_power=row[10],
                torque=row[11],
                seats=row[12]
            )
            session.add(car)
        session.commit()
        
if __name__ == "__main__":
    init_cars_database("Car.csv")