import duckdb
from sqlmodel import SQLModel, Field, Session, create_engine, select
from typing import Optional
class CarSale(SQLModel, table=True):
id: Optional[int] = Field(default=None, primary_key=True)
name: str
year: int
selling_price: int
km_driven: int
fuel: str
seller_type: str
transmission: str
owner: str
mileage: Optional[float] = None
engine: Optional[int] = None
max_power: Optional[float] = None
torque: Optional[str] = None
seats: Optional[int] = None
sqlite_url = "sqlite:///cars.db"
engine_db = create_engine(sqlite_url)
def init_cars_database(csv_path: str):
SQLModel.metadata.create_all(engine_db)
con = duckdb.connect()
query = f"""
SELECT
trim(name),
CAST(year AS INTEGER),
CAST(selling_price AS INTEGER),
CAST(km_driven AS INTEGER),
trim(fuel),
trim(seller_type),
trim(transmission),
trim(owner),
CAST(NULLIF(regexp_extract(mileage, '([0-9.]+)'), '') AS FLOAT),
CAST(NULLIF(regexp_extract(engine, '([0-9]+)'), '') AS INTEGER),
CAST(NULLIF(regexp_extract(max_power, '([0-9.]+)'), '') AS FLOAT),
trim(torque),
CAST(seats AS INTEGER)
FROM read_csv_auto('{csv_path}')
"""
raw_data = con.execute(query).fetchall()
with Session(engine_db) as session:
for row in raw_data:
car = CarSale(
name=row[0],
year=row[1],
selling_price=row[2],
km_driven=row[3],
fuel=row[4],
seller_type=row[5],
transmission=row[6],
owner=row[7],
mileage=row[8],
engine=row[9],
max_power=row[10],
torque=row[11],
seats=row[12]
)
session.add(car)
session.commit()
if __name__ == "__main__":
init_cars_database("Car.csv")