#!/usr/bin/env python3
"""
Find assets that exist in a system export but are missing from a filled Excel file.
Default comparison:
system export column: Host.Fqdn
filled Excel column: Hostname
Examples:
python compare_assets.py system.xlsx filled.xlsx
python compare_assets.py system.xlsx filled.xlsx -o missing_assets.xlsx
python compare_assets.py system.xlsx filled.xlsx --short-host
"""
from __future__ import annotations
import argparse
import sys
from pathlib import Path
import pandas as pd
DEFAULT_SYSTEM_KEY = "Host.Fqdn"
DEFAULT_FILLED_KEY = "Hostname"
def normalize_host(value: object, short_host: bool = False) -> str:
if pd.isna(value):
return ""
text = str(value).strip().lower().rstrip(".")
if short_host:
text = text.split(".", 1)[0]
return text
def read_excel(path: Path, sheet: str | int | None) -> pd.DataFrame:
if not path.exists():
raise FileNotFoundError(f"File not found: {path}")
sheet_name: str | int = 0 if sheet is None else sheet
return pd.read_excel(path, sheet_name=sheet_name, dtype=str)
def require_column(df: pd.DataFrame, column: str, file_label: str) -> None:
if column not in df.columns:
available = ", ".join(map(str, df.columns))
raise ValueError(
f'Column "{column}" was not found in {file_label}.\n'
f"Available columns: {available}"
)
def build_missing_report(
system_df: pd.DataFrame,
filled_df: pd.DataFrame,
system_key: str,
filled_key: str,
short_host: bool,
) -> tuple[pd.DataFrame, pd.DataFrame]:
require_column(system_df, system_key, "system export")
require_column(filled_df, filled_key, "filled Excel")
system = system_df.copy()
filled = filled_df.copy()
system["_compare_key"] = system[system_key].map(lambda value: normalize_host(value, short_host))
filled["_compare_key"] = filled[filled_key].map(lambda value: normalize_host(value, short_host))
filled_keys = set(filled["_compare_key"])
filled_keys.discard("")
missing = system[
(system["_compare_key"] != "") & (~system["_compare_key"].isin(filled_keys))
].copy()
summary = pd.DataFrame(
[
["System rows", len(system_df)],
["Filled Excel rows", len(filled_df)],
["Unique system keys", system["_compare_key"].replace("", pd.NA).dropna().nunique()],
["Unique filled keys", filled["_compare_key"].replace("", pd.NA).dropna().nunique()],
["Missing rows", len(missing)],
["Comparison mode", "short host" if short_host else "full FQDN"],
["System key column", system_key],
["Filled key column", filled_key],
],
columns=["Metric", "Value"],
)
missing.insert(0, "CompareKey", missing["_compare_key"])
missing = missing.drop(columns=["_compare_key"])
return missing, summary
def save_report(missing: pd.DataFrame, summary: pd.DataFrame, output_path: Path) -> None:
output_path.parent.mkdir(parents=True, exist_ok=True)