Загрузка данных


#!/usr/bin/env python3
"""
Find assets that exist in a system export but are missing from a filled Excel file.

Default comparison:
  system export column: Host.Fqdn
  filled Excel column:  Hostname

Examples:
  python compare_assets.py system.xlsx filled.xlsx
  python compare_assets.py system.xlsx filled.xlsx -o missing_assets.xlsx
  python compare_assets.py system.xlsx filled.xlsx --short-host
"""

from __future__ import annotations

import argparse
import sys
from pathlib import Path

import pandas as pd


DEFAULT_SYSTEM_KEY = "Host.Fqdn"
DEFAULT_FILLED_KEY = "Hostname"


def normalize_host(value: object, short_host: bool = False) -> str:
    if pd.isna(value):
        return ""

    text = str(value).strip().lower().rstrip(".")
    if short_host:
        text = text.split(".", 1)[0]
    return text


def read_excel(path: Path, sheet: str | int | None) -> pd.DataFrame:
    if not path.exists():
        raise FileNotFoundError(f"File not found: {path}")

    sheet_name: str | int = 0 if sheet is None else sheet
    return pd.read_excel(path, sheet_name=sheet_name, dtype=str)


def require_column(df: pd.DataFrame, column: str, file_label: str) -> None:
    if column not in df.columns:
        available = ", ".join(map(str, df.columns))
        raise ValueError(
            f'Column "{column}" was not found in {file_label}.\n'
            f"Available columns: {available}"
        )


def build_missing_report(
    system_df: pd.DataFrame,
    filled_df: pd.DataFrame,
    system_key: str,
    filled_key: str,
    short_host: bool,
) -> tuple[pd.DataFrame, pd.DataFrame]:
    require_column(system_df, system_key, "system export")
    require_column(filled_df, filled_key, "filled Excel")

    system = system_df.copy()
    filled = filled_df.copy()

    system["_compare_key"] = system[system_key].map(lambda value: normalize_host(value, short_host))
    filled["_compare_key"] = filled[filled_key].map(lambda value: normalize_host(value, short_host))

    filled_keys = set(filled["_compare_key"])
    filled_keys.discard("")

    missing = system[
        (system["_compare_key"] != "") & (~system["_compare_key"].isin(filled_keys))
    ].copy()

    summary = pd.DataFrame(
        [
            ["System rows", len(system_df)],
            ["Filled Excel rows", len(filled_df)],
            ["Unique system keys", system["_compare_key"].replace("", pd.NA).dropna().nunique()],
            ["Unique filled keys", filled["_compare_key"].replace("", pd.NA).dropna().nunique()],
            ["Missing rows", len(missing)],
            ["Comparison mode", "short host" if short_host else "full FQDN"],
            ["System key column", system_key],
            ["Filled key column", filled_key],
        ],
        columns=["Metric", "Value"],
    )

    missing.insert(0, "CompareKey", missing["_compare_key"])
    missing = missing.drop(columns=["_compare_key"])

    return missing, summary


def save_report(missing: pd.DataFrame, summary: pd.DataFrame, output_path: Path) -> None:
    output_path.parent.mkdir(parents=True, exist_ok=True)