Source code for hed.schema.schema_validation.compliance_summary

"""Summary report for HED schema compliance checking."""

from hed.schema.hed_schema_constants import HedSectionKey

# Section display names for readable output
_SECTION_DISPLAY_NAMES = {
    HedSectionKey.Tags: "Tags",
    HedSectionKey.UnitClasses: "Unit Classes",
    HedSectionKey.Units: "Units",
    HedSectionKey.UnitModifiers: "Unit Modifiers",
    HedSectionKey.ValueClasses: "Value Classes",
    HedSectionKey.Attributes: "Attributes",
    HedSectionKey.Properties: "Properties",
}



[docs]
class ComplianceSummary:
    """Tracks what was checked during schema compliance validation and the results.

    This provides a structured report of all checks performed, how many entries
    were examined, and how many issues were found per check category.

    Use ``get_summary()`` for a human-readable text report, or access
    ``check_results`` directly for programmatic use.
    """

    def __init__(self, schema_name="", schema_version=""):
        """Initialize a ComplianceSummary.

        Parameters:
            schema_name (str): Display name for the schema being checked.
            schema_version (str): The schema version string.
        """
        self.schema_name = schema_name
        self.schema_version = schema_version
        self.check_results = []
        self._current_check = None


[docs]
    def start_check(self, check_name, description=""):
        """Begin tracking a new compliance check.

        Parameters:
            check_name (str): Short identifier for the check (e.g. "prerelease_version").
            description (str): Human-readable description of what this check validates.
        """
        self._current_check = {
            "name": check_name,
            "description": description,
            "sections_checked": {},
            "entries_checked": 0,
            "entries_skipped": 0,
            "issue_count": 0,
            "sub_checks": [],
        }
        self.check_results.append(self._current_check)



[docs]
    def record_section(self, section_key, entries_checked, entries_skipped=0):
        """Record that a section was examined during the current check.

        Parameters:
            section_key (HedSectionKey or str): The section that was checked.
            entries_checked (int): Number of entries examined in this section.
            entries_skipped (int): Number of entries skipped (e.g. deprecated).
        """
        if self._current_check is None:
            return
        key = str(section_key)
        self._current_check["sections_checked"][key] = {
            "entries_checked": entries_checked,
            "entries_skipped": entries_skipped,
        }
        self._current_check["entries_checked"] += entries_checked
        self._current_check["entries_skipped"] += entries_skipped



[docs]
    def add_sub_check(self, sub_check_name):
        """Record a named sub-check within the current check.

        Parameters:
            sub_check_name (str): Name of the sub-check (e.g. an attribute validator name).
        """
        if self._current_check is None:
            return
        if sub_check_name not in self._current_check["sub_checks"]:
            self._current_check["sub_checks"].append(sub_check_name)



[docs]
    def record_issues(self, issue_count):
        """Record issues found during the current check.

        Parameters:
            issue_count (int): Number of issues found.
        """
        if self._current_check is None:
            return
        self._current_check["issue_count"] += issue_count


    @property
    def total_issues(self):
        """Return total issues across all checks.

        Returns:
            int: Total number of issues found.
        """
        return sum(c["issue_count"] for c in self.check_results)

    @property
    def total_entries_checked(self):
        """Return total entries checked across all checks.

        Returns:
            int: Total number of entries examined.
        """
        return sum(c["entries_checked"] for c in self.check_results)


[docs]
    def get_summary(self, verbose=True):
        """Return a human-readable summary of all compliance checks.

        Parameters:
            verbose (bool): If True, include per-section breakdowns and sub-check lists.

        Returns:
            str: Formatted multi-line summary report.
        """
        lines = []
        lines.append("=" * 70)
        lines.append("HED Schema Compliance Report")
        lines.append("=" * 70)
        if self.schema_name:
            lines.append(f"Schema: {self.schema_name}")
        if self.schema_version:
            lines.append(f"Version: {self.schema_version}")
        lines.append(f"Total issues found: {self.total_issues}")
        lines.append("")

        for i, check in enumerate(self.check_results, 1):
            status = "PASS" if check["issue_count"] == 0 else f"FAIL ({check['issue_count']} issues)"
            lines.append(f"{i}. [{status}] {check['name']}")
            if check["description"]:
                lines.append(f"   {check['description']}")

            if verbose:
                if check["entries_checked"] > 0 or check["entries_skipped"] > 0:
                    parts = [f"{check['entries_checked']} entries checked"]
                    if check["entries_skipped"] > 0:
                        parts.append(f"{check['entries_skipped']} skipped")
                    lines.append(f"   ({', '.join(parts)})")

                if check["sections_checked"] and verbose:
                    for section_str, info in check["sections_checked"].items():
                        display_name = section_str
                        # Try to get a nice display name
                        for sk, dn in _SECTION_DISPLAY_NAMES.items():
                            if str(sk) == section_str:
                                display_name = dn
                                break
                        skip_note = f", {info['entries_skipped']} skipped" if info["entries_skipped"] else ""
                        lines.append(f"     - {display_name}: {info['entries_checked']} checked{skip_note}")

                if check["sub_checks"]:
                    lines.append("   Sub-checks performed:")
                    for sc in check["sub_checks"]:
                        lines.append(f"     - {sc}")
            lines.append("")

        # Summary of what is NOT checked
        lines.append("-" * 70)
        lines.append("Known gaps (not currently checked):")
        lines.append("  - BoolRange attribute validation")
        lines.append("  - Missing descriptions on entries")
        lines.append("  - SuggestedTag/RelatedTag existence (8.3+ schemas)")
        lines.append("  - Unit class must have at least one unit")
        lines.append("  - DefaultUnits must be in the tag's own unit classes")
        lines.append("  - HedID uniqueness across entries")
        lines.append("  - HedID completeness (all entries should have IDs)")
        lines.append("  - Attributes must have exactly one range type")
        lines.append("  - Attributes must have at least one domain")
        lines.append("  - Reserved tag semantics")
        lines.append("  - Prologue/epilogue existence for released schemas")
        lines.append("  - StringRange value validation")
        lines.append("=" * 70)
        return "\n".join(lines)


    def __str__(self):
        return self.get_summary(verbose=False)