Source code for hed.tools.analysis.column_name_summary

"""Summarize the unique column names in a dataset."""

import json


[docs] class ColumnNameSummary: """Summarize the unique column names in a dataset.""" def __init__(self, name=""): self.name = name self.file_dict = {} self.unique_headers = []
[docs] def update(self, name, columns): """Update the summary based on columns associated with a file. Parameters: name (str): File name associated with the columns. columns (list): List of file names. """ position = self.update_headers(columns) if name not in self.file_dict: self.file_dict[name] = position elif name in self.file_dict and position != self.file_dict[name]: raise ValueError( "FileHasChangedColumnNames", f"{name}: Summary has conflicting column names " + f"Current: {str(columns)} Previous: {str(self.unique_headers[self.file_dict[name]])}", )
[docs] def update_headers(self, column_names): """Update the unique combinations of column names. Parameters: column_names (list): List of column names to update. """ for index, item in enumerate(self.unique_headers): if item == column_names: return index self.unique_headers.append(column_names) return len(self.unique_headers) - 1
[docs] def get_summary(self, as_json=False): """Return summary as an object or in JSON. Parameters: as_json (bool): If False (the default), return the underlying summary object, otherwise transform to JSON. """ patterns = [[] for _ in self.unique_headers] for key, value in self.file_dict.items(): patterns[value].append(key) column_headers = [] for index in range(len(patterns)): column_headers.append({"Column names": self.unique_headers[index], "Files": patterns[index]}) summary = {"Summary name": self.name, "Columns": column_headers, "Number files": len(self.file_dict)} if as_json: return json.dumps(summary, indent=4) else: return summary