Source code for hedweb.sidecar_operations

"""
Performs operations on JSON sidecars, such as validation, conversion, extraction, and merging with spreadsheets.
"""

import json

from hed import schema as hedschema
from hed.errors import ErrorHandler, HedFileError, get_printable_issue_string
from hed.models import df_util
from hed.tools.analysis.annotation_util import df_to_hed, hed_to_df, merge_hed_dict

from hedweb.base_operations import BaseOperations
from hedweb.constants import base_constants as bc
from hedweb.constants import file_constants as fc
from hedweb.web_util import generate_filename, get_schema_versions



[docs]
class SidecarOperations(BaseOperations):
    """Class to perform operations on sidecars."""


[docs]
    def __init__(self, arguments=None):
        """Construct a SidecarOperations object to handle sidecar operations.

        Parameters:
             arguments (dict or None): Dictionary with parameters extracted from form or service

        """
        self.schema = None
        self.command = None
        self.sidecar = None
        self.spreadsheet = None
        self.check_for_warnings = False
        self.include_description_tags = False
        self.spreadsheet_type = fc.TSV_EXTENSION
        if arguments:
            self.set_input_from_dict(arguments)



[docs]
    def process(self) -> dict:
        """Perform the requested action for the sidecar.

        Returns:
            dict: A dictionary of results in standard form.

        Raises:
            HedFileError: If the command was not found or the input arguments were not valid.
            HedFileError: If the schema is not found or cannot be loaded.
            HedFileError: If the sidecar is not found or cannot be loaded.
            HedFileError: If a required spreadsheet is not found or cannot be loaded.

        """
        if not self.command:
            raise HedFileError("MissingCommand", "Command is missing", "")
        elif not self.sidecar and not bc.COMMAND_MERGE_SPREADSHEET:
            raise HedFileError(
                "MissingSidecarFile",
                "Please give a valid JSON sidecar file to process",
                "",
            )
        elif self.command == bc.COMMAND_EXTRACT_SPREADSHEET or self.command == bc.COMMAND_MERGE_SPREADSHEET:
            pass
        elif not self.schema or not isinstance(self.schema, (hedschema.hed_schema.HedSchema, hedschema.HedSchemaGroup)):
            raise HedFileError("BadHedSchema", "Please provide a valid HedSchema", "")

        if self.command == bc.COMMAND_VALIDATE:
            results = self.sidecar_validate()
        elif self.command == bc.COMMAND_TO_SHORT or self.command == bc.COMMAND_TO_LONG:
            results = self.sidecar_convert()
        elif self.command == bc.COMMAND_EXTRACT_SPREADSHEET:
            results = self.sidecar_extract()
        elif self.command == bc.COMMAND_MERGE_SPREADSHEET:
            results = self.sidecar_merge()
        else:
            raise HedFileError(
                "UnknownProcessingMethod",
                f"Command {self.command} is missing or invalid",
                "",
            )
        return results



[docs]
    def sidecar_convert(self) -> dict:
        """Convert a sidecar from long to short form or short to long form.

        Returns:
            dict:  A downloadable response dictionary

        Notes:
            command (str):           Either 'to short' or 'to long' indicating type of conversion.

        """
        self.check_for_warnings = False
        results = self.sidecar_validate()
        if results[bc.MSG_CATEGORY] == "warning":
            return results
        display_name = self.sidecar.name
        if self.command == bc.COMMAND_TO_LONG:
            tag_form = "long_tag"
        else:
            tag_form = "short_tag"
        for column_data in self.sidecar:
            hed_strings = column_data.get_hed_strings()
            if hed_strings.empty:
                continue
            df_util.convert_to_form(hed_strings, self.schema, tag_form)
            column_data.set_hed_strings(hed_strings)

        file_name = generate_filename(
            display_name,
            name_suffix=f"_{tag_form}",
            extension=".json",
            append_datetime=True,
        )
        data = self.sidecar.get_as_json_string()
        category = "success"
        msg = f"Sidecar file {display_name} was successfully converted"
        return {
            bc.COMMAND: self.command,
            bc.COMMAND_TARGET: "sidecar",
            "data": data,
            "output_display_name": file_name,
            bc.SCHEMA_VERSION: get_schema_versions(self.schema),
            "msg_category": category,
            "msg": msg,
        }



[docs]
    def sidecar_extract(self) -> dict:
        """Create a four-column spreadsheet with the HED portion of the JSON sidecar.

        Returns:
            dict: A downloadable dictionary file or a file containing warnings

        """
        json_string = self.sidecar.get_as_json_string()
        str_sidecar = json.loads(json_string)
        df = hed_to_df(str_sidecar)
        data = df.to_csv(None, sep="\t", index=False, header=True)
        display_name = self.sidecar.name
        file_name = generate_filename(
            display_name,
            name_suffix="_extracted",
            extension=".tsv",
            append_datetime=True,
        )
        return {
            bc.COMMAND: bc.COMMAND_EXTRACT_SPREADSHEET,
            bc.COMMAND_TARGET: "sidecar",
            "data": data,
            "output_display_name": file_name,
            "msg_category": "success",
            "msg": f"JSON sidecar {display_name} was successfully extracted",
        }



[docs]
    def sidecar_merge(self) -> dict:
        """Merge an edited 4-column spreadsheet with JSON sidecar.

        Returns:
            dict: A downloadable dictionary file or a file containing warnings

        Raises:
            HedFileError: If the spreadsheet is not provided or cannot be loaded.

        Notes: The allowed option for merge is:
            include_description_tags (bool): If True, a Description tag is generated from Levels and included.

        """

        if not self.spreadsheet:
            raise HedFileError("MissingSpreadsheet", "Cannot merge spreadsheet with sidecar", "")
        df = self.spreadsheet.dataframe
        hed_dict = df_to_hed(df, description_tag=self.include_description_tags)
        if self.sidecar:
            sidecar_dict = json.loads(self.sidecar.get_as_json_string())
            display_name = self.sidecar.name
        else:
            sidecar_dict = {}
            display_name = "empty_sidecar"
        merge_hed_dict(sidecar_dict, hed_dict)

        data = json.dumps(sidecar_dict, indent=4)
        file_name = generate_filename(
            display_name,
            name_suffix="_merged_with_spreadsheet",
            extension=".json",
            append_datetime=True,
        )
        return {
            bc.COMMAND: self.command,
            bc.COMMAND_TARGET: "sidecar",
            "data": data,
            "output_display_name": file_name,
            "msg_category": "success",
            "msg": f"JSON sidecar {display_name} was successfully merged",
        }



[docs]
    def sidecar_validate(self) -> dict:
        """Validate the sidecars and return the errors and/or a message in a dictionary.

        Returns:
            dict: A dictionary of response values in standard form.

        Notes:  The allowed option for validate is:
            check_for_warnings (bool): If True, check for warnings as well as errors.

        """

        error_handler = ErrorHandler(check_for_warnings=self.check_for_warnings)
        issues = self.sidecar.validate(self.schema, name=self.sidecar.name, error_handler=error_handler)
        if issues:
            data = get_printable_issue_string(issues, f"JSON dictionary {self.sidecar.name} validation issues")
            file_name = generate_filename(
                self.sidecar.name,
                name_suffix="validation_issues",
                extension=".txt",
                append_datetime=True,
            )
            category = "warning"
            msg = f"JSON sidecar {self.sidecar.name} had validation issues"
        else:
            data = ""
            file_name = self.sidecar.name
            category = "success"
            msg = f"JSON file {self.sidecar.name} had no validation issues"

        return {
            bc.COMMAND: self.command,
            bc.COMMAND_TARGET: "sidecar",
            "data": data,
            "output_display_name": file_name,
            bc.SCHEMA_VERSION: get_schema_versions(self.schema),
            bc.MSG_CATEGORY: category,
            bc.MSG: msg,
        }