Source code for hedweb.process_form

"""
Handles processing of web form posts in a standardized way.
"""

import json
import os
import tempfile

# Import for type hints
from typing import TYPE_CHECKING

import hed.schema as hs
from hed.errors import HedFileError
from hed.models.hed_string import HedString
from hed.models.sidecar import Sidecar
from hed.models.spreadsheet_input import SpreadsheetInput
from hed.models.tabular_input import TabularInput
from hed.schema import from_string, load_schema_version
from werkzeug.datastructures import FileStorage
from werkzeug.utils import secure_filename

from hedweb.columns import create_column_selections, get_tag_columns
from hedweb.constants import base_constants as bc
from hedweb.constants import file_constants as fc
from hedweb.web_util import (
    form_has_file,
    form_has_option,
    form_has_url,
    get_parsed_name,
)

if TYPE_CHECKING:
    from hed.schema import HedSchema


[docs] class ProcessForm:
[docs] @staticmethod def get_input_from_form(request) -> dict: """Get a dictionary of input from a service request. Parameters: request (Request): A Request object containing user data for the service request. Returns: dict: A dictionary containing input arguments for calling the service request. """ arguments = { bc.REQUEST_TYPE: bc.FROM_FORM, bc.COMMAND: request.form.get(bc.COMMAND_OPTION, ""), bc.APPEND_ASSEMBLED: form_has_option( request.form, bc.APPEND_ASSEMBLED, "on" ), bc.CHECK_FOR_WARNINGS: form_has_option( request.form, bc.CHECK_FOR_WARNINGS, "on" ), bc.EXPAND_DEFS: form_has_option(request.form, bc.EXPAND_DEFS, "on"), bc.INCLUDE_CONTEXT: form_has_option(request.form, bc.INCLUDE_CONTEXT, "on"), bc.INCLUDE_DESCRIPTION_TAGS: form_has_option( request.form, bc.INCLUDE_DESCRIPTION_TAGS, "on" ), bc.INCLUDE_SUMMARIES: form_has_option( request.form, bc.INCLUDE_SUMMARIES, "on" ), bc.LIMIT_ERRORS: form_has_option(request.form, bc.LIMIT_ERRORS, "on"), bc.REMOVE_TYPES_ON: form_has_option(request.form, bc.REMOVE_TYPES_ON, "on"), bc.REPLACE_DEFS: form_has_option(request.form, bc.REPLACE_DEFS, "on"), bc.SHOW_DETAILS: form_has_option(request.form, bc.SHOW_DETAILS, "on"), bc.SPREADSHEET_TYPE: fc.TSV_EXTENSION, } value, skip = create_column_selections(request.form) arguments[bc.COLUMNS_SKIP] = skip arguments[bc.COLUMNS_VALUE] = value arguments[bc.TAG_COLUMNS] = get_tag_columns(request.form) ProcessForm.set_schema_from_request(arguments, request) ProcessForm.set_json_files(arguments, request) ProcessForm.set_queries(arguments, request) ProcessForm.set_input_objects(arguments, request) return arguments
[docs] @staticmethod def set_input_objects(arguments, request): """Extract and set input objects from the request form data. This method processes uploaded files and form data to create appropriate input objects (TabularInput, HedString, SpreadsheetInput) and adds them to the arguments dictionary. Parameters: arguments (dict): Dictionary to store the extracted input objects. request (Request): A Request object containing form data and uploaded files. """ if bc.EVENTS_FILE in request.files and request.files[bc.EVENTS_FILE]: f = request.files[bc.EVENTS_FILE] arguments[bc.EVENTS] = TabularInput( file=f, sidecar=arguments.get(bc.SIDECAR, None), name=secure_filename(f.filename), ) if bc.STRING_INPUT in request.form and request.form[bc.STRING_INPUT]: arguments[bc.STRING_LIST] = [ HedString(request.form[bc.STRING_INPUT], arguments[bc.SCHEMA]) ] if ( bc.SPREADSHEET_FILE in request.files and request.files[bc.SPREADSHEET_FILE].filename ): arguments[bc.WORKSHEET] = request.form.get(bc.WORKSHEET_NAME, None) filename = request.files[bc.SPREADSHEET_FILE].filename file_ext = os.path.splitext(filename)[1] if file_ext.lower() in fc.EXCEL_FILE_EXTENSIONS: arguments[bc.SPREADSHEET_TYPE] = fc.EXCEL_EXTENSION arguments[bc.SPREADSHEET] = SpreadsheetInput( file=request.files[bc.SPREADSHEET_FILE], file_type=fc.EXCEL_EXTENSION, worksheet_name=arguments[bc.WORKSHEET], tag_columns=arguments[bc.TAG_COLUMNS], has_column_names=True, name=filename, ) elif file_ext.lower() in fc.TEXT_FILE_EXTENSIONS: arguments[bc.SPREADSHEET_TYPE] = fc.TSV_EXTENSION arguments[bc.SPREADSHEET] = SpreadsheetInput( file=request.files[bc.SPREADSHEET_FILE], file_type=fc.TSV_EXTENSION, worksheet_name=arguments[bc.WORKSHEET], tag_columns=arguments[bc.TAG_COLUMNS], has_column_names=True, name=filename, )
[docs] @staticmethod def set_json_files(arguments, request): """Extract and set JSON files from the request form data. This method processes uploaded JSON files (sidecars, remodel files, definition files) and creates appropriate objects from them, adding them to the arguments dictionary. Parameters: arguments (dict): Dictionary to store the extracted JSON file objects. request (Request): A Request object containing form data and uploaded files. """ if bc.SIDECAR_FILE in request.files and request.files[bc.SIDECAR_FILE]: f = request.files[bc.SIDECAR_FILE] arguments[bc.SIDECAR] = Sidecar(files=f, name=secure_filename(f.filename)) if bc.REMODEL_FILE in request.files and request.files[bc.REMODEL_FILE]: f = request.files[bc.REMODEL_FILE] name = secure_filename(f.filename) arguments[bc.REMODEL_OPERATIONS] = { "name": name, "operations": json.load(f), } if bc.DEFINITION_FILE in request.files and request.files[bc.DEFINITION_FILE]: f = request.files[bc.DEFINITION_FILE] sidecar = Sidecar(files=f, name=secure_filename(f.filename)) arguments[bc.DEFINITIONS] = sidecar.get_def_dict( arguments[bc.SCHEMA], extra_def_dicts=None )
[docs] @staticmethod def set_queries(arguments, request): """Update arguments with lists of string queries Parameters: arguments (dict): A dictionary with the extracted parameters that are to be processed. request (Request): A Request object containing form data. """ arguments[bc.QUERY_NAMES] = None if bc.QUERY_INPUT in request.form and request.form[bc.QUERY_INPUT]: arguments[bc.QUERIES] = [request.form[bc.QUERY_INPUT]] else: arguments[bc.QUERIES] = None
[docs] @staticmethod def set_schema_from_request(arguments, request): """Create a HedSchema object from form pull-down box and set schema in arguments. Parameters: arguments (dict): Dictionary of parameters to which the schema will be added. request (Request): A Request object containing form data. """ if form_has_option(request.form, bc.SCHEMA_VERSION): ProcessForm.set_schema_from_version(arguments, request) return # The schemas section only if form_has_option( request.form, bc.SCHEMA_UPLOAD_OPTIONS, bc.SCHEMA_FILE_OPTION ) and form_has_file(request.files, bc.SCHEMA_FILE, fc.SCHEMA_EXTENSIONS): arguments[bc.SCHEMA] = ProcessForm.get_schema(request.files[bc.SCHEMA_FILE]) elif form_has_option( request.form, bc.SCHEMA_UPLOAD_OPTIONS, bc.SCHEMA_URL_OPTION ) and form_has_url(request.form, bc.SCHEMA_URL, fc.SCHEMA_EXTENSIONS): arguments[bc.SCHEMA] = ProcessForm.get_schema(request.values[bc.SCHEMA_URL]) elif ( form_has_option( request.form, bc.SCHEMA_UPLOAD_OPTIONS, bc.SCHEMA_FOLDER_OPTION ) and "schema_folder[]" in request.files ): ProcessForm.set_tsv_schema( arguments, request, "schema_folder[]", bc.SCHEMA1 ) if form_has_option( request.form, bc.SECOND_SCHEMA_UPLOAD_OPTIONS, bc.SECOND_SCHEMA_FILE_OPTION ) and form_has_file(request.files, bc.SECOND_SCHEMA_FILE, fc.SCHEMA_EXTENSIONS): arguments[bc.SCHEMA2] = ProcessForm.get_schema( request.files[bc.SECOND_SCHEMA_FILE] ) elif form_has_option( request.form, bc.SECOND_SCHEMA_UPLOAD_OPTIONS, bc.SECOND_SCHEMA_URL_OPTION ) and form_has_url(request.form, bc.SECOND_SCHEMA_URL, fc.SCHEMA_EXTENSIONS): arguments[bc.SCHEMA2] = ProcessForm.get_schema( request.values[bc.SECOND_SCHEMA_URL] ) elif ( form_has_option( request.form, bc.SECOND_SCHEMA_UPLOAD_OPTIONS, bc.SECOND_SCHEMA_FOLDER_OPTION, ) and "second_schema_folder[]" in request.files ): ProcessForm.set_tsv_schema( arguments, request, "second_schema_folder[]", bc.SCHEMA2 )
[docs] @staticmethod def set_tsv_schema(arguments, request, files_key, schema_key): """Set the schema in arguments from a folder of TSV files. This method handles uploaded TSV schema files that are uploaded as a folder structure, saves them to a temporary directory, and loads the schema from the appropriate file. Parameters: arguments (dict): Dictionary of parameters to which the schema will be added. request (Request): A Request object containing form data. files_key (str): The key in the request.files dictionary that contains the TSV files. schema_key (str): The key in the arguments dictionary where the schema will be stored. """ files = request.files.getlist(files_key) with tempfile.TemporaryDirectory() as tmpdir: rel_path = "" for file in files: rel_path = file.filename # Preserves webkitRelativePath from the client save_path = os.path.join(tmpdir, rel_path) # Create any needed subdirectories os.makedirs(os.path.dirname(save_path), exist_ok=True) # Save the file to the temp directory file.save(save_path) filename = os.path.splitext(os.path.basename(rel_path))[0] dir_name = os.path.dirname(rel_path) base_name = filename.rsplit("_", 1)[0] save_path = os.path.join(tmpdir, dir_name, base_name + ".tsv") arguments[schema_key] = hs.load_schema(save_path, name=base_name) return
[docs] @staticmethod def set_schema_from_version(arguments, request): """Set the schema field in arguments from a version string or uploaded file. This method handles schema selection from a version dropdown or from an uploaded schema file when "other" version is selected. Parameters: arguments (dict): Dictionary of parameters to which the schema will be added. request (Request): A Request object containing form data. """ if request.form[bc.SCHEMA_VERSION] != bc.OTHER_VERSION_OPTION: arguments[bc.SCHEMA] = load_schema_version(request.form[bc.SCHEMA_VERSION]) elif form_has_file(request.files, bc.SCHEMA_PATH): f = request.files[bc.SCHEMA_PATH] arguments[bc.SCHEMA] = from_string( f.read(fc.BYTE_LIMIT).decode("utf-8"), schema_format=secure_filename(f.filename), ) else: arguments[bc.SCHEMA] = None
[docs] @staticmethod def get_schema(schema_input=None, version=None, as_xml_string=None) -> "HedSchema": """Return a HedSchema object from the given parameters. Parameters: schema_input (str or FileStorage or None): Input url or file. version (str or None): A schema version string to load, e.g. "8.2.0" or "score_1.1.0". as_xml_string (str or None): A schema in xml string format. Returns: HedSchema: A HedSchema object loaded from the input. Raises: HedFileError: The schema can't be loaded for some reason. """ if isinstance(schema_input, FileStorage): name, extension = get_parsed_name(secure_filename(schema_input.filename)) hed_schema = hs.from_string( schema_input.read(fc.BYTE_LIMIT).decode("utf-8"), schema_format=extension, name=name, ) elif isinstance(schema_input, str): name, extension = get_parsed_name(schema_input, is_url=True) hed_schema = hs.load_schema(schema_input, name=name) elif isinstance(version, str): return hs.load_schema_version(version) elif isinstance(as_xml_string, str): return hs.from_string(as_xml_string, schema_format=".xml") else: raise HedFileError("SCHEMA_NOT_FOUND", "Must provide a loadable schema", "") return hed_schema