siml.preprocessing package

Subpackages

Submodules

siml.preprocessing.converter module

class siml.preprocessing.converter.DefaultFilterFunction

Bases: IFilterFunction

class siml.preprocessing.converter.DefaultLoadFunction(file_type: str, read_npy: bool, read_res: bool, skip_femio: bool, time_series: bool, conversion_function: IConvertFunction | None = None)

Bases: ILoadFunction

class siml.preprocessing.converter.DefaultSaveFunction(main_setting: MainSetting, write_ucd: bool, to_first_order: bool, *, user_save_function: ISaveFunction | None = None)

Bases: ISaveFunction

class siml.preprocessing.converter.IConvertFunction

Bases: object

class siml.preprocessing.converter.IFilterFunction

Bases: object

class siml.preprocessing.converter.ILoadFunction

Bases: object

class siml.preprocessing.converter.ISaveFunction

Bases: object

class siml.preprocessing.converter.RawConverter(main_setting: MainSetting, *, recursive: bool = True, conversion_function: IConvertFunction | None = None, filter_function: IFilterFunction | None = None, load_function: ILoadFunction | None = None, save_function: ISaveFunction | None = None, force_renew: bool = False, read_npy: bool = False, write_ucd: bool = True, read_res: bool = True, max_process: int | None = None, to_first_order: bool = False)

Bases: object

convert(raw_directory: Path | None = None, *, return_results: bool = False) dict[str, Optional[tuple[dict, femio.fem_data.FEMData]]]

Perform conversion.

Parameters:
  • raw_directory (pathlib.Path, optional) – Raw data directory name. If not fed, self.setting.data.raw is used instead.

  • return_results (bool, optional) – If True, save results and dump files

Returns:

key is a path to raw directory. If return_results is False, values is a list of None. If return_results is True, values is a dictionary

of converted values.

Return type:

dict[str, Union[dict, None]]

convert_single_data(raw_path: Path, *, output_directory: Path | None = None, raise_when_overwrite: bool = False, return_results: bool = False) dict[str, Optional[tuple[dict, femio.fem_data.FEMData]]]

Convert single directory.

Parameters:
  • raw_path (pathlib.Path) – Input data path of raw data.

  • output_directory (pathlib.Path, optional) – If fed, use the fed path as the output directory.

  • raise_when_overwrite (bool, optional) – If True, raise when the output directory exists. The default is False.

Returns:

key is a path to raw directory. If return_results is False, values is a list of None. If return_results is True, values is a dictionary

of converted values.

Return type:

dict[str, Union[dict, None]]

classmethod read_settings(settings_yaml, **args)
class siml.preprocessing.converter.SingleDataConverter(setting: ConversionSetting, raw_path: Path, load_function: ILoadFunction, filter_function: IFilterFunction, *, save_function: ISaveFunction | None = None, output_directory: Path | None = None, raise_when_overwrite: bool = False, force_renew: bool = False, return_results: bool = False)

Bases: object

property output_directory: Path
run() tuple[dict, femio.fem_data.FEMData] | None
siml.preprocessing.converter.save_dict_data(output_directory: ~pathlib.Path, dict_data: dict[str, numpy.ndarray], *, dtype=<class 'numpy.float32'>, encrypt_key=None, finished_file='converted', save_dtype_dict: ~typing.Dict | None = None) None

Save dict_data.

Parameters:
  • output_directory (pathlib.Path) – Output directory path.

  • dict_data (dict) – Data dictionary to be saved.

  • dtype (type, optional) – Data type to be saved.

  • encrypt_key (bytes, optional) – Data for encryption.

Return type:

None

siml.preprocessing.scalers_composition module

class siml.preprocessing.scalers_composition.ScalersComposition(variable_name_to_scalers: dict[str, str], scalers_dict: dict[str, siml.preprocessing.siml_scalers.scaler_wrapper.SimlScalerWrapper], max_process: int | None = None, decrypt_key: bytes | None = None)

Bases: object

REGISTERED_KEY: Final[str] = 'variable_name_to_scalers'
classmethod create_from_dict(preprocess_dict: dict, max_process: int | None = None, key: bytes | None = None) ScalersComposition
classmethod create_from_file(converter_parameters_pkl: Path, max_process: int | None = None, key: bytes | None = None) ScalersComposition
get_dumped_object() dict
get_scaler(variable_name: str, allow_missing: bool = False) SimlScalerWrapper
get_scaler_names(group_id: int | None = None) list[str]
get_variable_names(group_id: int | None = None) list[str]
inverse_transform(variable_name: str, data: ndarray | coo_matrix | csr_matrix | csc_matrix) ndarray | coo_matrix | csr_matrix | csc_matrix
inverse_transform_dict(dict_data: dict[str, Union[numpy.ndarray, scipy.sparse._coo.coo_matrix, scipy.sparse._csr.csr_matrix, scipy.sparse._csc.csc_matrix]]) dict[str, Union[numpy.ndarray, scipy.sparse._coo.coo_matrix, scipy.sparse._csr.csr_matrix, scipy.sparse._csc.csc_matrix]]
lazy_partial_fit(scaler_name_to_files: dict[str, list[siml.path_like_objects.siml_files.interface.ISimlNumpyFile]]) None
transform(variable_name: str, data: ndarray | coo_matrix | csr_matrix | csc_matrix) ndarray | coo_matrix | csr_matrix | csc_matrix
transform_dict(dict_data: dict[str, Union[numpy.ndarray, scipy.sparse._coo.coo_matrix, scipy.sparse._csr.csr_matrix, scipy.sparse._csc.csc_matrix]]) dict[str, Union[numpy.ndarray, scipy.sparse._coo.coo_matrix, scipy.sparse._csr.csr_matrix, scipy.sparse._csc.csc_matrix]]
transform_file(variable_name: str, siml_file: ISimlNumpyFile) ndarray | coo_matrix | csr_matrix | csc_matrix

siml.preprocessing.scaling_converter module

class siml.preprocessing.scaling_converter.Config

Bases: object

arbitrary_types_allowed = True
frozen = True
init = True
class siml.preprocessing.scaling_converter.PreprocessInnerSettings(preprocess_dict: 'dict', interim_directories: 'list[pathlib.Path]', preprocessed_root: 'pathlib.Path', recursive: 'bool' = True, REQUIRED_FILE_NAMES: 'Optional[list[str]]' = None, FINISHED_FILE: 'str' = 'preprocessed', PREPROCESSORS_PKL_NAME: 'str' = 'preprocessors.pkl', cached_interim_directories: 'Optional[list[SimlDirectory]]' = None)

Bases: object

FINISHED_FILE: str = 'preprocessed'
PREPROCESSORS_PKL_NAME: str = 'preprocessors.pkl'
REQUIRED_FILE_NAMES: list[str] | None = None
cached_interim_directories: list[siml.path_like_objects.siml_directory.SimlDirectory] | None = None
collect_interim_directories() list[siml.path_like_objects.siml_directory.SimlDirectory]
classmethod default_list_check(v)
get_default_preprocessors_pkl_path() Path
get_output_directory(data_directory: Path) Path
get_scaler_fitting_files(variable_name: str) list[siml.path_like_objects.siml_files.interface.ISimlNumpyFile]
interim_directories: list[pathlib.Path]
preprocess_dict: dict
preprocessed_root: Path
recursive: bool = True
class siml.preprocessing.scaling_converter.ScalingConverter(main_setting: MainSetting, *, force_renew: bool = False, save_func: IScalingSaveFunction | None = None, max_process: int | None = None, allow_missing: bool = False, recursive: bool = True, scalers: ScalersComposition | None = None)

Bases: object

This is Facade Class for scaling process

fit_transform(group_id: int | None = None) None

This function is consisted of these three process. - Determine parameters of scalers by reading data files lazily - Transform interim data and save result - Save file of parameters

Parameters:

group_id (int, optional) – group_id to specify chunk of preprocessing group. Useful when MemoryError occurs with all variables preprocessed in one node. If not specified, process all variables.

Return type:

None

inverse_transform(dict_data: dict[str, Union[numpy.ndarray, scipy.sparse._coo.coo_matrix, scipy.sparse._csr.csr_matrix, scipy.sparse._csc.csc_matrix]]) dict[str, Union[numpy.ndarray, scipy.sparse._coo.coo_matrix, scipy.sparse._csr.csr_matrix, scipy.sparse._csc.csc_matrix]]
lazy_fit_all(*, group_id: int | None = None) None

Determine preprocessing parameters by reading data files lazily.

Parameters:

group_id (int, optional) – group_id to specify chunk of preprocessing group. Useful when MemoryError occurs with all variables preprocessed in one node. If not specified, process all variables.

Return type:

None

classmethod read_pkl(main_setting: MainSetting, converter_parameters_pkl: Path, key: bytes | None = None)
classmethod read_settings(settings_yaml: Path, **args)
save() None

Save Parameters of scaling converters

transform_interim(*, group_id: int | None = None) None

Apply scaling process to data in interim directory and save results in preprocessed directory.

Parameters:

group_id (int, optional) – group_id to specify chunk of preprocessing group. Useful when MemoryError occurs with all variables preprocessed in one node. If not specified, process all variables.

Return type:

None

Module contents