siml package

Subpackages

Submodules

siml.config module

siml.data_parallel module

class siml.data_parallel.DataParallel(module, device_ids=None, output_device=None, dim=0)

Bases: DataParallel

scatter(inputs, kwargs, device_ids)
training: bool
siml.data_parallel.scatter_core(inputs, target_gpus, dim=0)
siml.data_parallel.scatter_kwargs(inputs, kwargs, target_gpus, dim=0)

siml.datasets module

class siml.datasets.BaseDataset(x_variable_names, y_variable_names, directories, *, supports=None, num_workers=0, allow_no_data=False, recursive=True, decrypt_key=None, required_file_names=None, **kwargs)

Bases: Dataset

class siml.datasets.CollateFunctionGenerator(*, time_series=False, dict_input=False, dict_output=False, use_support=False, element_wise=False, data_parallel=False, input_time_series_keys=None, output_time_series_keys=None, input_time_slices=None, output_time_slices=None)

Bases: object

class siml.datasets.DataDict

Bases: dict

property device
to(device)
class siml.datasets.ElementWiseDataset(x_variable_names, y_variable_names, directories, *, supports=None, num_workers=0, allow_no_data=False, **kwargs)

Bases: BaseDataset

class siml.datasets.LazyDataset(x_variable_names, y_variable_names, directories, *, supports=None, num_workers=0, allow_no_data=False, recursive=True, decrypt_key=None, required_file_names=None, **kwargs)

Bases: BaseDataset

class siml.datasets.OnMemoryDataset(x_variable_names, y_variable_names, directories, *, supports=None, num_workers=0, allow_no_data=False, **kwargs)

Bases: BaseDataset

class siml.datasets.PreprocessDataset(*args, **kwargs)

Bases: BaseDataset

class siml.datasets.SimplifiedDataset(x_variable_names, y_variable_names, raw_dict_x, supports: list[str] | None = None, *, answer_raw_dict_y=None, num_workers: int = 0, directories: list[pathlib.Path] | None = None, **kwargs)

Bases: BaseDataset

siml.datasets.convert_sparse_info(sparse_info, device=None, non_blocking=False)
siml.datasets.convert_sparse_tensor(sparse_info, device=None, non_blocking=False, merge=False)

Convert sparse info to torch.Tensor which is sparse.

Parameters:
  • sparse_info (list[list[dict[str: torch.Tensor]]]) – Sparse data which has: row, col, values, size in COO format.

  • non_blocking (bool, optional) – Dummy parameter to have unified interface with ignite.utils.convert_tensor.

  • merge (bool, optional) – If True, create large sparse tensor merged in the diag direction.

Returns:

sparse_tensors

Return type:

numpy.ndarray[torch.Tensor]

siml.datasets.merge_sparse_tensors(stripped_sparse_info, *, return_coo=True)

Merge sparse tensors.

Parameters:
  • stripped_sparse_info (list[dict[str: torch.Tensor]]) – Sparse data which has: row, col, values, size in COO format.

  • return_coo (bool) – If True, return torch.sparse_coo_tensor. Else, return sparse info dict. The default is True.

Returns:

merged_sparse_tensor

Return type:

torch.Tensor

siml.datasets.pad_sparse(sparse, length=None)

Pad sparse matrix.

Parameters:
  • sparse (scipy.sparse.coo_matrix) –

  • length (int) –

Returns:

padded_sparse – NOTE: So far dict is returned due to the lack of DataLoader support for sparse tensor https://github.com/pytorch/pytorch/issues/20248 . The dict will be converted to the sparse tensor at the timing of prepare_batch is called.

Return type:

dict

siml.inferer module

class siml.inferer.Inferer(main_setting: MainSetting, *, scalers: ScalersComposition | None = None, model_path: Path | None = None, converter_parameters_pkl: Path | None = None, load_function: ILoadFunction | None = None, data_addition_function: IFEMDataAdditionFunction | None = None, save_function: IInfererSaveFunction | None = None, user_loss_function_dic: dict[str, Callable[[torch.Tensor, torch.Tensor], torch.Tensor]] | None = None, decrypt_key: bytes | None = None)

Bases: object

deploy(output_directory: Path, encrypt_key: bytes | None = None)

Deploy model information.

Parameters:
  • output_directory (pathlib.Path) – Output directory path.

  • encrypt_key (bytes, optional) – Key to encrypt model data. If not fed, the model data will not be encrypted.

classmethod from_model_directory(model_directory: Path, converter_parameters_pkl: Path | None = None, model_select_method: str = 'best', decrypt_key: bytes | None = None, infer_epoch: int | None = None, main_setting: MainSetting | None = None, **kwargs)

Load model data from a deployed directory.

Parameters:
  • model_directory (str or pathlib.Path) – Model directory created with Inferer.deploy().

  • model_path (Optional[pathlib.Path], optional) – If fed, overwrite path to model file, by default None

  • converter_parameters_pkl (Optional[pathlib.Path], optional) –

    If fed, overwrite path to pkl file of scaling parameters,

    by default None

  • decrypt_key (bytes, optional) – Key to decrypt model data. If not fed, and the data is encrypted, ValueError is raised.

  • model_select_method (str, optional) – method name to select model. By default, best

  • infer_epoch (int, optional) – If fed, model which corresponds to infer_epoch is used.

  • main_setting (setting.MainSetting) –

    If fed, use it as settings. If not fed, main settings are

    loaded from model_directory

Returns:

Inferer object

Return type:

siml.Inferer

infer(*, data_directories: list[pathlib.Path] | None = None, output_directory_base: Path | None = None, output_all: bool = False, save_summary: bool | None = True)

Perform infererence.

Parameters:
  • data_directories (list[pathlib.Path], optional) – List of data directories. Data is searched recursively. The default is an empty list.

  • output_directory_base (pathlib.Path, optional) – If fed, overwrite self.setting.inferer.output_directory_base

  • output_all (bool, optional. Dafault False) – If True, return all of results including not preprocessed predicted data

  • save (bool, optional. Default None) – If fed, overwrite save option in main setting

  • save_summary (bool, optional. Default True) – If True, save summary information

Returns:

inference_results

Inference results contains:
  • dict_x: input and variables

  • dict_y: inferred variables

  • dict_answer: answer variables (None if not found)

  • loss: Loss value (scaled)

  • raw_loss: Loss in a raw scale

  • fem_data: FEMData object

  • output_directory: Output directory path

  • data_directory: Input directory path

  • inference_time: Inference time

Return type:

list[Dict]

infer_dataset(preprocess_dataset: PreprocessDataset, output_directory_base: Path | None = None, save_summary: bool | None = True) list[dict]

Perform inference for datasets

Parameters:
  • preprocess_dataset (datasets.PreprocessDataset) – dataset of preprocessed data

  • output_directory_base (Optional[pathlib.Path], optional) – base output directory, by default None

  • save_summary (Optional[bool], optional) – If fed, overwrite save option. by default None

Returns:

inference_results

Inference results contains:
  • dict_x: input and variables

  • dict_y: inferred variables

  • dict_answer: answer variables (None if not found)

  • loss: Loss value (scaled)

  • raw_loss: Loss in a raw scale

  • fem_data: FEMData object

  • output_directory: Output directory path

  • data_directory: Input directory path

  • inference_time: Inference time

Return type:

list[Dict]

infer_dict_data(scaled_dict_x: dict, *, data_directory: Path | None = None, scaled_dict_answer: dict | None = None, save_summary: bool | None = True, base_fem_data: FEMData | None = None)

Infer with dictionary data.

Parameters:
  • scaled_dict_x (dict) – Dict of scaled x data.

  • data_directory (pathlib.Path, optional) – path to directory of simulation files

  • scaled_dict_answer (dict, optional) – Dict of answer scaled y data.

  • save_summary (bool, default True) – If True, save summary information of inference

  • base_fem_data (femio.FEMData, optional) –

    If fed, inference results are registered to base_fem_data and

    saved as a file.

Returns:

inference_result

Inference results contains:
  • dict_x: input and answer variables

  • dict_y: inferred variables

  • loss: Loss value (scaled)

  • raw_loss: Loss in a raw scale

  • fem_data: FEMData object

  • output_directory: Output directory path

  • data_directory: Input directory path

  • inference_time: Inference time

Return type:

Dict

infer_parameter_study(model, data_directories, *, n_interpolation=100, converter_parameters_pkl=None)

Infer with performing parameter study. Parameter study is done with the data generated by interpolating the input data_directories.

Parameters:
  • model (pathlib.Path or io.BufferedIOBase, optional) – Model directory, file path, or buffer. If not fed, TrainerSetting.pretrain_directory will be used.

  • data_directories (list[pathlib.Path]) – List of data directories.

  • n_interpolation (int, optional) – The number of points used for interpolation.

Returns:

  • interpolated_input_dict (dict) – Input data dict generated by interpolation.

  • output_dict (dict) – Output data dict generated by inference.

classmethod read_settings_file(settings_yaml: Path, model_path: Path | None = None, converter_parameters_pkl: Path | None = None, **kwargs) Inferer

Read settings.yaml to generate Inferer object.

Parameters:
  • settings_yaml (pathlib.Path) – Path to yaml file of setting

  • model_path (Optional[pathlib.Path], optional) – If fed, overwrite path to model file, by default None

  • converter_parameters_pkl (Optional[pathlib.Path], optional) –

    If fed, overwrite path to pkl file of scaling parameters,

    by default None

Returns:

Inferer object

Return type:

Inferer

class siml.inferer.WholeInferProcessor(main_setting: MainSetting, model_path: Path | None = None, converter_parameters_pkl: Path | None = None, conversion_function: IConvertFunction | None = None, load_function: ILoadFunction | None = None, data_addition_function: IFEMDataAdditionFunction | None = None, save_function: IInfererSaveFunction | None = None, user_loss_function_dic: dict[str, Callable[[torch.Tensor, torch.Tensor], torch.Tensor]] | None = None)

Bases: object

run(data_directories: list[pathlib.Path] | Path, output_directory_base: Path | None = None, perform_preprocess: bool = True, save_summary: bool | None = True) dict

run whole inference processes.

Parameters:
  • data_directories (Union[list[pathlib.Path], pathlib.Path]) – pathes to data

  • output_directory_base (Optional[pathlib.Path], optional) – path to parent directory of cases, by default None

  • perform_preprocess (bool, optional) – If True, perform preprocessing and scaling, by default True

  • save (Optional[bool], optional) – If True, save items, by default None

Returns:

dictionary of results

Return type:

dict

run_dict_data(raw_dict_x: dict, *, answer_raw_dict_y: dict | None = None, perform_preprocess: bool = True) dict

_summary_

Parameters:
  • raw_dict_x (dict) – Dict of raw x data.

  • answer_raw_dict_y (Optional[dict], optional) – Dict of raw answer y data, by default None

  • perform_preprocess (bool, optional) – If True, perform scaling. by default True

Returns:

dictionary of result

Return type:

dict

siml.mains module

siml.mains.convert_raw_data(add_argument=None, conversion_function=None, filter_function=None, load_function=None, **kwargs)

siml.optimize module

class siml.optimize.Objective(main_setting, output_base)

Bases: object

DICT_DTYPE = {'float': <class 'float'>, 'int': <class 'int'>}
class siml.optimize.Study(main_setting, db_setting=None, step_by_step=False)

Bases: object

callback_exit(study, frozen_trial)
callback_print(study, frozen_trial)
perform_study()

Perform hyperparameter search study.

Parameters:

None

Return type:

None

siml.prepost module

Module for preprocessing.

siml.prepost.analyze_data_directories(data_directories, x_names, f_names, *, n_split=10, n_bin=20, out_directory=None, ref_index=0, plot=True, symmetric=False, magnitude_range=1.0)

Analyze data f_name with grid over x_name.

Parameters:
  • data_directories (list[pathlib.Path]) – List of data directories.

  • x_names (list[str]) – Names of x variables.

  • f_names (list[str]) – Name of f variable.

  • n_split (int, optional) – The number to split x space.

  • n_bin (int, optional) – The number of bins to draw histogram

  • out_directory (pathlib.Path, optional) – Output directory path. By default no output is written.

  • ref_index (int, optional) – Reference data directory index to analyze data.

  • plot (bool, optional) – If True, plot data by grid.

  • symmetric (bool, optional) – If True, take plot range symmetric.

  • magnitude_range (float, optional) – Magnitude to be multiplied to the range of plot.

siml.prepost.concatenate_preprocessed_data(preprocessed_base_directories, output_directory_base, variable_names, *, ratios=(0.9, 0.05, 0.05), overwrite=False, finished_file='preprocessed')

Concatenate preprocessed data in the element direction.

NOTE: It may lead data leakage so it is just for research use.

Parameters:
  • preprocessed_base_directories (pathlib.Path or list[pathlib.Path]) – Base directory name of preprocessed data.

  • output_directory_base (pathlib.Path) – Base directory of output. Inside of it, train, validation, and test directories will be created.

  • variable_names (list[str]) – Variable names to be concatenated.

  • ratios (list[float], optional) – Ratio to split data.

  • overwrite (bool, optional) – If True, overwrite output data.

siml.prepost.normalize_adjacency_matrix(adj)

Symmetrically normalize adjacency matrix.

Parameters:

adj (scipy.sparse.coo_matrix) – Adjacency matrix in COO expression.

Returns:

normalized_adj – Normalized adjacency matrix in COO expression.

Return type:

scipy.sparse.coo_matrix

siml.prepost.split_data_arrays(xs, fs, *, n_split=10, ref_index=0)

Split data fs with regards to grids of xs.

Parameters:
  • xs (list[numpy.ndarray]) – n_sample-length list contains (n_element, dim_x) shaped ndarray.

  • fs (list[numpy.ndarray]) – n_sample-length list contains (n_element, dim_f) shaped ndarray.

  • n_split (int, optional) – The number to split x space.

siml.setting module

class siml.setting.BlockSetting(name: str = 'Block', is_first: bool = False, is_last: bool = False, type: str = None, destinations: list = <factory>, residual: bool = False, reference_block_name: str = None, activation_after_residual: bool = True, allow_linear_residual: bool = False, bias: bool = True, input_slice: slice = slice(0, None, 1), input_indices: list = None, input_keys: list = None, input_names: list = None, output_key: str = None, support_input_index: int = None, support_input_indices: list = None, nodes: list = <factory>, kernel_sizes: list = None, activations: list = <factory>, dropouts: list = None, device: int = None, coeff: float = None, time_series: bool = False, no_grad: bool = False, weight_norm: bool = False, losses: list = <factory>, clip_grad_value: float = None, clip_grad_norm: float = None, optional: dict = <factory>, hidden_nodes: int = None, hidden_layers: int = None, hidden_activation: str = 'relu', output_activation: str = 'identity', input_dropout: float = 0.0, hidden_dropout: float = 0.0, output_dropout: float = 0.0)

Bases: TypedDataClass

activation_after_residual: bool = True
activations: list[str]
allow_linear_residual: bool = False
bias: bool = True
clip_grad_norm: float = None
clip_grad_value: float = None
coeff: float = None
destinations: list[str]
device: int = None
dropouts: list[float] = None
hidden_activation: str = 'relu'
hidden_dropout: float = 0.0
hidden_layers: int = None
hidden_nodes: int = None
input_dropout: float = 0.0
input_indices: list[int] = None
input_keys: list[str] = None
input_names: list[str] = None
input_slice: slice = slice(0, None, 1)
is_first: bool = False
is_last: bool = False
kernel_sizes: list[int] = None
property loss_names
losses: list[dict]
name: str = 'Block'
no_grad: bool = False
nodes: list[int]
optional: dict
output_activation: str = 'identity'
output_dropout: float = 0.0
output_key: str = None
reference_block_name: str = None
residual: bool = False
support_input_index: int = None
support_input_indices: list[int] = None
time_series: bool = False
type: str = None
weight_norm: bool = False
class siml.setting.CollectionVariableSetting(variables: Union[list[siml.setting.VariableSetting], dict[str, list[siml.setting.VariableSetting]]] = <factory>, super_post_init: bool = True)

Bases: TypedDataClass

collect_values(key, *, default=None)
property dims
get_time_series_keys()
property is_dict
property length
property names
strip()
super_post_init: bool = True
property time_series
property time_slice
to_dict()
variables: list[siml.setting.VariableSetting] | dict[str, list[siml.setting.VariableSetting]]
class siml.setting.ConversionSetting(mandatory_variables: list[str] = <factory>, optional_variables: list[str] = <factory>, mandatory: list[str] = <factory>, optional: list[str] = <factory>, output_base_directory: ~pathlib.Path | str | None = None, finished_file: str = 'converted', file_type: str = 'fistr', required_file_names: list[str] = <factory>, skip_femio: bool = False, time_series: bool = False, save_femio: bool = False, skip_save: bool = False, max_process: int = 1000)

Bases: TypedDataClass

Dataclass for raw data converter.

Parameters:
  • mandatory_variables (list[str]) – Mandatory variable names. If any of them are not found, ValueError is raised.

  • mandatory (list[str]) – An alias of mandatory_variables.

  • optional_variables (list[str]) – Optional variable names. If any of them are not found, they are ignored.

  • optional (list[str]) – An alias of optional_variables.

  • output_base_directory (str or pathlib.Path, optional) – Output base directory for the converted raw data. By default, ‘data/interim’ is the output base directory, so ‘data/interim/aaa/bbb’ directory is the output directory for ‘data/raw/aaa/bbb’ directory.

  • finished_file (str, optional) – File name to indicate that the conversion is finished.

  • file_type (str, optional) – File type to be read.

  • required_file_names (list[str], optional) – Required file names.

  • skip_femio (bool, optional) – If True, skip femio.FEMData reading process. Useful for user-defined data format such as csv, h5, etc.

  • time_series (bool, optional) – If True, make femio parse time series data.

  • save_femio (bool, optional) – If True, save femio data in the interim directories.

  • skip_save (bool, optional) – If True, skip SiML’s default saving function.

  • max_process (int, optional) – Maximum number of processes.

file_type: str = 'fistr'
finished_file: str = 'converted'
mandatory: list[str]
mandatory_variables: list[str]
max_process: int = 1000
optional: list[str]
optional_variables: list[str]
output_base_directory: Path | str | None = None
classmethod read_settings_yaml(settings_yaml)
required_file_names: list[str]
save_femio: bool = False
property should_load_mandatory_variables: bool
skip_femio: bool = False
skip_save: bool = False
time_series: bool = False
class siml.setting.DBSetting(servername: str = '', username: str = '', password: str = '', use_sqlite: bool = False)

Bases: TypedDataClass

password: str = ''
servername: str = ''
use_sqlite: bool = False
username: str = ''
class siml.setting.DataSetting(raw: list = <factory>, interim: list = <factory>, preprocessed: list = <factory>, inferred: list = <factory>, train: list = <factory>, validation: list = <factory>, develop: list = <factory>, test: list = <factory>, pad: bool = False, encrypt_key: bytes = None)

Bases: TypedDataClass

develop: list[pathlib.Path]
encrypt_key: bytes = None
inferred: list[pathlib.Path]
property inferred_root
interim: list[pathlib.Path]
property interim_root
pad: bool = False
preprocessed: list[pathlib.Path]
property preprocessed_root
raw: list[pathlib.Path]
property raw_root
test: list[pathlib.Path]
train: list[pathlib.Path]
validation: list[pathlib.Path]
class siml.setting.GroupSetting(blocks: list, name: str = 'GROUP', inputs: siml.setting.CollectionVariableSetting = <factory>, support_inputs: list = None, outputs: siml.setting.CollectionVariableSetting = <factory>, repeat: int = 1, convergence_threshold: float = None, mode: str = 'simple', debug: bool = False, time_series_length: int = None, optional: dict = <factory>)

Bases: TypedDataClass

blocks: list[siml.setting.BlockSetting]
convergence_threshold: float = None
debug: bool = False
property input_dims
property input_length
property input_names
inputs: CollectionVariableSetting
mode: str = 'simple'
name: str = 'GROUP'
optional: dict
property output_dims
property output_length
property output_names
outputs: CollectionVariableSetting
repeat: int = 1
support_inputs: list[str] = None
time_series_length: int = None
class siml.setting.InfererSetting(model: ~pathlib.Path = None, save: bool = True, overwrite: bool = False, output_directory: ~pathlib.Path = None, output_directory_base: ~pathlib.Path = PosixPath('data/inferred'), data_directories: list[pathlib.Path] = <factory>, write_simulation: bool = False, write_npy: bool = True, write_yaml: bool = True, write_simulation_base: ~pathlib.Path = None, write_simulation_stem: ~pathlib.Path = None, read_simulation_type: str = 'fistr', write_simulation_type: str = 'fistr', converter_parameters_pkl: ~pathlib.Path = None, convert_to_order1: bool = False, accomodate_length: int = 0, perform_preprocess: bool = False, perform_inverse: bool = True, return_all_results: bool = True, model_key: bytes = None, gpu_id: int = -1, less_output: bool = False, skip_fem_data_creation: bool = False, infer_epoch: int = None)

Bases: TypedDataClass

model: pathlib.Path optional

Model directory, file path, or buffer. If not fed, TrainerSetting.pretrain_directory will be used.

save: bool, optional

If True, save inference results.

output_directory: pathlib.Path, optional

Output directory path. If fed, output the data in the specified directory. When this is fed, output_directory_base has no effect.

output_directory_base: pathlib.Path, optional

Output directory base name. If not fed, data/inferred will be the default output directory base.

data_directories: list[pathlib.Path], optional

Data directories to infer.

write_simulation: bool, optional

If True, write simulation data file(s) based on the inference.

write_npy: bool, optional

If True, write npy files of inferences.

write_yaml: bool, optional

If True, write yaml file used to make inference.

write_simulation_base: pathlib.Path, optional

Base of simulation data to be used for write_simulation option. If not fed, try to find from the input directories.

read_simulation_type: str, optional

Simulation file type to read.

write_simulation_type: str, optional

Simulation file type to write.

converter_parameters_pkl: pathlib.Path, optional

Pickel file of converter parameters. IF not fed, DataSetting.preprocessed_root is used.

perform_preprocess: bool, optional

If True, perform preprocess.

accomodate_length: int

If specified, duplicate initial state to initialize RNN state.

overwrite: bool

If True, overwrite output.

return_all_results: bool

If True, return all inference results. Set False if the inference data is too large to fit into the memory available.

model_key: bytes

If fed, decrypt model file with the key.

gpu_id: int, optional

GPU ID. Specify non negative value to use GPU. -1 Meaning CPU.

less_output: bool, optional

If True, output less variables in FEMData object.

skip_fem_data_creation: bool, optional

If True, skip fem_data object creation.

accomodate_length: int = 0
convert_to_order1: bool = False
converter_parameters_pkl: Path = None
data_directories: list[pathlib.Path]
gpu_id: int = -1
infer_epoch: int = None
less_output: bool = False
model: Path = None
model_key: bytes = None
output_directory: Path = None
output_directory_base: Path = PosixPath('data/inferred')
overwrite: bool = False
perform_inverse: bool = True
perform_preprocess: bool = False
read_simulation_type: str = 'fistr'
return_all_results: bool = True
save: bool = True
skip_fem_data_creation: bool = False
write_npy: bool = True
write_simulation: bool = False
write_simulation_base: Path = None
write_simulation_stem: Path = None
write_simulation_type: str = 'fistr'
write_yaml: bool = True
class siml.setting.Iter(value)

Bases: Enum

An enumeration.

MULTIPROCESS = 'multiprocess'
MULTITHREAD = 'multithread'
SERIAL = 'serial'
class siml.setting.MainSetting(data: siml.setting.DataSetting = <factory>, conversion: siml.setting.ConversionSetting = <factory>, preprocess: dict = <factory>, trainer: siml.setting.TrainerSetting = <factory>, inferer: siml.setting.InfererSetting = <factory>, model: siml.setting.ModelSetting = <factory>, optuna: siml.setting.OptunaSetting = <factory>, study: siml.setting.StudySetting = <factory>, replace_preprocessed: bool = False, misc: dict = <factory>)

Bases: object

conversion: ConversionSetting
data: DataSetting
get_crypt_key()
inferer: InfererSetting
misc: dict
model: ModelSetting
optuna: OptunaSetting
preprocess: dict
classmethod read_dict_settings(dict_settings, *, name=None, replace_preprocessed=False)
classmethod read_settings_yaml(settings_yaml: Path, replace_preprocessed=False, *, decrypt_key: bytes | None = None)
replace_preprocessed: bool = False
study: StudySetting
trainer: TrainerSetting
update_with_dict(new_dict)
class siml.setting.ModelSetting(setting=None, blocks=None, groups=None)

Bases: TypedDataClass

blocks: list[siml.setting.BlockSetting]
groups: list[siml.setting.GroupSetting] = None
class siml.setting.OptimizerSetting(lr: float = 0.001, betas: Tuple = (0.9, 0.999), eps: float = 1e-08, weight_decay: float = 0)

Bases: TypedDataClass

betas: Tuple = (0.9, 0.999)
eps: float = 1e-08
lr: float = 0.001
weight_decay: float = 0
class siml.setting.OptunaSetting(n_trial: int = 100, output_base_directory: pathlib.Path = PosixPath('models/optuna'), hyperparameters: list = <factory>, setting: dict = <factory>)

Bases: TypedDataClass

hyperparameters: list[dict]
n_trial: int = 100
output_base_directory: Path = PosixPath('models/optuna')
setting: dict
class siml.setting.PreprocessSetting(preprocess: dict = <factory>)

Bases: object

preprocess: dict
classmethod read_settings_yaml(settings_yaml)
class siml.setting.StudySetting(root_directory: pathlib.Path = None, type: str = 'learning_curve', relative_develop_size_linspace: Tuple = <factory>, n_fold: int = 10, unit_error: str = '-', plot_validation: bool = False, x_from_zero: bool = False, y_from_zero: bool = False, x_logscale: bool = False, y_logscale: bool = False, scale_loss: bool = False)

Bases: TypedDataClass

n_fold: int = 10
plot_validation: bool = False
relative_develop_size_linspace: Tuple
root_directory: Path = None
scale_loss: bool = False
type: str = 'learning_curve'
unit_error: str = '-'
x_from_zero: bool = False
x_logscale: bool = False
y_from_zero: bool = False
y_logscale: bool = False
class siml.setting.TrainerSetting(inputs: ~siml.setting.CollectionVariableSetting = <factory>, support_input: str = None, support_inputs: list[str] = None, outputs: ~siml.setting.CollectionVariableSetting = <factory>, output_directory_base: ~pathlib.Path = PosixPath('models'), output_directory: ~pathlib.Path = None, name: str = 'default', suffix: str = None, batch_size: int = 1, validation_batch_size: int = None, n_epoch: int = 100, validation_directories: list[pathlib.Path] = <factory>, restart_directory: ~pathlib.Path = None, pretrain_directory: ~pathlib.Path = None, loss_function: str | dict = 'mse', loss_weights: dict[str, float] = None, optimizer: str = 'adam', compute_accuracy: bool = False, model_key: bytes = None, gpu_id: int = -1, log_trigger_epoch: int = 1, stop_trigger_epoch: int = 10, patience: int = 3, optuna_trial: ~optuna.trial._trial.Trial = None, prune: bool = False, snapshot_choise_method: str = 'best', seed: int = 0, element_wise: bool = False, simplified_model: bool = False, time_series: bool = False, element_batch_size: int = -1, validation_element_batch_size: int = None, use_siml_updater: bool = True, iterator: ~siml.setting.Iter = Iter.SERIAL, optimizer_setting: dict = <factory>, lazy: bool = True, num_workers: int = None, display_mergin: int = 4, non_blocking: bool = True, clip_grad_value: float = None, clip_grad_norm: float = None, recursive: bool = True, state_dict_strict: bool = True, train_data_shuffle: bool = True, data_parallel: bool = False, model_parallel: bool = False, draw_network: bool = True, output_stats: bool = False, split_ratio: dict = <factory>, figure_format: str = 'pdf', pseudo_batch_size: int = 0, debug_dataset: bool = False, time_series_split: list[int] = None, time_series_split_evaluation: list[int] = None, loss_slice: slice = <factory>)

Bases: TypedDataClass

inputs: siml.setting.CollectionVariableSetting

Variable settings of inputs.

outputs: siml.setting.CollectionVariableSetting

Variable settings of outputs.

train_directories: list[str] or pathlib.Path

Training data directories.

output_directory_base: str or pathlib.Path

Output directory base name.

output_directory: str or pathlib.Path

Output directory name.

validation_directories: list[str] or pathlib.Path, optional

Validation data directories.

restart_directory: str or pathlib.Path, optional

Directory name to be used for restarting.

pretrain_directory: str or pathlib.Path, optional

Pretrained directory name.

loss_function: chainer.FunctionNode,

optional

Loss function to be used for training.

optimizer: chainer.Optimizer, optional

Optimizer to be used for training.

compute_accuracy: bool, optional

If True, compute accuracy.

name: str

The name of the study.

suffix: str

Suffix to be added to the name.

batch_size: int, optional

Batch size for train dataset.

validation_batch_size: int, optional

Batch size for validation dataset.

n_epoch: int, optional

The number of epochs.

model_key: bytes

If fed, decrypt model file with the key.

gpu_id: int, optional

GPU ID. Specify non negative value to use GPU. -1 Meaning CPU.

log_trigger_epoch: int, optional

The interval of logging of training. It is used for logging, plotting, and saving snapshots.

stop_trigger_epoch: int, optional

The interval to check if training should be stopped. It is used for early stopping and pruning.

optuna_trial: optuna.Trial, optional

Trial object used to perform optuna hyper parameter tuning.

prune: bool, optional

If True and optuna_trial is given, prining would be performed.

seed: str, optional

Random seed.

element_wise: bool, optional

If True, concatenate data to force element wise training (so no graph information can be used). With this option, element_batch_size will be used for trainer’s batch size as it is “element wise” training.

element_batch_size: int, optional

If positive, split one mesh int element_batch_size and perform update multiple times for one mesh. In case of element_wise is True, element_batch_size is the batch size in the usual sence.

validation_element_batch_size: int, optional

element_batch_size for validation dataset.

simplified_model: bool, optional

If True, regard the target simulation as simplified simulation (so-called “1D simulation”), which focuses on only a few inputs and outputs. The behavior of the trainer will be similar to that with element_wise = True.

time_series: bool, optional

If True, regard the data as time series. In that case, the data shape will be [seq, batch, element, feature] instead of the default [batch, element, feature] shape.

lazy: bool, optional

If True, load data lazily.

num_workers: int, optional

The number of workers to load data.

display_mergin: int, optional non_blocking: bool [True]

If True and this copy is between CPU and GPU, the copy may occur asynchronously with respect to the host. For other cases, this argument has no effect.

data_parallel: bool [False]

If True, perform data parallel on GPUs.

model_parallel: bool [False]

If True, perform model parallel on GPUs.

draw_network: bool [True]

If True, draw network (requireing graphviz).

output_stats: bool [False]

If True, output stats of training (like mean of weight, grads, …)

split_ratio: dict[str, float]

If fed, split the data into train, validation, and test at the beginning of the training. Should be {‘validation’: float, ‘test’: float} dict.

figure_format: str

The format of the figure. The default is ‘pdf’.

clip_grad_value: float

If fed, apply gradient clipping by value.

clip_grad_norm: float

If fed, apply gradient clipping with norm.

recursive: bool

If True, search data recursively.

time_series_split: list[int]

If fed, split time series with [start, step, length].

loss_slice: slice

Slice to be applied to loss computation.

state_dict_strict: bool

It will be passed to torch.nn.Module.load_state_dict.

batch_size: int = 1
clip_grad_norm: float = None
clip_grad_value: float = None
compute_accuracy: bool = False
data_parallel: bool = False
debug_dataset: bool = False
determine_batch_sizes() tuple[int, int]
determine_element_wise() bool
display_mergin: int = 4
draw_network: bool = True
element_batch_size: int = -1
element_wise: bool = False
figure_format: str = 'pdf'
get_input_time_series_keys() list[str]
get_output_time_series_keys() list[str]
gpu_id: int = -1
property input_dims
property input_is_dict
property input_length
property input_names
property input_names_list
inputs: CollectionVariableSetting
iterator: Iter = 'serial'
lazy: bool = True
log_trigger_epoch: int = 1
loss_function: str | dict = 'mse'
loss_slice: slice
loss_weights: dict[str, float] = None
model_key: bytes = None
model_parallel: bool = False
n_epoch: int = 100
name: str = 'default'
non_blocking: bool = True
num_workers: int = None
optimizer: str = 'adam'
optimizer_setting: dict
optuna_trial: Trial = None
property output_dims
output_directory: Path = None
output_directory_base: Path = PosixPath('models')
property output_is_dict
property output_length
property output_names
property output_names_list
property output_skips
output_stats: bool = False
outputs: CollectionVariableSetting
property overwrite_restart_mode
patience: int = 3
pretrain_directory: Path = None
prune: bool = False
pseudo_batch_size: int = 0
recursive: bool = True
restart_directory: Path = None
seed: int = 0
simplified_model: bool = False
snapshot_choise_method: str = 'best'
split_ratio: dict
state_dict_strict: bool = True
stop_trigger_epoch: int = 10
suffix: str = None
support_input: str = None
support_inputs: list[str] = None
time_series: bool = False
time_series_split: list[int] = None
time_series_split_evaluation: list[int] = None
train_data_shuffle: bool = True
update_output_directory(*, id_=None, base=None)
update_time_series(variables)
use_siml_updater: bool = True
validation_batch_size: int = None
validation_directories: list[pathlib.Path]
validation_element_batch_size: int = None
property variable_information
class siml.setting.TypedDataClass

Bases: object

convert()

Convert all fields accordingly with their type definitions.

classmethod read_settings_yaml(settings_yaml)
to_dict()
validate()
class siml.setting.VariableSetting(name: str = 'variable', dim: int = 1, shape: list[int] = <factory>, skip: bool = False, time_series: bool = False, time_slice: slice = <factory>)

Bases: TypedDataClass

name: str

The name of the variable.

dim: int

The number of the feature of the variable. For higher tensor variables, it should be the dimension of the last index.

shape: list[int]

The shape of the tensor.

skip: bool

If True, skip the variable for loss computation or convergence computation.

time_series: bool

If True, regard it as a time series.

time_slice: list[int]

Slice for time series.

dim: int = 1
get(key, default=None)
name: str = 'variable'
shape: list[int]
skip: bool = False
time_series: bool = False
time_slice: slice
siml.setting.dump_yaml(data_class, stream)

Write YAML file of the specified dataclass object.

Parameters:
  • data_class (dataclasses.dataclass) – DataClass object to write.

  • stream (File or stream) – Stream to write.

siml.setting.write_yaml(data_class, file_name, *, overwrite=False, key=None)

Write YAML file of the specified dataclass object.

Parameters:
  • data_class (dataclasses.dataclass) – DataClass object to write.

  • file_name (str or pathlib.Path) – YAML file name to write.

  • overwrite (bool, optional) – If True, overwrite file.

  • key (bytes) – Key for encription.

siml.study module

class siml.study.Status(value)

Bases: Enum

An enumeration.

ERROR = 'ERROR'
FINISHED = 'FINISHED'
NOT_YET = 'NOT_YET'
RUNNING = 'RUNNING'
class siml.study.Study(settings)

Bases: object

initialize_log_file()
initialize_study_setting()
plot_study(allow_nan=False)
run()
run_single(condition)

siml.trainer module

class siml.trainer.Trainer(main_settings: MainSetting, *, optuna_trial=None, user_loss_function_dic: dict[str, Callable[[torch.Tensor, torch.Tensor], torch.Tensor]] | None = None)

Bases: object

evaluate(evaluate_test: bool = False, load_best_model: bool = False) tuple[ignite.engine.engine.State, Optional[ignite.engine.engine.State], Optional[ignite.engine.engine.State]]

Evaluate model performance

Parameters:
  • evaluate_test (bool, optional) – If True, evaluation by test dataset is performed, by default False

  • load_best_model (bool, optional) – If True, best model is used to evaluate, by default False

Returns:

ignite State objects for train, validation and test dataset

Return type:

tuple[State, Union[State, None], Union[State, None]]

train(draw_model: bool = True) float

Start training

Parameters:

draw_model (bool, optional) – If True, output figure of models, by default True

Returns:

loss for validaiton data

Return type:

float

siml.util module

class siml.util.VariableMask(skips, dims, is_dict=None, *, invert=False)

Bases: object

siml.util.cat_time_series(x, time_series_keys)
siml.util.collect_data_directories(base_directory, *, required_file_names=None, allow_no_data=False, pattern=None, inverse_pattern=None, toplevel=True, print_state=False)

Collect data directories recursively from the base directory.

Parameters:
  • base_directory (pathlib.Path) – Base directory to search directory from.

  • required_file_names (list[str]) – If given, return only directories which have required files.

  • pattern (str) – If given, return only directories which match the pattern.

  • inverse_pattern (str, optional) – If given, return only files which DO NOT match the pattern.

  • print_state (bool, optional) – If True, print state of the search

Returns:

found_directories – All found directories.

Return type:

list[pathlib.Path]

siml.util.collect_files(directories, required_file_names, *, pattern=None, allow_no_data=False, inverse_pattern=None)

Collect data files recursively from the base directory.

Parameters:
  • base_directory (pathlib.Path or list[pathlib.Path]) – Base directory to search directory from.

  • required_file_names (list[str]) – File names.

  • pattern (str, optional) – If given, return only files which match the pattern.

  • inverse_pattern (str, optional) – If given, return only files which DO NOT match the pattern.

Returns:

collected_files

Return type:

list[pathlib.Path]

siml.util.concatenate_variable(variables)
siml.util.date_string()
siml.util.decrypt_file(key, file_name, return_stringio=False)

Decrypt data file.

Parameters:
  • key (bytes) – Key for decryption.

  • file_path (str or pathlib.Path) – File path of the encrypted data.

  • return_stringio (bool, optional) – If True, return io.StrintIO instead of io.BytesIO.

Returns:

decrypted_data

Return type:

io.BytesIO

siml.util.determine_max_process(max_process=None)

Determine maximum number of processes.

Parameters:

max_process (int, optional) – Input maximum process.

Returns:

resultant_max_process

Return type:

int

siml.util.directory_have_files(directory, files)
siml.util.encrypt_file(key, file_path, binary)

Encrypt data and then save to a file.

Parameters:
  • key (bytes) – Key for encription.

  • file_path (str or pathlib.Path) – File path to save.

  • binary (io.BytesIO) – Data content.

siml.util.files_exist(directory, file_names)

Check if files exist in the specified directory.

Parameters:
  • directory (pathlib.Path) –

  • file_names (list[str]) –

Returns:

files_exist – True if all files exist. Otherwise False.

Return type:

bool

siml.util.files_match(file_names, required_file_names)

Check if file names match.

Parameters:
  • file_names (list[str]) –

  • file_names

Returns:

files_match – True if all files match. Otherwise False.

Return type:

bool

siml.util.get_top_directory() Path

Return path of the top-level directory of the working tree

Returns:

path of the top-level directory of the working tree

Return type:

Path

siml.util.load_variable(data_directory: Path, file_basename: str, *, allow_missing: bool = False, check_nan: bool = False, decrypt_key: bytes | None = None) ndarray | coo_matrix

Load variable data.

Parameters:
  • output_directory (pathlib.Path) – Directory path.

  • file_basename (str) – File base name without extenstion.

  • allow_missing (bool, optional) – If True, return None when the corresponding file is missing. Otherwise, raise ValueError.

  • decrypt_key (bytes, optional) – If fed, it is used to decrypt the file.

Returns:

data

Return type:

numpy.ndarray or scipy.sparse.coo_matrix

siml.util.load_yaml(source)

Load YAML source.

Parameters:

source (File-like object or str or pathlib.Path) –

Returns:

dict_data – YAML contents.

Return type:

dict

siml.util.load_yaml_file(file_name)

Load YAML file.

Parameters:

file_name (str or pathlib.Path) – YAML file name.

Returns:

dict_data – YAML contents.

Return type:

dict

siml.util.pad_array(array, n)

Pad array to the size n.

Parameters:
  • array (numpy.ndarray or scipy.sparse.coo_matrix) – Input array of size (m, f1, f2, …) for numpy.ndarray or (m. m) for scipy.sparse.coomatrix

  • n (int) – Size after padding. n should be equal to or larger than m.

Returns:

padded_array – Padded array of size (n, f1, f2, …) for numpy.ndarray or (n, n) for scipy.sparse.coomatrix.

Return type:

numpy.ndarray or scipy.sparse.coo_matrix

siml.util.save_variable(output_directory, file_basename, data, *, dtype=<class 'numpy.float32'>, encrypt_key=None)

Save variable data.

Parameters:
  • output_directory (pathlib.Path) – Save directory path.

  • file_basename (str) – Save file base name without extenstion.

  • data (np.ndarray or scipy.sparse.coo_matrix) – Data to be saved.

  • dtype (type, optional) – Data type to be saved.

  • encrypt_key (bytes, optional) – Data for encryption.

Return type:

None

siml.util.split_data(list_directories, *, validation=0.1, test=0.1, shuffle=True)

Split list of data directories into train, validation, and test.

Parameters:
  • list_directories (list[pathlib.Path]) – List of data directories.

  • validation (float, optional) – The ratio of the validation dataset size.

  • test (float, optional) – The ratio of the test dataset size.

  • shuffle (bool, optional) – If True, shuffle list_directories.

Returns:

  • train_directories (list[pathlib.Path])

  • validation_directories (list[pathlib.Path])

  • test_directories (list[pathlib.Path])

Module contents

SiML

siml.get_version()