High Level API for scaling, training and prediction¶

In this section, we will use high level API for performing machine learning process.

At First, we will prepare dummy data. These dummy data corresponds to feature values extracted from simultion data.

import pathlib
import random
import shutil

import numpy as np
import scipy.sparse as sp


def prepare_sample_interim_files():
    np.random.seed(0)
    random.seed(0)

    output_directory = pathlib.Path("out")
    if output_directory.exists():
        shutil.rmtree(output_directory)

    base_interim_dir = output_directory / "interim"
    base_interim_dir.mkdir(parents=True)

    n_cases = 5
    dtype = np.float32
    for i in range(n_cases):
        n_nodes = 100 * (i + 1)
        interim_dir = base_interim_dir / f"case_{i}"
        interim_dir.mkdir()

        nodal_initial_u = np.random.rand(n_nodes, 3, 1)
        np.save(
            interim_dir / "nodal_initial_u.npy",
            nodal_initial_u.astype(dtype),
        )

        # nodal_last_u = np.random.rand(n_nodes, 3, 1)
        np.save(interim_dir / "nodal_last_u.npy", nodal_initial_u.astype(dtype))

        sparse_array_names = [
            "nodal_nadj",
            "nodal_x_grad_hop1",
            "nodal_y_grad_hop1",
            "nodal_z_grad_hop1",
        ]
        rng = np.random.default_rng()
        for name in sparse_array_names:
            arr = sp.random(n_nodes, n_nodes, density=0.1, random_state=rng)
            sp.save_npz(interim_dir / name, arr.tocoo().astype(dtype))

        (interim_dir / "converted").touch()


prepare_sample_interim_files()

Setting file for scaling and training can be downloaded from data.yml we perform scaling process for data above.

from phlower.services.preprocessing import PhlowerScalingService
from phlower.settings import PhlowerSetting

setting = PhlowerSetting.read_yaml("sample_data/e2e/setting.yml")

scaler = PhlowerScalingService.from_setting(setting)
scaler.fit_transform_all(
    interim_data_directories=[
        pathlib.Path("out/interim/case_0"),
        pathlib.Path("out/interim/case_1"),
        pathlib.Path("out/interim/case_2"),
        pathlib.Path("out/interim/case_3"),
        pathlib.Path("out/interim/case_4"),
    ],
    output_base_directory=pathlib.Path("out/preprocessed"),
)

Next, we perform training by using preprocessed data.

from phlower.services.trainer import PhlowerTrainer

trainer = PhlowerTrainer.from_setting(setting)

loss = trainer.train(
    train_directories=[
        pathlib.Path("out/preprocessed/case_0"),
        pathlib.Path("out/preprocessed/case_1"),
        pathlib.Path("out/preprocessed/case_2"),
    ],
    validation_directories=[
        pathlib.Path("out/preprocessed/case_3"),
        pathlib.Path("out/preprocessed/case_4"),
    ],
    output_directory=pathlib.Path("out/model"),
)
-------  Start Training -------
Device: cpu
torch.cuda.is_available()=False
Log every 1 epoch
Output directory: out/model
-------------------------------

epoch    train_loss    validation_loss    details_tr/nodal_last_u    details_val/nodal_last_u    elapsed_time

  0%|                                                     | 0/3 [00:00<?, ?it/s]


  0%|                                                     | 0/3 [00:00<?, ?it/s]


  0%|                                                     | 0/2 [00:00<?, ?it/s]

0        3.60635e-01   3.49816e-01        3.60635e-01                3.49816e-01                 0.04

  0%|                                                     | 0/3 [00:00<?, ?it/s]


  0%|                                                     | 0/3 [00:00<?, ?it/s]


  0%|                                                     | 0/2 [00:00<?, ?it/s]

1        3.52956e-01   3.42368e-01        3.52956e-01                3.42368e-01                 0.08

  0%|                                                     | 0/3 [00:00<?, ?it/s]


  0%|                                                     | 0/3 [00:00<?, ?it/s]


  0%|                                                     | 0/2 [00:00<?, ?it/s]

2        3.45437e-01   3.35074e-01        3.45437e-01                3.35074e-01                 0.12

  0%|                                                     | 0/3 [00:00<?, ?it/s]


  0%|                                                     | 0/3 [00:00<?, ?it/s]


  0%|                                                     | 0/2 [00:00<?, ?it/s]

3        3.38075e-01   3.27933e-01        3.38075e-01                3.27933e-01                 0.16

  0%|                                                     | 0/3 [00:00<?, ?it/s]


  0%|                                                     | 0/3 [00:00<?, ?it/s]


  0%|                                                     | 0/2 [00:00<?, ?it/s]

4        3.30867e-01   3.20941e-01        3.30867e-01                3.20941e-01                 0.20

  0%|                                                     | 0/3 [00:00<?, ?it/s]


  0%|                                                     | 0/3 [00:00<?, ?it/s]


  0%|                                                     | 0/2 [00:00<?, ?it/s]

5        3.23809e-01   3.14095e-01        3.23809e-01                3.14095e-01                 0.23

  0%|                                                     | 0/3 [00:00<?, ?it/s]


  0%|                                                     | 0/3 [00:00<?, ?it/s]


  0%|                                                     | 0/2 [00:00<?, ?it/s]

6        3.16899e-01   3.07392e-01        3.16899e-01                3.07392e-01                 0.27

  0%|                                                     | 0/3 [00:00<?, ?it/s]


  0%|                                                     | 0/3 [00:00<?, ?it/s]


  0%|                                                     | 0/2 [00:00<?, ?it/s]

7        3.10133e-01   3.00829e-01        3.10133e-01                3.00829e-01                 0.31

  0%|                                                     | 0/3 [00:00<?, ?it/s]


  0%|                                                     | 0/3 [00:00<?, ?it/s]


  0%|                                                     | 0/2 [00:00<?, ?it/s]

8        3.03509e-01   2.94404e-01        3.03509e-01                2.94404e-01                 0.35

  0%|                                                     | 0/3 [00:00<?, ?it/s]


  0%|                                                     | 0/3 [00:00<?, ?it/s]


  0%|                                                     | 0/2 [00:00<?, ?it/s]

9        2.97024e-01   2.88113e-01        2.97024e-01                2.88113e-01                 0.39

train function returns float which corresponds to last training loss. Let’s call print it.

print(loss)
0.29843243956565857

Finally, we perform predicion by using pretrained model. Setting file for prediction can be downloaded from data.yml

It is found that physical dimension is also considered properly.

from phlower.services.predictor import PhlowerPredictor

setting = PhlowerSetting.read_yaml("sample_data/e2e/predict.yml")

predictor = PhlowerPredictor(
    model_directory=pathlib.Path("out/model"),
    predict_setting=setting.prediction,
)

preprocessed_directories = [pathlib.Path("out/preprocessed/case_3")]

for result in predictor.predict(
    preprocessed_directories, perform_inverse_scaling=False
):
    for k in result.prediction_data.keys():
        print(f"{k}: {result.prediction_data[k].dimension}")
nodal_last_u: PhlowerDimensionTensor(T: -1.0, L: 1.0, M: 0.0, I: 0.0, Theta: 0.0, N: 0.0, J: 0.0)

Total running time of the script: (0 minutes 4.136 seconds)

Gallery generated by Sphinx-Gallery