Skip to content
Snippets Groups Projects

Optimize Neural Network with Optuna

  • Clone with SSH
  • Clone with HTTPS
  • Embed
  • Share
    The snippet can be accessed without any authentication.
    Authored by Christof Kaufmann

    The network contains a residual block (the Add layer). The optimization is done with 2, 3 and 4 fully connected layers in the residual block. Each layer can have

    • activation function ReLU, tanh or snake (from tensorflow-addons),
    • kernel initializer He normal, He uniform, Glorot normal or Glorot uniform and
    • number of neurons between 10 and 69.

    There are 50 trials done for each number of layers, so in total 150 trials. Each trial tries one parameter combination and trains the model 5 times each with max. 400 epochs. The score is the mean training loss of the best epoch (by early stopping). There is no parallelization available unfortunately.

    Prerequisites:

    mamba install optuna plotly   # or: conda install -c conda-forge optuna plotly
    pip install tensorflow-addons

    Also the default packages like TensorFlow & co are required.

    Edited
    opt.py 3.49 KiB
    #%% imports
    import glob
    import optuna
    import pickle
    import numpy as np
    import tensorflow as tf
    from tensorflow.keras.layers import Input, Dense, Add, Activation
    import tensorflow_addons as tfa
    import plotly.express as px    # not used directly, but if not available plots will throw an error
    from IPython.display import display
    
    
    #%% data: discontinuous staircase
    x_train = np.random.rand(10000) * 10
    x_test = np.sort(np.random.rand(10000)) * 10
    y_train = np.floor(x_train)
    y_test = np.floor(x_test)
    
    
    #%% ANN model: residual block with dense layers inside
    def make_ann(neurons, activations, initializers):
        # build model
        ann_in = Input(1)
        ann_res = ann_in
        for num, act, init in zip(neurons, activations, initializers):
            ann_res = Dense(num,
                            activation=act if act != 'snake' else tfa.activations.snake,
                            kernel_initializer=init)(ann_res)
    
        ann_res = Dense(1, activation='linear')(ann_res)
        ann_out = Add()([ann_in, ann_res])
        ann = tf.keras.models.Model(ann_in, ann_out)
        ann.compile(optimizer='adam', loss='mse')
    
        return ann
    
    def assess_ann(ann):
        # train and eval model
        early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=40, restore_best_weights=True)
        ann.fit(x_train, y_train, batch_size=100, epochs=400, callbacks=early_stop, verbose=False)
        mse = ann.evaluate(x_test, y_test, verbose=False)
        return mse
    
    
    #%% load studies from files
    studies = {}
    paths = glob.glob("study-*.pkl")
    for p in paths:
        layers = int(p[6:-4])
        with open(p, 'rb') as f:
            studies[layers] = pickle.load(f)
    
    
    #%% Optuna
    def objective(trial, layers=2):
        # select "random" parameters
        neurons, activations, initializers = [], [], []
        for l in range(layers):
            neurons.append(trial.suggest_int(f"neurons {l}", 10, 70))
            activations.append(trial.suggest_categorical(f"activation {l}", ['tanh', 'relu', 'snake']))
            initializers.append(trial.suggest_categorical(f"init {l}", ['he_uniform', 'he_normal', 'glorot_uniform', 'glorot_normal']))
    
        # train 5x and return the average of the mean squared errors for optimization
        scores = []
        for _ in range(5):
            ann = make_ann(neurons, activations, initializers)
            mse = assess_ann(ann)
            scores.append(mse)
    
        return np.mean(scores)
    
    # optimize ANNs with 2, 3 and 4 layers
    if not studies: # check if we loaded studies from files
        for layers in range(2, 5):
            study = optuna.create_study(direction='minimize')
            studies[layers] = study
            study.optimize(lambda trial: objective(trial, layers), n_trials=50)
    
            # save studies into files for later use or script mode
            with open(f'study-{layers}.pkl', 'wb') as f:
                pickle.dump(study, f)
    
    
    #%% plots
    def list_best_trials(study):
        # list best parameters
        res = study.trials_dataframe()
        res = res.sort_values('value').dropna().drop(columns=['number', 'datetime_start', 'datetime_complete', 'duration', 'state'])
        display(res.head(15))
    
    def plot_trials(study):
        # plots (require plotly and jupyter or VSCode)
        fig = optuna.visualization.plot_parallel_coordinate(study)
        fig.show()
        fig = optuna.visualization.plot_param_importances(study)
        fig.show()
        fig = optuna.visualization.plot_contour(study, params=["activation 0", "activation 1"])
        fig.show()
    
    for layers in range(2, 5):
        list_best_trials(studies[layers])
        plot_trials(studies[layers])
    
    fig = optuna.visualization.plot_contour(studies[2], params=["neurons 0", "neurons 1"])
    fig.show()
    0% Loading or .
    You are about to add 0 people to the discussion. Proceed with caution.
    Please register or to comment