Skip to content
Snippets Groups Projects
Select Git revision
  • 03834d5e6d391116eed6d3ca3acad5a61a798183
  • main default protected
  • lsander-main-patch-33579
3 results

random_search.py

Blame
  • Forked from Frederic Aust / Vorhersage der Verkehrslage CVH
    Source project has a limited visibility.
    random_search.py 1.96 KiB
    
    from sklearn.model_selection import train_test_split
    import numpy as np
    import pandas as pd
    from sklearn.ensemble import RandomForestRegressor
    from sklearn.metrics import r2_score,mean_squared_error,mean_absolute_percentage_error
    from sklearn.model_selection import GridSearchCV# Create the parameter grid based on the results of random search 
    from sklearn.model_selection import RandomizedSearchCV# Number of trees in random forest
    
    dataset = np.genfromtxt('data.csv', delimiter=',', skip_header=1)
    x = dataset[:,[0,1,2,4,5,6,7,8,9]]
    y = dataset[:,3]
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=42)
    
    n_estimators = [int(x) for x in np.linspace(start = 200, stop = 1000, num = 5)]
    # Number of features to consider at every split
    max_features = ['auto', 'sqrt']
    # Maximum number of levels in tree
    max_depth = [int(x) for x in np.linspace(10, 110, num = 11)]
    max_depth.append(None)
    # Minimum number of samples required to split a node
    min_samples_split = [2, 5, 10]
    # Minimum number of samples required at each leaf node
    min_samples_leaf = [1, 2, 4]
    # Method of selecting samples for training each tree
    bootstrap = [True, False]# Create the random grid
    random_grid = {'n_estimators': n_estimators,
                   'max_features': max_features,
                   'max_depth': max_depth,
                   'min_samples_split': min_samples_split,
                   'min_samples_leaf': min_samples_leaf,
                   'bootstrap': bootstrap}
    print(random_grid)
    # Use the random grid to search for best hyperparameters
    # First create the base model to tune
    rf = RandomForestRegressor()
    # Random search of parameters, using 3 fold cross validation, 
    # search across 100 different combinations, and use all available cores
    rf_random = RandomizedSearchCV(estimator = rf, param_distributions = random_grid, n_iter = 100, cv = 3, verbose=2, random_state=42, n_jobs = -1)# Fit the random search model
    rf_random.fit(x_train, y_train)
    rf_random.best_params_
    print(rf_random.best_params_)