I updated Neuraxle to the latest version (3.4).
I noticed the whole auto_ml.py
was redone. I checked the documentation but there is nothing about it. On git it seems method RandomSearch()
was replaced a long time ago by AutoML()
method. However the parameters are different.
Does somebody knows how can I channel Boston Housing example pipeline to automatic parameter search in latest Neuraxle version (3.4)?
import numpy as np
from sklearn.cluster import KMeans
from sklearn.datasets import load_boston
from sklearn.decomposition import PCA, FastICA
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.linear_model import Ridge
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from neuraxle.hyperparams.distributions import RandInt, LogUniform, Boolean
from neuraxle.hyperparams.space import HyperparameterSpace
from neuraxle.metaopt.auto_ml import RandomSearch
from neuraxle.metaopt.random import KFoldCrossValidationWrapper
from neuraxle.pipeline import Pipeline
from neuraxle.steps.numpy import NumpyTranspose
from neuraxle.steps.sklearn import SKLearnWrapper
from neuraxle.union import AddFeatures, ModelStacking
def main():
boston = load_boston()
X, y = shuffle(boston.data, boston.target, random_state=13)
X = X.astype(np.float32)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False)
# Note that the hyperparameter spaces are defined here during the pipeline definition, but it could be already set
# within the classes ar their definition if using custom classes, or also it could be defined after declaring the
# pipeline using a flat dict or a nested dict.
p = Pipeline([
AddFeatures([
SKLearnWrapper(
PCA(n_components=2),
HyperparameterSpace({"n_components": RandInt(1, 3)})
),
SKLearnWrapper(
FastICA(n_components=2),
HyperparameterSpace({"n_components": RandInt(1, 3)})
),
]),
ModelStacking([
SKLearnWrapper(
GradientBoostingRegressor(),
HyperparameterSpace({
"n_estimators": RandInt(50, 600), "max_depth": RandInt(1, 10),
"learning_rate": LogUniform(0.07, 0.7)
})
),
SKLearnWrapper(
KMeans(),
HyperparameterSpace({"n_clusters": RandInt(5, 10)})
),
],
joiner=NumpyTranspose(),
judge=SKLearnWrapper(
Ridge(),
HyperparameterSpace({"alpha": LogUniform(0.7, 1.4), "fit_intercept": Boolean()})
),
)
])
print("Meta-fitting on train:")
p = p.meta_fit(X_train, y_train, metastep=RandomSearch(
n_iter=10,
higher_score_is_better=True,
validation_technique=KFoldCrossValidationWrapper(scoring_function=r2_score, k_fold=10)
))
# Here is an alternative way to do it, more "pipeliney":
# p = RandomSearch(
# p,
# n_iter=15,
# higher_score_is_better=True,
# validation_technique=KFoldCrossValidation(scoring_function=r2_score, k_fold=3)
# ).fit(X_train, y_train)
print("")
print("Transforming train and test:")
y_train_predicted = p.predict(X_train)
y_test_predicted = p.predict(X_test)
print("")
print("Evaluating transformed train:")
score_transform = r2_score(y_train_predicted, y_train)
print('R2 regression score:', score_transform)
print("")
print("Evaluating transformed test:")
score_test = r2_score(y_test_predicted, y_test)
print('R2 regression score:', score_test)
if __name__ == "__main__":
main()