-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathoptimize_run_pipeline_logger.py
55 lines (48 loc) · 1.9 KB
/
optimize_run_pipeline_logger.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
from niaaml import Pipeline
from niaaml.classifiers import MultiLayerPerceptron
from niaaml.preprocessing.feature_selection import VarianceThreshold
from niaaml.preprocessing.feature_transform import Normalizer
from niaaml.data import CSVDataReader
from niaaml.logger import Logger
import os
import numpy
import pandas
"""
This example presents how to use the Pipeline class with logging individually. You may use this if you want to test out a specific classification pipeline.
"""
# prepare data reader using csv file
data_reader = CSVDataReader(
src=os.path.dirname(os.path.abspath(__file__)) + "/example_files/dataset.csv",
has_header=False,
contains_classes=True,
)
# prepare Logger instance
# verbose=True means more information, output_file is the log's file name
# if output_file is None, there is no file created
logger = Logger(verbose=True, output_file="output.log")
# instantiate a Pipeline object
pipeline = Pipeline(
feature_selection_algorithm=VarianceThreshold(),
feature_transform_algorithm=Normalizer(),
classifier=MultiLayerPerceptron(),
logger=logger,
)
# run pipeline optimization process (returns fitness value, but sets the best parameters for classifier, feature selection algorithm and feature transform algorithm during the process)
pipeline.optimize(
data_reader.get_x(),
data_reader.get_y(),
10,
50,
"ParticleSwarmAlgorithm",
"Accuracy",
)
# run the pipeline using dummy data
# you could run the pipeline before the optimization process, but get wrong predictions as nothing in the pipeline is fit for the given dataset
predicted = pipeline.run(
pandas.DataFrame(
numpy.random.uniform(
low=0.0, high=15.0, size=(30, data_reader.get_x().shape[1])
)
)
)
# pipeline variable contains Pipeline object that can be used for further classification, exported as an object (that can be later loaded and used) or exported as text file