From 36f9d7e098646e34bee511a5b1e57fc6ad304c05 Mon Sep 17 00:00:00 2001 From: valentin Date: Sat, 4 Apr 2020 22:28:14 +0200 Subject: [PATCH] Pipelines regroup --- 04_detect_from_oneline_csv.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/04_detect_from_oneline_csv.py b/04_detect_from_oneline_csv.py index 1a0c448..e05af31 100644 --- a/04_detect_from_oneline_csv.py +++ b/04_detect_from_oneline_csv.py @@ -66,13 +66,16 @@ class ManualFeatureSelector(TransformerMixin): # =-=-=-=-=-=-= Pipelines =-=-=-=-=-=-= # - This pipeline select usefull features - -pipeline_features_selection = Pipeline([ - ('features_remap', ManualFeatureSelector()), -]) +#pipeline_features_selection = Pipeline([ +# ('features_remap', ManualFeatureSelector()), +#]) # - This pipeline use the imputer and scales the values - +# -- Tried to regroup both pipelines, seems to work for now ... -- +# -- TODO If no issues, don't forget to remove the upper commented pipeline pipeline = Pipeline([ + ('features_remap', ManualFeatureSelector()), ('imputer', SimpleImputer(strategy="median")), ('std_scaler', StandardScaler()), ]) @@ -95,7 +98,8 @@ saved_model = joblib.load("models/malware_classifier_1.pkl") # - Use np.array([['info1', 'info2', 'infox']]) to build the 'line' variable - def predict_one_line(model,line): - X_unknown = full_pipeline(line) + #X_unknown = full_pipeline(line) + X_unknown = pipeline.fit_transform(line) X_unknown_columns = ['Characteristics', 'DllCharacteristics', 'SectionsMaxEntropy', 'MajorSubsystemVersion', 'Machine', 'Subsystem', 'ResourcesMaxEntropy', 'ResourcesMinEntropy', 'VersionInformationSize', ' MajorOperatingSystemVersion', 'ResourcesMeanEntropy', 'SectionsMeanEntropy'] X_unknown = pd.DataFrame(X_unknown, columns=X_unknown_columns) ans = model.predict(X_unknown)