From 36f9d7e098646e34bee511a5b1e57fc6ad304c05 Mon Sep 17 00:00:00 2001
From: valentin <poubelle@romanet.fr>
Date: Sat, 4 Apr 2020 22:28:14 +0200
Subject: [PATCH] Pipelines regroup

---
 04_detect_from_oneline_csv.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/04_detect_from_oneline_csv.py b/04_detect_from_oneline_csv.py
index 1a0c448..e05af31 100644
--- a/04_detect_from_oneline_csv.py
+++ b/04_detect_from_oneline_csv.py
@@ -66,13 +66,16 @@ class ManualFeatureSelector(TransformerMixin):
 # =-=-=-=-=-=-= Pipelines =-=-=-=-=-=-=  
 
 # - This pipeline select usefull features -
-pipeline_features_selection = Pipeline([
-	('features_remap', ManualFeatureSelector()), 
-])
+#pipeline_features_selection = Pipeline([
+#	('features_remap', ManualFeatureSelector()), 
+#])
 
 
 # - This pipeline use the imputer and scales the values - 
+# -- Tried to regroup both pipelines, seems to work for now ... -- 
+# -- TODO If no issues, don't forget to remove the upper commented pipeline 
 pipeline = Pipeline([
+	('features_remap', ManualFeatureSelector()), 
 	('imputer', SimpleImputer(strategy="median")),
 	('std_scaler', StandardScaler()),
 ]) 
@@ -95,7 +98,8 @@ saved_model = joblib.load("models/malware_classifier_1.pkl")
 # - Use np.array([['info1', 'info2', 'infox']]) to build the 'line' variable -
  
 def predict_one_line(model,line): 
-	X_unknown = full_pipeline(line) 
+	#X_unknown = full_pipeline(line) 
+	X_unknown = pipeline.fit_transform(line) 
 	X_unknown_columns = ['Characteristics', 'DllCharacteristics', 'SectionsMaxEntropy',     'MajorSubsystemVersion', 'Machine', 'Subsystem', 'ResourcesMaxEntropy', 'ResourcesMinEntropy', 'VersionInformationSize', '    MajorOperatingSystemVersion', 'ResourcesMeanEntropy', 'SectionsMeanEntropy']  
 	X_unknown = pd.DataFrame(X_unknown, columns=X_unknown_columns) 
 	ans = model.predict(X_unknown)