Pipelines regroup

2020-04-04 22:28:14 +02:00 · 2020-04-04 22:28:14 +02:00 · 36f9d7e098
commit 36f9d7e098
parent a1bdae4dab
1 changed files with 8 additions and 4 deletions
--- a/04_detect_from_oneline_csv.py
+++ b/04_detect_from_oneline_csv.py
@ -66,13 +66,16 @@ class ManualFeatureSelector(TransformerMixin):
 # =-=-=-=-=-=-= Pipelines =-=-=-=-=-=-=  
 # - This pipeline select usefull features -
-pipeline_features_selection = Pipeline([
+#pipeline_features_selection = Pipeline([
-	('features_remap', ManualFeatureSelector()), 
+#	('features_remap', ManualFeatureSelector()), 
-])
+#])
 # - This pipeline use the imputer and scales the values - 
 # -- Tried to regroup both pipelines, seems to work for now ... -- 
 # -- TODO If no issues, don't forget to remove the upper commented pipeline 
 pipeline = Pipeline([
 	('features_remap', ManualFeatureSelector()), 
 	('imputer', SimpleImputer(strategy="median")),
 	('std_scaler', StandardScaler()),
 ]) 
@ -95,7 +98,8 @@ saved_model = joblib.load("models/malware_classifier_1.pkl")
 # - Use np.array([['info1', 'info2', 'infox']]) to build the 'line' variable -
 def predict_one_line(model,line): 
-	X_unknown = full_pipeline(line) 
+	#X_unknown = full_pipeline(line) 
 	X_unknown = pipeline.fit_transform(line) 
 	X_unknown_columns = ['Characteristics', 'DllCharacteristics', 'SectionsMaxEntropy',     'MajorSubsystemVersion', 'Machine', 'Subsystem', 'ResourcesMaxEntropy', 'ResourcesMinEntropy', 'VersionInformationSize', '    MajorOperatingSystemVersion', 'ResourcesMeanEntropy', 'SectionsMeanEntropy']  
 	X_unknown = pd.DataFrame(X_unknown, columns=X_unknown_columns) 
 	ans = model.predict(X_unknown)