diff --git a/04_detect_from_oneline_csv.py b/04_detect_from_oneline_csv.py index 1a0c448..e05af31 100644 --- a/04_detect_from_oneline_csv.py +++ b/04_detect_from_oneline_csv.py @@ -66,13 +66,16 @@ class ManualFeatureSelector(TransformerMixin): # =-=-=-=-=-=-= Pipelines =-=-=-=-=-=-= # - This pipeline select usefull features - -pipeline_features_selection = Pipeline([ - ('features_remap', ManualFeatureSelector()), -]) +#pipeline_features_selection = Pipeline([ +# ('features_remap', ManualFeatureSelector()), +#]) # - This pipeline use the imputer and scales the values - +# -- Tried to regroup both pipelines, seems to work for now ... -- +# -- TODO If no issues, don't forget to remove the upper commented pipeline pipeline = Pipeline([ + ('features_remap', ManualFeatureSelector()), ('imputer', SimpleImputer(strategy="median")), ('std_scaler', StandardScaler()), ]) @@ -95,7 +98,8 @@ saved_model = joblib.load("models/malware_classifier_1.pkl") # - Use np.array([['info1', 'info2', 'infox']]) to build the 'line' variable - def predict_one_line(model,line): - X_unknown = full_pipeline(line) + #X_unknown = full_pipeline(line) + X_unknown = pipeline.fit_transform(line) X_unknown_columns = ['Characteristics', 'DllCharacteristics', 'SectionsMaxEntropy', 'MajorSubsystemVersion', 'Machine', 'Subsystem', 'ResourcesMaxEntropy', 'ResourcesMinEntropy', 'VersionInformationSize', ' MajorOperatingSystemVersion', 'ResourcesMeanEntropy', 'SectionsMeanEntropy'] X_unknown = pd.DataFrame(X_unknown, columns=X_unknown_columns) ans = model.predict(X_unknown)