Skip to content

Commit 02cd688

Browse files
authored
Merge pull request #143 from Giskard-AI/task/GSK-177_Enron_Unit_test
Task/gsk 177 enron unit test
2 parents ff98bfa + 9015ab8 commit 02cd688

File tree

5 files changed

+13634
-266
lines changed

5 files changed

+13634
-266
lines changed

‎giskard-ml-worker/ml_worker/testing/performance_tests.py‎

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,9 @@ def _test_classification_score(self, score_fn, gsk_dataset: GiskardDataset, mode
7474
dataframe = gsk_dataset.df.reset_index(drop=True)
7575
prediction = model.run_predict(dataframe).raw_prediction
7676
labels_mapping = {model.classification_labels[i]: i for i in range(len(model.classification_labels))}
77+
78+
if gsk_dataset.target not in dataframe:
79+
raise ValueError("Target Column is not available")
7780
actual_target = dataframe[gsk_dataset.target].map(labels_mapping)
7881
if is_binary_classification:
7982
metric = score_fn(actual_target, prediction)
@@ -82,24 +85,24 @@ def _test_classification_score(self, score_fn, gsk_dataset: GiskardDataset, mode
8285
output_df_sample = dataframe.loc[actual_target != prediction]
8386

8487
return RawSingleTestResult(
85-
actual_slices_size=[len(gsk_dataset)],
88+
actual_slices_size=[len(dataframe)],
8689
metric=metric,
8790
passed=metric >= threshold,
8891
output_df=output_df_sample
8992
)
9093

91-
def _test_accuracy_score(self, score_fn, gsk_dataset: GiskardDataset, model: GiskardModel, threshold=1):
94+
def _test_accuracy_score(self, gsk_dataset: GiskardDataset, model: GiskardModel, threshold=1):
9295
dataframe = gsk_dataset.df.reset_index(drop=True)
9396
prediction = model.run_predict(dataframe).raw_prediction
9497
labels_mapping = {model.classification_labels[i]: i for i in range(len(model.classification_labels))}
9598
actual_target = dataframe[gsk_dataset.target].map(labels_mapping)
9699

97-
metric = score_fn(actual_target, prediction)
100+
metric = accuracy_score(actual_target, prediction)
98101

99102
output_df_sample = dataframe.loc[actual_target != prediction]
100103

101104
return RawSingleTestResult(
102-
actual_slices_size=[len(gsk_dataset)],
105+
actual_slices_size=[len(dataframe)],
103106
metric=metric,
104107
passed=metric >= threshold,
105108
output_df=output_df_sample
@@ -175,7 +178,7 @@ def test_accuracy(self, actual_slice: GiskardDataset, model: GiskardModel, thres
175178
output_df:
176179
Dataframe containing all the incorrect rows of the given data slice
177180
"""
178-
results = self._test_accuracy_score(accuracy_score, actual_slice, model, threshold)
181+
results = self._test_accuracy_score(actual_slice, model, threshold)
179182
transformed_results = self.transform_results(results)
180183

181184
return transformed_results
@@ -585,8 +588,7 @@ def test_diff_reference_actual_accuracy(self, reference_slice, actual_slice, mod
585588
output_df:
586589
Dataframe containing all the incorrect rows of the given actual dataset
587590
"""
588-
partial_accuracy = partial(self._test_classification_score, accuracy_score)
589-
return self._test_diff_reference_actual(partial_accuracy, model, reference_slice, actual_slice, threshold)
591+
return self._test_diff_reference_actual(self._test_accuracy_score, model, reference_slice, actual_slice, threshold)
590592

591593
def test_diff_rmse(self, actual_slice, reference_slice, model, threshold=0.1):
592594
"""
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
import logging
2+
import time
3+
4+
import pandas as pd
5+
import pytest
6+
from sklearn import model_selection
7+
from sklearn.compose import ColumnTransformer
8+
from sklearn.impute import SimpleImputer
9+
from sklearn.linear_model import LogisticRegression
10+
from sklearn.pipeline import Pipeline
11+
from sklearn.preprocessing import OneHotEncoder
12+
from sklearn.preprocessing import StandardScaler
13+
from sklearn.feature_extraction.text import CountVectorizer
14+
from sklearn.feature_extraction.text import TfidfTransformer
15+
from nltk.corpus import stopwords
16+
from string import punctuation
17+
18+
from ml_worker.core.giskard_dataset import GiskardDataset
19+
from ml_worker.core.model import GiskardModel
20+
from test import path
21+
22+
input_types = {
23+
"Subject": "text",
24+
"Content": "text",
25+
"Week_day": "category",
26+
"Month": "category",
27+
"Hour": "numeric",
28+
"Nb_of_forwarded_msg": "numeric",
29+
"Year": "numeric"
30+
}
31+
32+
33+
@pytest.fixture()
34+
def enron_data() -> GiskardDataset:
35+
logging.info("Fetching Enron Data")
36+
return GiskardDataset(
37+
df=pd.read_csv(path('test_data/enron_data.csv')),
38+
target='Target',
39+
feature_types=input_types
40+
)
41+
42+
43+
@pytest.fixture()
44+
def enron_test_data(enron_data):
45+
return GiskardDataset(
46+
df=pd.DataFrame(enron_data.df).drop(columns=['Target']),
47+
feature_types=input_types,
48+
target=None
49+
)
50+
51+
52+
@pytest.fixture()
53+
def enron_model(enron_data) -> GiskardModel:
54+
start = time.time()
55+
56+
stoplist = set(stopwords.words('english') + list(punctuation))
57+
columns_to_scale = [key for key in input_types.keys() if input_types[key] == "numeric"]
58+
59+
numeric_transformer = Pipeline([('imputer', SimpleImputer(strategy='median')),
60+
('scaler', StandardScaler())])
61+
62+
columns_to_encode = [key for key in input_types.keys() if input_types[key] == "category"]
63+
64+
categorical_transformer = Pipeline([
65+
('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
66+
('onehot', OneHotEncoder(handle_unknown='ignore', sparse=False))])
67+
68+
text_transformer = Pipeline([
69+
('vect', CountVectorizer(stop_words=stoplist)),
70+
('tfidf', TfidfTransformer())
71+
])
72+
73+
preprocessor = ColumnTransformer(
74+
transformers=[
75+
('num', numeric_transformer, columns_to_scale),
76+
('cat', categorical_transformer, columns_to_encode),
77+
('text_Mail', text_transformer, "Content")
78+
]
79+
)
80+
clf = Pipeline(steps=[('preprocessor', preprocessor),
81+
('classifier', LogisticRegression(max_iter=100))])
82+
83+
Y = enron_data.df['Target']
84+
X = enron_data.df.drop(columns="Target")
85+
X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X, Y, # NOSONAR
86+
test_size=0.20,
87+
random_state=30,
88+
stratify=Y)
89+
clf.fit(X_train, Y_train)
90+
91+
train_time = time.time() - start
92+
model_score = clf.score(X_test, Y_test)
93+
logging.info(f"Trained model with score: {model_score} in {round(train_time * 1000)} ms")
94+
95+
return GiskardModel(
96+
prediction_function=clf.predict_proba,
97+
model_type='classification',
98+
feature_names=list(input_types),
99+
classification_threshold=0.5,
100+
classification_labels=clf.classes_
101+
)

0 commit comments

Comments
 (0)