Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
6c590d6
chore(add): Base class for numerical perturbation detector
Kranium2002 Oct 13, 2024
4e21ed9
fix: minor issues with base class
Kranium2002 Oct 17, 2024
f51e560
add: detector file with default values
Kranium2002 Oct 17, 2024
13d543f
add: mock models for test
Kranium2002 Oct 17, 2024
fe0e2b0
add: tests for numerical perturbation detector
Kranium2002 Oct 17, 2024
a44bca4
Merge branch 'main' into main
henchaves Oct 18, 2024
d1b240b
Merge branch 'main' into main
kevinmessiaen Oct 29, 2024
f9cceda
Merge branch 'main' into main
henchaves Oct 31, 2024
ae9323c
Format files
henchaves Oct 31, 2024
dd04ec7
Merge branch 'main' into main
henchaves Nov 4, 2024
ce3f39f
Merge branch 'main' into main
mattbit Nov 15, 2024
32ff226
Merge branch 'Giskard-AI:main' into main
Kranium2002 Nov 22, 2024
df252f3
fix: check datatype using column_types
Kranium2002 Nov 23, 2024
41bcef7
fix: add metric and metric_value
Kranium2002 Nov 23, 2024
272ee33
fix: add domain and deviation for scan widget
Kranium2002 Nov 23, 2024
6c8b9ae
fix: add transformation_fn
Kranium2002 Nov 23, 2024
264c8d3
Merge branch 'Giskard-AI:main' into main
Kranium2002 Nov 23, 2024
74321ab
fix: tests
Kranium2002 Dec 1, 2024
4eee617
fix: base detector and meta data
Kranium2002 Dec 1, 2024
4973df2
fix: default transformations
Kranium2002 Dec 1, 2024
8e5c8f5
create: Transformations using TransformationFunction Base Class
Kranium2002 Dec 1, 2024
c620bf3
Merge branch 'main' into main
henchaves Dec 16, 2024
c466512
Merge branch 'main' into main
henchaves Jan 6, 2025
312c02d
Format files
henchaves Jan 6, 2025
f30c7b2
Move BaseNumericalPerturbationDetector to base_detector.py
henchaves Jan 6, 2025
eada50e
Create BasePerturbationDetector
henchaves Jan 6, 2025
6afbd2f
Fix import
henchaves Jan 6, 2025
f251bba
Remove params from _get_default_transformations
henchaves Jan 6, 2025
04356b0
Update _get_default_transformations from NumericalPerturbationDetector
henchaves Jan 6, 2025
52b1ae0
Create a base PerturbationFunction class
henchaves Jan 6, 2025
8628c4b
Fix Sonar issue
henchaves Jan 7, 2025
d58e38c
Add type hints to text_transformations.py methods
henchaves Jan 7, 2025
a395c68
Update import
henchaves Jan 7, 2025
dcbb88c
Merge branch 'main' into feature/gsk-3948-add-numerical-perturbation-…
kevinmessiaen Jan 29, 2025
9d05258
Merge branch 'main' into feature/gsk-3948-add-numerical-perturbation-…
kevinmessiaen Jan 30, 2025
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
chore(add): Base class for numerical perturbation detector
  • Loading branch information
Kranium2002 committed Oct 13, 2024
commit 6c590d6cd282c12840206858a4a8f272e19358ee
136 changes: 136 additions & 0 deletions giskard/scanner/robustness/base_numerical_detector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
from typing import Optional, Sequence

import numpy as np
import pandas as pd

from ...datasets.base import Dataset
from ...models.base import BaseModel
from ..issues import Issue, IssueLevel, Robustness
from ..logger import logger
from ..registry import Detector


class BaseNumericalPerturbationDetector:
"""Base class for metamorphic detectors based on numerical feature perturbations."""

_issue_group = Robustness

def __init__(
self,
perturbation_fraction: float = 0.01,
threshold: Optional[float] = None,
num_samples: Optional[int] = None,
output_sensitivity: Optional[float] = None,
):
"""
Parameters
----------
perturbation_fraction: float
Fractional perturbation to apply to numerical features (default is 1% change).
threshold: Optional[float]
The threshold for the fail rate, defined as the proportion of samples for which the model
prediction has changed. If the fail rate is greater than the threshold, an issue is created.
num_samples: Optional[int]
The maximum number of samples to use for testing. If not provided, a default number is used.
output_sensitivity: Optional[float]
For regression models, the maximum relative change in prediction considered acceptable.
"""
self.perturbation_fraction = perturbation_fraction
self.threshold = threshold
self.num_samples = num_samples
self.output_sensitivity = output_sensitivity

def run(self, model: BaseModel, dataset: Dataset, features: Sequence[str]) -> Sequence[Issue]:
"""Run the numerical perturbation detector."""
numerical_features = [f for f in features if pd.api.types.is_numeric_dtype(dataset.df[f])]

logger.info(
"%s: Running numerical perturbation detector with threshold=%.3f, "
"perturbation_fraction=%.3f, output_sensitivity=%.3f, num_samples=%d"
% (
self.__class__.__name__,
self.threshold or -1,
self.perturbation_fraction,
self.output_sensitivity or -1,
self.num_samples or -1
)
)

issues = [] # Initialize issues list
for feature in numerical_features:
issues.extend(self._detect_issues(model, dataset, feature))

return [i for i in issues if i is not None]

def _detect_issues(
self,
model: BaseModel,
dataset: Dataset,
feature: str,
) -> Sequence[Issue]:
num_samples = self.num_samples or min(1000, len(dataset.df))
output_sensitivity = self.output_sensitivity or 0.05
threshold = self.threshold or 0.05

# Generate perturbed dataset by adding a small percentage of change
perturbation = dataset.df[feature] * self.perturbation_fraction
perturbed_data = dataset.df.copy()
perturbed_data[feature] += perturbation

# Subset the dataset for faster calculations
perturbed_data = perturbed_data.sample(n=num_samples, random_state=42)
original_data = dataset.df.loc[perturbed_data.index]

# Calculate predictions before and after perturbation
original_pred = model.predict(Dataset(original_data, dataset.target, dataset.column_types))
perturbed_pred = model.predict(Dataset(perturbed_data, dataset.target, dataset.column_types))

if model.is_classification:
passed = original_pred.raw_prediction == perturbed_pred.raw_prediction
elif model.is_regression:
rel_delta = np.abs((perturbed_pred.raw_prediction - original_pred.raw_prediction) / original_pred.raw_prediction)
passed = rel_delta < output_sensitivity
else:
raise NotImplementedError("Only classification and regression models are supported.")

pass_rate = passed.mean()
fail_rate = 1 - pass_rate

logger.info("Testing `%s` perturbation\tFail rate: %.3f" % (feature, fail_rate))

issues = [] # Initialize issues list inside this method as well
if fail_rate >= threshold:
# Severity
issue_level = IssueLevel.MAJOR if fail_rate >= 2 * threshold else IssueLevel.MEDIUM

# Issue description
desc = (
"When the feature `%s` is perturbed by %.2f%%, the model changes its prediction in %.2f%% of cases."
% (feature, self.perturbation_fraction * 100, fail_rate * 100)
)

failed_size = (~passed).sum()
slice_size = len(passed)

issue = Issue(
model,
dataset,
group=self._issue_group,
level=issue_level,
description=desc,
features=[feature],
meta={
"feature": feature,
"perturbation_fraction": self.perturbation_fraction,
"fail_rate": fail_rate,
"failed_size": failed_size,
"slice_size": slice_size,
"threshold": threshold,
"output_sensitivity": output_sensitivity,
},
importance=fail_rate,
)

issues.append(issue)

return issues
Empty file.
Loading