|
| 1 | +from collections import Counter |
| 2 | +import math, random |
| 3 | + |
| 4 | +# |
| 5 | +# data splitting |
| 6 | +# |
| 7 | + |
| 8 | +def split_data(data, prob): |
| 9 | + """split data into fractions [prob, 1 - prob]""" |
| 10 | + results = [], [] |
| 11 | + for row in data: |
| 12 | + results[0 if random.random() < prob else 1].append(row) |
| 13 | + return results |
| 14 | + |
| 15 | +def train_test_split(x, y, test_pct): |
| 16 | + data = zip(x, y) # pair corresponding values |
| 17 | + train, test = split_data(data, 1 - test_pct) # split the dataset of pairs |
| 18 | + x_train, y_train = zip(*train) # magical un-zip trick |
| 19 | + x_test, y_test = zip(*test) |
| 20 | + return x_train, x_test, y_train, y_test |
| 21 | + |
| 22 | +# |
| 23 | +# correctness |
| 24 | +# |
| 25 | + |
| 26 | +def accuracy(tp, fp, fn, tn): |
| 27 | + correct = tp + tn |
| 28 | + total = tp + fp + fn + tn |
| 29 | + return correct / total |
| 30 | + |
| 31 | +def precision(tp, fp, fn, tn): |
| 32 | + return tp / (tp + fp) |
| 33 | + |
| 34 | +def recall(tp, fp, fn, tn): |
| 35 | + return tp / (tp + fn) |
| 36 | + |
| 37 | +def f1_score(tp, fp, fn, tn): |
| 38 | + p = precision(tp, fp, fn, tn) |
| 39 | + r = recall(tp, fp, fn, tn) |
| 40 | + |
| 41 | + return 2 * p * r / (p + r) |
| 42 | + |
| 43 | +if __name__ == "__main__": |
| 44 | + |
| 45 | + print("accuracy(70, 4930, 13930, 981070)", accuracy(70, 4930, 13930, 981070)) |
| 46 | + print("precision(70, 4930, 13930, 981070)", precision(70, 4930, 13930, 981070)) |
| 47 | + print("recall(70, 4930, 13930, 981070)", recall(70, 4930, 13930, 981070)) |
| 48 | + print("f1_score(70, 4930, 13930, 981070)", f1_score(70, 4930, 13930, 981070)) |
0 commit comments