Skip to content

Commit 13fa770

Browse files
committed
ML
1 parent ba4c28f commit 13fa770

File tree

2 files changed

+52
-2
lines changed

2 files changed

+52
-2
lines changed

‎code-python3/README.md‎

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,14 +40,16 @@ with `.items()`
4040

4141
## binary mode for CSVs
4242

43-
Binary mode for CSVs. In Python 2 you would open CSV files in binary mode to
43+
In Python 2 it was best practice to open CSV files in binary mode to
4444
make sure you dealt properly with Windows line endings:
4545

4646
```
4747
f = open("some.csv", "rb")
4848
```
4949

50-
In Python 3 you open them in text mode and just specify the line ending types:
50+
In Python 3 that doesn't work for various reasons having to do with raw bytes
51+
and string encodings. Instead you need to open them in text mode and
52+
specify the line ending types:
5153

5254
```
5355
f = open("some.csv", 'r', encoding='utf8', newline='')

‎code-python3/machine_learning.py‎

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
from collections import Counter
2+
import math, random
3+
4+
#
5+
# data splitting
6+
#
7+
8+
def split_data(data, prob):
9+
"""split data into fractions [prob, 1 - prob]"""
10+
results = [], []
11+
for row in data:
12+
results[0 if random.random() < prob else 1].append(row)
13+
return results
14+
15+
def train_test_split(x, y, test_pct):
16+
data = zip(x, y) # pair corresponding values
17+
train, test = split_data(data, 1 - test_pct) # split the dataset of pairs
18+
x_train, y_train = zip(*train) # magical un-zip trick
19+
x_test, y_test = zip(*test)
20+
return x_train, x_test, y_train, y_test
21+
22+
#
23+
# correctness
24+
#
25+
26+
def accuracy(tp, fp, fn, tn):
27+
correct = tp + tn
28+
total = tp + fp + fn + tn
29+
return correct / total
30+
31+
def precision(tp, fp, fn, tn):
32+
return tp / (tp + fp)
33+
34+
def recall(tp, fp, fn, tn):
35+
return tp / (tp + fn)
36+
37+
def f1_score(tp, fp, fn, tn):
38+
p = precision(tp, fp, fn, tn)
39+
r = recall(tp, fp, fn, tn)
40+
41+
return 2 * p * r / (p + r)
42+
43+
if __name__ == "__main__":
44+
45+
print("accuracy(70, 4930, 13930, 981070)", accuracy(70, 4930, 13930, 981070))
46+
print("precision(70, 4930, 13930, 981070)", precision(70, 4930, 13930, 981070))
47+
print("recall(70, 4930, 13930, 981070)", recall(70, 4930, 13930, 981070))
48+
print("f1_score(70, 4930, 13930, 981070)", f1_score(70, 4930, 13930, 981070))

0 commit comments

Comments
 (0)