montecarlo1
diff --git a/‎code-python3/README.md‎
Lines changed: 4 additions & 2 deletions b/‎code-python3/README.md‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎code-python3/machine_learning.py‎
Lines changed: 48 additions & 0 deletions b/‎code-python3/machine_learning.py‎
Lines changed: 48 additions & 0 deletions
@@ -40,14 +40,16 @@ with `.items()`
 
 ## binary mode for CSVs
 
-Binary mode for CSVs. In Python 2 you would open CSV files in binary mode to
+In Python 2 it was best practice to open CSV files in binary mode to
 make sure you dealt properly with Windows line endings:
 
 ```
 f = open("some.csv", "rb")
 ```
 
-In Python 3 you open them in text mode and just specify the line ending types:
+In Python 3 that doesn't work for various reasons having to do with raw bytes
+and string encodings. Instead you need to open them in text mode and
+specify the line ending types:
 
 ```
 f = open("some.csv", 'r', encoding='utf8', newline='')
 
@@ -0,0 +1,48 @@
+from collections import Counter
+import math, random
+
+#
+# data splitting
+#
+
+def split_data(data, prob):
+    """split data into fractions [prob, 1 - prob]"""
+    results = [], []
+    for row in data:
+        results[0 if random.random() < prob else 1].append(row)
+    return results
+
+def train_test_split(x, y, test_pct):
+    data = zip(x, y)                              # pair corresponding values
+    train, test = split_data(data, 1 - test_pct)  # split the dataset of pairs
+    x_train, y_train = zip(*train)                # magical un-zip trick
+    x_test, y_test = zip(*test)
+    return x_train, x_test, y_train, y_test
+
+#
+# correctness
+#
+
+def accuracy(tp, fp, fn, tn):
+    correct = tp + tn
+    total = tp + fp + fn + tn
+    return correct / total
+
+def precision(tp, fp, fn, tn):
+    return tp / (tp + fp)
+
+def recall(tp, fp, fn, tn):
+    return tp / (tp + fn)
+
+def f1_score(tp, fp, fn, tn):
+    p = precision(tp, fp, fn, tn)
+    r = recall(tp, fp, fn, tn)
+
+    return 2 * p * r / (p + r)
+
+if __name__ == "__main__":
+
+    print("accuracy(70, 4930, 13930, 981070)", accuracy(70, 4930, 13930, 981070))
+    print("precision(70, 4930, 13930, 981070)", precision(70, 4930, 13930, 981070))
+    print("recall(70, 4930, 13930, 981070)", recall(70, 4930, 13930, 981070))
+    print("f1_score(70, 4930, 13930, 981070)", f1_score(70, 4930, 13930, 981070))