Skip to content

Commit 07b0aa1

Browse files
committed
more laziness
1 parent 2d4063d commit 07b0aa1

File tree

2 files changed

+371
-0
lines changed

2 files changed

+371
-0
lines changed

‎code-python3/decision_trees.py‎

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
from __future__ import division
2+
from collections import Counter, defaultdict
3+
from functools import partial
4+
import math, random
5+
6+
def entropy(class_probabilities):
7+
"""given a list of class probabilities, compute the entropy"""
8+
return sum(-p * math.log(p, 2) for p in class_probabilities if p)
9+
10+
def class_probabilities(labels):
11+
total_count = len(labels)
12+
return [count / total_count
13+
for count in Counter(labels).values()]
14+
15+
def data_entropy(labeled_data):
16+
labels = [label for _, label in labeled_data]
17+
probabilities = class_probabilities(labels)
18+
return entropy(probabilities)
19+
20+
def partition_entropy(subsets):
21+
"""find the entropy from this partition of data into subsets"""
22+
total_count = sum(len(subset) for subset in subsets)
23+
24+
return sum( data_entropy(subset) * len(subset) / total_count
25+
for subset in subsets )
26+
27+
def group_by(items, key_fn):
28+
"""returns a defaultdict(list), where each input item
29+
is in the list whose key is key_fn(item)"""
30+
groups = defaultdict(list)
31+
for item in items:
32+
key = key_fn(item)
33+
groups[key].append(item)
34+
return groups
35+
36+
def partition_by(inputs, attribute):
37+
"""returns a dict of inputs partitioned by the attribute
38+
each input is a pair (attribute_dict, label)"""
39+
return group_by(inputs, lambda x: x[0][attribute])
40+
41+
def partition_entropy_by(inputs,attribute):
42+
"""computes the entropy corresponding to the given partition"""
43+
partitions = partition_by(inputs, attribute)
44+
return partition_entropy(partitions.values())
45+
46+
def classify(tree, input):
47+
"""classify the input using the given decision tree"""
48+
49+
# if this is a leaf node, return its value
50+
if tree in [True, False]:
51+
return tree
52+
53+
# otherwise find the correct subtree
54+
attribute, subtree_dict = tree
55+
56+
subtree_key = input.get(attribute) # None if input is missing attribute
57+
58+
if subtree_key not in subtree_dict: # if no subtree for key,
59+
subtree_key = None # we'll use the None subtree
60+
61+
subtree = subtree_dict[subtree_key] # choose the appropriate subtree
62+
return classify(subtree, input) # and use it to classify the input
63+
64+
def build_tree_id3(inputs, split_candidates=None):
65+
66+
# if this is our first pass,
67+
# all keys of the first input are split candidates
68+
if split_candidates is None:
69+
split_candidates = inputs[0][0].keys()
70+
71+
# count Trues and Falses in the inputs
72+
num_inputs = len(inputs)
73+
num_trues = len([label for item, label in inputs if label])
74+
num_falses = num_inputs - num_trues
75+
76+
if num_trues == 0: # if only Falses are left
77+
return False # return a "False" leaf
78+
79+
if num_falses == 0: # if only Trues are left
80+
return True # return a "True" leaf
81+
82+
if not split_candidates: # if no split candidates left
83+
return num_trues >= num_falses # return the majority leaf
84+
85+
# otherwise, split on the best attribute
86+
best_attribute = min(split_candidates,
87+
key=partial(partition_entropy_by, inputs))
88+
89+
partitions = partition_by(inputs, best_attribute)
90+
new_candidates = [a for a in split_candidates
91+
if a != best_attribute]
92+
93+
# recursively build the subtrees
94+
subtrees = { attribute : build_tree_id3(subset, new_candidates)
95+
for attribute, subset in partitions.items() }
96+
97+
subtrees[None] = num_trues > num_falses # default case
98+
99+
return (best_attribute, subtrees)
100+
101+
def forest_classify(trees, input):
102+
votes = [classify(tree, input) for tree in trees]
103+
vote_counts = Counter(votes)
104+
return vote_counts.most_common(1)[0][0]
105+
106+
107+
if __name__ == "__main__":
108+
109+
inputs = [
110+
({'level':'Senior','lang':'Java','tweets':'no','phd':'no'}, False),
111+
({'level':'Senior','lang':'Java','tweets':'no','phd':'yes'}, False),
112+
({'level':'Mid','lang':'Python','tweets':'no','phd':'no'}, True),
113+
({'level':'Junior','lang':'Python','tweets':'no','phd':'no'}, True),
114+
({'level':'Junior','lang':'R','tweets':'yes','phd':'no'}, True),
115+
({'level':'Junior','lang':'R','tweets':'yes','phd':'yes'}, False),
116+
({'level':'Mid','lang':'R','tweets':'yes','phd':'yes'}, True),
117+
({'level':'Senior','lang':'Python','tweets':'no','phd':'no'}, False),
118+
({'level':'Senior','lang':'R','tweets':'yes','phd':'no'}, True),
119+
({'level':'Junior','lang':'Python','tweets':'yes','phd':'no'}, True),
120+
({'level':'Senior','lang':'Python','tweets':'yes','phd':'yes'},True),
121+
({'level':'Mid','lang':'Python','tweets':'no','phd':'yes'}, True),
122+
({'level':'Mid','lang':'Java','tweets':'yes','phd':'no'}, True),
123+
({'level':'Junior','lang':'Python','tweets':'no','phd':'yes'},False)
124+
]
125+
126+
for key in ['level','lang','tweets','phd']:
127+
print(key, partition_entropy_by(inputs, key))
128+
print()
129+
130+
senior_inputs = [(input, label)
131+
for input, label in inputs if input["level"] == "Senior"]
132+
133+
for key in ['lang', 'tweets', 'phd']:
134+
print(key, partition_entropy_by(senior_inputs, key))
135+
print()
136+
137+
print("building the tree")
138+
tree = build_tree_id3(inputs)
139+
print(tree)
140+
141+
print("Junior / Java / tweets / no phd", classify(tree,
142+
{ "level" : "Junior",
143+
"lang" : "Java",
144+
"tweets" : "yes",
145+
"phd" : "no"} ))
146+
147+
print("Junior / Java / tweets / phd", classify(tree,
148+
{ "level" : "Junior",
149+
"lang" : "Java",
150+
"tweets" : "yes",
151+
"phd" : "yes"} ))
152+
153+
print("Intern", classify(tree, { "level" : "Intern" } ))
154+
print("Senior", classify(tree, { "level" : "Senior" } ))

‎code-python3/neural_networks.py‎

Lines changed: 217 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
from __future__ import division
2+
from collections import Counter
3+
from functools import partial
4+
from linear_algebra import dot
5+
import math, random
6+
import matplotlib
7+
import matplotlib.pyplot as plt
8+
9+
def step_function(x):
10+
return 1 if x >= 0 else 0
11+
12+
def perceptron_output(weights, bias, x):
13+
"""returns 1 if the perceptron 'fires', 0 if not"""
14+
return step_function(dot(weights, x) + bias)
15+
16+
def sigmoid(t):
17+
return 1 / (1 + math.exp(-t))
18+
19+
def neuron_output(weights, inputs):
20+
return sigmoid(dot(weights, inputs))
21+
22+
def feed_forward(neural_network, input_vector):
23+
"""takes in a neural network (represented as a list of lists of lists of weights)
24+
and returns the output from forward-propagating the input"""
25+
26+
outputs = []
27+
28+
for layer in neural_network:
29+
30+
input_with_bias = input_vector + [1] # add a bias input
31+
output = [neuron_output(neuron, input_with_bias) # compute the output
32+
for neuron in layer] # for this layer
33+
outputs.append(output) # and remember it
34+
35+
# the input to the next layer is the output of this one
36+
input_vector = output
37+
38+
return outputs
39+
40+
def backpropagate(network, input_vector, target):
41+
42+
hidden_outputs, outputs = feed_forward(network, input_vector)
43+
44+
# the output * (1 - output) is from the derivative of sigmoid
45+
output_deltas = [output * (1 - output) * (output - target[i])
46+
for i, output in enumerate(outputs)]
47+
48+
# adjust weights for output layer (network[-1])
49+
for i, output_neuron in enumerate(network[-1]):
50+
for j, hidden_output in enumerate(hidden_outputs + [1]):
51+
output_neuron[j] -= output_deltas[i] * hidden_output
52+
53+
# back-propagate errors to hidden layer
54+
hidden_deltas = [hidden_output * (1 - hidden_output) *
55+
dot(output_deltas, [n[i] for n in network[-1]])
56+
for i, hidden_output in enumerate(hidden_outputs)]
57+
58+
# adjust weights for hidden layer (network[0])
59+
for i, hidden_neuron in enumerate(network[0]):
60+
for j, input in enumerate(input_vector + [1]):
61+
hidden_neuron[j] -= hidden_deltas[i] * input
62+
63+
def patch(x, y, hatch, color):
64+
"""return a matplotlib 'patch' object with the specified
65+
location, crosshatch pattern, and color"""
66+
return matplotlib.patches.Rectangle((x - 0.5, y - 0.5), 1, 1,
67+
hatch=hatch, fill=False, color=color)
68+
69+
70+
def show_weights(neuron_idx):
71+
weights = network[0][neuron_idx]
72+
abs_weights = map(abs, weights)
73+
74+
grid = [abs_weights[row:(row+5)] # turn the weights into a 5x5 grid
75+
for row in range(0,25,5)] # [weights[0:5], ..., weights[20:25]]
76+
77+
ax = plt.gca() # to use hatching, we'll need the axis
78+
79+
ax.imshow(grid, # here same as plt.imshow
80+
cmap=matplotlib.cm.binary, # use white-black color scale
81+
interpolation='none') # plot blocks as blocks
82+
83+
# cross-hatch the negative weights
84+
for i in range(5): # row
85+
for j in range(5): # column
86+
if weights[5*i + j] < 0: # row i, column j = weights[5*i + j]
87+
# add black and white hatches, so visible whether dark or light
88+
ax.add_patch(patch(j, i, '/', "white"))
89+
ax.add_patch(patch(j, i, '\\', "black"))
90+
plt.show()
91+
92+
if __name__ == "__main__":
93+
94+
raw_digits = [
95+
"""11111
96+
1...1
97+
1...1
98+
1...1
99+
11111""",
100+
101+
"""..1..
102+
..1..
103+
..1..
104+
..1..
105+
..1..""",
106+
107+
"""11111
108+
....1
109+
11111
110+
1....
111+
11111""",
112+
113+
"""11111
114+
....1
115+
11111
116+
....1
117+
11111""",
118+
119+
"""1...1
120+
1...1
121+
11111
122+
....1
123+
....1""",
124+
125+
"""11111
126+
1....
127+
11111
128+
....1
129+
11111""",
130+
131+
"""11111
132+
1....
133+
11111
134+
1...1
135+
11111""",
136+
137+
"""11111
138+
....1
139+
....1
140+
....1
141+
....1""",
142+
143+
"""11111
144+
1...1
145+
11111
146+
1...1
147+
11111""",
148+
149+
"""11111
150+
1...1
151+
11111
152+
....1
153+
11111"""]
154+
155+
def make_digit(raw_digit):
156+
return [1 if c == '1' else 0
157+
for row in raw_digit.split("\n")
158+
for c in row.strip()]
159+
160+
inputs = list(map(make_digit, raw_digits))
161+
162+
targets = [[1 if i == j else 0 for i in range(10)]
163+
for j in range(10)]
164+
165+
random.seed(0) # to get repeatable results
166+
input_size = 25 # each input is a vector of length 25
167+
num_hidden = 5 # we'll have 5 neurons in the hidden layer
168+
output_size = 10 # we need 10 outputs for each input
169+
170+
# each hidden neuron has one weight per input, plus a bias weight
171+
hidden_layer = [[random.random() for __ in range(input_size + 1)]
172+
for __ in range(num_hidden)]
173+
174+
# each output neuron has one weight per hidden neuron, plus a bias weight
175+
output_layer = [[random.random() for __ in range(num_hidden + 1)]
176+
for __ in range(output_size)]
177+
178+
# the network starts out with random weights
179+
network = [hidden_layer, output_layer]
180+
181+
# 10,000 iterations seems enough to converge
182+
for __ in range(10000):
183+
for input_vector, target_vector in zip(inputs, targets):
184+
backpropagate(network, input_vector, target_vector)
185+
186+
def predict(input):
187+
return feed_forward(network, input)[-1]
188+
189+
for i, input in enumerate(inputs):
190+
outputs = predict(input)
191+
print(i, [round(p,2) for p in outputs])
192+
193+
print(""".@@@.
194+
...@@
195+
..@@.
196+
...@@
197+
.@@@.""")
198+
print([round(x, 2) for x in
199+
predict( [0,1,1,1,0, # .@@@.
200+
0,0,0,1,1, # ...@@
201+
0,0,1,1,0, # ..@@.
202+
0,0,0,1,1, # ...@@
203+
0,1,1,1,0])]) # .@@@.
204+
print()
205+
206+
print(""".@@@.
207+
@..@@
208+
.@@@.
209+
@..@@
210+
.@@@.""")
211+
print([round(x, 2) for x in
212+
predict( [0,1,1,1,0, # .@@@.
213+
1,0,0,1,1, # @..@@
214+
0,1,1,1,0, # .@@@.
215+
1,0,0,1,1, # @..@@
216+
0,1,1,1,0])]) # .@@@.
217+
print()

0 commit comments

Comments
 (0)