-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathutilities.py
31 lines (22 loc) · 899 Bytes
/
utilities.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
from __future__ import division
from collections import Counter
import random
import numpy as np
def shuffle_in_unison(a, b):
""" Shuffles two lists of equal length and keeps corresponding elements in the same index. """
rng_state = np.random.get_state()
np.random.shuffle(a)
np.random.set_state(rng_state)
np.random.shuffle(b)
def entropy(Y):
""" In information theory, entropy is a measure of the uncertanty of a random sample from a group. """
distribution = Counter(Y)
s = 0.0
total = len(Y)
for y, num_y in distribution.items():
probability_y = (num_y/total)
s += (probability_y)*np.log(probability_y)
return -s
def information_gain(y, y_true, y_false):
""" The reduction in entropy from splitting data into two groups. """
return entropy(y) - (entropy(y_true)*len(y_true) + entropy(y_false)*len(y_false))/len(y)