-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathclassification_with_scikit_learn_local.py
72 lines (53 loc) · 2.05 KB
/
classification_with_scikit_learn_local.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#file name: classification_with_scikit_learn.py
from google.colab import files
uploaded = files.upload()
"""Load the dataset into a dataframe:"""
from google.colab import drive
drive.mount("/gdrive")
import pandas as pd
import io
df = pd.read_csv(io.StringIO(uploaded['Tunnel_1_4Linear8Sensors9ClassesCappedRange8Fast_10.txt'].decode('utf-8')), sep='\t')
df
"""Split the dataset into training and testing:"""
from sklearn.model_selection import train_test_split
train_X, val_X, train_y, val_y = train_test_split(df.loc[:,df.columns != "Class"], df.Class, random_state = 0)
print(train_X)
print(train_y)
"""Learn using a knn-Classifier:"""
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
neigh = KNeighborsClassifier(n_neighbors=5)
neigh.fit(train_X, train_y)
"""Classify using the knn-Classifier:"""
predictions = neigh.predict(val_X)
print("Type of predictions: ", type(predictions))
print(predictions)
print("Type of val_y: ", type(val_y))
print(predictions == val_y)
"""Check how good the prediction is:"""
accuracy = sum(predictions == val_y)/len(predictions)
print(accuracy)
"""Use cross-validation"""
from sklearn.model_selection import cross_val_score
from sklearn.utils import shuffle
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors=5)
shuffled = shuffle(df)
score = cross_val_score(classifier, shuffled.loc[:,shuffled.columns != "Class"], shuffled.Class, cv=10)
print(score)
from sklearn.model_selection import cross_val_score
from sklearn.utils import shuffle
from sklearn.neighbors import KNeighborsClassifier
import pandas as pd
import numpy as np
shuffled = shuffle(pd.concat([train_X, train_y], axis=1))
for k in range(1,50):
classifier = KNeighborsClassifier(n_neighbors=k)
score = cross_val_score(classifier, shuffled.loc[:,shuffled.columns != "Class"], shuffled.Class, cv=10)
#print(score)
print(np.median(score))
knn = KNeighborsClassifier(n_neighbors=1)
knn.fit(train_X, train_y)
predictions = knn.predict(val_X)
accuracy = sum(predictions == val_y)/len(predictions)
print(accuracy)