sklearn_clustering.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sklearn.datasets import make_blobs
2
+ X ,y = make_blobs(n_samples=150, n_features=2, centers=3, cluster_std= 0.5, shuffle= True, random_state= 0)
3
+ import matplotlib.pyplot as plt
4
+ #plt.scatter(X[:,0], X[:,1], c='white', marker='o', edgecolors='black', s=50)
5
+ #plt.grid()
6
+ #plt.show()
7
+ from sklearn.cluster import KMeans
8
+ km = KMeans(n_clusters=3, init='random', n_init=10, max_iter=300, tol=1e-04, random_state=0)
9
+ y_km = km.fit_predict(X)
10
+ print(y_km)
11
+ #plt.scatter(X[y_km == 0, 0], X[y_km == 0, 1], s=50, c='lightgreen',marker='s', edgecolor='black',label='cluster 1')
12
+ #plt.scatter(X[y_km == 1, 0], X[y_km == 1, 1], s=50, c='orange',marker='o', edgecolor='black',label='cluster 2')
13
+ #plt.scatter(X[y_km == 2, 0], X[y_km == 2, 1], s=50, c='lightblue',marker='v', edgecolor='black',label='cluster 3')
14
+ #plt.scatter(km.cluster_centers_[:,0],km.cluster_centers_[:,1], s=250, marker='*', c='red', edgecolors='black', label = 'centroids')
15
+ #plt.legend(scatterpoints=1)
16
+ #plt.grid()
17
+ #plt.show()
18
+
19
+ distortions = []
20
+ for i in range(1,11):
21
+ km = KMeans(n_clusters=i, init='k-means++', n_init=10, max_iter=300, random_state = 0)
22
+ km.fit(X)
23
+ distortions.append(km.inertia_)
24
+
25
+ plt.plot(range(1,11), distortions, marker = 'o')
26
+ plt.xlabel('Number of clusters')
27
+ plt.ylabel('Distortion')
28
+ plt.show()
sklearn_clustering2.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sklearn.datasets import make_blobs
2
+ X ,y = make_blobs(n_samples=150, n_features=2, centers=3, cluster_std= 0.5, shuffle= True, random_state= 0)
3
+ import matplotlib.pyplot as plt
4
+ #plt.scatter(X[:,0], X[:,1], c='white', marker='o', edgecolors='black', s=50)
5
+ #plt.grid()
6
+ #plt.show()
7
+ from sklearn.cluster import KMeans
8
+ km = KMeans(n_clusters=3, init='random', n_init=10, max_iter=300, tol=1e-04, random_state=0)
9
+ y_km = km.fit_predict(X)
10
+ print(y_km)
11
+ #plt.scatter(X[y_km == 0, 0], X[y_km == 0, 1], s=50, c='lightgreen',marker='s', edgecolor='black',label='cluster 1')
12
+ #plt.scatter(X[y_km == 1, 0], X[y_km == 1, 1], s=50, c='orange',marker='o', edgecolor='black',label='cluster 2')
13
+ #plt.scatter(X[y_km == 2, 0], X[y_km == 2, 1], s=50, c='lightblue',marker='v', edgecolor='black',label='cluster 3')
14
+ #plt.scatter(km.cluster_centers_[:,0],km.cluster_centers_[:,1], s=250, marker='*', c='red', edgecolors='black', label = 'centroids')
15
+ #plt.legend(scatterpoints=1)
16
+ #plt.grid()
17
+ #plt.show()
18
+
19
+ distortions = []
20
+ for i in range(1,11):
21
+ km = KMeans(n_clusters=i, init='k-means++', n_init=10, max_iter=300, random_state = 0)
22
+ km.fit(X)
23
+ distortions.append(km.inertia_)
24
+
25
+ plt.plot(range(1,11), distortions, marker = 'o')
26
+ plt.xlabel('Number of clusters')
27
+ plt.ylabel('Distortion')
28
+ plt.show()
sklearn_linear_regression.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from sklearn import linear_model
3
+ reg = linear_model.LinearRegression()
4
+ X = np.array([[3.04],[3.64],[4.61],[5.57],[6.74],[7.77]])
5
+ Y = np.array([0.91,1.01,1.09,1.11,1.20,1.30])
6
+ reg.fit(X,Y)
7
+ print(reg.coef_)
8
+ print(reg.intercept_)
9
+ print(reg.predict([[5]]))
sklearn_train_bostonHousing.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sklearn.datasets import load_boston
2
+ from sklearn.model_selection import train_test_split
3
+ from sklearn.linear_model import LinearRegression
4
+ from sklearn.neighbors import KNeighborsRegressor
5
+ import matplotlib.pyplot as plt
6
+ import numpy as np
7
+ boston = load_boston()
8
+ x = boston.data
9
+ y = boston.target
10
+ # print(x.shape)
11
+ # print(y.shape)
12
+ # print(x)
13
+ # plt.figure(figsize=(4,3))
14
+ # plt.hist(y)
15
+ # plt.xlabel('price($1000s)')
16
+ # plt.ylabel('count')
17
+ # plt.tight_layout()
18
+ # plt.show()
19
+ # for index, feature_name in enumerate(boston.feature_names):
20
+ # plt.figure(figsize=(4,3))
21
+ # plt.scatter(x[:, index], y)
22
+ # plt.ylabel('Price', size=15)
23
+ # plt.xlabel(feature_name, size=15)
24
+ # plt.tight_layout()
25
+ # plt.show()
26
+
27
+ x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.2, random_state = 0)
28
+ # linear = LinearRegression()
29
+ # linear.fit(x_train, y_train)
30
+ # linear_predicted = linear.predict(x_test)
31
+ # plt.figure(figsize =(4,3))
32
+ # plt.suptitle('Linear Regression')
33
+ # plt.scatter(y_test,linear_predicted)
34
+ # plt.plot([0,50],[0,50], '--k')
35
+ # plt.axis('tight')
36
+ # plt.xlabel('True price($1000s)')
37
+ # plt.ylabel('Predicted price($1000s)')
38
+ # plt.tight_layout()
39
+ # plt.show
40
+ # print("Linear RMS: %r " % np.sqrt(np.mean((linear_predicted - y_train)) ** 2))
41
+ # print("Linear intercept: ")
42
+ # print(linear.intercept_)
43
+ # print("Linear cofficent: ")
44
+ # print(linear.coef_)
45
+
46
+ # neigh = KNeighborsRegressor(n_neighbors=2)
47
+ # neigh.fit(x_train, y_train)
48
+ # neigh_predicted = neigh.predict(x_test)
49
+ # plt.figure(figsize=(4,3))
50
+ # plt.suptitle('KNN')
51
+ # plt.scatter(y_test, neigh_predicted)
52
+ # plt.plot([0,50],[0,50],'--k')
53
+ # plt.axis('tight')
54
+ # plt.xlabel('True price ($1000s)')
55
+ # plt.ylabel('Predicted price ($1000s)')
56
+ # plt.tight_layout()
57
+ # plt.show()
58
+ # print("KNN RMS: %r " % np.sqrt(np.mean((neigh_predicted - y_test) ** 2)))
59
+
60
+ # from sklearn import tree
61
+ # tree = tree.DecisionTreeRegressor()
62
+ # tree.fit(x_train, y_train)
63
+ # print('Decision Tree Feature Importance: ')
64
+ # print(tree.feature_importances_)
65
+ # tree_predicted = tree.predict(x_test)
66
+ # plt.figure(figsize=(4, 3))
67
+ # plt.suptitle('Decision Tree')
68
+ # plt.scatter(y_test, tree_predicted)
69
+ # plt.plot([0,50],[0,50], '--k')
70
+ # plt.axis('tight')
71
+ # plt.xlabel('True price ($1000s)')
72
+ # plt.ylabel('Predicted price ($1000s)')
73
+ # plt.tight_layout()
74
+ # plt.show()
75
+ # print("Decision Tree RMS: %r " % np.sqrt(np.mean((tree_predicted - y_test) ** 2)))
76
+
77
+ # from sklearn.ensemble import RandomForestRegressor
78
+ # forest = RandomForestRegressor(max_depth=2, random_state=0)
79
+ # forest.fit(x_train, y_train)
80
+ # print('Random Forest Feature Importance')
81
+ # print(forest.feature_importances_)
82
+ # forest_predicted = forest.predict(x_test)
83
+ # plt.figure(figsize=(4,3))
84
+ # plt.suptitle('Random Forest')
85
+ # plt.scatter(y_test, forest_predicted)
86
+ # plt.plot([0,50],[0,50],'--k')
87
+ # plt.axis('tight')
88
+ # plt.xlabel('True price ($1000s)')
89
+ # plt.ylabel('Predicted price ($1000s)')
90
+ # plt.tight_layout()
91
+ # plt.show()
92
+ # print("Forest RMS: %r " % np.sqrt(np.mean((forest_predicted - y_test) ** 2)))
93
+
94
+
95
+ from sklearn import datasets
96
+ from sklearn.model_selection import cross_val_score
97
+ import numpy as np
98
+ digits = datasets.load_digits()
99
+ x = digits.data
100
+ y = digits.target
101
+
102
+ from sklearn.linear_model import Perceptron
103
+ perceptron_model = Perceptron(tol=1e-3, random_state=0)
104
+ Perceptron_scores = cross_val_score(perceptron_model, x,y, cv=10)
105
+ print('Perceptron avg performance: ')
106
+ print(np.mean(perceptron_model))
107
+
108
+ from sklearn.neighbors import KNeighborsClassifier
109
+ neigh = KNeighborsClassifier(n_neighbors=3)
110
+ neigh_scores = cross_val_score(neigh, x,y, cv=10)
111
+ print('KNN avg performance: ')
112
+ print(np.mean(neigh))
113
+
114
+ #the same for decsion tree and random forest
sklearn_train_digit.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sklearn import datasets
2
+ import matplotlib.pyplot as plt
3
+
4
+ iris = datasets.load_iris()
5
+ digits = datasets.load_digits()
6
+ fig = plt.figure(figsize=(8,8))
7
+ fig.subplots_adjust(left=0,right=1,bottom=0,top=1,hspace=0.05,wspace=0.05)
8
+ for i in range(100):
9
+ ax = fig.add_subplot(10,10,i+1,xticks=[],yticks=[])
10
+ ax.imshow(digits.images[i],cmap=plt.cm.binary,interpolation='nearest')
11
+ ax.text(0,7,str(digits.target[i]),color='green')
12
+ plt.show()
13
+ x= digits.data
14
+ y= digits.target
15
+ from sklearn.model_selection import train_test_split
16
+ xtrain,xtest,ytrain,ytest = train_test_split(x,y,test_size=0.2,random_state=0)
17
+ from sklearn.linear_model import Perceptron
18
+ perceptron_model = Perceptron(tol=1e-3,random_state=0)
19
+ perceptron_model.fit(xtrain,ytrain)
20
+ perceptron_prediction=perceptron_model.predict(xtest)
21
+ from sklearn import metrics