Upload 5 files

Browse files

Files changed (5) hide show

sklearn_clustering.py +28 -0
sklearn_clustering2.py +28 -0
sklearn_linear_regression.py +9 -0
sklearn_train_bostonHousing.py +114 -0
sklearn_train_digit.py +21 -0

sklearn_clustering.py ADDED Viewed

	@@ -0,0 +1,28 @@

+from sklearn.datasets import make_blobs
+X ,y = make_blobs(n_samples=150, n_features=2, centers=3, cluster_std= 0.5, shuffle= True, random_state= 0)
+import matplotlib.pyplot as plt
+#plt.scatter(X[:,0], X[:,1], c='white', marker='o', edgecolors='black', s=50)
+#plt.grid()
+#plt.show()
+from sklearn.cluster import KMeans
+km = KMeans(n_clusters=3, init='random', n_init=10, max_iter=300, tol=1e-04, random_state=0)
+y_km = km.fit_predict(X)
+print(y_km)
+#plt.scatter(X[y_km == 0, 0], X[y_km == 0, 1], s=50, c='lightgreen',marker='s', edgecolor='black',label='cluster 1')
+#plt.scatter(X[y_km == 1, 0], X[y_km == 1, 1], s=50, c='orange',marker='o', edgecolor='black',label='cluster 2')
+#plt.scatter(X[y_km == 2, 0], X[y_km == 2, 1], s=50, c='lightblue',marker='v', edgecolor='black',label='cluster 3')
+#plt.scatter(km.cluster_centers_[:,0],km.cluster_centers_[:,1], s=250, marker='*', c='red', edgecolors='black', label = 'centroids')
+#plt.legend(scatterpoints=1)
+#plt.grid()
+#plt.show()
+distortions = []
+for i in range(1,11):
+    km = KMeans(n_clusters=i, init='k-means++', n_init=10, max_iter=300, random_state = 0)
+    km.fit(X)
+    distortions.append(km.inertia_)
+plt.plot(range(1,11), distortions, marker = 'o')
+plt.xlabel('Number of clusters')
+plt.ylabel('Distortion')
+plt.show()

sklearn_clustering2.py ADDED Viewed

	@@ -0,0 +1,28 @@

+from sklearn.datasets import make_blobs
+X ,y = make_blobs(n_samples=150, n_features=2, centers=3, cluster_std= 0.5, shuffle= True, random_state= 0)
+import matplotlib.pyplot as plt
+#plt.scatter(X[:,0], X[:,1], c='white', marker='o', edgecolors='black', s=50)
+#plt.grid()
+#plt.show()
+from sklearn.cluster import KMeans
+km = KMeans(n_clusters=3, init='random', n_init=10, max_iter=300, tol=1e-04, random_state=0)
+y_km = km.fit_predict(X)
+print(y_km)
+#plt.scatter(X[y_km == 0, 0], X[y_km == 0, 1], s=50, c='lightgreen',marker='s', edgecolor='black',label='cluster 1')
+#plt.scatter(X[y_km == 1, 0], X[y_km == 1, 1], s=50, c='orange',marker='o', edgecolor='black',label='cluster 2')
+#plt.scatter(X[y_km == 2, 0], X[y_km == 2, 1], s=50, c='lightblue',marker='v', edgecolor='black',label='cluster 3')
+#plt.scatter(km.cluster_centers_[:,0],km.cluster_centers_[:,1], s=250, marker='*', c='red', edgecolors='black', label = 'centroids')
+#plt.legend(scatterpoints=1)
+#plt.grid()
+#plt.show()
+distortions = []
+for i in range(1,11):
+    km = KMeans(n_clusters=i, init='k-means++', n_init=10, max_iter=300, random_state = 0)
+    km.fit(X)
+    distortions.append(km.inertia_)
+plt.plot(range(1,11), distortions, marker = 'o')
+plt.xlabel('Number of clusters')
+plt.ylabel('Distortion')
+plt.show()

sklearn_linear_regression.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import numpy as np
+from sklearn import linear_model
+reg = linear_model.LinearRegression()
+X = np.array([[3.04],[3.64],[4.61],[5.57],[6.74],[7.77]])
+Y = np.array([0.91,1.01,1.09,1.11,1.20,1.30])
+reg.fit(X,Y)
+print(reg.coef_)
+print(reg.intercept_)
+print(reg.predict([[5]]))

sklearn_train_bostonHousing.py ADDED Viewed

	@@ -0,0 +1,114 @@

+from sklearn.datasets import load_boston
+from sklearn.model_selection import train_test_split
+from sklearn.linear_model import LinearRegression
+from sklearn.neighbors import KNeighborsRegressor
+import matplotlib.pyplot as plt
+import numpy as np
+boston = load_boston()
+x = boston.data
+y = boston.target
+# print(x.shape)
+# print(y.shape)
+# print(x)
+# plt.figure(figsize=(4,3))
+# plt.hist(y)
+# plt.xlabel('price($1000s)')
+# plt.ylabel('count')
+# plt.tight_layout()
+# plt.show()
+# for index, feature_name in enumerate(boston.feature_names):
+#     plt.figure(figsize=(4,3))
+#     plt.scatter(x[:, index], y)
+#     plt.ylabel('Price', size=15)
+#     plt.xlabel(feature_name, size=15)
+#     plt.tight_layout()
+# plt.show()
+x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.2, random_state = 0)
+# linear = LinearRegression()
+# linear.fit(x_train, y_train)
+# linear_predicted = linear.predict(x_test)
+# plt.figure(figsize =(4,3))
+# plt.suptitle('Linear Regression')
+# plt.scatter(y_test,linear_predicted)
+# plt.plot([0,50],[0,50], '--k')
+# plt.axis('tight')
+# plt.xlabel('True price($1000s)')
+# plt.ylabel('Predicted price($1000s)')
+# plt.tight_layout()
+# plt.show
+# print("Linear RMS: %r " % np.sqrt(np.mean((linear_predicted - y_train)) ** 2))
+# print("Linear intercept: ")
+# print(linear.intercept_)
+# print("Linear cofficent: ")
+# print(linear.coef_)
+# neigh = KNeighborsRegressor(n_neighbors=2)
+# neigh.fit(x_train, y_train)
+# neigh_predicted = neigh.predict(x_test)
+# plt.figure(figsize=(4,3))
+# plt.suptitle('KNN')
+# plt.scatter(y_test, neigh_predicted)
+# plt.plot([0,50],[0,50],'--k')
+# plt.axis('tight')
+# plt.xlabel('True price ($1000s)')
+# plt.ylabel('Predicted price ($1000s)')
+# plt.tight_layout()
+# plt.show()
+# print("KNN RMS: %r " % np.sqrt(np.mean((neigh_predicted - y_test) ** 2)))
+# from sklearn import tree
+# tree = tree.DecisionTreeRegressor()
+# tree.fit(x_train, y_train)
+# print('Decision Tree Feature Importance: ')
+# print(tree.feature_importances_)
+# tree_predicted = tree.predict(x_test)
+# plt.figure(figsize=(4, 3))
+# plt.suptitle('Decision Tree')
+# plt.scatter(y_test, tree_predicted)
+# plt.plot([0,50],[0,50], '--k')
+# plt.axis('tight')
+# plt.xlabel('True price ($1000s)')
+# plt.ylabel('Predicted price ($1000s)')
+# plt.tight_layout()
+# plt.show()
+# print("Decision Tree RMS: %r " % np.sqrt(np.mean((tree_predicted - y_test) ** 2)))
+# from sklearn.ensemble import RandomForestRegressor
+# forest = RandomForestRegressor(max_depth=2, random_state=0)
+# forest.fit(x_train, y_train)
+# print('Random Forest Feature Importance')
+# print(forest.feature_importances_)
+# forest_predicted = forest.predict(x_test)
+# plt.figure(figsize=(4,3))
+# plt.suptitle('Random Forest')
+# plt.scatter(y_test, forest_predicted)
+# plt.plot([0,50],[0,50],'--k')
+# plt.axis('tight')
+# plt.xlabel('True price ($1000s)')
+# plt.ylabel('Predicted price ($1000s)')
+# plt.tight_layout()
+# plt.show()
+# print("Forest RMS: %r " % np.sqrt(np.mean((forest_predicted - y_test) ** 2)))
+from sklearn import datasets
+from sklearn.model_selection import cross_val_score
+import numpy as np
+digits = datasets.load_digits()
+x = digits.data
+y = digits.target
+from sklearn.linear_model import Perceptron
+perceptron_model = Perceptron(tol=1e-3, random_state=0)
+Perceptron_scores = cross_val_score(perceptron_model, x,y, cv=10)
+print('Perceptron avg performance: ')
+print(np.mean(perceptron_model))
+from sklearn.neighbors import KNeighborsClassifier
+neigh = KNeighborsClassifier(n_neighbors=3)
+neigh_scores = cross_val_score(neigh, x,y, cv=10)
+print('KNN avg performance: ')
+print(np.mean(neigh))
+#the same for decsion tree and random forest

sklearn_train_digit.py ADDED Viewed

	@@ -0,0 +1,21 @@

+from sklearn import datasets
+import matplotlib.pyplot as plt
+iris = datasets.load_iris()
+digits = datasets.load_digits()
+fig = plt.figure(figsize=(8,8))
+fig.subplots_adjust(left=0,right=1,bottom=0,top=1,hspace=0.05,wspace=0.05)
+for i in range(100):
+    ax = fig.add_subplot(10,10,i+1,xticks=[],yticks=[])
+    ax.imshow(digits.images[i],cmap=plt.cm.binary,interpolation='nearest')
+    ax.text(0,7,str(digits.target[i]),color='green')
+plt.show()
+x= digits.data
+y= digits.target
+from sklearn.model_selection import train_test_split
+xtrain,xtest,ytrain,ytest = train_test_split(x,y,test_size=0.2,random_state=0)
+from sklearn.linear_model import Perceptron
+perceptron_model = Perceptron(tol=1e-3,random_state=0)
+perceptron_model.fit(xtrain,ytrain)
+perceptron_prediction=perceptron_model.predict(xtest)
+from sklearn import metrics