# https://stackabuse.com/classification-in-python-with-scikit-learn-and-pandas/
# need to split dataset for proper fitting and validation
from datetime import datetime
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline
import warnings
warnings.filterwarnings('ignore')


# 机器学习分类预测的一个基本框架：
# 1 读取数据，这个例子包含50个传感器，220320条数据。数据清洗，填空，转换。
# 2 将数据传入几个经典算法，就可以得到预测方程和匹配指数。
# 3 具体的工作在于提高数据质量和核实实用性。历史数据至少需要两分好实际反映匹配率。
df = pd.read_csv('23-pump-sensor.csv', sep=',', header=0)


#pd.unique(df['sensor_15'])
df['sensor_15'].value_counts()

Series([], Name: count, dtype: int64)


df = df.drop([df.columns[0],df.columns[1],'sensor_15'], axis=1)


#pd.unique(df['machine_status'])
df['machine_status'].value_counts()

machine_status
NORMAL        205836
RECOVERING     14477
BROKEN             7
Name: count, dtype: int64


df['machine_status'].replace(['NORMAL', 'RECOVERING','BROKEN'], [0,1,1], inplace=True)


dfmean = df.mean(axis=0) #column average


df = df.fillna(dfmean)


df.head()


import sklearn as sk
from sklearn.linear_model import LogisticRegression


y = df.iloc[:,-1]
X = df.iloc[:,:-1]


LR = LogisticRegression(random_state=0, solver='lbfgs', multi_class='ovr').fit(X, y)
round(LR.score(X,y), 4)

0.9959


LR.predict(X.iloc[-1:, :])

array([0])


X.iloc[-1:, :]


y.iloc[-1]

0


from sklearn import svm
SVM = svm.LinearSVC()
SVM.fit(X, y)
SVM.predict(X.iloc[-1:,:])
round(SVM.score(X,y), 4)

0.9965


from sklearn.ensemble import RandomForestClassifier

RF = RandomForestClassifier(n_estimators=100, max_depth=2, random_state=0)
RF.fit(X, y)
RF.predict(X.iloc[-1:,:])
round(RF.score(X,y), 4)

0.9946


RF.predict(X.iloc[-1:,:])

array([0])


from sklearn.neural_network import MLPClassifier

NN = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1)
NN.fit(X, y)
NN.predict(X.iloc[-1:,:])
round(NN.score(X,y), 4)

0.9938

	sensor_00	sensor_01	sensor_02	sensor_03	sensor_04	sensor_05	sensor_06	sensor_07	sensor_08	sensor_09	...	sensor_43	sensor_44	sensor_45	sensor_46	sensor_47	sensor_48	sensor_49	sensor_50	sensor_51
0	2.465394	47.09201	53.2118	46.310760	634.3750	76.45975	13.41146	16.13136	15.56713	15.05353	...	41.92708	39.641200	65.68287	50.92593	38.194440	157.9861	67.70834	243.0556	201.3889
1	2.465394	47.09201	53.2118	46.310760	634.3750	76.45975	13.41146	16.13136	15.56713	15.05353	...	41.92708	39.641200	65.68287	50.92593	38.194440	157.9861	67.70834	243.0556	201.3889
2	2.444734	47.35243	53.2118	46.397570	638.8889	73.54598	13.32465	16.03733	15.61777	15.01013	...	41.66666	39.351852	65.39352	51.21528	38.194443	155.9606	67.12963	241.3194	203.7037
3	2.460474	47.09201	53.1684	46.397568	628.1250	76.98898	13.31742	16.24711	15.69734	15.08247	...	40.88541	39.062500	64.81481	51.21528	38.194440	155.9606	66.84028	240.4514	203.1250
4	2.445718	47.13541	53.2118	46.397568	636.4583	76.58897	13.35359	16.21094	15.69734	15.08247	...	41.40625	38.773150	65.10416	51.79398	38.773150	158.2755	66.55093	242.1875	201.3889