找回密码
 立即注册
查看: 219|回复: 0

【数据挖掘】基于卷积神经网络的非侵入式负荷分解(NILM)Python实现

[复制链接]
发表于 2022-6-28 08:00 | 显示全部楼层 |阅读模式
本方法主要利用基于卷积神经网络的非侵入式负荷分解方法实现住宅设备的识别,输入数据为在设备运行时获得的瞬态功率信号数据。训练卷积神经网络使用数据为开源数据REDD(1Hz),具体实现原理请参考文献下载链接。只供学习参考,Python实现代码如下:
1 第一部分:数据可视化
  1. import pandas as pd
  2. import matplotlib.pyplot as plt
  3. import numpy as np
  4. data = pd.read_csv("data.csv")
  5. #print(data)
  6. n = int(data.max(axis = 0, skipna = True)[1]) + 1  # gets the number of readings
  7. h = int(data.max(axis = 0, skipna = True)[0]) # gets the number of houses
  8. houses = np.zeros((n,5,h))  # time, tv inst power, aggr power, filtered tv , on/off
  9. for i in range(h):
  10.     houses[:,0:3,i] =  data[data['House']==i+1  ].values[:,1:]   # data is now seperated by house
  11. # visulise data
  12. plt.figure()
  13. plt.suptitle('Tv power and agg power seperated ')
  14. plt.subplot(211)
  15. for i in range(h):
  16.     plt.plot(houses[:,0,i],houses[:,1,i], label = 'House ' + str(i+1) )
  17. plt.subplot(212)
  18. for i in range(h):
  19.     plt.plot(houses[:,0,i],houses[:,2,i], label = 'House ' + str(i+1) )
  20. plt.legend()
  21. # plt.show()
  22. plt.figure()
  23. plt.suptitle('Normalised data, shown by house')
  24. for i in range(h):
  25.     plt.subplot(3,1,i+1)
  26.     plt.plot(houses[:,0,i],houses[:,1,i]/np.average(houses[:,1,i]) )
  27.     plt.plot(houses[:, 0, i], houses[:, 2, i]/np.average( houses[:, 2, i]) )
  28. plt.legend()
  29. plt.show()
复制代码
实验结果:




过滤并标准化电视即时功率
  1. from scipy.signal import *
  2. houses[:,3,:] = savgol_filter(houses[:, 1, :] / np.average(houses[:, 1, :],axis=0), 11, 2,axis=0)
  3. thres = 1.15  # the threshold for seperation
  4. plt.figure()
  5. plt.suptitle('Filtered inst power, and determined state')
  6. for i in range(h):
  7.     houses[ np.argwhere( houses[:,3,i] >= thres ) ,4,i  ] +=1   # makes the 4th row qual to 1 if the filtered result is higher then the threshold
  8.     plt.subplot(3, 1, i + 1)
  9.     plt.plot(  houses[:, 0, i],houses[:, 3, i]  ) # plot filtered curve
  10.     plt.plot(houses[:, 0, i], houses[:, 4, i]) # plot on state
  11.     plt.plot(houses[:, 0, i], houses[:, 1, i] / np.average(houses[:, 1, i])) # plot normalised curve
  12. plt.show()
复制代码
实验结果:


2 卷积神经网络实现负荷识别
  1. #总功率问题
  2. # - 存在非常大的峰
  3. # - 其他电器出现的一些周期性峰值,这些在每个房子中的频率不同
  4. # - 噪声/峰值平坦度与一号房的电视电源使用量相似
  5. # 想法
  6. # - 针对开/关状态测试/检查
  7. # - 测试/检查状态改变的时间
  8. # - 使用 CNN/ANN/MLP 进行分类
  9. # - 输入应该是整个时间,还是 N 个时间步长的夹头
  10. # - N 时间步长意味着它可以推广到不同的测试输入大小
  11. # - 然后应该建立这 N 个大输入的训练集和测试集,每一张幻灯片
  12. # - 允许在输入中打开状态的位置进行概括
  13. # - 输出N 1/0(或开启的概率,然后是阈值)每个时间步对应的输出
复制代码
  1. import numpy as np
  2. from keras.layers import Dense
  3. from keras.layers import Flatten
  4. from keras.layers import Dropout
  5. from keras.models import Sequential
  6. from keras.layers.convolutional import Conv1D
  7. from keras.layers.convolutional import MaxPooling1D
  8. def sep_data(houses, T=0.8):
  9.     # seperate the house data into windowed sections winin each house
  10.     # split into training (T%) and test set, no validation set is being used
  11.     # record the class as a one hot vector of if the last state is on/off
  12.     window_len = 20
  13.     #at size 20 the classifier was classfiing the periodic sections as the tv being on in house one #filter sized 6,3 wights 1.8,1
  14.     #at length 30 a similar thing happened, filters 6,3 weights at 3:1, not as accurate as ^
  15.     # at 10, with 3,3, stride and 2.2:1 similar problems
  16.     ntrain = int((n-window_len)*T) # amount of data from each house to train on
  17.     train = np.zeros((ntrain*h,window_len,1,2))                         # [#windows x samples per window x 1(as only one input feature) x 2 (for rata, and weights)
  18.     test = np.zeros( ( (n-window_len-ntrain)*h,window_len,1,2)  )
  19.     for j in range(h): #each house
  20.         for i in range(n-window_len):
  21.             train_index = i+j*ntrain ## which row in the reformatted array is being filled
  22.             test_index = i-ntrain + j*(n-window_len-ntrain)
  23.             if i<ntrain:  #part of training set
  24.                 train[train_index,:,:,0 ] =np.reshape(houses[i:i+window_len,2,j] , (window_len,1))     # window up the aggregated power
  25.                 #train[train_index, :,:,1] = np.reshape(houses[i:i + window_len,4, j], (window_len,1)) # no longer used, was used when every state in window was a class
  26.                 train[train_index, 0 , :, 1] = houses[i + window_len, 4, j]                            # identify state of last step in window
  27.                 train[train_index, 1, :, 1] = -(train[train_index, 0 , :, 1] -1)                       # one hot encode the category,cat 0 = on, cat 1 = off
  28.             else: # test set
  29.                 test[test_index , :,:, 0] =np.reshape( houses[i:i + window_len, 2, j], (window_len,1))
  30.                 #test[test_index), :,:, 1] = np.reshape(houses[i:i + window_len, 4, j], (window_len,1))
  31.                 test[test_index, 0, :, 1] = houses[i + window_len, 4, j]
  32.                 test[test_index, 1, :, 1] = -(test[test_index, 0, :, 1] - 1)
  33.     #return train[:,:,:,0],train[:,:,0,1],test[:,:,:,0],test[:,:,0,1]
  34.     # how uneven is the data? this could be a problem when training, as a large group can overpower and always be predicted
  35.     wratio = np.sum(train[:,1,:,1])/np.sum(train[:,0,:,1])
  36.     print(wratio)
  37.     return train[:,:,:,0],train[:,0:2,0,1],test[:,:,:,0],test[:,0:2,0,1]
  38. # 训练与评估CNN模型
  39. def run_model(trainx, trainy, testx, testy):
  40.     verbose, epochs, batch_size = 0, 10, 64
  41.     n_timesteps, n_features, n_outputs = trainx.shape[1], trainx.shape[2], trainy.shape[1]
  42.     # 使用 keras 构建卷积神经网络
  43.     #卷积应该有希望识别窗口时间序列数据中的特征,以指示最终时间状态
  44.     model = Sequential()
  45.     model.add(Conv1D(filters=120, kernel_size=6, activation='sigmoid', input_shape=(n_timesteps, n_features)))
  46.     model.add(Conv1D(filters=120, kernel_size=3, activation='sigmoid'))
  47.     model.add(Dropout(0.5)) #这一层有助于规范化,并希望减少对训练集的过度拟合
  48.     model.add(MaxPooling1D(pool_size=2))
  49.     model.add(Flatten()) # 展平
  50.     model.add(Dense(100, activation='sigmoid')) #全连接层
  51.     model.add(Dense(n_outputs, activation='softmax'))  # 在这里使用 softmax 来预测每个类中的概率
  52.     #当每个时间步被预测为开或关时使用,sigmoid函数可以作为多个类,并且需要不同的损失度量
  53.     model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
  54.     # 编译模型
  55.     model.summary()
  56.     model.fit(trainx, trainy, epochs=epochs, batch_size=batch_size, verbose=verbose, class_weight = {0:2.2,1:1 }) # 实施类权重以修复类不平衡,并停止对所有状态进行预测
  57.     # 评估模型
  58.     pr = model.predict(testx)
  59.     pt = model.predict(trainx)
  60.     plt.figure()
  61.     ax1 = plt.subplot(211)
  62.     ax1.title.set_text('Training set')
  63.     ax1.plot(np.round(pt[:,0]), label = 'Predicted class')
  64.     ax1.plot(trainy[:,0], label ='class (1=on) ' )
  65.     ax1.plot(pt[:, 0], label='Class probability')
  66.     ax2 = plt.subplot(212)
  67.     ax2.title.set_text('Test set')
  68.     ax2.plot(np.round(pr[:, 0]), label = 'Predicted class')
  69.     ax2.plot(testy[:, 0],label ='class (1=on) ' )
  70.     ax2.plot(pr[:, 0],label='Class probability')
  71.     ax2.legend(loc='upper center', bbox_to_anchor=(0.5, -0.05), fancybox=True, shadow=True, ncol=3)
  72.     plt.show()
  73.     loss1, accuracy = model.evaluate(testx, testy, batch_size=batch_size, verbose=0)
  74.     loss2, at = model.evaluate(trainx, trainy, batch_size=batch_size, verbose=0)
  75.     print('Training accuracy: %.3f ' % at)
  76.     print('Training loss: %.3f ' % loss2)
  77.     print('Test accuracy: %.3f' % accuracy)
  78.     print('Test loss: %.3f' % loss1)
  79.     return accuracy,model
  80. #模型评估
  81. def model_variability(scores):
  82.     print(scores)
  83.     m, s = np.mean(scores), np.std(scores)
  84.     print('Accuracy: %.3f%% (+/-%.3f)' % (m, s))
  85. #将数据重新格式化为训练集和测试集。
  86. # 数据正在加窗,最后一次的开/关状态记录在一个热向量中
  87. trainx, trainy, testx, testy = sep_data(houses)
  88. repeats = 3
  89. scores = list()
  90. for r in range(repeats):
  91.     score,model = run_model(trainx, trainy, testx, testy)
  92.     score *=100.0
  93.     print('>#%d: %.3f' % (r + 1, score))
  94.     scores.append(score)
  95. model_variability(scores)
复制代码




数据:下载

本帖子中包含更多资源

您需要 登录 才可以下载或查看,没有账号?立即注册

×
懒得打字嘛,点击右侧快捷回复 【右侧内容,后台自定义】
您需要登录后才可以回帖 登录 | 立即注册

本版积分规则

小黑屋|手机版|Unity开发者联盟 ( 粤ICP备20003399号 )

GMT+8, 2024-11-26 05:43 , Processed in 0.065034 second(s), 23 queries .

Powered by Discuz! X3.5 Licensed

© 2001-2024 Discuz! Team.

快速回复 返回顶部 返回列表