找回密码
 立即注册
查看: 225|回复: 0

【NILM教程】REDD数据处理代码

[复制链接]
发表于 2022-6-28 08:23 | 显示全部楼层 |阅读模式
REDD数据预处理python程序
  1. # -*- coding: utf-8 -*-import pandas as pd
  2. import matplotlib.pyplot as plt
  3. import time
  4. import argparse
  5. import os
  6. params_appliance ={'microwave':{'windowlength':599,'on_power_threshold':200,'max_on_power':3969,'mean':500,'std':800,'s2s_length':128,'houses':[1,2,3],'channels':[11,6,16],'train_build':[2,3],'test_build':1},'fridge':{'windowlength':599,'on_power_threshold':50,'max_on_power':3323,'mean':200,'std':400,'s2s_length':512,'houses':[1,2,3],'channels':[5,9,7],'train_build':[2,3],'test_build':1},'dishwasher':{'windowlength':599,'on_power_threshold':10,'max_on_power':3964,'mean':700,'std':1000,'s2s_length':1536,'houses':[1,2,3],'channels':[6,10,9],'train_build':[2,3],'test_build':1},'washingmachine':{'windowlength':599,'on_power_threshold':20,'max_on_power':3999,'mean':400,'std':700,'s2s_length':2000,'houses':[1,2,3],# 'channels': [19, 7, 13],'channels':[20,7,13],'train_build':[2,3],'test_build':1}}# APPLIANCE_NAME = 'washingmachine'# APPLIANCE_NAME = 'fridge'# APPLIANCE_NAME = 'dishwasher'
  7. APPLIANCE_NAME ='microwave'
  8. aggregate_mean =522
  9. aggregate_std =814
  10. start_time = time.time()
  11. data_dir ='redd/low_freq'
  12. sample_seconds =8
  13. validation_percent =15
  14. nrows =None
  15. appliance_names =["microwave","fridge","dishwasher","washingmachine"]for appliance_name in appliance_names:print('--'*20)print('\n'+ appliance_name)
  16.     train = pd.DataFrame(columns=['aggregate', appliance_name])
  17.     save_path ='created_data/REDD/{}/'.format(appliance_name)ifnot os.path.exists(save_path):#如果路径不存在
  18.         os.makedirs(save_path)for h in params_appliance[appliance_name]['houses']:print('    '+ data_dir +'/house_'+str(h)+'/'+'channel_'+str(params_appliance[appliance_name]['channels'][params_appliance[appliance_name]['houses'].index(h)])+'.dat')# read data
  19.         mains1_df = pd.read_table(data_dir +'/'+'house_'+str(h)+'/'+'channel_'+str(1)+'.dat',
  20.                                       sep="\s+",
  21.                                       nrows=nrows,
  22.                                       usecols=[0,1],
  23.                                       names=['time','mains1'],
  24.                                       dtype={'time':str},)
  25.    
  26.         mains2_df = pd.read_table(data_dir +'/'+'house_'+str(h)+'/'+'channel_'+str(2)+'.dat',
  27.                                       sep="\s+",
  28.                                       nrows=nrows,
  29.                                       usecols=[0,1],
  30.                                       names=['time','mains2'],
  31.                                       dtype={'time':str},)
  32.         app_df = pd.read_table(data_dir +'/'+'house_'+str(h)+'/'+'channel_'+str(params_appliance[appliance_name]['channels'][params_appliance[appliance_name]['houses'].index(h)])+'.dat',
  33.                                    sep="\s+",
  34.                                    nrows=nrows,
  35.                                    usecols=[0,1],
  36.                                    names=['time', appliance_name],
  37.                                    dtype={'time':str},)
  38.    
  39.    
  40.         mains1_df['time']= pd.to_datetime(mains1_df['time'], unit='s')
  41.         mains2_df['time']= pd.to_datetime(mains2_df['time'], unit='s')
  42.    
  43.         mains1_df.set_index('time', inplace=True)
  44.         mains2_df.set_index('time', inplace=True)
  45.    
  46.         mains_df = mains1_df.join(mains2_df, how='outer')
  47.    
  48.         mains_df['aggregate']= mains_df.iloc[:].sum(axis=1)#resample = mains_df.resample(str(sample_seconds) + 'S').mean()
  49.    
  50.         mains_df.reset_index(inplace=True)# deleting original separate mainsdel mains_df['mains1'], mains_df['mains2']
  51.    
  52.         app_df['time']= pd.to_datetime(app_df['time'], unit='s')
  53.         mains_df.set_index('time', inplace=True)
  54.         app_df.set_index('time', inplace=True)
  55.    
  56.         df_align = mains_df.join(app_df, how='outer'). \
  57.                 resample(str(sample_seconds)+'S').mean().fillna(method='backfill', limit=1)
  58.         df_align = df_align.dropna()
  59.    
  60.         df_align.reset_index(inplace=True)#print(df_align.count())# df_align['OVER 5 MINS'] = (df_align['time'].diff()).dt.seconds > 9# df_align.plot()# plt.plot(df_align['OVER 5 MINS'])# plt.show()del mains1_df, mains2_df, mains_df, app_df, df_align['time']
  61.    
  62.         mains = df_align['aggregate'].values
  63.         app_data = df_align[appliance_name].values
  64.             # plt.plot(np.arange(0, len(mains)), mains, app_data)# plt.show()# if debug:#         # plot the dtaset#     print("df_align:")#     print(df_align.head())#     plt.plot(df_align['aggregate'].values)#     plt.plot(df_align[appliance_name].values)#     plt.show()# Normilization
  65.         mean = params_appliance[appliance_name]['mean']
  66.         std = params_appliance[appliance_name]['std']
  67.    
  68.         df_align['aggregate']=(df_align['aggregate']- aggregate_mean)/ aggregate_std
  69.         df_align[appliance_name]=(df_align[appliance_name]- mean)/ std
  70.    
  71.         if h == params_appliance[appliance_name]['test_build']:# Test CSV
  72.             df_align.to_csv(save_path + appliance_name +'_test_.csv', mode='a', index=False, header=False)print("    Size of test set is {:.4f} M rows.".format(len(df_align)/10**6))continue
  73.    
  74.         train = train.append(df_align, ignore_index=True)del df_align
  75.    
  76.         # Validation CSV
  77.     val_len =int((len(train)/100)*validation_percent)
  78.     val = train.tail(val_len)
  79.     val.reset_index(drop=True, inplace=True)
  80.     train.drop(train.index[-val_len:], inplace=True)
  81.     val.to_csv(save_path + appliance_name +'_validation_'+'.csv', mode='a', index=False, header=False)# Training CSV
  82.     train.to_csv(save_path + appliance_name +'_training_.csv', mode='a', index=False, header=False)
复制代码
数据下载链接: 非入侵负荷分解数据集下载
【1】只做针对REDD的低频数据集
懒得打字嘛,点击右侧快捷回复 【右侧内容,后台自定义】
您需要登录后才可以回帖 登录 | 立即注册

本版积分规则

小黑屋|手机版|Unity开发者联盟 ( 粤ICP备20003399号 )

GMT+8, 2024-11-26 05:55 , Processed in 0.089965 second(s), 25 queries .

Powered by Discuz! X3.5 Licensed

© 2001-2024 Discuz! Team.

快速回复 返回顶部 返回列表