验证可能是数据集存在问题,准备大改

This commit is contained in:
MuJ 2024-01-09 20:08:48 +08:00
parent 31f4fbc323
commit 1aa5d08034
1 changed files with 76 additions and 14 deletions

88
main.py
View File

@ -1,14 +1,20 @@
import pandas as pd import pandas as pd
import numpy as np import numpy as np
import os
import warnings
warnings.filterwarnings('ignore')
import tensorflow as tf import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from keras import regularizers from keras import regularizers
from keras.callbacks import TensorBoard, LearningRateScheduler from keras.callbacks import TensorBoard, LearningRateScheduler
from keras.layers import Dropout from keras.layers import Dropout
from sklearn.preprocessing import MinMaxScaler from sklearn.preprocessing import MinMaxScaler
from tqdm import tqdm
def data_read(data_address): def data_read(data_address):
df = pd.read_csv(data_address) df = pd.read_csv(data_address)
label_mapping = {label: idx for idx, label_mapping = {label: idx for idx,
label in enumerate(df['Problem'].unique())} label in enumerate(df['Problem'].unique())}
@ -20,8 +26,6 @@ def data_read(data_address):
X = np.array(df['Voltage']) X = np.array(df['Voltage'])
Y = np.array(df['Problem']) Y = np.array(df['Problem'])
X = tf.nn.relu(X)
# 转换为时间序列数据格式 # 转换为时间序列数据格式
time_steps = 34 time_steps = 34
X_series, Y_series = [], [] X_series, Y_series = [], []
@ -32,20 +36,72 @@ def data_read(data_address):
return np.array(X_series), np.array(Y_series) return np.array(X_series), np.array(Y_series)
# 编写绘图函数,画出训练集电压数据
def plant_for_voltage(x_train_new, x_train_orginal, gamma, learning_rate, y_train, different_location):
# 绘制X_train的图形
for i in different_location:
if i % 100 == 0:
plt.figure()
time_Step = list(range(0, 34))
plt.plot(time_Step,
x_train_new[i])
# 添加标题和标签
plt.title(f'gamma:{gamma},learning_rate:{learning_rate},Y:{y_train[i]}')
plt.xlabel('time_step')
plt.ylabel('voltage')
try:
os.makedirs(f'Liu/picture/gamma{gamma} learningrate{learning_rate}')
except FileExistsError:
pass
plt.savefig(f'Liu/picture/gamma{gamma} learningrate{learning_rate}/X_train_new_{i}.png')
plt.close()
plt.clf()
# 画出原始图像
plt.figure()
time_Step = list(range(0, 34))
plt.plot(time_Step,
x_train_orginal[i])
# 添加标题和标签
plt.title(f'gamma:{gamma},learning_rate:{learning_rate},Y:{y_train[i]}')
plt.xlabel('time_step')
plt.ylabel('voltage')
try:
os.makedirs(f'Liu/picture/gamma{gamma} learningrate{learning_rate}')
except FileExistsError:
pass
plt.savefig(f'Liu/picture/gamma{gamma} learningrate{learning_rate}/X_train_{i}.png')
plt.close()
plt.clf()
def if_diff(a, b):
# 比较两个数组相同位置上的元素是否相等
diff = np.where(a != b)
list_diff = []
# 打印不同元素的索引及其对应的元素
for i in range(len(diff[0])):
idx = (diff[0][i], diff[1][i])
list_diff.append(idx[0])
return list_diff
X_train, Y_train = data_read( X_train, Y_train = data_read(
'Liu\data\VOLTAGE-QUALITY-CLASSIFICATION-MODEL--main\Voltage Quality.csv') 'Liu\data\VOLTAGE-QUALITY-CLASSIFICATION-MODEL--main\Voltage Quality.csv')
X_test, Y_test = data_read( X_test, Y_test = data_read(
'Liu/data/VOLTAGE-QUALITY-CLASSIFICATION-MODEL--main/Voltage Quality Test.csv') 'Liu/data/VOLTAGE-QUALITY-CLASSIFICATION-MODEL--main/Voltage Quality Test.csv')
# 归一化 # 初始化归一化模型
sc = MinMaxScaler(feature_range=(0, 1)) sc = MinMaxScaler(feature_range=(-1, 1))
X_train = sc.fit_transform(X_train) X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test) X_test = sc.transform(X_test)
X_train.reshape(-1, 34, 1) X_train.reshape(-1, 34, 1)
X_test.reshape(-1, 34, 1) X_test.reshape(-1, 34, 1)
np.random.seed(7) np.random.seed(7)
np.random.shuffle(X_train) np.random.shuffle(X_train)
np.random.seed(7) np.random.seed(7)
@ -66,7 +122,7 @@ model = tf.keras.models.Sequential([
tf.keras.layers.SimpleRNN(100), tf.keras.layers.SimpleRNN(100),
Dropout(0.2), Dropout(0.2),
tf.keras.layers.Dense(n_classes, activation='relu', tf.keras.layers.Dense(n_classes, activation='relu',
kernel_regularizer=regularizers.l2(0.3)) # 适应多分类 ) # kernel_regularizer=regularizers.l2(0.3)
]) ])
# 编译模型 # 编译模型
@ -94,6 +150,7 @@ initial_weights = model.get_weights()
log_dir = "logs/fit" log_dir = "logs/fit"
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1) tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)
# 制作扰动数据 # 制作扰动数据
# 转换X_train和Y_train为TensorFlow张量 # 转换X_train和Y_train为TensorFlow张量
@ -119,7 +176,7 @@ accuracy_per_gamma = {}
flattened_gradients = tf.reshape(gradients, [-1]) flattened_gradients = tf.reshape(gradients, [-1])
# 选择最大的γ * |X|个梯度 # 选择最大的γ * |X|个梯度
for gamma in [0.05, 0.1, 0.2, 0.4]: for gamma in tqdm([0.6, 0.7, 0.8, 0.9, 0.99]):
num_gradients_to_select = int( num_gradients_to_select = int(
gamma * tf.size(flattened_gradients, out_type=tf.dtypes.float32)) gamma * tf.size(flattened_gradients, out_type=tf.dtypes.float32))
top_gradients_indices = tf.argsort(flattened_gradients, direction='DESCENDING')[ top_gradients_indices = tf.argsort(flattened_gradients, direction='DESCENDING')[
@ -143,24 +200,29 @@ for gamma in [0.05, 0.1, 0.2, 0.4]:
# 创建准确率列表 # 创建准确率列表
accuracy_list = [] accuracy_list = []
for learning_rate in [0.1, 0.2, 0.3, 0.4, 0.5]: for learning_rate in tqdm([0.1, 0.2, 0.3, 0.4, 0.5]):
# 应用学习率到梯度 # 应用学习率到梯度
scaled_gradients = learning_rate * updated_gradients scaled_gradients = (learning_rate * 100) * updated_gradients
# 使用缩放后的梯度更新X_train_tensor # 使用缩放后的梯度更新X_train_tensor
X_train_updated = tf.add(X_train_tensor, scaled_gradients) X_train_updated = tf.add(X_train_tensor, scaled_gradients)
X_train_updated = X_train_updated.numpy() X_train_updated = X_train_updated.numpy()
list_diff = if_diff(X_train, X_train_updated)
# 显示扰动数据和原始数据的可视化图像
# plant_for_voltage(X_train_updated, X_train, gamma, learning_rate, Y_train, list_diff)
# Reset model weights to initial weights # Reset model weights to initial weights
model.set_weights(initial_weights) model.set_weights(initial_weights)
# 训练模型,添加 TensorBoard 回调 # 训练模型,添加 TensorBoard 回调
history = model.fit(X_train_updated, Y_train, epochs=1500, history = model.fit(X_train_updated, Y_train, epochs=500,
batch_size=32, callbacks=[tensorboard_callback, lr_scheduler]) batch_size=32, callbacks=[tensorboard_callback, lr_scheduler], verbose=0)
# 评估模型 # 评估模型
loss, accuracy = model.evaluate(X_test, Y_test) loss, accuracy = model.evaluate(X_test, Y_test)
print(f"Accuracy gamma: {gamma},learning:{learning_rate}", accuracy)
# 记录准确率 # 记录准确率
accuracy_list.append(accuracy) accuracy_list.append(accuracy)
@ -177,7 +239,7 @@ learning_rates = [0.1, 0.2, 0.3, 0.4, 0.5]
gammas = [0.05, 0.1, 0.2, 0.4] gammas = [0.05, 0.1, 0.2, 0.4]
# 创建图像 # 创建图像
last_plt = plt.figure(figsize=(10, 6)) plt.figure(figsize=(10, 6))
# 为每个gamma值绘制曲线 # 为每个gamma值绘制曲线
for gamma in gammas: for gamma in gammas: