Liu/main.py

162 lines
4.8 KiB
Python
Raw Normal View History

import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
def data_read(data_address):
df = pd.read_csv(data_address)
label_mapping = {label: idx for idx,
label in enumerate(df['Problem'].unique())}
df['Problem'] = df['Problem'].map(label_mapping)
df.replace([np.inf, -np.inf], np.nan, inplace=True)
df.dropna(inplace=True)
X = np.array(df['Voltage'])
Y = np.array(df['Problem'])
# 归一化处理
X = (X - np.min(X)) / (np.max(X) - np.min(X))
# 转换为时间序列数据格式
time_steps = 34
X_series, Y_series = [], []
for i in range(0, len(X) - time_steps):
X_series.append(X[i:(i + time_steps)])
Y_series.append(Y[i + time_steps - 1])
return np.array(X_series).reshape(-1, time_steps, 1), np.array(Y_series)
X_train, Y_train = data_read(
'Liu\data\VOLTAGE-QUALITY-CLASSIFICATION-MODEL--main\Voltage Quality.csv')
X_test, Y_test = data_read(
'Liu/data/VOLTAGE-QUALITY-CLASSIFICATION-MODEL--main/Voltage Quality Test.csv')
# 获取类别数量
n_classes = len(np.unique(Y_train))
# 构建模型
model = tf.keras.models.Sequential([
tf.keras.layers.LSTM(50, return_sequences=True, input_shape=(34, 1)),
tf.keras.layers.LSTM(50),
tf.keras.layers.Dense(n_classes, activation='softmax') # 修改为适应多分类
])
# 编译模型
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy', metrics=['accuracy'])
# 训练模型
model.fit(X_train, Y_train, epochs=3000, validation_split=0.2)
# 评估模型
loss, accuracy = model.evaluate(X_test, Y_test)
# 制作扰动数据
# 损失函数
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
# 定义损失函数
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy()
# 转换X_train和Y_train为TensorFlow张量
X_train_tensor = tf.convert_to_tensor(X_train, dtype=tf.float32)
Y_train_tensor = tf.convert_to_tensor(Y_train, dtype=tf.int32)
# 使用tf.GradientTape来计算梯度
with tf.GradientTape() as tape:
# 确保tape监视输入张量
tape.watch(X_train_tensor)
# 前向传播,计算预测值
predictions = model(X_train_tensor)
# 计算损失
loss = loss_fn(Y_train_tensor, predictions)
# 计算关于输入X的梯度
gradients = tape.gradient(loss, X_train_tensor)
# 计算每个输入的梯度的L2范数
gradient_magnitudes = tf.norm(gradients, axis=1)
# 创建每个gamma对应的准确率的字典
accuracy_per_gamma = {}
# 选择最大的γ * |X|个梯度
for gamma in [0.05, 0.1, 0.2, 0.4]: # 你需要选择一个合适的γ值
num_gradients_to_select = int(
gamma * tf.size(gradient_magnitudes, out_type=tf.dtypes.float32))
# 获取最大梯度的索引
# tf.argsort返回的是升序排列的所以用[::-1]来获得降序的索引
top_gradients_indices = tf.argsort(gradient_magnitudes, direction='DESCENDING')[
:num_gradients_to_select]
# 创建集合A这里是所有选定的最大梯度的索引
A = top_gradients_indices.numpy()
# 创建一个新的梯度张量,初始化为原始梯度的副本
updated_gradients = tf.identity(gradients)
# 获取所有索引
all_indices = tf.range(start=0, limit=tf.size(
gradient_magnitudes, out_type=tf.dtypes.int32), dtype=tf.int32)
# 创建一个布尔掩码其中集合A中的索引为False其他为True
mask = ~tf.reduce_any(tf.equal(tf.expand_dims(
all_indices, 1), tf.reshape(A, (1, -1))), axis=1)
# 使用这个掩码更新不在集合A中的梯度为0
updated_gradients = tf.where(
mask, updated_gradients, tf.zeros_like(updated_gradients))
# 创建准确率列表
accuracy_list = []
for learning_rate in [0.1, 0.2, 0.3, 0.4, 0.5]:
# 更新X_train_tensor
X_train_updated = X_train_tensor - updated_gradients
# 编译模型
model.compile(
optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
# 训练模型
model.fit(X_train, Y_train, epochs=3000, validation_split=0.2)
# 评估模型
loss, accuracy = model.evaluate(X_test, Y_test)
# 记录准确率
accuracy_list.append(accuracy)
# 记录该gamma下的准确率
accuracy_per_gamma[gamma] = accuracy_list
# 学习率样本
learning_rates = [0.1, 0.2, 0.3, 0.4, 0.5]
# 不同的gamma值
gammas = [0.05, 0.1, 0.2, 0.4]
# 创建图像
plt.figure(figsize=(10, 6))
# 为每个gamma值绘制曲线
for gamma in gammas:
plt.plot(learning_rates, accuracy_per_gamma[gamma], marker='o', label=f'Gamma={gamma}')
# 添加标题和标签
plt.title('Accuracy vs Learning Rate for Different Gammas')
plt.xlabel('Learning Rate')
plt.ylabel('Accuracy')
plt.legend()
# 显示图像
plt.show()