Liu/main.py

162 lines
4.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
def data_read(data_address):
df = pd.read_csv(data_address)
label_mapping = {label: idx for idx,
label in enumerate(df['Problem'].unique())}
df['Problem'] = df['Problem'].map(label_mapping)
df.replace([np.inf, -np.inf], np.nan, inplace=True)
df.dropna(inplace=True)
X = np.array(df['Voltage'])
Y = np.array(df['Problem'])
# 归一化处理
X = (X - np.min(X)) / (np.max(X) - np.min(X))
# 转换为时间序列数据格式
time_steps = 34
X_series, Y_series = [], []
for i in range(0, len(X) - time_steps):
X_series.append(X[i:(i + time_steps)])
Y_series.append(Y[i + time_steps - 1])
return np.array(X_series).reshape(-1, time_steps, 1), np.array(Y_series)
X_train, Y_train = data_read(
'Liu\data\VOLTAGE-QUALITY-CLASSIFICATION-MODEL--main\Voltage Quality.csv')
X_test, Y_test = data_read(
'Liu/data/VOLTAGE-QUALITY-CLASSIFICATION-MODEL--main/Voltage Quality Test.csv')
# 获取类别数量
n_classes = len(np.unique(Y_train))
# 构建模型
model = tf.keras.models.Sequential([
tf.keras.layers.LSTM(50, return_sequences=True, input_shape=(34, 1)),
tf.keras.layers.LSTM(50),
tf.keras.layers.Dense(n_classes, activation='softmax') # 修改为适应多分类
])
# 编译模型
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy', metrics=['accuracy'])
# 训练模型
model.fit(X_train, Y_train, epochs=3000, validation_split=0.2)
# 评估模型
loss, accuracy = model.evaluate(X_test, Y_test)
# 制作扰动数据
# 损失函数
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
# 定义损失函数
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy()
# 转换X_train和Y_train为TensorFlow张量
X_train_tensor = tf.convert_to_tensor(X_train, dtype=tf.float32)
Y_train_tensor = tf.convert_to_tensor(Y_train, dtype=tf.int32)
# 使用tf.GradientTape来计算梯度
with tf.GradientTape() as tape:
# 确保tape监视输入张量
tape.watch(X_train_tensor)
# 前向传播,计算预测值
predictions = model(X_train_tensor)
# 计算损失
loss = loss_fn(Y_train_tensor, predictions)
# 计算关于输入X的梯度
gradients = tape.gradient(loss, X_train_tensor)
# 计算每个输入的梯度的L2范数
gradient_magnitudes = tf.norm(gradients, axis=1)
# 创建每个gamma对应的准确率的字典
accuracy_per_gamma = {}
# 选择最大的γ * |X|个梯度
for gamma in [0.05, 0.1, 0.2, 0.4]: # 你需要选择一个合适的γ值
num_gradients_to_select = int(
gamma * tf.size(gradient_magnitudes, out_type=tf.dtypes.float32))
# 获取最大梯度的索引
# tf.argsort返回的是升序排列的所以用[::-1]来获得降序的索引
top_gradients_indices = tf.argsort(gradient_magnitudes, direction='DESCENDING')[
:num_gradients_to_select]
# 创建集合A这里是所有选定的最大梯度的索引
A = top_gradients_indices.numpy()
# 创建一个新的梯度张量,初始化为原始梯度的副本
updated_gradients = tf.identity(gradients)
# 获取所有索引
all_indices = tf.range(start=0, limit=tf.size(
gradient_magnitudes, out_type=tf.dtypes.int32), dtype=tf.int32)
# 创建一个布尔掩码其中集合A中的索引为False其他为True
mask = ~tf.reduce_any(tf.equal(tf.expand_dims(
all_indices, 1), tf.reshape(A, (1, -1))), axis=1)
# 使用这个掩码更新不在集合A中的梯度为0
updated_gradients = tf.where(
mask, updated_gradients, tf.zeros_like(updated_gradients))
# 创建准确率列表
accuracy_list = []
for learning_rate in [0.1, 0.2, 0.3, 0.4, 0.5]:
# 更新X_train_tensor
X_train_updated = X_train_tensor - updated_gradients
# 编译模型
model.compile(
optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
# 训练模型
model.fit(X_train, Y_train, epochs=3000, validation_split=0.2)
# 评估模型
loss, accuracy = model.evaluate(X_test, Y_test)
# 记录准确率
accuracy_list.append(accuracy)
# 记录该gamma下的准确率
accuracy_per_gamma[gamma] = accuracy_list
# 学习率样本
learning_rates = [0.1, 0.2, 0.3, 0.4, 0.5]
# 不同的gamma值
gammas = [0.05, 0.1, 0.2, 0.4]
# 创建图像
plt.figure(figsize=(10, 6))
# 为每个gamma值绘制曲线
for gamma in gammas:
plt.plot(learning_rates, accuracy_per_gamma[gamma], marker='o', label=f'Gamma={gamma}')
# 添加标题和标签
plt.title('Accuracy vs Learning Rate for Different Gammas')
plt.xlabel('Learning Rate')
plt.ylabel('Accuracy')
plt.legend()
# 显示图像
plt.show()