import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt


def data_read(data_address):
    df = pd.read_csv(data_address)
    label_mapping = {label: idx for idx,
                     label in enumerate(df['Problem'].unique())}
    df['Problem'] = df['Problem'].map(label_mapping)

    df.replace([np.inf, -np.inf], np.nan, inplace=True)
    df.dropna(inplace=True)

    X = np.array(df['Voltage'])
    Y = np.array(df['Problem'])

    # 归一化处理
    X = (X - np.min(X)) / (np.max(X) - np.min(X))

    # 转换为时间序列数据格式
    time_steps = 34
    X_series, Y_series = [], []
    for i in range(0, len(X) - time_steps):
        X_series.append(X[i:(i + time_steps)])
        Y_series.append(Y[i + time_steps - 1])

    return np.array(X_series).reshape(-1, time_steps, 1), np.array(Y_series)


X_train, Y_train = data_read(
    'Liu\data\VOLTAGE-QUALITY-CLASSIFICATION-MODEL--main\Voltage Quality.csv')
X_test, Y_test = data_read(
    'Liu/data/VOLTAGE-QUALITY-CLASSIFICATION-MODEL--main/Voltage Quality Test.csv')

# 获取类别数量
n_classes = len(np.unique(Y_train))

# 构建模型
model = tf.keras.models.Sequential([
    tf.keras.layers.LSTM(50, return_sequences=True, input_shape=(34, 1)),
    tf.keras.layers.LSTM(50),
    tf.keras.layers.Dense(n_classes, activation='softmax')  # 修改为适应多分类
])

# 编译模型
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# 训练模型
model.fit(X_train, Y_train, epochs=10, validation_split=0.2)

# 评估模型
loss, accuracy = model.evaluate(X_test, Y_test)

print(X_train.shape)


# 制作扰动数据

# 损失函数
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

# 定义损失函数
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy()

# 转换X_train和Y_train为TensorFlow张量
X_train_tensor = tf.convert_to_tensor(X_train, dtype=tf.float32)
Y_train_tensor = tf.convert_to_tensor(Y_train, dtype=tf.int32)

# 使用tf.GradientTape来计算梯度
with tf.GradientTape() as tape:
    # 确保tape监视输入张量
    tape.watch(X_train_tensor)
    # 前向传播，计算预测值
    predictions = model(X_train_tensor)
    # 计算损失
    loss = loss_fn(Y_train_tensor, predictions)

# 计算关于输入X的梯度
gradients = tape.gradient(loss, X_train_tensor)

# 计算每个输入的梯度的L2范数
gradient_magnitudes = tf.norm(gradients, axis=1)

# 创建每个gamma对应的准确率的字典
accuracy_per_gamma = {}


# 平坦化梯度
flattened_gradients = tf.reshape(gradients, [-1])

# 选择最大的γ * |X|个梯度
for gamma in [0.05, 0.1, 0.2, 0.4]:
    num_gradients_to_select = int(gamma * tf.size(flattened_gradients, out_type=tf.dtypes.float32))
    top_gradients_indices = tf.argsort(flattened_gradients, direction='DESCENDING')[:num_gradients_to_select]

    # 创建一个新的梯度张量，初始化为原始梯度的副本
    updated_gradients = tf.identity(flattened_gradients)

    # 创建一个布尔掩码，其中选定的最大梯度为False，其他为True
    mask = tf.ones_like(updated_gradients, dtype=bool)
    mask = tf.tensor_scatter_nd_update(mask, tf.expand_dims(top_gradients_indices, 1), tf.zeros_like(top_gradients_indices, dtype=bool))

    # 使用这个掩码更新梯度
    updated_gradients = tf.where(mask, tf.zeros_like(updated_gradients), updated_gradients)

    # 将梯度重构为原始形状
    updated_gradients = tf.reshape(updated_gradients, tf.shape(gradients))
    
    
    # 创建准确率列表
    accuracy_list = []

    for learning_rate in [0.1, 0.2, 0.3, 0.4, 0.5]:
        
        # 应用学习率到梯度
        scaled_gradients = learning_rate * updated_gradients
        
        
        # 使用缩放后的梯度更新X_train_tensor
        X_train_updated = X_train_tensor - scaled_gradients
        tf.reshape(X_train_updated, (3332,34,1))
        X_train_updated = X_train_updated.numpy()

        # 编译模型
        model.compile(
            optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

        # 训练模型
        model.fit(X_train_updated, Y_train, epochs=1500, validation_split=0.2)

        # 评估模型
        loss, accuracy = model.evaluate(X_test, Y_test)
        
        # 记录准确率
        accuracy_list.append(accuracy)

    # 记录该gamma下的准确率
    accuracy_per_gamma[gamma] = accuracy_list
    
# 学习率样本
learning_rates = [0.1, 0.2, 0.3, 0.4, 0.5]

# 不同的gamma值
gammas = [0.05, 0.1, 0.2, 0.4]

# 创建图像
plt.figure(figsize=(10, 6))

# 为每个gamma值绘制曲线
for gamma in gammas:
    plt.plot(learning_rates, accuracy_per_gamma[gamma], marker='o', label=f'Gamma={gamma}')

# 添加标题和标签
plt.title('Accuracy vs Learning Rate for Different Gammas')
plt.xlabel('Learning Rate')
plt.ylabel('Accuracy')
plt.legend()

# 显示图像
plt.show()