深度学习2.0-10.tensorflow的高阶操作之张量的限幅

科技2025-09-16 114

文章目录

1.张量的限幅1.tf.clip_by_value-根据值来裁剪2.tf.nn.relu-将矩阵中每行小于0的值置03.tf.clip_by_norm-根据范数裁剪4.tf.clip_by_global_norm-等比例裁剪

1.张量的限幅

1.tf.clip_by_value-根据值来裁剪

2.tf.nn.relu-将矩阵中每行小于0的值置0

3.tf.clip_by_norm-根据范数裁剪

4.tf.clip_by_global_norm-等比例裁剪

Gradient clipping-梯度裁剪例子

# 将无关信息屏蔽掉 import os os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' import tensorflow as tf from tensorflow import keras from tensorflow.keras import datasets, layers, optimizers # 列出你所有的物理GPU，设置内存自动增长 gpus = tf.config.experimental.list_physical_devices('GPU') for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) print(tf.__version__) (x, y), _ = datasets.mnist.load_data() x = tf.convert_to_tensor(x, dtype=tf.float32) / 50. y = tf.convert_to_tensor(y) y = tf.one_hot(y, depth=10) print('x:', x.shape, 'y:', y.shape) train_db = tf.data.Dataset.from_tensor_slices((x, y)).batch(128).repeat(30) x, y = next(iter(train_db)) print('sample:', x.shape, y.shape) # print(x[0], y[0]) def main(): # 784 => 512 w1, b1 = tf.Variable(tf.random.truncated_normal([784, 512], stddev=0.1)), tf.Variable(tf.zeros([512])) # 512 => 256 w2, b2 = tf.Variable(tf.random.truncated_normal([512, 256], stddev=0.1)), tf.Variable(tf.zeros([256])) # 256 => 10 w3, b3 = tf.Variable(tf.random.truncated_normal([256, 10], stddev=0.1)), tf.Variable(tf.zeros([10])) # 优化器 optimizer = optimizers.SGD(lr=0.01) for step, (x, y) in enumerate(train_db): # [b, 28, 28] => [b, 784] x = tf.reshape(x, (-1, 784)) with tf.GradientTape() as tape: # layer1. h1 = x @ w1 + b1 h1 = tf.nn.relu(h1) # layer2 h2 = h1 @ w2 + b2 h2 = tf.nn.relu(h2) # output out = h2 @ w3 + b3 # out = tf.nn.relu(out) # compute loss # [b, 10] - [b, 10] loss = tf.square(y - out) # [b, 10] => [b] loss = tf.reduce_mean(loss, axis=1) # [b] => scalar loss = tf.reduce_mean(loss) # compute gradient grads = tape.gradient(loss, [w1, b1, w2, b2, w3, b3]) print('==before==') for g in grads: print(tf.norm(g)) # 对所有可训练参数进行等比例裁剪 grads, _ = tf.clip_by_global_norm(grads, 15) print('==after==') for g in grads: print(tf.norm(g)) # update w' = w - lr*grad optimizer.apply_gradients(zip(grads, [w1, b1, w2, b2, w3, b3])) if step % 100 == 0: print(step, 'loss:', float(loss)) if __name__ == '__main__': main()

Processed: 0.024, SQL: 9