目录
#MSE Gradient
x = tf.random.normal([2,4]) #输入:2个simple:[b,4]
w = tf.random.normal([4,3]) #把4维降到3维==>最终分为3类
b = tf.zeros([3])
y = tf.constant([2,0])with tf.GradientTape() as tape:
tape.watch([w,b])
prob = tf.nn.softmax(x@w+b,axis=1)
loss = tf.reduce_mean(tf.losses.MSE(tf.one_hot(y,depth=3),prob))grads = tape.gradient(loss,[w,b])
print(grads[0])
# tf.Tensor(
# [[-0.053691040.07968411 -0.02599307]
#[-0.0535683-0.01830230.0718706 ]
#[-0.07898090.028222490.05075841]
#[ 0.08340039 -0.03885455 -0.04454585]], shape=(4, 3), dtype=float32)
#Crossentropy Gradient
x = tf.random.normal([2,4]) #输入:2个样本:[b,4]
w = tf.random.normal([4,3]) #把4维降到3维==>最终分为3类
b = tf.zeros([3])
y = tf.constant([2,0])with tf.GradientTape() as tape:
tape.watch([w,b])
logits = x@w+b
loss = tf.reduce_mean(tf.losses.categorical_crossentropy(tf.one_hot(y,depth=3),logits,from_logits=True))grads = tape.gradient(loss,[w,b])
print(grads)
# [, ]
print(grads[0])
# tf.Tensor([-0.444272550.6398322-0.1955596 ], shape=(3,), dtype=float32)
#单输出感知机梯度
x = tf.random.normal([1,3])
w = tf.ones([3,1])
b = tf.ones([1])
y = tf.constant([1])with tf.GradientTape() as tape:
tape.watch([w,b])
logits = tf.sigmoid(x@w+b)
loss = tf.reduce_mean(tf.losses.MSE(y,logits))#本身输出已经在0~1,所以不需要再one_hot encodinggrads = tape.gradient(loss,[w,b])
print(grads)
# [, ]
#多输出感知机梯度
x = tf.random.normal([2,4])
w = tf.ones([4,3])
b = tf.ones([3])
y = tf.constant([2,0])with tf.GradientTape() as tape:
tape.watch([w,b])
prob = tf.nn.softmax(x@w+b,axis=1)
loss = tf.reduce_mean(tf.losses.MSE(tf.one_hot(y,depth=3),prob))grads = tape.gradient(loss,[w,b])
print(grads[0])