Code:
current = model(np.append(np.ndarray.flatten(old_field),next_number).reshape(1,-1))
next = target(np.append(np.ndarray.flatten(game.field),action).reshape(1,-1))
nextState = game.field
target_q_values = (next * 0.9) + reward # bu yong max? shi lilunshangdeb bushi quzuida
# loss = tf.convert_to_tensor((target_q_values – current)**2)
loss = mse(target_q_values, current)
train_step(loss)
Train_step:
@tf.function
def train_step(loss):
with tf.GradientTape(persistent=True) as tape:
gradient = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(gradient,model.trainable_variables))
submitted by /u/Striking-Warning9533
[visit reddit] [comments]