actor.learn(s, a, td_error)
def learn(self, s, a, td):
s = s[np.newaxis, :]
feed_dict = {self.s: s, self.a: a, self.td_error: td}
_, exp_v = self.sess.run([self.train_op, self.exp_v], feed_dict)
return exp_v
输入变量的数值:
步进:
[]---->[[]]
步进:
feed_dict = {self.s: s, self.a: a, self.td_error: td}