forked from lfd/quantum-rl
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathwrappers.py
More file actions
76 lines (48 loc) · 2.1 KB
/
wrappers.py
File metadata and controls
76 lines (48 loc) · 2.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import numpy as np
import tensorflow as tf
import gym
class ToDoubleTensor(gym.ObservationWrapper):
def observation(self, obs):
return tf.convert_to_tensor(obs, dtype=tf.float64)
class ScaledDirectionalEncodingCP(gym.ObservationWrapper):
def observation(self, obs):
## Scaled Encoding
# Scale cart position (range [-4.8, 4.8]) to range [0, 2pi]
obs[0] = ((obs[0] + 4.8) / 9.6) * 2 * np.pi
# Scale pole angle (range [-0.418, 0.418]) to range [0, 2pi]
obs[2] = ((obs[2] + 0.418) / 0.836) * 2 * np.pi
## Directional Encoding
obs[1] = np.pi if obs[1] > 0 else 0
obs[3] = np.pi if obs[1] > 0 else 0
return obs
class ScaledDirectionalEncodingBJ(gym.ObservationWrapper):
def observation(self, obs):
## Scaled Encoding
# Players current sum (range [4, 21]) to range [0, 2pi]
curr_sum= ((obs[0] - 4) / 17) * 2 * np.pi
## Scaled Encoding
# Dealers one showing card (range [1,10]) to range [0, 2pi]
dealer = ((obs[1] - 1) / 9) * 2 * np.pi
## Directional Encoding
usable = np.pi if obs[2] else 0
return (curr_sum, dealer, usable)
class ScaledContEncodingCP(gym.ObservationWrapper):
def observation(self, obs):
## Scaled Encoding
# Scale cart position (range [-4.8, 4.8]) to range [0, 2pi]
obs[0] = ((obs[0] + 4.8) / 9.6) * 2 * np.pi
# Scale pole angle (range [-0.418, 0.418]) to range [0, 2pi]
obs[2] = ((obs[2] + 0.418) / 0.836) * 2 * np.pi
## Continous Encoding
obs[1] = np.arctan(obs[1])
obs[3] = np.arctan(obs[3])
return obs
class ContDirectionalEncodingBJ(gym.ObservationWrapper):
def observation(self, obs):
curr_sum= np.arctan(obs[0])
dealer = np.arctan(obs[1])
usable = np.pi if obs[2] else 0
return (curr_sum, dealer, usable)
class ContinousEncoding(gym.ObservationWrapper):
def observation(self, obs):
return np.arctan(obs)