quantum-rl/wrappers.py at master · ZhengtongYan/quantum-rl · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import numpy as np
import tensorflow as tf
import gym

class ToDoubleTensor(gym.ObservationWrapper):

    def observation(self, obs):
        return tf.convert_to_tensor(obs, dtype=tf.float64)

class ScaledDirectionalEncodingCP(gym.ObservationWrapper):

    def observation(self, obs):

        ## Scaled Encoding
        # Scale cart position (range [-4.8, 4.8]) to range [0, 2pi]
        obs[0] = ((obs[0] + 4.8) / 9.6) * 2 * np.pi

        # Scale pole angle (range [-0.418, 0.418]) to range [0, 2pi]
        obs[2] = ((obs[2] + 0.418) / 0.836) * 2 * np.pi

        ## Directional Encoding
        obs[1] = np.pi if obs[1] > 0 else 0
        obs[3] = np.pi if obs[1] > 0 else 0

        return obs

class ScaledDirectionalEncodingBJ(gym.ObservationWrapper):

    def observation(self, obs):

        ## Scaled Encoding
        # Players current sum (range [4, 21]) to range [0, 2pi]
        curr_sum= ((obs[0] - 4) / 17) * 2 * np.pi

        ## Scaled Encoding
        # Dealers one showing card (range [1,10]) to range [0, 2pi]
        dealer = ((obs[1] - 1) / 9) * 2 * np.pi

        ## Directional Encoding
        usable = np.pi if obs[2] else 0

        return (curr_sum, dealer, usable)

class ScaledContEncodingCP(gym.ObservationWrapper):

    def observation(self, obs):

        ## Scaled Encoding
        # Scale cart position (range [-4.8, 4.8]) to range [0, 2pi]
        obs[0] = ((obs[0] + 4.8) / 9.6) * 2 * np.pi

        # Scale pole angle (range [-0.418, 0.418]) to range [0, 2pi]
        obs[2] = ((obs[2] + 0.418) / 0.836) * 2 * np.pi

        ## Continous Encoding
        obs[1] = np.arctan(obs[1])
        obs[3] = np.arctan(obs[3])

        return obs

class ContDirectionalEncodingBJ(gym.ObservationWrapper):

    def observation(self, obs):

        curr_sum= np.arctan(obs[0])

        dealer = np.arctan(obs[1])

        usable = np.pi if obs[2] else 0

        return (curr_sum, dealer, usable)

class ContinousEncoding(gym.ObservationWrapper):

    def observation(self, obs):
        return np.arctan(obs)