Safe Haskell | None |
---|---|
Language | Haskell2010 |
Proximal Policy Optimization Algorithm Defaults
Synopsis
- algorithm :: Algorithm
- verbose :: Bool
- bufferType :: BufferType
- numEpisodes :: Int
- numSteps :: Int
- numEpochs :: Int
- numIterations :: Int
- earlyStop :: Tensor
- batchSize :: Int
- rngSeed :: Int
- maxTime :: Float
- aceId :: String
- aceBackend :: String
- aceVariant :: Int
- actionSpace :: ActionSpace
- rewardScale :: Float
- ε :: Float
- δ :: Tensor
- γ :: Tensor
- τ :: Tensor
- wInit :: Float
- η :: Tensor
- β1 :: Float
- β2 :: Float
Documentation
bufferType :: BufferType Source #
Replay Buffer Type
numEpisodes :: Int Source #
Number of episodes to play
numIterations :: Int Source #
Number of iterations
aceBackend :: String Source #
PDK/Technology backend of the ACE Environment
aceVariant :: Int Source #
ACE Environment variant
actionSpace :: ActionSpace Source #
Discrete or Continuous action space
rewardScale :: Float Source #
Scale for reward centering