Safe Haskell | None |
---|---|
Language | Haskell2010 |
Twin Delayed Deep Deterministic Policy Gradient Algorithm Defaults
Synopsis
- algorithm :: Algorithm
- verbose :: Bool
- numEpisodes :: Int
- numIterations :: Int
- numSteps :: Int
- randomEpisode :: Int
- numEpochs :: Int
- earlyStop :: Tensor
- batchSize :: Int
- rngSeed :: Int
- aceId :: String
- aceBackend :: String
- aceVariant :: Int
- actionLow :: Float
- actionHigh :: Float
- d :: Int
- c :: Float
- γ :: Tensor
- τ :: Tensor
- decayPeriod :: Int
- σMin :: Float
- σMax :: Float
- σEval :: Tensor
- σAct :: Tensor
- σClip :: Float
- wInit :: Float
- ηφ :: Tensor
- ηθ :: Tensor
- β1 :: Float
- β2 :: Float
- negativeSlope :: Float
- bufferType :: BufferType
- bufferSize :: Int
- warmupPeriode :: Int
- stateClip :: Float
- strategy :: Strategy
- k :: Int
- relTol :: Tensor
Documentation
numEpisodes :: Int Source #
Number of episodes to play
numIterations :: Int Source #
Horizon T
randomEpisode :: Int Source #
Random Exploration every n Episodes
aceBackend :: String Source #
PDK/Technology backend of the ACE Environment
aceVariant :: Int Source #
ACE Environment variant
actionHigh :: Float Source #
Action space upper bound
decayPeriod :: Int Source #
Decay Period
negativeSlope :: Float Source #
Leaky ReLU Slope
bufferType :: BufferType Source #
Replay Buffer Type
bufferSize :: Int Source #
Replay Buffer Size
warmupPeriode :: Int Source #
Initial sample collecting period