edelwace-0.1.0.0: HaskTorch Reinforcement Learning Agents for GACE
Safe HaskellNone
LanguageHaskell2010

TD3.Defaults

Description

Twin Delayed Deep Deterministic Policy Gradient Algorithm Defaults

Synopsis

Documentation

algorithm :: Algorithm Source #

Algorithm ID

verbose :: Bool Source #

Print verbose debug output

numEpisodes :: Int Source #

Number of episodes to play

numSteps :: Int Source #

Number of Steps to take with policy

randomEpisode :: Int Source #

Random Exploration every n Episodes

numEpochs :: Int Source #

Number of epochs to train

earlyStop :: Tensor Source #

Early stop criterion

batchSize :: Int Source #

Mini batch of N transistions

rngSeed :: Int Source #

Random seed for reproducability

aceId :: String Source #

ACE Identifier of the Environment

aceBackend :: String Source #

PDK/Technology backend of the ACE Environment

aceVariant :: Int Source #

ACE Environment variant

actionLow :: Float Source #

Action space lower bound

actionHigh :: Float Source #

Action space upper bound

d :: Int Source #

Policy and Target Update Delay

c :: Float Source #

Noise clipping

γ :: Tensor Source #

Discount Factor

τ :: Tensor Source #

Soft Update coefficient (sometimes "polyak") of the target networks τ ∈ [0,1]

decayPeriod :: Int Source #

Decay Period

σMin :: Float Source #

Noise Clipping Minimum

σMax :: Float Source #

Noise Clipping Maximuxm

σEval :: Tensor Source #

Evaluation Noise standard deviation (σ~)

σAct :: Tensor Source #

Action Noise standard deviation

wInit :: Float Source #

Initial weights

ηφ :: Tensor Source #

Actor Learning Rate

ηθ :: Tensor Source #

Critic Learning Rate

β1 :: Float Source #

Betas

β2 :: Float Source #

Betas

negativeSlope :: Float Source #

Leaky ReLU Slope

bufferType :: BufferType Source #

Replay Buffer Type

bufferSize :: Int Source #

Replay Buffer Size

warmupPeriode :: Int Source #

Initial sample collecting period

stateClip :: Float Source #

Range for clipping scaled states

strategy :: Strategy Source #

Target Sampling Strategy

k :: Int Source #

Number of Additional Targets to sample

relTol :: Tensor Source #

Error Tolerance for Target / Reward Calculation