Safe Haskell	None
Language	Haskell2010

HyperParameters.Defaults

Description

Twin Delayed Deep Deterministic Policy Gradient Algorithm Defaults

Synopsis

Documentation

numEpisodes :: Int Source #

Number of episodes to play

horizonT :: Int Source #

Maximum Number of Steps per Episode

numEpochs :: Int Source #

Number of epochs to train

batchSize :: Int Source #

Mini batch size

rngSeed :: Int Source #

Random seed for reproducibility

actionLow :: Float Source #

Action space lower bound

actionHigh :: Float Source #

Action space upper bound

d :: Int Source #

Policy and Target Update Delay

c :: Float Source #

Noise clipping

γ :: Tensor Source #

Discount Factor

τ :: Tensor Source #

Soft Update coefficient (sometimes "polyak") of the target networks τ ∈ [0,1]

decayPeriod :: Int Source #

Decay Period

σMin :: Float Source #

Noise Clipping Minimum

σMax :: Float Source #

Noise Clipping Maximum

σEval :: Tensor Source #

Evaluation Noise standard deviation (σ~)

σAct :: Tensor Source #

Action Noise standard deviation

σClip :: Float Source #

Noise Clipping

hidDim :: Int Source #

Number of units per hidden layer

wInit :: Float Source #

Initial weights

ηφ :: Tensor Source #

Actor Learning Rate

ηθ :: Tensor Source #

Critic Learning Rate

β1 :: Float Source #

ADAM Hyper Parameter β1

β2 :: Float Source #

ADAM Hyper Parameter β2

negativeSlope :: Float Source #

Leaky ReLU Slope

bufferSize :: Int Source #

Replay Buffer Size

explFreq :: Int Source #

Frequency of random exploration Episodes

evalFreq :: Int Source #

Frequency of Evaluation Episodes

k :: Int Source #

Number of Additional Targets to sample