edelwace-0.1.0.0: HaskTorch Reinforcement Learning Agents for GACE
Safe HaskellNone
LanguageHaskell2010

PPO.Defaults

Description

Proximal Policy Optimization Algorithm Defaults

Synopsis

Documentation

algorithm :: Algorithm Source #

Algorithm ID

verbose :: Bool Source #

Print verbose debug output

bufferType :: BufferType Source #

Replay Buffer Type

numEpisodes :: Int Source #

Number of episodes to play

numSteps :: Int Source #

How many steps to take in env

numEpochs :: Int Source #

How many gradient update steps

numIterations :: Int Source #

Number of iterations

earlyStop :: Tensor Source #

Early stop criterion

batchSize :: Int Source #

Size of the batches during epoch

rngSeed :: Int Source #

Random seed for reproducability

maxTime :: Float Source #

Maximum time to cut off

aceId :: String Source #

ACE Identifier of the Environment

aceBackend :: String Source #

PDK/Technology backend of the ACE Environment

aceVariant :: Int Source #

ACE Environment variant

actionSpace :: ActionSpace Source #

Discrete or Continuous action space

rewardScale :: Float Source #

Scale for reward centering

ε :: Float Source #

Factor for clipping

δ :: Tensor Source #

Factor in loss function

γ :: Tensor Source #

Discount Factor

τ :: Tensor Source #

Avantage Factor

wInit :: Float Source #

Initial weights

η :: Tensor Source #

Learning Rate

β1 :: Float Source #

Betas

β2 :: Float Source #

Betas