edelwace-0.1.0.0: HaskTorch Reinforcement Learning Agents for GACE

Safe Haskell	None
Language	Haskell2010

SAC.Defaults

Description

Soft Actor Critic Algorithm Defaults

Synopsis

Documentation

algorithm :: Algorithm Source #

Algorithm ID

verbose :: Bool Source #

Print verbose debug output

bufferType :: BufferType Source #

Replay Buffer Type

numSteps :: Int Source #

How many steps to take in env

numEpochs :: Int Source #

How many gradient update steps

numIterations :: Int Source #

Total Number of iterations, depends on bufferType.

earlyStop :: Tensor Source #

Early stop criterion

minReward :: Float Source #

Reward Lower Bound

batchSize :: Int Source #

Size of the batches during epoch

rngSeed :: Int Source #

Random seed for reproducability

maxTime :: Float Source #

Maximum time to cut off

aceId :: String Source #

ACE Identifier of the Environment

aceBackend :: String Source #

PDK/Technology backend of the ACE Environment

aceVariant :: Int Source #

ACE Environment variant

γ :: Tensor Source #

Discount Factor

τ :: Tensor Source #

Smoothing Coefficient

εNoise :: Tensor Source #

Action Noise

αLearned :: Bool Source #

Whether temperature coefficient is fixed or learned (see αInit)

αInit :: Tensor Source #

Temperature Coefficient

σMin :: Float Source #

Lower Variance Clipping

σMax :: Float Source #

Upper Variance Clipping

rewardScale :: Tensor Source #

Reward Scaling Factor

ρ :: Tensor Source #

Reward Scaling Factor

d :: Int Source #

Update Step frequency

εConst :: Tensor Source #

Priority update factor

wInit :: Float Source #

Initial weights

ηπ :: Tensor Source #

Learning Rate for Actor / Policy

ηq :: Tensor Source #

Learning Rate for Critic(s)

ηα :: Tensor Source #

Learning Rate for Alpha

β1 :: Float Source #

Betas

β2 :: Float Source #

Betas

bufferSize :: Int Source #

Maximum size of Replay Buffer

αStart :: Float Source #

Powerlaw Exponent

βStart :: Float Source #

Weight Exponent

βFrames :: Int Source #

Weight Exponent Delay

η0 :: Float Source #

Initial η

ηT :: Float Source #

Final η

cMin :: Int Source #

Minimum Sampling Range

strategy :: Strategy Source #

Target Sampling Strategy

k :: Int Source #

Number of Additional Targets to sample

relTol :: Tensor Source #

Error Tolerance for Target / Reward Calculation