Safe Haskell | None |
---|---|
Language | Haskell2010 |
Utility and Helper functions for EDELWACE
Synopsis
- data Algorithm
- data ReplayMemory
- data Strategy
- data Mode
- data Args = Args {}
- type Transition = (Tensor, Tensor, Tensor, Tensor, Tensor)
- successRate :: Tensor -> Float
- range :: Int -> [Int]
- fst' :: (a, b, c) -> a
- delete' :: Ord a => [a] -> Set a -> Set a
- splits :: [Int] -> [[Int]]
- fst3 :: (a, b, c) -> a
- both :: (a -> b) -> (a, a) -> (b, b)
- uncurry3 :: (a -> b -> c -> d) -> (a, b, c) -> d
- lookup' :: Ord k => [k] -> Map k a -> Maybe [a]
- tmap :: (Tensor -> Tensor) -> Transition -> Transition
- (//) :: Integral a => a -> a -> a
- (%) :: Integral a => a -> a -> a
- currentTimeStamp :: String -> IO String
- currentTimeStamp' :: IO String
- createModelArchiveDir :: String -> IO String
- createModelArchiveDir' :: String -> String -> String -> String -> String -> String -> IO String
- saveOptim :: Adam -> FilePath -> IO ()
- loadOptim :: Int -> Float -> Float -> FilePath -> IO Adam
- weightLimit :: Linear -> Float
- data Initializer
- initWeights :: Initializer -> Float -> Float -> [Int] -> IO IndependentTensor
- weightInit :: Initializer -> Float -> Float -> Linear -> IO Linear
- weightInit' :: Initializer -> Float -> Float -> Linear -> IO Linear
- weightInitUniform :: Float -> Float -> Linear -> IO Linear
- weightInitUniform' :: Linear -> IO Linear
- weightInitNormal :: Float -> Float -> Linear -> IO Linear
- weightInitNormal' :: Linear -> IO Linear
- softUpdate :: Tensor -> Tensor -> Tensor -> Tensor
- softSync :: Parameterized f => Tensor -> f -> f -> IO f
- copySync :: Parameterized f => f -> f -> f
Documentation
Available Algorithms
TD3 | Twin Delayed Deep Deterministic Policy Gradient |
SAC | Soft Actor Critic |
PPO | Proximal Policy Optimization |
data ReplayMemory Source #
Available Replay Buffer Types
RPB | Vanilla Replay Buffer |
PER | Prioritized Experience Replay |
MEM | PPO Style replay Memory |
ERE | Emphasizing Recent Experience |
HER | Hindsight Experience Replay |
Instances
Hindsight Experience Replay Strategies for choosing Goals
Final | Only Final States are additional targets |
Random | Replay with |
Episode | Replay with |
Future | Replay with |
Instances
Eq Strategy Source # | |
Read Strategy Source # | |
Show Strategy Source # | |
Generic Strategy Source # | |
ToJSON Strategy Source # | |
FromJSON Strategy Source # | |
type Rep Strategy Source # | |
Defined in Lib type Rep Strategy = D1 ('MetaData "Strategy" "Lib" "acid-0.1.0.0-Kasfqr97FKi1MHFsFLmEZd" 'False) ((C1 ('MetaCons "Final" 'PrefixI 'False) (U1 :: Type -> Type) :+: C1 ('MetaCons "Random" 'PrefixI 'False) (U1 :: Type -> Type)) :+: (C1 ('MetaCons "Episode" 'PrefixI 'False) (U1 :: Type -> Type) :+: C1 ('MetaCons "Future" 'PrefixI 'False) (U1 :: Type -> Type))) |
Command Line Arguments
Args | |
|
type Transition = (Tensor, Tensor, Tensor, Tensor, Tensor) Source #
Type Alias for Transition Tuple (state, action, reward, state', done)
successRate :: Tensor -> Float Source #
Calculate success rate given 1D Boolean done Tensor
tmap :: (Tensor -> Tensor) -> Transition -> Transition Source #
Map an appropriate function over a transition tuple
currentTimeStamp' :: IO String Source #
Current Timestamp with default formatting: "%Y%m%d-%H%M%S"
createModelArchiveDir :: String -> IO String Source #
Create a model archive directory for the given algorithm
createModelArchiveDir' :: String -> String -> String -> String -> String -> String -> IO String Source #
Create a model archive directory for the given algorithm, ace id and backend
weightLimit :: Linear -> Float Source #
Calculate weight Limits based on Layer Dimensions
data Initializer Source #
Type of weight initialization
Normal | Normally distributed weights |
Uniform | Uniformally distributed weights |
XavierNormal | Using T.xavierNormal |
XavierUniform | Using T.xavierUniform |
KaimingNormal | Using T.kaimingNormal |
KaimingUniform | Using T.kaimingUniform |
Dirac | |
Eye | |
Ones | |
Zeros | |
Constant |
initWeights :: Initializer -> Float -> Float -> [Int] -> IO IndependentTensor Source #
Weights for a layer given limits and dimensions.
weightInit :: Initializer -> Float -> Float -> Linear -> IO Linear Source #
Initialize Weights of Linear Layer
weightInit' :: Initializer -> Float -> Float -> Linear -> IO Linear Source #
Initialize Weights and Bias of Linear Layer
weightInitUniform :: Float -> Float -> Linear -> IO Linear Source #
Initialize weights uniformally given upper and lower bounds
weightInitNormal :: Float -> Float -> Linear -> IO Linear Source #
Initialize weights normally given mean and std bounds
softUpdate :: Tensor -> Tensor -> Tensor -> Tensor Source #
Softly update parameters from Online Net to Target Net
softSync :: Parameterized f => Tensor -> f -> f -> IO f Source #
Softly copy parameters from Online Net to Target Net
copySync :: Parameterized f => f -> f -> f Source #
Hard Copy of Parameter from one net to the other