| Safe Haskell | None |
|---|---|
| Language | Haskell2010 |
Lib
Description
Utility and Helper functions for EDELWACE
Synopsis
- data Algorithm
- data ReplayMemory
- data Strategy
- data Mode
- data Args = Args {}
- type Transition = (Tensor, Tensor, Tensor, Tensor, Tensor)
- successRate :: Tensor -> Float
- range :: Int -> [Int]
- fst' :: (a, b, c) -> a
- delete' :: Ord a => [a] -> Set a -> Set a
- splits :: [Int] -> [[Int]]
- fst3 :: (a, b, c) -> a
- both :: (a -> b) -> (a, a) -> (b, b)
- uncurry3 :: (a -> b -> c -> d) -> (a, b, c) -> d
- lookup' :: Ord k => [k] -> Map k a -> Maybe [a]
- tmap :: (Tensor -> Tensor) -> Transition -> Transition
- (//) :: Integral a => a -> a -> a
- (%) :: Integral a => a -> a -> a
- currentTimeStamp :: String -> IO String
- currentTimeStamp' :: IO String
- createModelArchiveDir :: String -> IO String
- createModelArchiveDir' :: String -> String -> String -> String -> String -> String -> IO String
- saveOptim :: Adam -> FilePath -> IO ()
- loadOptim :: Int -> Float -> Float -> FilePath -> IO Adam
- weightLimit :: Linear -> Float
- data Initializer
- initWeights :: Initializer -> Float -> Float -> [Int] -> IO IndependentTensor
- weightInit :: Initializer -> Float -> Float -> Linear -> IO Linear
- weightInit' :: Initializer -> Float -> Float -> Linear -> IO Linear
- weightInitUniform :: Float -> Float -> Linear -> IO Linear
- weightInitUniform' :: Linear -> IO Linear
- weightInitNormal :: Float -> Float -> Linear -> IO Linear
- weightInitNormal' :: Linear -> IO Linear
- softUpdate :: Tensor -> Tensor -> Tensor -> Tensor
- softSync :: Parameterized f => Tensor -> f -> f -> IO f
- copySync :: Parameterized f => f -> f -> f
Documentation
Available Algorithms
Constructors
| TD3 | Twin Delayed Deep Deterministic Policy Gradient |
| SAC | Soft Actor Critic |
| PPO | Proximal Policy Optimization |
data ReplayMemory Source #
Available Replay Buffer Types
Constructors
| RPB | Vanilla Replay Buffer |
| PER | Prioritized Experience Replay |
| MEM | PPO Style replay Memory |
| ERE | Emphasizing Recent Experience |
| HER | Hindsight Experience Replay |
Instances
Hindsight Experience Replay Strategies for choosing Goals
Constructors
| Final | Only Final States are additional targets |
| Random | Replay with |
| Episode | Replay with |
| Future | Replay with |
Instances
| Eq Strategy Source # | |
| Read Strategy Source # | |
| Show Strategy Source # | |
| Generic Strategy Source # | |
| ToJSON Strategy Source # | |
| FromJSON Strategy Source # | |
| type Rep Strategy Source # | |
Defined in Lib type Rep Strategy = D1 ('MetaData "Strategy" "Lib" "acid-0.1.0.0-Kasfqr97FKi1MHFsFLmEZd" 'False) ((C1 ('MetaCons "Final" 'PrefixI 'False) (U1 :: Type -> Type) :+: C1 ('MetaCons "Random" 'PrefixI 'False) (U1 :: Type -> Type)) :+: (C1 ('MetaCons "Episode" 'PrefixI 'False) (U1 :: Type -> Type) :+: C1 ('MetaCons "Future" 'PrefixI 'False) (U1 :: Type -> Type))) | |
Command Line Arguments
Constructors
| Args | |
Fields
| |
type Transition = (Tensor, Tensor, Tensor, Tensor, Tensor) Source #
Type Alias for Transition Tuple (state, action, reward, state', done)
successRate :: Tensor -> Float Source #
Calculate success rate given 1D Boolean done Tensor
tmap :: (Tensor -> Tensor) -> Transition -> Transition Source #
Map an appropriate function over a transition tuple
currentTimeStamp' :: IO String Source #
Current Timestamp with default formatting: "%Y%m%d-%H%M%S"
createModelArchiveDir :: String -> IO String Source #
Create a model archive directory for the given algorithm
createModelArchiveDir' :: String -> String -> String -> String -> String -> String -> IO String Source #
Create a model archive directory for the given algorithm, ace id and backend
weightLimit :: Linear -> Float Source #
Calculate weight Limits based on Layer Dimensions
data Initializer Source #
Type of weight initialization
Constructors
| Normal | Normally distributed weights |
| Uniform | Uniformally distributed weights |
| XavierNormal | Using T.xavierNormal |
| XavierUniform | Using T.xavierUniform |
| KaimingNormal | Using T.kaimingNormal |
| KaimingUniform | Using T.kaimingUniform |
| Dirac | |
| Eye | |
| Ones | |
| Zeros | |
| Constant |
initWeights :: Initializer -> Float -> Float -> [Int] -> IO IndependentTensor Source #
Weights for a layer given limits and dimensions.
weightInit :: Initializer -> Float -> Float -> Linear -> IO Linear Source #
Initialize Weights of Linear Layer
weightInit' :: Initializer -> Float -> Float -> Linear -> IO Linear Source #
Initialize Weights and Bias of Linear Layer
weightInitUniform :: Float -> Float -> Linear -> IO Linear Source #
Initialize weights uniformally given upper and lower bounds
weightInitNormal :: Float -> Float -> Linear -> IO Linear Source #
Initialize weights normally given mean and std bounds
softUpdate :: Tensor -> Tensor -> Tensor -> Tensor Source #
Softly update parameters from Online Net to Target Net
softSync :: Parameterized f => Tensor -> f -> f -> IO f Source #
Softly copy parameters from Online Net to Target Net
copySync :: Parameterized f => f -> f -> f Source #
Hard Copy of Parameter from one net to the other