acid-0.1.0.0: Artificial Circuit Designer
Safe HaskellNone
LanguageHaskell2010

RPB

Description

General Replay Buffer Types and TypeClasses

Synopsis

Documentation

class Functor b => ReplayBuffer b where Source #

Replay Buffer Interface

Methods

size :: b Tensor -> Int Source #

Return size of current buffer

push :: Int -> b Tensor -> b Tensor -> b Tensor Source #

Push one buffer into another

lookUp :: [Int] -> b Tensor -> b Tensor Source #

Look Up given list if indices

sampleIO :: Int -> b Tensor -> IO (b Tensor) Source #

Take n Random Samples

asTuple :: b Tensor -> (Tensor, Tensor, Tensor, Tensor, Tensor) Source #

Return the Tuple: (s, a, r, s', d) for training

collectExperience :: Agent a => Params -> CircusUrl -> Tracker -> Int -> a -> IO (b Tensor) Source #

Collect Experiences in Buffer

Instances

Instances details
ReplayBuffer Buffer Source #

Vanilla Replay Buffer implements ReplayBuffer

Instance details

Defined in RPB

ReplayBuffer Buffer Source #

Hindsight Experience Replay Buffer implements ReplayBuffer

Instance details

Defined in RPB.HER

randomBatches :: ReplayBuffer b => Int -> Int -> b Tensor -> IO [Transition] Source #

Generate a list of uniformly sampled minibatches

data Buffer a Source #

Vanilla Replay Buffer

Constructors

Buffer 

Fields

Instances

Instances details
Functor Buffer Source #

Vanilla ReplayBuffer implements functor

Instance details

Defined in RPB

Methods

fmap :: (a -> b) -> Buffer a -> Buffer b #

(<$) :: a -> Buffer b -> Buffer a #

Applicative Buffer Source #

This is badly defined and only so it can use liftA2.

Instance details

Defined in RPB

Methods

pure :: a -> Buffer a #

(<*>) :: Buffer (a -> b) -> Buffer a -> Buffer b #

liftA2 :: (a -> b -> c) -> Buffer a -> Buffer b -> Buffer c #

(*>) :: Buffer a -> Buffer b -> Buffer b #

(<*) :: Buffer a -> Buffer b -> Buffer a #

ReplayBuffer Buffer Source #

Vanilla Replay Buffer implements ReplayBuffer

Instance details

Defined in RPB

Eq a => Eq (Buffer a) Source # 
Instance details

Defined in RPB

Methods

(==) :: Buffer a -> Buffer a -> Bool #

(/=) :: Buffer a -> Buffer a -> Bool #

Show a => Show (Buffer a) Source # 
Instance details

Defined in RPB

Methods

showsPrec :: Int -> Buffer a -> ShowS #

show :: Buffer a -> String #

showList :: [Buffer a] -> ShowS #

empty :: Buffer Tensor Source #

Create a new, empty Buffer on the CPU

size' :: Buffer Tensor -> Int Source #

How many Trajectories are currently stored in memory

drop :: Int -> Buffer Tensor -> Buffer Tensor Source #

Drop number of entries from the beginning of the Buffer

push' :: Int -> Buffer Tensor -> Buffer Tensor -> Buffer Tensor Source #

Push one buffer into another one

lookUp' :: [Int] -> Buffer Tensor -> Buffer Tensor Source #

Get the given indices from Buffer

sampleIO' :: Int -> Buffer Tensor -> IO (Buffer Tensor) Source #

Take n random samples from Buffer

asTuple' :: Buffer Tensor -> Transition Source #

Return (State, Action, Reward, Next State, Done) Tuple

collectStep :: Agent a => Params -> CircusUrl -> Tracker -> Int -> Int -> a -> Tensor -> Buffer Tensor -> IO (Buffer Tensor) Source #

Evaluate Policy for T steps and return experience Buffer

collectExperience' :: Agent a => Params -> CircusUrl -> Tracker -> Int -> a -> IO (Buffer Tensor) Source #

Collect experience for a given number of steps