hasktorch-gradually-typed-0.2.0.0: experimental project for hasktorch
Safe HaskellSafe-Inferred
LanguageHaskell2010

Torch.GraduallyTyped.NN.Transformer.GTransformer

Synopsis

Documentation

data GTransformer (posEnc :: Type) (relPosEnc :: Type) (initialLayerNorm :: Type) (initialDropout :: Type) (stack :: Type) (finalLayerNorm :: Type) (finalDropout :: Type) where Source #

Generic transformer. Can specialize to either encoder or decoder.

  • posEnc: an absolute positional encoding layer as used by, e.g., BERT.
  • relPosEnc: a relative positional encoding layer as used by, e.g., T5.
  • initialLayerNorm: a layer normalization layer for the embeddings.
  • initialDropout: a dropout layer for the embeddings.
  • stack: a stack of transformer blocks.
  • finalLayerNorm: the final layer normalization layer.
  • finalDropout: the final dropout layer.

Constructors

GTransformer 

Fields

  • :: forall posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout. { tPosEnc :: posEnc

    absolute positional encoding

  •    , tRelPosEnc :: relPosEnc

    relative positional encoding

  •    , tInitialLayerNorm :: initialLayerNorm

    initial layer norm

  •    , tInitialDropout :: initialDropout

    initial dropout

  •    , tStack :: stack

    transformer block stack

  •    , tFinalLayerNorm :: finalLayerNorm

    final layer norm

  •    , tFinalDropout :: finalDropout

    final dropout

  •    } -> GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout
     

Instances

Instances details
Generic (GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) Source # 
Instance details

Defined in Torch.GraduallyTyped.NN.Transformer.GTransformer

Associated Types

type Rep (GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) :: Type -> Type Source #

Methods

from :: GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> Rep (GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) x Source #

to :: Rep (GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) x -> GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout Source #

(Show posEnc, Show relPosEnc, Show initialLayerNorm, Show initialDropout, Show stack, Show finalLayerNorm, Show finalDropout) => Show (GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) Source # 
Instance details

Defined in Torch.GraduallyTyped.NN.Transformer.GTransformer

Methods

showsPrec :: Int -> GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> ShowS Source #

show :: GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> String Source #

showList :: [GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout] -> ShowS Source #

(Eq posEnc, Eq relPosEnc, Eq initialLayerNorm, Eq initialDropout, Eq stack, Eq finalLayerNorm, Eq finalDropout) => Eq (GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) Source # 
Instance details

Defined in Torch.GraduallyTyped.NN.Transformer.GTransformer

Methods

(==) :: GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> Bool Source #

(/=) :: GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> Bool Source #

(Ord posEnc, Ord relPosEnc, Ord initialLayerNorm, Ord initialDropout, Ord stack, Ord finalLayerNorm, Ord finalDropout) => Ord (GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) Source # 
Instance details

Defined in Torch.GraduallyTyped.NN.Transformer.GTransformer

Methods

compare :: GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> Ordering Source #

(<) :: GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> Bool Source #

(<=) :: GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> Bool Source #

(>) :: GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> Bool Source #

(>=) :: GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> Bool Source #

max :: GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout Source #

min :: GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout Source #

(HasStateDict posEnc, HasStateDict relPosEnc, HasStateDict initialLayerNorm, HasStateDict initialDropout, HasStateDict stack, HasStateDict finalLayerNorm, HasStateDict finalDropout) => HasStateDict (GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) Source # 
Instance details

Defined in Torch.GraduallyTyped.NN.Transformer.GTransformer

Methods

fromStateDict :: (MonadIO m, MonadThrow m, MonadState StateDict m) => ModelSpec (GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) -> StateDictKey -> m (GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) Source #

toStateDict :: (MonadThrow m, MonadState StateDict m) => StateDictKey -> GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> m () Source #

(HasInitialize posEnc generatorDevice posEnc' generatorDevice0, HasInitialize relPosEnc generatorDevice0 relPosEnc' generatorDevice1, HasInitialize initialLayerNorm generatorDevice1 initialLayerNorm' generatorDevice2, HasInitialize initialDropout generatorDevice2 initialDropout' generatorDevice3, HasInitialize stack generatorDevice3 stack' generatorDevice4, HasInitialize finalLayerNorm generatorDevice4 finalLayerNorm' generatorDevice5, HasInitialize finalDropout generatorDevice5 finalDropout' generatorOutputDevice) => HasInitialize (GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) generatorDevice (GTransformer posEnc' relPosEnc' initialLayerNorm' initialDropout' stack' finalLayerNorm' finalDropout') generatorOutputDevice Source # 
Instance details

Defined in Torch.GraduallyTyped.NN.Transformer.GTransformer

Methods

initialize :: MonadThrow m => ModelSpec (GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) -> Generator generatorDevice -> m (GTransformer posEnc' relPosEnc' initialLayerNorm' initialDropout' stack' finalLayerNorm' finalDropout', Generator generatorOutputDevice) Source #

(HasForward initialLayerNorm (Tensor inputGradient inputLayout inputDevice inputDataType inputShape) generatorDevice tensor0 generatorDevice0, HasForward initialDropout tensor0 generatorDevice0 tensor1 generatorDevice1, HasForward relPosEnc (Tensor relPosGradient relPosLayout relPosDevice relPosDataType relPosShape) generatorDevice1 (Tensor relPosEncGradient relPosEncLayout relPosEncDevice relPosEncDataType relPosEncShape) generatorDevice2, HasForward stack (tensor1, Tensor (relPosEncGradient <|> attentionMaskGradient) (relPosEncLayout <+> attentionMaskLayout) (relPosEncDevice <+> attentionMaskDevice) (relPosEncDataType <+> attentionMaskDataType) (BroadcastShapesF doubleTransposedRelPosEncShape unsqueezedAttentionMaskShape)) generatorDevice2 tensor3 generatorDevice3, transposedRelPosEncShape ~ TransposeF ('SelectDim ('ByIndex 2 :: By Symbol Natural)) ('SelectDim ('ByIndex 3 :: By Symbol Natural)) relPosEncShape, Catch transposedRelPosEncShape, doubleTransposedRelPosEncShape ~ TransposeF ('SelectDim ('ByIndex 1 :: By Symbol Natural)) ('SelectDim ('ByIndex 2 :: By Symbol Natural)) transposedRelPosEncShape, Catch doubleTransposedRelPosEncShape, unsqueezedAttentionMaskShape ~ UnsqueezeF ('SelectDim ('ByIndex 1 :: By Symbol Natural)) attentionMaskShape, Catch unsqueezedAttentionMaskShape, Catch (BroadcastShapesF doubleTransposedRelPosEncShape unsqueezedAttentionMaskShape), HasForward finalLayerNorm tensor3 generatorDevice3 tensor4 generatorDevice4, HasForward finalDropout tensor4 generatorDevice4 output generatorOutputDevice) => HasForward (GTransformer () relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) (Tensor inputGradient inputLayout inputDevice inputDataType inputShape, Tensor relPosGradient relPosLayout relPosDevice relPosDataType relPosShape, Tensor attentionMaskGradient attentionMaskLayout attentionMaskDevice attentionMaskDataType attentionMaskShape) generatorDevice output generatorOutputDevice Source #

HasForward instance for GTransformer in an encoder configuration with relative positional encoding rather than absolute positional encoding.

     ┌───────┐  ┌────────┐  ┌───────────────┐
     │ input │  │ relPos │  │ attentionMask │
     └───┬───┘  └───┬────┘  └───────┬───────┘
         │          │               │
         │          ▼               │
         │     tRelPosEnc           │
         │          ▼               │
         │      transpose           │
         │          ▼               ▼
         │      transpose       unsqueeze
         ▼          │               │
(tInitialLayerNorm) │               │
         ▼          └─────►add◄─────┘
 (tInitialDropout)          │
         ▼                  │
      tStack◄───────────────┘
         ▼
 (tFinalLayerNorm)
         ▼
  (tFinalDropout)
         │
         ▼
    ┌────────┐
    │ output │
    └────────┘
Instance details

Defined in Torch.GraduallyTyped.NN.Transformer.GTransformer

Methods

forward :: MonadThrow m => GTransformer () relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> (Tensor inputGradient inputLayout inputDevice inputDataType inputShape, Tensor relPosGradient relPosLayout relPosDevice relPosDataType relPosShape, Tensor attentionMaskGradient attentionMaskLayout attentionMaskDevice attentionMaskDataType attentionMaskShape) -> Generator generatorDevice -> m (output, Generator generatorOutputDevice) Source #

(HasForward posEnc (Tensor posGradient posLayout posDevice posDataType posShape) generatorDevice (Tensor posEncGradient posEncLayout posEncDevice posEncDataType posEncShape) generatorDevice0, HasForward initialLayerNorm (Tensor (inputGradient <|> posEncGradient) (inputLayout <+> posEncLayout) (inputDevice <+> posEncDevice) (inputDataType <+> posEncDataType) (BroadcastShapesF inputShape posEncShape)) generatorDevice0 tensor1 generatorDevice1, Catch (BroadcastShapesF inputShape posEncShape), HasForward initialDropout tensor1 generatorDevice1 tensor2 generatorDevice2, HasForward stack (tensor2, Tensor attentionMaskGradient attentionMaskLayout attentionMaskDevice attentionMaskDataType (UnsqueezeF ('SelectDim ('ByIndex 1 :: By Symbol Natural)) attentionMaskShape)) generatorDevice2 tensor3 generatorDevice3, Catch (UnsqueezeF ('SelectDim ('ByIndex 1 :: By Symbol Natural)) attentionMaskShape), HasForward finalLayerNorm tensor3 generatorDevice3 tensor4 generatorDevice4, HasForward finalDropout tensor4 generatorDevice4 output generatorOutputDevice) => HasForward (GTransformer posEnc () initialLayerNorm initialDropout stack finalLayerNorm finalDropout) (Tensor inputGradient inputLayout inputDevice inputDataType inputShape, Tensor posGradient posLayout posDevice posDataType posShape, Tensor attentionMaskGradient attentionMaskLayout attentionMaskDevice attentionMaskDataType attentionMaskShape) generatorDevice output generatorOutputDevice Source #

HasForward instance for GTransformer in an encoder configuration with absolute positional encoding rather than relative positional encoding.

┌───────┐  ┌─────┐  ┌───────────────┐
│ input │  │ pos │  │ attentionMask │
└───┬───┘  └─────┘  └───────┬───────┘
    │         │             │
    │         ▼             │
    │      tPosEnc          │
    │         │             │
    └──►add◄──┘             │
         │                  │
         ▼                  │
(tInitialLayerNorm)         │
         ▼                  ▼
 (tInitialDropout)     unsqueeze
         ▼                  │
      tStack◄───────────────┘
         ▼
 (tFinalLayerNorm)
         ▼
  (tFinalDropout)
         │
         ▼
    ┌────────┐
    │ output │
    └────────┘
Instance details

Defined in Torch.GraduallyTyped.NN.Transformer.GTransformer

Methods

forward :: MonadThrow m => GTransformer posEnc () initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> (Tensor inputGradient inputLayout inputDevice inputDataType inputShape, Tensor posGradient posLayout posDevice posDataType posShape, Tensor attentionMaskGradient attentionMaskLayout attentionMaskDevice attentionMaskDataType attentionMaskShape) -> Generator generatorDevice -> m (output, Generator generatorOutputDevice) Source #

(HasForward initialLayerNorm (Tensor decoderInputGradient decoderInputLayout decoderInputDevice decoderInputDataType decoderInputShape) generatorDevice tensor0 generatorDevice0, HasForward initialDropout tensor0 generatorDevice0 tensor1 generatorDevice1, HasForward relPosEnc (Tensor decoderRelPosGradient decoderRelPosLayout decoderRelPosDevice decoderRelPosDataType decoderRelPosShape) generatorDevice1 (Tensor decoderRelPosEncGradient decoderRelPosEncLayout decoderRelPosEncDevice decoderRelPosEncDataType decoderRelPosEncShape) generatorDevice2, HasForward stack (tensor1, Tensor encoderOutputGradient encoderOutputLayout encoderOutputDevice encoderOutputDataType encoderOutputShape, Tensor (decoderRelPosEncGradient <|> decoderAttentionMaskGradient) (decoderRelPosEncLayout <+> decoderAttentionMaskLayout) (decoderRelPosEncDevice <+> decoderAttentionMaskDevice) (decoderRelPosEncDataType <+> decoderAttentionMaskDataType) (BroadcastShapesF doubleTransposedDecoderRelPosEncShape unsqueezedDecoderAttentionMaskShape), Tensor crossAttentionMaskGradient crossAttentionMaskLayout crossAttentionMaskDevice crossAttentionMaskDataType unsqueezedCrossAttentionMaskShape) generatorDevice2 tensor3 generatorDevice3, transposedDecoderRelPosEncShape ~ TransposeF ('SelectDim ('ByIndex 2 :: By Symbol Natural)) ('SelectDim ('ByIndex 3 :: By Symbol Natural)) decoderRelPosEncShape, Catch transposedDecoderRelPosEncShape, doubleTransposedDecoderRelPosEncShape ~ TransposeF ('SelectDim ('ByIndex 1 :: By Symbol Natural)) ('SelectDim ('ByIndex 2 :: By Symbol Natural)) transposedDecoderRelPosEncShape, Catch doubleTransposedDecoderRelPosEncShape, unsqueezedDecoderAttentionMaskShape ~ UnsqueezeF ('SelectDim ('ByIndex 1 :: By Symbol Natural)) decoderAttentionMaskShape, Catch unsqueezedDecoderAttentionMaskShape, unsqueezedCrossAttentionMaskShape ~ UnsqueezeF ('SelectDim ('ByIndex 1 :: By Symbol Natural)) crossAttentionMaskShape, Catch unsqueezedCrossAttentionMaskShape, Catch (BroadcastShapesF doubleTransposedDecoderRelPosEncShape unsqueezedDecoderAttentionMaskShape), HasForward finalLayerNorm tensor3 generatorDevice3 tensor4 generatorDevice4, HasForward finalDropout tensor4 generatorDevice4 output generatorOutputDevice) => HasForward (GTransformer () relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) (Tensor decoderInputGradient decoderInputLayout decoderInputDevice decoderInputDataType decoderInputShape, Tensor encoderOutputGradient encoderOutputLayout encoderOutputDevice encoderOutputDataType encoderOutputShape, Tensor decoderRelPosGradient decoderRelPosLayout decoderRelPosDevice decoderRelPosDataType decoderRelPosShape, Tensor decoderAttentionMaskGradient decoderAttentionMaskLayout decoderAttentionMaskDevice decoderAttentionMaskDataType decoderAttentionMaskShape, Tensor crossAttentionMaskGradient crossAttentionMaskLayout crossAttentionMaskDevice crossAttentionMaskDataType crossAttentionMaskShape) generatorDevice output generatorOutputDevice Source #

HasForward instance for GTransformer in a decoder configuration with relative positional encoding rather than absolute positional encoding.

  ┌──────────────┐  ┌───────────────┐  ┌───────────────┐  ┌──────────────────────┐  ┌────────────────────┐
  │ decoderInput │  │ encoderOutput │  │ decoderRelPos │  │ decoderAttentionMask │  │ crossAttentionMask │
  └──────┬───────┘  └───────┬───────┘  └───────┬───────┘  └──────────┬───────────┘  └─────────┬──────────┘
         │                  │                  │                     │                        │
         │                  │                  ▼                     │                        │
         │                  │             tdRelPosEnc                │                        │
         │                  │                  ▼                     │                        │
         │                  │              transpose                 │                        │
         │                  │                  ▼                     ▼                        ▼
         │                  │              transpose             unsqueeze                unsqueeze
         ▼                  │                  │                     │                        │
(tInitialLayerNorm)         │                  │                     │                        │
         ▼                  │                  └────────►add◄────────┘                        │
 (tInitialDropout)          │                             │                                   │
         ▼                  │                             │                                   │
      tStack◄───────────────┘◄────────────────────────────┘◄──────────────────────────────────┘
         ▼
 (tFinalLayerNorm)
         ▼
  (tFinalDropout)
         │
         ▼
    ┌────────┐
    │ output │
    └────────┘
Instance details

Defined in Torch.GraduallyTyped.NN.Transformer.GTransformer

Methods

forward :: MonadThrow m => GTransformer () relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> (Tensor decoderInputGradient decoderInputLayout decoderInputDevice decoderInputDataType decoderInputShape, Tensor encoderOutputGradient encoderOutputLayout encoderOutputDevice encoderOutputDataType encoderOutputShape, Tensor decoderRelPosGradient decoderRelPosLayout decoderRelPosDevice decoderRelPosDataType decoderRelPosShape, Tensor decoderAttentionMaskGradient decoderAttentionMaskLayout decoderAttentionMaskDevice decoderAttentionMaskDataType decoderAttentionMaskShape, Tensor crossAttentionMaskGradient crossAttentionMaskLayout crossAttentionMaskDevice crossAttentionMaskDataType crossAttentionMaskShape) -> Generator generatorDevice -> m (output, Generator generatorOutputDevice) Source #

(HasForward posEnc (Tensor decoderPosGradient decoderPosLayout decoderPosDevice decoderPosDataType decoderPosShape) generatorDevice (Tensor decoderPosEncGradient decoderPosEncLayout decoderPosEncDevice decoderPosEncDataType decoderPosEncShape) generatorDevice0, HasForward initialLayerNorm (Tensor (decoderInputGradient <|> decoderPosEncGradient) (decoderInputLayout <+> decoderPosEncLayout) (decoderInputDevice <+> decoderPosEncDevice) (decoderInputDataType <+> decoderPosEncDataType) (BroadcastShapesF decoderInputShape decoderPosEncShape)) generatorDevice0 tensor1 generatorDevice1, HasForward initialDropout tensor1 generatorDevice1 tensor2 generatorDevice2, HasForward stack (tensor2, Tensor encoderOutputGradient encoderOutputLayout encoderOutputDevice encoderOutputDataType encoderOutputShape, Tensor decoderAttentionMaskGradient decoderAttentionMaskLayout decoderAttentionMaskDevice decoderAttentionMaskDataType (UnsqueezeF ('SelectDim ('ByIndex 1 :: By Symbol Natural)) decoderAttentionMaskShape), Tensor crossAttentionMaskGradient crossAttentionMaskLayout crossAttentionMaskDevice crossAttentionMaskDataType (UnsqueezeF ('SelectDim ('ByIndex 1 :: By Symbol Natural)) crossAttentionMaskShape)) generatorDevice2 tensor3 generatorDevice3, Catch (UnsqueezeF ('SelectDim ('ByIndex 1 :: By Symbol Natural)) decoderAttentionMaskShape), Catch (UnsqueezeF ('SelectDim ('ByIndex 1 :: By Symbol Natural)) crossAttentionMaskShape), Catch (BroadcastShapesF decoderInputShape decoderPosEncShape), HasForward finalLayerNorm tensor3 generatorDevice3 tensor4 generatorDevice4, HasForward finalDropout tensor4 generatorDevice4 output generatorOutputDevice) => HasForward (GTransformer posEnc () initialLayerNorm initialDropout stack finalLayerNorm finalDropout) (Tensor decoderInputGradient decoderInputLayout decoderInputDevice decoderInputDataType decoderInputShape, Tensor encoderOutputGradient encoderOutputLayout encoderOutputDevice encoderOutputDataType encoderOutputShape, Tensor decoderPosGradient decoderPosLayout decoderPosDevice decoderPosDataType decoderPosShape, Tensor decoderAttentionMaskGradient decoderAttentionMaskLayout decoderAttentionMaskDevice decoderAttentionMaskDataType decoderAttentionMaskShape, Tensor crossAttentionMaskGradient crossAttentionMaskLayout crossAttentionMaskDevice crossAttentionMaskDataType crossAttentionMaskShape) generatorDevice output generatorOutputDevice Source #

HasForward instance for GTransformer in a decoder configuration with absolute positional encoding rather than relative positional encoding.

┌──────────────┐  ┌────────────┐  ┌───────────────┐  ┌──────────────────────┐  ┌────────────────────┐
│ decoderInput │  │ decoderPos │  │ encoderOutput │  │ decoderAttentionMask │  │ crossAttentionMask │
└──────┬───────┘  └──────┬─────┘  └───────┬───────┘  └──────────┬───────────┘  └──────────┬─────────┘
       │                 │                │                     │                         │
       │                 ▼                │                     │                         │
       │             tdPosEnc             │                     │                         │
       │                 │                │                     │                         │
       └──────►add◄──────┘                │                     │                         │
                │                         │                     │                         │
                ▼                         │                     │                         │
       (tInitialLayerNorm)                │                     │                         │
                ▼                         │                     ▼                         ▼
        (tInitialDropout)                 │                 unsqueeze                 unsqueeze
                ▼                         │                     │                         │
             tStack◄──────────────────────┘◄────────────────────┘◄────────────────────────┘
                ▼
        (tFinalLayerNorm)
                ▼
         (tFinalDropout)
                │
                ▼
           ┌────────┐
           │ output │
           └────────┘
Instance details

Defined in Torch.GraduallyTyped.NN.Transformer.GTransformer

Methods

forward :: MonadThrow m => GTransformer posEnc () initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> (Tensor decoderInputGradient decoderInputLayout decoderInputDevice decoderInputDataType decoderInputShape, Tensor encoderOutputGradient encoderOutputLayout encoderOutputDevice encoderOutputDataType encoderOutputShape, Tensor decoderPosGradient decoderPosLayout decoderPosDevice decoderPosDataType decoderPosShape, Tensor decoderAttentionMaskGradient decoderAttentionMaskLayout decoderAttentionMaskDevice decoderAttentionMaskDataType decoderAttentionMaskShape, Tensor crossAttentionMaskGradient crossAttentionMaskLayout crossAttentionMaskDevice crossAttentionMaskDataType crossAttentionMaskShape) -> Generator generatorDevice -> m (output, Generator generatorOutputDevice) Source #

type Rep (GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) Source # 
Instance details

Defined in Torch.GraduallyTyped.NN.Transformer.GTransformer

type Rep (GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) = D1 ('MetaData "GTransformer" "Torch.GraduallyTyped.NN.Transformer.GTransformer" "hasktorch-gradually-typed-0.2.0.0-1KV1aIPzzbp6JpSr37tC1K" 'False) (C1 ('MetaCons "GTransformer" 'PrefixI 'True) ((S1 ('MetaSel ('Just "tPosEnc") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 posEnc) :*: (S1 ('MetaSel ('Just "tRelPosEnc") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 relPosEnc) :*: S1 ('MetaSel ('Just "tInitialLayerNorm") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 initialLayerNorm))) :*: ((S1 ('MetaSel ('Just "tInitialDropout") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 initialDropout) :*: S1 ('MetaSel ('Just "tStack") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 stack)) :*: (S1 ('MetaSel ('Just "tFinalLayerNorm") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 finalLayerNorm) :*: S1 ('MetaSel ('Just "tFinalDropout") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 finalDropout)))))
type ModelSpec (GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) Source # 
Instance details

Defined in Torch.GraduallyTyped.NN.Transformer.GTransformer

type ModelSpec (GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) = GTransformer (ModelSpec posEnc) (ModelSpec relPosEnc) (ModelSpec initialLayerNorm) (ModelSpec initialDropout) (ModelSpec stack) (ModelSpec finalLayerNorm) (ModelSpec finalDropout)

type family TransformerEncoderF (style :: TransformerStyle) (numLayers :: Nat) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (headDim :: Dim (Name Symbol) (Size Nat)) (headEmbedDim :: Dim (Name Symbol) (Size Nat)) (embedDim :: Dim (Name Symbol) (Size Nat)) (inputEmbedDim :: Dim (Name Symbol) (Size Nat)) (ffnDim :: Dim (Name Symbol) (Size Nat)) (posEncDim :: Dim (Name Symbol) (Size Nat)) (hasDropout :: HasDropout) :: Type where ... Source #

Equations

TransformerEncoderF style numLayers gradient device dataType headDim headEmbedDim embedDim inputEmbedDim ffnDim posEncDim hasDropout = GTransformer (TEPosEncF style gradient device dataType inputEmbedDim posEncDim) (TERelPosEncF style gradient device dataType headDim posEncDim) (TEInitialLayerNormF style gradient device dataType inputEmbedDim) (TEInitialDropoutF style hasDropout) (TEStackF style numLayers gradient device dataType headDim headEmbedDim embedDim inputEmbedDim ffnDim hasDropout) (TEFinalLayerNormF style gradient device dataType inputEmbedDim) (TEFinalDropoutF style hasDropout) 

type family TEPosEncF (style :: TransformerStyle) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (inputEmbedDim :: Dim (Name Symbol) (Size Nat)) (posEncDim :: Dim (Name Symbol) (Size Nat)) :: Type where ... Source #

Specifies the absolute positional encoding layer of a transformer encoder.

Equations

TEPosEncF 'T5 _ _ _ _ _ = () 
TEPosEncF 'ByT5 gradient device dataType inputEmbedDim posEncDim = TEPosEncF 'T5 gradient device dataType inputEmbedDim posEncDim 
TEPosEncF 'BART gradient device dataType inputEmbedDim posEncDim = NamedModel (Embedding gradient ('Layout 'Dense) device dataType posEncDim inputEmbedDim 'Nothing) 
TEPosEncF 'MBART gradient device dataType inputEmbedDim posEncDim = TEPosEncF 'BART gradient device dataType inputEmbedDim posEncDim 
TEPosEncF 'Pegasus gradient device dataType inputEmbedDim posEncDim = TEPosEncF 'BART gradient device dataType inputEmbedDim posEncDim 
TEPosEncF 'BERT gradient device dataType inputEmbedDim posEncDim = NamedModel (Embedding gradient ('Layout 'Dense) device dataType posEncDim inputEmbedDim 'Nothing) 
TEPosEncF 'RoBERTa gradient device dataType inputEmbedDim posEncDim = TEPosEncF 'BERT gradient device dataType inputEmbedDim posEncDim 

type family TERelPosEncF (style :: TransformerStyle) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (headDim :: Dim (Name Symbol) (Size Nat)) (posEncDim :: Dim (Name Symbol) (Size Nat)) :: Type where ... Source #

Specifies the relative positional encoding layer of a transformer encoder.

Equations

TERelPosEncF 'T5 gradient device dataType headDim posEncDim = NamedModel (Embedding gradient ('Layout 'Dense) device dataType posEncDim headDim 'Nothing) 
TERelPosEncF 'ByT5 gradient device dataType headDim posEncDim = TERelPosEncF 'T5 gradient device dataType headDim posEncDim 
TERelPosEncF 'BART _ _ _ _ _ = () 
TERelPosEncF 'MBART gradient device dataType headDim posEncDim = TERelPosEncF 'BART gradient device dataType headDim posEncDim 
TERelPosEncF 'Pegasus gradient device dataType headDim posEncDim = TERelPosEncF 'BART gradient device dataType headDim posEncDim 
TERelPosEncF 'BERT _ _ _ _ _ = () 
TERelPosEncF 'RoBERTa gradient device dataType headDim posEncDim = TERelPosEncF 'BERT gradient device dataType headDim posEncDim 

type family TEInitialLayerNormF (style :: TransformerStyle) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (inputEmbedDim :: Dim (Name Symbol) (Size Nat)) :: Type where ... Source #

Specifies the initial layer normalization layer of a transformer encoder.

Equations

TEInitialLayerNormF 'T5 _ _ _ _ = () 
TEInitialLayerNormF 'ByT5 gradient device dataType inputEmbedDim = TEInitialLayerNormF 'T5 gradient device dataType inputEmbedDim 
TEInitialLayerNormF 'BART gradient device dataType inputEmbedDim = NamedModel (LayerNorm 'WithBias gradient device dataType ('Shape '[inputEmbedDim])) 
TEInitialLayerNormF 'MBART gradient device dataType inputEmbedDim = TEInitialLayerNormF 'BART gradient device dataType inputEmbedDim 
TEInitialLayerNormF 'Pegasus _ _ _ _ = () 
TEInitialLayerNormF 'BERT gradient device dataType inputEmbedDim = NamedModel (LayerNorm 'WithBias gradient device dataType ('Shape '[inputEmbedDim])) 
TEInitialLayerNormF 'RoBERTa gradient device dataType inputEmbedDim = TEInitialLayerNormF 'BERT gradient device dataType inputEmbedDim 

type family TEStackF (style :: TransformerStyle) (numLayers :: Nat) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (headDim :: Dim (Name Symbol) (Size Nat)) (headEmbedDim :: Dim (Name Symbol) (Size Nat)) (embedDim :: Dim (Name Symbol) (Size Nat)) (inputEmbedDim :: Dim (Name Symbol) (Size Nat)) (ffnDim :: Dim (Name Symbol) (Size Nat)) (hasDropout :: HasDropout) :: Type where ... Source #

Specifies the transformer block stack of a transformer encoder.

Equations

TEStackF style numLayers gradient device dataType headDim headEmbedDim embedDim inputEmbedDim ffnDim hasDropout = NamedModel (EncoderStackF style numLayers gradient device dataType headDim headEmbedDim embedDim inputEmbedDim ffnDim hasDropout) 

type family TEFinalLayerNormF (style :: TransformerStyle) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (inputEmbedDim :: Dim (Name Symbol) (Size Nat)) :: Type where ... Source #

Specifies the final layer normalization layer of a transformer encoder.

Equations

TEFinalLayerNormF 'T5 gradient device dataType inputEmbedDim = NamedModel (LayerNorm 'WithoutBias gradient device dataType ('Shape '[inputEmbedDim])) 
TEFinalLayerNormF 'ByT5 gradient device dataType inputEmbedDim = TEFinalLayerNormF 'T5 gradient device dataType inputEmbedDim 
TEFinalLayerNormF 'BART _ _ _ _ = () 
TEFinalLayerNormF 'MBART gradient device dataType inputEmbedDim = TEFinalLayerNormF 'BART gradient device dataType inputEmbedDim 
TEFinalLayerNormF 'Pegasus gradient device dataType inputEmbedDim = NamedModel (LayerNorm 'WithBias gradient device dataType ('Shape '[inputEmbedDim])) 
TEFinalLayerNormF 'BERT _ _ _ _ = () 
TEFinalLayerNormF 'RoBERTa gradient device dataType inputEmbedDim = TEFinalLayerNormF 'BERT gradient device dataType inputEmbedDim 

type family TEFinalDropoutF (style :: TransformerStyle) (hasDropout :: HasDropout) :: Type where ... Source #

Specifies the final dropout layer of a transformer encoder.

transformerEncoderSpec :: forall style numLayers gradient device dataType headDim headEmbedDim embedDim inputEmbedDim ffnDim posEncDim hasDropout. STransformerStyle style -> SNat numLayers -> SGradient gradient -> SDevice device -> SDataType dataType -> SDim headDim -> SDim headEmbedDim -> SDim embedDim -> SDim inputEmbedDim -> SDim ffnDim -> SDim posEncDim -> SHasDropout hasDropout -> Double -> Double -> ModelSpec (TransformerEncoderF style numLayers gradient device dataType headDim headEmbedDim embedDim inputEmbedDim ffnDim posEncDim hasDropout) Source #

Specifies the parameters of a transformer in an encoder configuration.

  • style: the style of the transformer stack, e.g. ST5, SByT5, etc.
  • gradient: whether to compute the gradient of the stack's parameters.
  • device: the computational device on which the stack is allocated.
  • dataType: the data type of the stack's parameters.
  • headDim: the dimension of all transformer heads in the stack.
  • headEmbedDim: the dimension of the transformer head embeddings.
  • embedDim: the dimension of the transformer embeddings.
  • inputEmbedDim: the dimension of the transformer query embeddings.
  • ffnDim: the dimension of the feed-forward network.
  • posEncDim: the dimension of the positional encoding.
  • dropoutP: the dropout rate.
  • eps: the epsilon value for numerical stability of the layer normalization.

type family TransformerDecoderF (style :: TransformerStyle) (numLayers :: Nat) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (headDim :: Dim (Name Symbol) (Size Nat)) (headEmbedDim :: Dim (Name Symbol) (Size Nat)) (embedDim :: Dim (Name Symbol) (Size Nat)) (decoderInputEmbedDim :: Dim (Name Symbol) (Size Nat)) (encoderOutputEmbedDim :: Dim (Name Symbol) (Size Nat)) (ffnDim :: Dim (Name Symbol) (Size Nat)) (posEncDim :: Dim (Name Symbol) (Size Nat)) (hasDropout :: HasDropout) :: Type where ... Source #

Equations

TransformerDecoderF style numLayers gradient device dataType headDim headEmbedDim embedDim decoderInputEmbedDim encoderOutputEmbedDim ffnDim posEncDim hasDropout = GTransformer (TDPosEncF style gradient device dataType decoderInputEmbedDim posEncDim) (TDRelPosEncF style gradient device dataType headDim posEncDim) (TDInitialLayerNormF style gradient device dataType decoderInputEmbedDim) (TDInitialDropoutF style hasDropout) (TDStackF style numLayers gradient device dataType headDim headEmbedDim embedDim decoderInputEmbedDim encoderOutputEmbedDim ffnDim hasDropout) (TDFinalLayerNormF style gradient device dataType decoderInputEmbedDim) (TDFinalDropoutF style hasDropout) 

type family TDPosEncF (style :: TransformerStyle) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (inputEmbedDim :: Dim (Name Symbol) (Size Nat)) (posEncDim :: Dim (Name Symbol) (Size Nat)) :: Type where ... Source #

Specifies the absolute positional encoding layer of a transformer decoder.

Equations

TDPosEncF 'T5 _ _ _ _ _ = () 
TDPosEncF 'ByT5 gradient device dataType inputEmbedDim posEncDim = TDPosEncF 'T5 gradient device dataType inputEmbedDim posEncDim 
TDPosEncF 'BART gradient device dataType inputEmbedDim posEncDim = NamedModel (Embedding gradient ('Layout 'Dense) device dataType posEncDim inputEmbedDim 'Nothing) 
TDPosEncF 'MBART gradient device dataType inputEmbedDim posEncDim = TDPosEncF 'BART gradient device dataType inputEmbedDim posEncDim 
TDPosEncF 'Pegasus gradient device dataType inputEmbedDim posEncDim = TDPosEncF 'BART gradient device dataType inputEmbedDim posEncDim 

type family TDRelPosEncF (style :: TransformerStyle) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (headDim :: Dim (Name Symbol) (Size Nat)) (posEncDim :: Dim (Name Symbol) (Size Nat)) :: Type where ... Source #

Specifies the relative positional encoding layer of a transformer decoder.

Equations

TDRelPosEncF 'T5 gradient device dataType headDim posEncDim = NamedModel (Embedding gradient ('Layout 'Dense) device dataType posEncDim headDim 'Nothing) 
TDRelPosEncF 'ByT5 gradient device dataType headDim posEncDim = TDRelPosEncF 'T5 gradient device dataType headDim posEncDim 
TDRelPosEncF 'BART _ _ _ _ _ = () 
TDRelPosEncF 'MBART gradient device dataType headDim posEncDim = TDRelPosEncF 'BART gradient device dataType headDim posEncDim 
TDRelPosEncF 'Pegasus gradient device dataType headDim posEncDim = TDRelPosEncF 'BART gradient device dataType headDim posEncDim 

type family TDInitialLayerNormF (style :: TransformerStyle) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (inputEmbedDim :: Dim (Name Symbol) (Size Nat)) :: Type where ... Source #

Specifies the initial layer normalization layer of a transformer decoder.

Equations

TDInitialLayerNormF 'T5 _ _ _ _ = () 
TDInitialLayerNormF 'ByT5 gradient device dataType inputEmbedDim = TDInitialLayerNormF 'T5 gradient device dataType inputEmbedDim 
TDInitialLayerNormF 'BART gradient device dataType inputEmbedDim = NamedModel (LayerNorm 'WithBias gradient device dataType ('Shape '[inputEmbedDim])) 
TDInitialLayerNormF 'MBART gradient device dataType inputEmbedDim = TDInitialLayerNormF 'BART gradient device dataType inputEmbedDim 
TDInitialLayerNormF 'Pegasus _ _ _ _ = () 

type family TDStackF (style :: TransformerStyle) (numLayers :: Nat) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (headDim :: Dim (Name Symbol) (Size Nat)) (headEmbedDim :: Dim (Name Symbol) (Size Nat)) (embedDim :: Dim (Name Symbol) (Size Nat)) (decoderInputEmbedDim :: Dim (Name Symbol) (Size Nat)) (encoderOutputEmbedDim :: Dim (Name Symbol) (Size Nat)) (ffnDim :: Dim (Name Symbol) (Size Nat)) (hasDropout :: HasDropout) :: Type where ... Source #

Specifies the transformer block stack of a transformer decoder.

Equations

TDStackF style numLayers gradient device dataType headDim headEmbedDim embedDim decoderInputEmbedDim encoderOutputEmbedDim ffnDim hasDropout = NamedModel (DecoderStackF style numLayers gradient device dataType headDim headEmbedDim embedDim decoderInputEmbedDim encoderOutputEmbedDim ffnDim hasDropout) 

type family TDFinalLayerNormF (style :: TransformerStyle) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (inputEmbedDim :: Dim (Name Symbol) (Size Nat)) :: Type where ... Source #

Specifies the final layer normalization layer of a transformer decoder.

Equations

TDFinalLayerNormF 'T5 gradient device dataType inputEmbedDim = NamedModel (LayerNorm 'WithoutBias gradient device dataType ('Shape '[inputEmbedDim])) 
TDFinalLayerNormF 'ByT5 gradient device dataType inputEmbedDim = TDFinalLayerNormF 'T5 gradient device dataType inputEmbedDim 
TDFinalLayerNormF 'BART _ _ _ _ = () 
TDFinalLayerNormF 'MBART gradient device dataType inputEmbedDim = TDFinalLayerNormF 'BART gradient device dataType inputEmbedDim 
TDFinalLayerNormF 'Pegasus gradient device dataType inputEmbedDim = NamedModel (LayerNorm 'WithBias gradient device dataType ('Shape '[inputEmbedDim])) 

type family TDFinalDropoutF (style :: TransformerStyle) (hasDropout :: HasDropout) :: Type where ... Source #

Specifies the final dropout layer of a transformer decoder.

transformerDecoderSpec :: forall style numLayers gradient device dataType headDim headEmbedDim embedDim decoderInputEmbedDim encoderOutputEmbedDim ffnDim posEncDim hasDropout. STransformerStyle style -> SNat numLayers -> SGradient gradient -> SDevice device -> SDataType dataType -> SDim headDim -> SDim headEmbedDim -> SDim embedDim -> SDim decoderInputEmbedDim -> SDim encoderOutputEmbedDim -> SDim ffnDim -> SDim posEncDim -> SHasDropout hasDropout -> Double -> Double -> ModelSpec (TransformerDecoderF style numLayers gradient device dataType headDim headEmbedDim embedDim decoderInputEmbedDim encoderOutputEmbedDim ffnDim posEncDim hasDropout) Source #

Specifies the parameters of a transformer in a decoder configuration.

  • style: the style of the transformer stack, e.g. ST5, SByT5, etc.
  • gradient: whether to compute the gradient of the stack's parameters.
  • device: the computational device on which the stack is allocated.
  • dataType: the data type of the stack's parameters.
  • headDim: the dimension of all transformer heads in the stack.
  • headEmbedDim: the dimension of the transformer head embeddings.
  • embedDim: the dimension of the transformer embeddings.
  • decoderInputEmbedDim: the dimension of the decoder input embeddings.
  • encoderOutputEmbedDim: the dimension of the encoder output embeddings.
  • ffnDim: the dimension of the feed-forward network.
  • posEncDim: the dimension of the positional encoding.
  • dropoutP: the dropout rate.
  • eps: the epsilon value for numerical stability of the layer normalization.