Safe Haskell	Safe-Inferred
Language	Haskell2010

Torch.GraduallyTyped.NN.Transformer.GTransformer

Synopsis

data GTransformer (posEnc :: Type) (relPosEnc :: Type) (initialLayerNorm :: Type) (initialDropout :: Type) (stack :: Type) (finalLayerNorm :: Type) (finalDropout :: Type) where
- GTransformer :: forall posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout. {..} -> GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout
type family TransformerEncoderF (style :: TransformerStyle) (numLayers :: Nat) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (headDim :: Dim (Name Symbol) (Size Nat)) (headEmbedDim :: Dim (Name Symbol) (Size Nat)) (embedDim :: Dim (Name Symbol) (Size Nat)) (inputEmbedDim :: Dim (Name Symbol) (Size Nat)) (ffnDim :: Dim (Name Symbol) (Size Nat)) (posEncDim :: Dim (Name Symbol) (Size Nat)) (hasDropout :: HasDropout) :: Type where ...
type family TEPosEncF (style :: TransformerStyle) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (inputEmbedDim :: Dim (Name Symbol) (Size Nat)) (posEncDim :: Dim (Name Symbol) (Size Nat)) :: Type where ...
type family TERelPosEncF (style :: TransformerStyle) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (headDim :: Dim (Name Symbol) (Size Nat)) (posEncDim :: Dim (Name Symbol) (Size Nat)) :: Type where ...
type family TEInitialLayerNormF (style :: TransformerStyle) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (inputEmbedDim :: Dim (Name Symbol) (Size Nat)) :: Type where ...
type family TEInitialDropoutF (style :: TransformerStyle) (hasDropout :: HasDropout) :: Type where ...
type family TEStackF (style :: TransformerStyle) (numLayers :: Nat) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (headDim :: Dim (Name Symbol) (Size Nat)) (headEmbedDim :: Dim (Name Symbol) (Size Nat)) (embedDim :: Dim (Name Symbol) (Size Nat)) (inputEmbedDim :: Dim (Name Symbol) (Size Nat)) (ffnDim :: Dim (Name Symbol) (Size Nat)) (hasDropout :: HasDropout) :: Type where ...
type family TEFinalLayerNormF (style :: TransformerStyle) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (inputEmbedDim :: Dim (Name Symbol) (Size Nat)) :: Type where ...
type family TEFinalDropoutF (style :: TransformerStyle) (hasDropout :: HasDropout) :: Type where ...
transformerEncoderSpec :: forall style numLayers gradient device dataType headDim headEmbedDim embedDim inputEmbedDim ffnDim posEncDim hasDropout. STransformerStyle style -> SNat numLayers -> SGradient gradient -> SDevice device -> SDataType dataType -> SDim headDim -> SDim headEmbedDim -> SDim embedDim -> SDim inputEmbedDim -> SDim ffnDim -> SDim posEncDim -> SHasDropout hasDropout -> Double -> Double -> ModelSpec (TransformerEncoderF style numLayers gradient device dataType headDim headEmbedDim embedDim inputEmbedDim ffnDim posEncDim hasDropout)
type family TransformerDecoderF (style :: TransformerStyle) (numLayers :: Nat) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (headDim :: Dim (Name Symbol) (Size Nat)) (headEmbedDim :: Dim (Name Symbol) (Size Nat)) (embedDim :: Dim (Name Symbol) (Size Nat)) (decoderInputEmbedDim :: Dim (Name Symbol) (Size Nat)) (encoderOutputEmbedDim :: Dim (Name Symbol) (Size Nat)) (ffnDim :: Dim (Name Symbol) (Size Nat)) (posEncDim :: Dim (Name Symbol) (Size Nat)) (hasDropout :: HasDropout) :: Type where ...
type family TDPosEncF (style :: TransformerStyle) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (inputEmbedDim :: Dim (Name Symbol) (Size Nat)) (posEncDim :: Dim (Name Symbol) (Size Nat)) :: Type where ...
type family TDRelPosEncF (style :: TransformerStyle) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (headDim :: Dim (Name Symbol) (Size Nat)) (posEncDim :: Dim (Name Symbol) (Size Nat)) :: Type where ...
type family TDInitialLayerNormF (style :: TransformerStyle) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (inputEmbedDim :: Dim (Name Symbol) (Size Nat)) :: Type where ...
type family TDInitialDropoutF (style :: TransformerStyle) (hasDropout :: HasDropout) :: Type where ...
type family TDStackF (style :: TransformerStyle) (numLayers :: Nat) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (headDim :: Dim (Name Symbol) (Size Nat)) (headEmbedDim :: Dim (Name Symbol) (Size Nat)) (embedDim :: Dim (Name Symbol) (Size Nat)) (decoderInputEmbedDim :: Dim (Name Symbol) (Size Nat)) (encoderOutputEmbedDim :: Dim (Name Symbol) (Size Nat)) (ffnDim :: Dim (Name Symbol) (Size Nat)) (hasDropout :: HasDropout) :: Type where ...
type family TDFinalLayerNormF (style :: TransformerStyle) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (inputEmbedDim :: Dim (Name Symbol) (Size Nat)) :: Type where ...
type family TDFinalDropoutF (style :: TransformerStyle) (hasDropout :: HasDropout) :: Type where ...
transformerDecoderSpec :: forall style numLayers gradient device dataType headDim headEmbedDim embedDim decoderInputEmbedDim encoderOutputEmbedDim ffnDim posEncDim hasDropout. STransformerStyle style -> SNat numLayers -> SGradient gradient -> SDevice device -> SDataType dataType -> SDim headDim -> SDim headEmbedDim -> SDim embedDim -> SDim decoderInputEmbedDim -> SDim encoderOutputEmbedDim -> SDim ffnDim -> SDim posEncDim -> SHasDropout hasDropout -> Double -> Double -> ModelSpec (TransformerDecoderF style numLayers gradient device dataType headDim headEmbedDim embedDim decoderInputEmbedDim encoderOutputEmbedDim ffnDim posEncDim hasDropout)

Documentation

data GTransformer (posEnc :: Type) (relPosEnc :: Type) (initialLayerNorm :: Type) (initialDropout :: Type) (stack :: Type) (finalLayerNorm :: Type) (finalDropout :: Type) where Source #

Generic transformer. Can specialize to either encoder or decoder.

posEnc: an absolute positional encoding layer as used by, e.g., BERT.
relPosEnc: a relative positional encoding layer as used by, e.g., T5.
initialLayerNorm: a layer normalization layer for the embeddings.
initialDropout: a dropout layer for the embeddings.
stack: a stack of transformer blocks.
finalLayerNorm: the final layer normalization layer.
finalDropout: the final dropout layer.

Constructors

GTransformer

Fields

:: forall posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout. { tPosEnc :: posEnc
absolute positional encoding
, tRelPosEnc :: relPosEnc
relative positional encoding
, tInitialLayerNorm :: initialLayerNorm
initial layer norm
, tInitialDropout :: initialDropout
initial dropout
, tStack :: stack
transformer block stack
, tFinalLayerNorm :: finalLayerNorm
final layer norm
, tFinalDropout :: finalDropout
final dropout
} -> GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout

Instances

Instances details

Generic (GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) Source #
Instance details Defined in Torch.GraduallyTyped.NN.Transformer.GTransformer Associated Types type Rep (GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) :: Type -> Type Source # Methods from :: GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> Rep (GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) x Source # to :: Rep (GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) x -> GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout Source #
(Show posEnc, Show relPosEnc, Show initialLayerNorm, Show initialDropout, Show stack, Show finalLayerNorm, Show finalDropout) => Show (GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) Source #
Instance details Defined in Torch.GraduallyTyped.NN.Transformer.GTransformer Methods showsPrec :: Int -> GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> ShowS Source # show :: GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> String Source # showList :: [GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout] -> ShowS Source #
(Eq posEnc, Eq relPosEnc, Eq initialLayerNorm, Eq initialDropout, Eq stack, Eq finalLayerNorm, Eq finalDropout) => Eq (GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) Source #
Instance details Defined in Torch.GraduallyTyped.NN.Transformer.GTransformer Methods (==) :: GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> Bool Source # (/=) :: GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> Bool Source #
(Ord posEnc, Ord relPosEnc, Ord initialLayerNorm, Ord initialDropout, Ord stack, Ord finalLayerNorm, Ord finalDropout) => Ord (GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) Source #
Instance details Defined in Torch.GraduallyTyped.NN.Transformer.GTransformer Methods compare :: GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> Ordering Source # (<) :: GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> Bool Source # (<=) :: GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> Bool Source # (>) :: GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> Bool Source # (>=) :: GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> Bool Source # max :: GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout Source # min :: GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout Source #
(HasStateDict posEnc, HasStateDict relPosEnc, HasStateDict initialLayerNorm, HasStateDict initialDropout, HasStateDict stack, HasStateDict finalLayerNorm, HasStateDict finalDropout) => HasStateDict (GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) Source #
Instance details Defined in Torch.GraduallyTyped.NN.Transformer.GTransformer Methods fromStateDict :: (MonadIO m, MonadThrow m, MonadState StateDict m) => ModelSpec (GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) -> StateDictKey -> m (GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) Source # toStateDict :: (MonadThrow m, MonadState StateDict m) => StateDictKey -> GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> m () Source #
(HasInitialize posEnc generatorDevice posEnc' generatorDevice0, HasInitialize relPosEnc generatorDevice0 relPosEnc' generatorDevice1, HasInitialize initialLayerNorm generatorDevice1 initialLayerNorm' generatorDevice2, HasInitialize initialDropout generatorDevice2 initialDropout' generatorDevice3, HasInitialize stack generatorDevice3 stack' generatorDevice4, HasInitialize finalLayerNorm generatorDevice4 finalLayerNorm' generatorDevice5, HasInitialize finalDropout generatorDevice5 finalDropout' generatorOutputDevice) => HasInitialize (GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) generatorDevice (GTransformer posEnc' relPosEnc' initialLayerNorm' initialDropout' stack' finalLayerNorm' finalDropout') generatorOutputDevice Source #
Instance details Defined in Torch.GraduallyTyped.NN.Transformer.GTransformer Methods initialize :: MonadThrow m => ModelSpec (GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) -> Generator generatorDevice -> m (GTransformer posEnc' relPosEnc' initialLayerNorm' initialDropout' stack' finalLayerNorm' finalDropout', Generator generatorOutputDevice) Source #
(HasForward initialLayerNorm (Tensor inputGradient inputLayout inputDevice inputDataType inputShape) generatorDevice tensor0 generatorDevice0, HasForward initialDropout tensor0 generatorDevice0 tensor1 generatorDevice1, HasForward relPosEnc (Tensor relPosGradient relPosLayout relPosDevice relPosDataType relPosShape) generatorDevice1 (Tensor relPosEncGradient relPosEncLayout relPosEncDevice relPosEncDataType relPosEncShape) generatorDevice2, HasForward stack (tensor1, Tensor (relPosEncGradient <\|> attentionMaskGradient) (relPosEncLayout <+> attentionMaskLayout) (relPosEncDevice <+> attentionMaskDevice) (relPosEncDataType <+> attentionMaskDataType) (BroadcastShapesF doubleTransposedRelPosEncShape unsqueezedAttentionMaskShape)) generatorDevice2 tensor3 generatorDevice3, transposedRelPosEncShape ~ TransposeF ('SelectDim ('ByIndex 2 :: By Symbol Natural)) ('SelectDim ('ByIndex 3 :: By Symbol Natural)) relPosEncShape, Catch transposedRelPosEncShape, doubleTransposedRelPosEncShape ~ TransposeF ('SelectDim ('ByIndex 1 :: By Symbol Natural)) ('SelectDim ('ByIndex 2 :: By Symbol Natural)) transposedRelPosEncShape, Catch doubleTransposedRelPosEncShape, unsqueezedAttentionMaskShape ~ UnsqueezeF ('SelectDim ('ByIndex 1 :: By Symbol Natural)) attentionMaskShape, Catch unsqueezedAttentionMaskShape, Catch (BroadcastShapesF doubleTransposedRelPosEncShape unsqueezedAttentionMaskShape), HasForward finalLayerNorm tensor3 generatorDevice3 tensor4 generatorDevice4, HasForward finalDropout tensor4 generatorDevice4 output generatorOutputDevice) => HasForward (GTransformer () relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) (Tensor inputGradient inputLayout inputDevice inputDataType inputShape, Tensor relPosGradient relPosLayout relPosDevice relPosDataType relPosShape, Tensor attentionMaskGradient attentionMaskLayout attentionMaskDevice attentionMaskDataType attentionMaskShape) generatorDevice output generatorOutputDevice Source #	`HasForward` instance for `GTransformer` in an encoder configuration with relative positional encoding rather than absolute positional encoding. ┌───────┐ ┌────────┐ ┌───────────────┐ │ input │ │ relPos │ │ attentionMask │ └───┬───┘ └───┬────┘ └───────┬───────┘ │ │ │ │ ▼ │ │ tRelPosEnc │ │ ▼ │ │ transpose │ │ ▼ ▼ │ transpose unsqueeze ▼ │ │ (tInitialLayerNorm) │ │ ▼ └─────►add◄─────┘ (tInitialDropout) │ ▼ │ tStack◄───────────────┘ ▼ (tFinalLayerNorm) ▼ (tFinalDropout) │ ▼ ┌────────┐ │ output │ └────────┘
Instance details Defined in Torch.GraduallyTyped.NN.Transformer.GTransformer Methods forward :: MonadThrow m => GTransformer () relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> (Tensor inputGradient inputLayout inputDevice inputDataType inputShape, Tensor relPosGradient relPosLayout relPosDevice relPosDataType relPosShape, Tensor attentionMaskGradient attentionMaskLayout attentionMaskDevice attentionMaskDataType attentionMaskShape) -> Generator generatorDevice -> m (output, Generator generatorOutputDevice) Source #
(HasForward posEnc (Tensor posGradient posLayout posDevice posDataType posShape) generatorDevice (Tensor posEncGradient posEncLayout posEncDevice posEncDataType posEncShape) generatorDevice0, HasForward initialLayerNorm (Tensor (inputGradient <\|> posEncGradient) (inputLayout <+> posEncLayout) (inputDevice <+> posEncDevice) (inputDataType <+> posEncDataType) (BroadcastShapesF inputShape posEncShape)) generatorDevice0 tensor1 generatorDevice1, Catch (BroadcastShapesF inputShape posEncShape), HasForward initialDropout tensor1 generatorDevice1 tensor2 generatorDevice2, HasForward stack (tensor2, Tensor attentionMaskGradient attentionMaskLayout attentionMaskDevice attentionMaskDataType (UnsqueezeF ('SelectDim ('ByIndex 1 :: By Symbol Natural)) attentionMaskShape)) generatorDevice2 tensor3 generatorDevice3, Catch (UnsqueezeF ('SelectDim ('ByIndex 1 :: By Symbol Natural)) attentionMaskShape), HasForward finalLayerNorm tensor3 generatorDevice3 tensor4 generatorDevice4, HasForward finalDropout tensor4 generatorDevice4 output generatorOutputDevice) => HasForward (GTransformer posEnc () initialLayerNorm initialDropout stack finalLayerNorm finalDropout) (Tensor inputGradient inputLayout inputDevice inputDataType inputShape, Tensor posGradient posLayout posDevice posDataType posShape, Tensor attentionMaskGradient attentionMaskLayout attentionMaskDevice attentionMaskDataType attentionMaskShape) generatorDevice output generatorOutputDevice Source #	`HasForward` instance for `GTransformer` in an encoder configuration with absolute positional encoding rather than relative positional encoding. ┌───────┐ ┌─────┐ ┌───────────────┐ │ input │ │ pos │ │ attentionMask │ └───┬───┘ └─────┘ └───────┬───────┘ │ │ │ │ ▼ │ │ tPosEnc │ │ │ │ └──►add◄──┘ │ │ │ ▼ │ (tInitialLayerNorm) │ ▼ ▼ (tInitialDropout) unsqueeze ▼ │ tStack◄───────────────┘ ▼ (tFinalLayerNorm) ▼ (tFinalDropout) │ ▼ ┌────────┐ │ output │ └────────┘
Instance details Defined in Torch.GraduallyTyped.NN.Transformer.GTransformer Methods forward :: MonadThrow m => GTransformer posEnc () initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> (Tensor inputGradient inputLayout inputDevice inputDataType inputShape, Tensor posGradient posLayout posDevice posDataType posShape, Tensor attentionMaskGradient attentionMaskLayout attentionMaskDevice attentionMaskDataType attentionMaskShape) -> Generator generatorDevice -> m (output, Generator generatorOutputDevice) Source #
(HasForward initialLayerNorm (Tensor decoderInputGradient decoderInputLayout decoderInputDevice decoderInputDataType decoderInputShape) generatorDevice tensor0 generatorDevice0, HasForward initialDropout tensor0 generatorDevice0 tensor1 generatorDevice1, HasForward relPosEnc (Tensor decoderRelPosGradient decoderRelPosLayout decoderRelPosDevice decoderRelPosDataType decoderRelPosShape) generatorDevice1 (Tensor decoderRelPosEncGradient decoderRelPosEncLayout decoderRelPosEncDevice decoderRelPosEncDataType decoderRelPosEncShape) generatorDevice2, HasForward stack (tensor1, Tensor encoderOutputGradient encoderOutputLayout encoderOutputDevice encoderOutputDataType encoderOutputShape, Tensor (decoderRelPosEncGradient <\|> decoderAttentionMaskGradient) (decoderRelPosEncLayout <+> decoderAttentionMaskLayout) (decoderRelPosEncDevice <+> decoderAttentionMaskDevice) (decoderRelPosEncDataType <+> decoderAttentionMaskDataType) (BroadcastShapesF doubleTransposedDecoderRelPosEncShape unsqueezedDecoderAttentionMaskShape), Tensor crossAttentionMaskGradient crossAttentionMaskLayout crossAttentionMaskDevice crossAttentionMaskDataType unsqueezedCrossAttentionMaskShape) generatorDevice2 tensor3 generatorDevice3, transposedDecoderRelPosEncShape ~ TransposeF ('SelectDim ('ByIndex 2 :: By Symbol Natural)) ('SelectDim ('ByIndex 3 :: By Symbol Natural)) decoderRelPosEncShape, Catch transposedDecoderRelPosEncShape, doubleTransposedDecoderRelPosEncShape ~ TransposeF ('SelectDim ('ByIndex 1 :: By Symbol Natural)) ('SelectDim ('ByIndex 2 :: By Symbol Natural)) transposedDecoderRelPosEncShape, Catch doubleTransposedDecoderRelPosEncShape, unsqueezedDecoderAttentionMaskShape ~ UnsqueezeF ('SelectDim ('ByIndex 1 :: By Symbol Natural)) decoderAttentionMaskShape, Catch unsqueezedDecoderAttentionMaskShape, unsqueezedCrossAttentionMaskShape ~ UnsqueezeF ('SelectDim ('ByIndex 1 :: By Symbol Natural)) crossAttentionMaskShape, Catch unsqueezedCrossAttentionMaskShape, Catch (BroadcastShapesF doubleTransposedDecoderRelPosEncShape unsqueezedDecoderAttentionMaskShape), HasForward finalLayerNorm tensor3 generatorDevice3 tensor4 generatorDevice4, HasForward finalDropout tensor4 generatorDevice4 output generatorOutputDevice) => HasForward (GTransformer () relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) (Tensor decoderInputGradient decoderInputLayout decoderInputDevice decoderInputDataType decoderInputShape, Tensor encoderOutputGradient encoderOutputLayout encoderOutputDevice encoderOutputDataType encoderOutputShape, Tensor decoderRelPosGradient decoderRelPosLayout decoderRelPosDevice decoderRelPosDataType decoderRelPosShape, Tensor decoderAttentionMaskGradient decoderAttentionMaskLayout decoderAttentionMaskDevice decoderAttentionMaskDataType decoderAttentionMaskShape, Tensor crossAttentionMaskGradient crossAttentionMaskLayout crossAttentionMaskDevice crossAttentionMaskDataType crossAttentionMaskShape) generatorDevice output generatorOutputDevice Source #	`HasForward` instance for `GTransformer` in a decoder configuration with relative positional encoding rather than absolute positional encoding. ┌──────────────┐ ┌───────────────┐ ┌───────────────┐ ┌──────────────────────┐ ┌────────────────────┐ │ decoderInput │ │ encoderOutput │ │ decoderRelPos │ │ decoderAttentionMask │ │ crossAttentionMask │ └──────┬───────┘ └───────┬───────┘ └───────┬───────┘ └──────────┬───────────┘ └─────────┬──────────┘ │ │ │ │ │ │ │ ▼ │ │ │ │ tdRelPosEnc │ │ │ │ ▼ │ │ │ │ transpose │ │ │ │ ▼ ▼ ▼ │ │ transpose unsqueeze unsqueeze ▼ │ │ │ │ (tInitialLayerNorm) │ │ │ │ ▼ │ └────────►add◄────────┘ │ (tInitialDropout) │ │ │ ▼ │ │ │ tStack◄───────────────┘◄────────────────────────────┘◄──────────────────────────────────┘ ▼ (tFinalLayerNorm) ▼ (tFinalDropout) │ ▼ ┌────────┐ │ output │ └────────┘
Instance details Defined in Torch.GraduallyTyped.NN.Transformer.GTransformer Methods forward :: MonadThrow m => GTransformer () relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> (Tensor decoderInputGradient decoderInputLayout decoderInputDevice decoderInputDataType decoderInputShape, Tensor encoderOutputGradient encoderOutputLayout encoderOutputDevice encoderOutputDataType encoderOutputShape, Tensor decoderRelPosGradient decoderRelPosLayout decoderRelPosDevice decoderRelPosDataType decoderRelPosShape, Tensor decoderAttentionMaskGradient decoderAttentionMaskLayout decoderAttentionMaskDevice decoderAttentionMaskDataType decoderAttentionMaskShape, Tensor crossAttentionMaskGradient crossAttentionMaskLayout crossAttentionMaskDevice crossAttentionMaskDataType crossAttentionMaskShape) -> Generator generatorDevice -> m (output, Generator generatorOutputDevice) Source #
(HasForward posEnc (Tensor decoderPosGradient decoderPosLayout decoderPosDevice decoderPosDataType decoderPosShape) generatorDevice (Tensor decoderPosEncGradient decoderPosEncLayout decoderPosEncDevice decoderPosEncDataType decoderPosEncShape) generatorDevice0, HasForward initialLayerNorm (Tensor (decoderInputGradient <\|> decoderPosEncGradient) (decoderInputLayout <+> decoderPosEncLayout) (decoderInputDevice <+> decoderPosEncDevice) (decoderInputDataType <+> decoderPosEncDataType) (BroadcastShapesF decoderInputShape decoderPosEncShape)) generatorDevice0 tensor1 generatorDevice1, HasForward initialDropout tensor1 generatorDevice1 tensor2 generatorDevice2, HasForward stack (tensor2, Tensor encoderOutputGradient encoderOutputLayout encoderOutputDevice encoderOutputDataType encoderOutputShape, Tensor decoderAttentionMaskGradient decoderAttentionMaskLayout decoderAttentionMaskDevice decoderAttentionMaskDataType (UnsqueezeF ('SelectDim ('ByIndex 1 :: By Symbol Natural)) decoderAttentionMaskShape), Tensor crossAttentionMaskGradient crossAttentionMaskLayout crossAttentionMaskDevice crossAttentionMaskDataType (UnsqueezeF ('SelectDim ('ByIndex 1 :: By Symbol Natural)) crossAttentionMaskShape)) generatorDevice2 tensor3 generatorDevice3, Catch (UnsqueezeF ('SelectDim ('ByIndex 1 :: By Symbol Natural)) decoderAttentionMaskShape), Catch (UnsqueezeF ('SelectDim ('ByIndex 1 :: By Symbol Natural)) crossAttentionMaskShape), Catch (BroadcastShapesF decoderInputShape decoderPosEncShape), HasForward finalLayerNorm tensor3 generatorDevice3 tensor4 generatorDevice4, HasForward finalDropout tensor4 generatorDevice4 output generatorOutputDevice) => HasForward (GTransformer posEnc () initialLayerNorm initialDropout stack finalLayerNorm finalDropout) (Tensor decoderInputGradient decoderInputLayout decoderInputDevice decoderInputDataType decoderInputShape, Tensor encoderOutputGradient encoderOutputLayout encoderOutputDevice encoderOutputDataType encoderOutputShape, Tensor decoderPosGradient decoderPosLayout decoderPosDevice decoderPosDataType decoderPosShape, Tensor decoderAttentionMaskGradient decoderAttentionMaskLayout decoderAttentionMaskDevice decoderAttentionMaskDataType decoderAttentionMaskShape, Tensor crossAttentionMaskGradient crossAttentionMaskLayout crossAttentionMaskDevice crossAttentionMaskDataType crossAttentionMaskShape) generatorDevice output generatorOutputDevice Source #	`HasForward` instance for `GTransformer` in a decoder configuration with absolute positional encoding rather than relative positional encoding. ┌──────────────┐ ┌────────────┐ ┌───────────────┐ ┌──────────────────────┐ ┌────────────────────┐ │ decoderInput │ │ decoderPos │ │ encoderOutput │ │ decoderAttentionMask │ │ crossAttentionMask │ └──────┬───────┘ └──────┬─────┘ └───────┬───────┘ └──────────┬───────────┘ └──────────┬─────────┘ │ │ │ │ │ │ ▼ │ │ │ │ tdPosEnc │ │ │ │ │ │ │ │ └──────►add◄──────┘ │ │ │ │ │ │ │ ▼ │ │ │ (tInitialLayerNorm) │ │ │ ▼ │ ▼ ▼ (tInitialDropout) │ unsqueeze unsqueeze ▼ │ │ │ tStack◄──────────────────────┘◄────────────────────┘◄────────────────────────┘ ▼ (tFinalLayerNorm) ▼ (tFinalDropout) │ ▼ ┌────────┐ │ output │ └────────┘
Instance details Defined in Torch.GraduallyTyped.NN.Transformer.GTransformer Methods forward :: MonadThrow m => GTransformer posEnc () initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> (Tensor decoderInputGradient decoderInputLayout decoderInputDevice decoderInputDataType decoderInputShape, Tensor encoderOutputGradient encoderOutputLayout encoderOutputDevice encoderOutputDataType encoderOutputShape, Tensor decoderPosGradient decoderPosLayout decoderPosDevice decoderPosDataType decoderPosShape, Tensor decoderAttentionMaskGradient decoderAttentionMaskLayout decoderAttentionMaskDevice decoderAttentionMaskDataType decoderAttentionMaskShape, Tensor crossAttentionMaskGradient crossAttentionMaskLayout crossAttentionMaskDevice crossAttentionMaskDataType crossAttentionMaskShape) -> Generator generatorDevice -> m (output, Generator generatorOutputDevice) Source #
type Rep (GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) Source #
Instance details Defined in Torch.GraduallyTyped.NN.Transformer.GTransformer type Rep (GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) = D1 ('MetaData "GTransformer" "Torch.GraduallyTyped.NN.Transformer.GTransformer" "hasktorch-gradually-typed-0.2.0.0-1KV1aIPzzbp6JpSr37tC1K" 'False) (C1 ('MetaCons "GTransformer" 'PrefixI 'True) ((S1 ('MetaSel ('Just "tPosEnc") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 posEnc) :: (S1 ('MetaSel ('Just "tRelPosEnc") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 relPosEnc) :: S1 ('MetaSel ('Just "tInitialLayerNorm") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 initialLayerNorm))) :: ((S1 ('MetaSel ('Just "tInitialDropout") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 initialDropout) :: S1 ('MetaSel ('Just "tStack") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 stack)) :: (S1 ('MetaSel ('Just "tFinalLayerNorm") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 finalLayerNorm) :: S1 ('MetaSel ('Just "tFinalDropout") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 finalDropout)))))
type ModelSpec (GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) Source #
Instance details Defined in Torch.GraduallyTyped.NN.Transformer.GTransformer type ModelSpec (GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) = GTransformer (ModelSpec posEnc) (ModelSpec relPosEnc) (ModelSpec initialLayerNorm) (ModelSpec initialDropout) (ModelSpec stack) (ModelSpec finalLayerNorm) (ModelSpec finalDropout)

type family TransformerEncoderF (style :: TransformerStyle) (numLayers :: Nat) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (headDim :: Dim (Name Symbol) (Size Nat)) (headEmbedDim :: Dim (Name Symbol) (Size Nat)) (embedDim :: Dim (Name Symbol) (Size Nat)) (inputEmbedDim :: Dim (Name Symbol) (Size Nat)) (ffnDim :: Dim (Name Symbol) (Size Nat)) (posEncDim :: Dim (Name Symbol) (Size Nat)) (hasDropout :: HasDropout) :: Type where ... Source #

Equations

TransformerEncoderF style numLayers gradient device dataType headDim headEmbedDim embedDim inputEmbedDim ffnDim posEncDim hasDropout = GTransformer (TEPosEncF style gradient device dataType inputEmbedDim posEncDim) (TERelPosEncF style gradient device dataType headDim posEncDim) (TEInitialLayerNormF style gradient device dataType inputEmbedDim) (TEInitialDropoutF style hasDropout) (TEStackF style numLayers gradient device dataType headDim headEmbedDim embedDim inputEmbedDim ffnDim hasDropout) (TEFinalLayerNormF style gradient device dataType inputEmbedDim) (TEFinalDropoutF style hasDropout)

type family TEPosEncF (style :: TransformerStyle) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (inputEmbedDim :: Dim (Name Symbol) (Size Nat)) (posEncDim :: Dim (Name Symbol) (Size Nat)) :: Type where ... Source #

Specifies the absolute positional encoding layer of a transformer encoder.

Equations

TEPosEncF 'T5 _ _ _ _ _ = ()
TEPosEncF 'ByT5 gradient device dataType inputEmbedDim posEncDim = TEPosEncF 'T5 gradient device dataType inputEmbedDim posEncDim
TEPosEncF 'BART gradient device dataType inputEmbedDim posEncDim = NamedModel (Embedding gradient ('Layout 'Dense) device dataType posEncDim inputEmbedDim 'Nothing)
TEPosEncF 'MBART gradient device dataType inputEmbedDim posEncDim = TEPosEncF 'BART gradient device dataType inputEmbedDim posEncDim
TEPosEncF 'Pegasus gradient device dataType inputEmbedDim posEncDim = TEPosEncF 'BART gradient device dataType inputEmbedDim posEncDim
TEPosEncF 'BERT gradient device dataType inputEmbedDim posEncDim = NamedModel (Embedding gradient ('Layout 'Dense) device dataType posEncDim inputEmbedDim 'Nothing)
TEPosEncF 'RoBERTa gradient device dataType inputEmbedDim posEncDim = TEPosEncF 'BERT gradient device dataType inputEmbedDim posEncDim

type family TERelPosEncF (style :: TransformerStyle) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (headDim :: Dim (Name Symbol) (Size Nat)) (posEncDim :: Dim (Name Symbol) (Size Nat)) :: Type where ... Source #

Specifies the relative positional encoding layer of a transformer encoder.

Equations

TERelPosEncF 'T5 gradient device dataType headDim posEncDim = NamedModel (Embedding gradient ('Layout 'Dense) device dataType posEncDim headDim 'Nothing)
TERelPosEncF 'ByT5 gradient device dataType headDim posEncDim = TERelPosEncF 'T5 gradient device dataType headDim posEncDim
TERelPosEncF 'BART _ _ _ _ _ = ()
TERelPosEncF 'MBART gradient device dataType headDim posEncDim = TERelPosEncF 'BART gradient device dataType headDim posEncDim
TERelPosEncF 'Pegasus gradient device dataType headDim posEncDim = TERelPosEncF 'BART gradient device dataType headDim posEncDim
TERelPosEncF 'BERT _ _ _ _ _ = ()
TERelPosEncF 'RoBERTa gradient device dataType headDim posEncDim = TERelPosEncF 'BERT gradient device dataType headDim posEncDim

type family TEInitialLayerNormF (style :: TransformerStyle) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (inputEmbedDim :: Dim (Name Symbol) (Size Nat)) :: Type where ... Source #

Specifies the initial layer normalization layer of a transformer encoder.

Equations

TEInitialLayerNormF 'T5 _ _ _ _ = ()
TEInitialLayerNormF 'ByT5 gradient device dataType inputEmbedDim = TEInitialLayerNormF 'T5 gradient device dataType inputEmbedDim
TEInitialLayerNormF 'BART gradient device dataType inputEmbedDim = NamedModel (LayerNorm 'WithBias gradient device dataType ('Shape '[inputEmbedDim]))
TEInitialLayerNormF 'MBART gradient device dataType inputEmbedDim = TEInitialLayerNormF 'BART gradient device dataType inputEmbedDim
TEInitialLayerNormF 'Pegasus _ _ _ _ = ()
TEInitialLayerNormF 'BERT gradient device dataType inputEmbedDim = NamedModel (LayerNorm 'WithBias gradient device dataType ('Shape '[inputEmbedDim]))
TEInitialLayerNormF 'RoBERTa gradient device dataType inputEmbedDim = TEInitialLayerNormF 'BERT gradient device dataType inputEmbedDim

type family TEInitialDropoutF (style :: TransformerStyle) (hasDropout :: HasDropout) :: Type where ... Source #

Specifies the initial dropout layer of a transformer encoder.

type family TEStackF (style :: TransformerStyle) (numLayers :: Nat) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (headDim :: Dim (Name Symbol) (Size Nat)) (headEmbedDim :: Dim (Name Symbol) (Size Nat)) (embedDim :: Dim (Name Symbol) (Size Nat)) (inputEmbedDim :: Dim (Name Symbol) (Size Nat)) (ffnDim :: Dim (Name Symbol) (Size Nat)) (hasDropout :: HasDropout) :: Type where ... Source #

Specifies the transformer block stack of a transformer encoder.

Equations

TEStackF style numLayers gradient device dataType headDim headEmbedDim embedDim inputEmbedDim ffnDim hasDropout = NamedModel (EncoderStackF style numLayers gradient device dataType headDim headEmbedDim embedDim inputEmbedDim ffnDim hasDropout)

type family TEFinalLayerNormF (style :: TransformerStyle) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (inputEmbedDim :: Dim (Name Symbol) (Size Nat)) :: Type where ... Source #

Specifies the final layer normalization layer of a transformer encoder.

Equations

TEFinalLayerNormF 'T5 gradient device dataType inputEmbedDim = NamedModel (LayerNorm 'WithoutBias gradient device dataType ('Shape '[inputEmbedDim]))
TEFinalLayerNormF 'ByT5 gradient device dataType inputEmbedDim = TEFinalLayerNormF 'T5 gradient device dataType inputEmbedDim
TEFinalLayerNormF 'BART _ _ _ _ = ()
TEFinalLayerNormF 'MBART gradient device dataType inputEmbedDim = TEFinalLayerNormF 'BART gradient device dataType inputEmbedDim
TEFinalLayerNormF 'Pegasus gradient device dataType inputEmbedDim = NamedModel (LayerNorm 'WithBias gradient device dataType ('Shape '[inputEmbedDim]))
TEFinalLayerNormF 'BERT _ _ _ _ = ()
TEFinalLayerNormF 'RoBERTa gradient device dataType inputEmbedDim = TEFinalLayerNormF 'BERT gradient device dataType inputEmbedDim

type family TEFinalDropoutF (style :: TransformerStyle) (hasDropout :: HasDropout) :: Type where ... Source #

Specifies the final dropout layer of a transformer encoder.

Equations

TEFinalDropoutF 'T5 'WithDropout = Dropout
TEFinalDropoutF 'ByT5 'WithDropout = Dropout
TEFinalDropoutF 'BART _ = ()
TEFinalDropoutF 'MBART _ = ()
TEFinalDropoutF 'Pegasus _ = ()
TEFinalDropoutF 'BERT _ = ()
TEFinalDropoutF 'RoBERTa _ = ()
TEFinalDropoutF _ 'WithoutDropout = ()

transformerEncoderSpec :: forall style numLayers gradient device dataType headDim headEmbedDim embedDim inputEmbedDim ffnDim posEncDim hasDropout. STransformerStyle style -> SNat numLayers -> SGradient gradient -> SDevice device -> SDataType dataType -> SDim headDim -> SDim headEmbedDim -> SDim embedDim -> SDim inputEmbedDim -> SDim ffnDim -> SDim posEncDim -> SHasDropout hasDropout -> Double -> Double -> ModelSpec (TransformerEncoderF style numLayers gradient device dataType headDim headEmbedDim embedDim inputEmbedDim ffnDim posEncDim hasDropout) Source #

Specifies the parameters of a transformer in an encoder configuration.

style: the style of the transformer stack, e.g. ST5, SByT5, etc.
gradient: whether to compute the gradient of the stack's parameters.
device: the computational device on which the stack is allocated.
dataType: the data type of the stack's parameters.
headDim: the dimension of all transformer heads in the stack.
headEmbedDim: the dimension of the transformer head embeddings.
embedDim: the dimension of the transformer embeddings.
inputEmbedDim: the dimension of the transformer query embeddings.
ffnDim: the dimension of the feed-forward network.
posEncDim: the dimension of the positional encoding.
dropoutP: the dropout rate.
eps: the epsilon value for numerical stability of the layer normalization.

type family TransformerDecoderF (style :: TransformerStyle) (numLayers :: Nat) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (headDim :: Dim (Name Symbol) (Size Nat)) (headEmbedDim :: Dim (Name Symbol) (Size Nat)) (embedDim :: Dim (Name Symbol) (Size Nat)) (decoderInputEmbedDim :: Dim (Name Symbol) (Size Nat)) (encoderOutputEmbedDim :: Dim (Name Symbol) (Size Nat)) (ffnDim :: Dim (Name Symbol) (Size Nat)) (posEncDim :: Dim (Name Symbol) (Size Nat)) (hasDropout :: HasDropout) :: Type where ... Source #

Equations

TransformerDecoderF style numLayers gradient device dataType headDim headEmbedDim embedDim decoderInputEmbedDim encoderOutputEmbedDim ffnDim posEncDim hasDropout = GTransformer (TDPosEncF style gradient device dataType decoderInputEmbedDim posEncDim) (TDRelPosEncF style gradient device dataType headDim posEncDim) (TDInitialLayerNormF style gradient device dataType decoderInputEmbedDim) (TDInitialDropoutF style hasDropout) (TDStackF style numLayers gradient device dataType headDim headEmbedDim embedDim decoderInputEmbedDim encoderOutputEmbedDim ffnDim hasDropout) (TDFinalLayerNormF style gradient device dataType decoderInputEmbedDim) (TDFinalDropoutF style hasDropout)

type family TDPosEncF (style :: TransformerStyle) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (inputEmbedDim :: Dim (Name Symbol) (Size Nat)) (posEncDim :: Dim (Name Symbol) (Size Nat)) :: Type where ... Source #

Specifies the absolute positional encoding layer of a transformer decoder.

Equations

TDPosEncF 'T5 _ _ _ _ _ = ()
TDPosEncF 'ByT5 gradient device dataType inputEmbedDim posEncDim = TDPosEncF 'T5 gradient device dataType inputEmbedDim posEncDim
TDPosEncF 'BART gradient device dataType inputEmbedDim posEncDim = NamedModel (Embedding gradient ('Layout 'Dense) device dataType posEncDim inputEmbedDim 'Nothing)
TDPosEncF 'MBART gradient device dataType inputEmbedDim posEncDim = TDPosEncF 'BART gradient device dataType inputEmbedDim posEncDim
TDPosEncF 'Pegasus gradient device dataType inputEmbedDim posEncDim = TDPosEncF 'BART gradient device dataType inputEmbedDim posEncDim

type family TDRelPosEncF (style :: TransformerStyle) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (headDim :: Dim (Name Symbol) (Size Nat)) (posEncDim :: Dim (Name Symbol) (Size Nat)) :: Type where ... Source #

Specifies the relative positional encoding layer of a transformer decoder.

Equations

TDRelPosEncF 'T5 gradient device dataType headDim posEncDim = NamedModel (Embedding gradient ('Layout 'Dense) device dataType posEncDim headDim 'Nothing)
TDRelPosEncF 'ByT5 gradient device dataType headDim posEncDim = TDRelPosEncF 'T5 gradient device dataType headDim posEncDim
TDRelPosEncF 'BART _ _ _ _ _ = ()
TDRelPosEncF 'MBART gradient device dataType headDim posEncDim = TDRelPosEncF 'BART gradient device dataType headDim posEncDim
TDRelPosEncF 'Pegasus gradient device dataType headDim posEncDim = TDRelPosEncF 'BART gradient device dataType headDim posEncDim

type family TDInitialLayerNormF (style :: TransformerStyle) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (inputEmbedDim :: Dim (Name Symbol) (Size Nat)) :: Type where ... Source #

Specifies the initial layer normalization layer of a transformer decoder.

Equations

TDInitialLayerNormF 'T5 _ _ _ _ = ()
TDInitialLayerNormF 'ByT5 gradient device dataType inputEmbedDim = TDInitialLayerNormF 'T5 gradient device dataType inputEmbedDim
TDInitialLayerNormF 'BART gradient device dataType inputEmbedDim = NamedModel (LayerNorm 'WithBias gradient device dataType ('Shape '[inputEmbedDim]))
TDInitialLayerNormF 'MBART gradient device dataType inputEmbedDim = TDInitialLayerNormF 'BART gradient device dataType inputEmbedDim
TDInitialLayerNormF 'Pegasus _ _ _ _ = ()

type family TDInitialDropoutF (style :: TransformerStyle) (hasDropout :: HasDropout) :: Type where ... Source #

Specifies the initial dropout layer of a transformer decoder.

Equations

TDInitialDropoutF 'T5 'WithDropout = Dropout
TDInitialDropoutF 'ByT5 'WithDropout = Dropout
TDInitialDropoutF 'BART 'WithDropout = Dropout
TDInitialDropoutF 'MBART 'WithDropout = Dropout
TDInitialDropoutF 'Pegasus 'WithDropout = Dropout
TDInitialDropoutF _ 'WithoutDropout = ()

type family TDStackF (style :: TransformerStyle) (numLayers :: Nat) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (headDim :: Dim (Name Symbol) (Size Nat)) (headEmbedDim :: Dim (Name Symbol) (Size Nat)) (embedDim :: Dim (Name Symbol) (Size Nat)) (decoderInputEmbedDim :: Dim (Name Symbol) (Size Nat)) (encoderOutputEmbedDim :: Dim (Name Symbol) (Size Nat)) (ffnDim :: Dim (Name Symbol) (Size Nat)) (hasDropout :: HasDropout) :: Type where ... Source #

Specifies the transformer block stack of a transformer decoder.

Equations

TDStackF style numLayers gradient device dataType headDim headEmbedDim embedDim decoderInputEmbedDim encoderOutputEmbedDim ffnDim hasDropout = NamedModel (DecoderStackF style numLayers gradient device dataType headDim headEmbedDim embedDim decoderInputEmbedDim encoderOutputEmbedDim ffnDim hasDropout)

type family TDFinalLayerNormF (style :: TransformerStyle) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (inputEmbedDim :: Dim (Name Symbol) (Size Nat)) :: Type where ... Source #

Specifies the final layer normalization layer of a transformer decoder.

Equations

TDFinalLayerNormF 'T5 gradient device dataType inputEmbedDim = NamedModel (LayerNorm 'WithoutBias gradient device dataType ('Shape '[inputEmbedDim]))
TDFinalLayerNormF 'ByT5 gradient device dataType inputEmbedDim = TDFinalLayerNormF 'T5 gradient device dataType inputEmbedDim
TDFinalLayerNormF 'BART _ _ _ _ = ()
TDFinalLayerNormF 'MBART gradient device dataType inputEmbedDim = TDFinalLayerNormF 'BART gradient device dataType inputEmbedDim
TDFinalLayerNormF 'Pegasus gradient device dataType inputEmbedDim = NamedModel (LayerNorm 'WithBias gradient device dataType ('Shape '[inputEmbedDim]))

type family TDFinalDropoutF (style :: TransformerStyle) (hasDropout :: HasDropout) :: Type where ... Source #

Specifies the final dropout layer of a transformer decoder.

Equations

TDFinalDropoutF 'T5 'WithDropout = Dropout
TDFinalDropoutF 'ByT5 'WithDropout = Dropout
TDFinalDropoutF 'BART _ = ()
TDFinalDropoutF 'MBART _ = ()
TDFinalDropoutF 'Pegasus _ = ()
TDFinalDropoutF _ 'WithoutDropout = ()

transformerDecoderSpec :: forall style numLayers gradient device dataType headDim headEmbedDim embedDim decoderInputEmbedDim encoderOutputEmbedDim ffnDim posEncDim hasDropout. STransformerStyle style -> SNat numLayers -> SGradient gradient -> SDevice device -> SDataType dataType -> SDim headDim -> SDim headEmbedDim -> SDim embedDim -> SDim decoderInputEmbedDim -> SDim encoderOutputEmbedDim -> SDim ffnDim -> SDim posEncDim -> SHasDropout hasDropout -> Double -> Double -> ModelSpec (TransformerDecoderF style numLayers gradient device dataType headDim headEmbedDim embedDim decoderInputEmbedDim encoderOutputEmbedDim ffnDim posEncDim hasDropout) Source #

Specifies the parameters of a transformer in a decoder configuration.

style: the style of the transformer stack, e.g. ST5, SByT5, etc.
gradient: whether to compute the gradient of the stack's parameters.
device: the computational device on which the stack is allocated.
dataType: the data type of the stack's parameters.
headDim: the dimension of all transformer heads in the stack.
headEmbedDim: the dimension of the transformer head embeddings.
embedDim: the dimension of the transformer embeddings.
decoderInputEmbedDim: the dimension of the decoder input embeddings.
encoderOutputEmbedDim: the dimension of the encoder output embeddings.
ffnDim: the dimension of the feed-forward network.
posEncDim: the dimension of the positional encoding.
dropoutP: the dropout rate.
eps: the epsilon value for numerical stability of the layer normalization.

TEInitialDropoutF 'T5 'WithDropout = Dropout
TEInitialDropoutF 'ByT5 'WithDropout = Dropout
TEInitialDropoutF 'BART 'WithDropout = Dropout
TEInitialDropoutF 'MBART 'WithDropout = Dropout
TEInitialDropoutF 'Pegasus 'WithDropout = Dropout
TEInitialDropoutF 'BERT 'WithDropout = Dropout
TEInitialDropoutF 'RoBERTa 'WithDropout = Dropout
TEInitialDropoutF _ 'WithoutDropout = ()