Safe Haskell | Safe-Inferred |
---|---|
Language | Haskell2010 |
Synopsis
- data GTransformer (posEnc :: Type) (relPosEnc :: Type) (initialLayerNorm :: Type) (initialDropout :: Type) (stack :: Type) (finalLayerNorm :: Type) (finalDropout :: Type) where
- GTransformer :: forall posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout. {..} -> GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout
- type family TransformerEncoderF (style :: TransformerStyle) (numLayers :: Nat) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (headDim :: Dim (Name Symbol) (Size Nat)) (headEmbedDim :: Dim (Name Symbol) (Size Nat)) (embedDim :: Dim (Name Symbol) (Size Nat)) (inputEmbedDim :: Dim (Name Symbol) (Size Nat)) (ffnDim :: Dim (Name Symbol) (Size Nat)) (posEncDim :: Dim (Name Symbol) (Size Nat)) (hasDropout :: HasDropout) :: Type where ...
- type family TEPosEncF (style :: TransformerStyle) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (inputEmbedDim :: Dim (Name Symbol) (Size Nat)) (posEncDim :: Dim (Name Symbol) (Size Nat)) :: Type where ...
- type family TERelPosEncF (style :: TransformerStyle) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (headDim :: Dim (Name Symbol) (Size Nat)) (posEncDim :: Dim (Name Symbol) (Size Nat)) :: Type where ...
- type family TEInitialLayerNormF (style :: TransformerStyle) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (inputEmbedDim :: Dim (Name Symbol) (Size Nat)) :: Type where ...
- type family TEInitialDropoutF (style :: TransformerStyle) (hasDropout :: HasDropout) :: Type where ...
- type family TEStackF (style :: TransformerStyle) (numLayers :: Nat) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (headDim :: Dim (Name Symbol) (Size Nat)) (headEmbedDim :: Dim (Name Symbol) (Size Nat)) (embedDim :: Dim (Name Symbol) (Size Nat)) (inputEmbedDim :: Dim (Name Symbol) (Size Nat)) (ffnDim :: Dim (Name Symbol) (Size Nat)) (hasDropout :: HasDropout) :: Type where ...
- type family TEFinalLayerNormF (style :: TransformerStyle) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (inputEmbedDim :: Dim (Name Symbol) (Size Nat)) :: Type where ...
- type family TEFinalDropoutF (style :: TransformerStyle) (hasDropout :: HasDropout) :: Type where ...
- transformerEncoderSpec :: forall style numLayers gradient device dataType headDim headEmbedDim embedDim inputEmbedDim ffnDim posEncDim hasDropout. STransformerStyle style -> SNat numLayers -> SGradient gradient -> SDevice device -> SDataType dataType -> SDim headDim -> SDim headEmbedDim -> SDim embedDim -> SDim inputEmbedDim -> SDim ffnDim -> SDim posEncDim -> SHasDropout hasDropout -> Double -> Double -> ModelSpec (TransformerEncoderF style numLayers gradient device dataType headDim headEmbedDim embedDim inputEmbedDim ffnDim posEncDim hasDropout)
- type family TransformerDecoderF (style :: TransformerStyle) (numLayers :: Nat) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (headDim :: Dim (Name Symbol) (Size Nat)) (headEmbedDim :: Dim (Name Symbol) (Size Nat)) (embedDim :: Dim (Name Symbol) (Size Nat)) (decoderInputEmbedDim :: Dim (Name Symbol) (Size Nat)) (encoderOutputEmbedDim :: Dim (Name Symbol) (Size Nat)) (ffnDim :: Dim (Name Symbol) (Size Nat)) (posEncDim :: Dim (Name Symbol) (Size Nat)) (hasDropout :: HasDropout) :: Type where ...
- type family TDPosEncF (style :: TransformerStyle) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (inputEmbedDim :: Dim (Name Symbol) (Size Nat)) (posEncDim :: Dim (Name Symbol) (Size Nat)) :: Type where ...
- type family TDRelPosEncF (style :: TransformerStyle) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (headDim :: Dim (Name Symbol) (Size Nat)) (posEncDim :: Dim (Name Symbol) (Size Nat)) :: Type where ...
- type family TDInitialLayerNormF (style :: TransformerStyle) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (inputEmbedDim :: Dim (Name Symbol) (Size Nat)) :: Type where ...
- type family TDInitialDropoutF (style :: TransformerStyle) (hasDropout :: HasDropout) :: Type where ...
- type family TDStackF (style :: TransformerStyle) (numLayers :: Nat) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (headDim :: Dim (Name Symbol) (Size Nat)) (headEmbedDim :: Dim (Name Symbol) (Size Nat)) (embedDim :: Dim (Name Symbol) (Size Nat)) (decoderInputEmbedDim :: Dim (Name Symbol) (Size Nat)) (encoderOutputEmbedDim :: Dim (Name Symbol) (Size Nat)) (ffnDim :: Dim (Name Symbol) (Size Nat)) (hasDropout :: HasDropout) :: Type where ...
- type family TDFinalLayerNormF (style :: TransformerStyle) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (inputEmbedDim :: Dim (Name Symbol) (Size Nat)) :: Type where ...
- type family TDFinalDropoutF (style :: TransformerStyle) (hasDropout :: HasDropout) :: Type where ...
- transformerDecoderSpec :: forall style numLayers gradient device dataType headDim headEmbedDim embedDim decoderInputEmbedDim encoderOutputEmbedDim ffnDim posEncDim hasDropout. STransformerStyle style -> SNat numLayers -> SGradient gradient -> SDevice device -> SDataType dataType -> SDim headDim -> SDim headEmbedDim -> SDim embedDim -> SDim decoderInputEmbedDim -> SDim encoderOutputEmbedDim -> SDim ffnDim -> SDim posEncDim -> SHasDropout hasDropout -> Double -> Double -> ModelSpec (TransformerDecoderF style numLayers gradient device dataType headDim headEmbedDim embedDim decoderInputEmbedDim encoderOutputEmbedDim ffnDim posEncDim hasDropout)
Documentation
data GTransformer (posEnc :: Type) (relPosEnc :: Type) (initialLayerNorm :: Type) (initialDropout :: Type) (stack :: Type) (finalLayerNorm :: Type) (finalDropout :: Type) where Source #
Generic transformer. Can specialize to either encoder or decoder.
posEnc
: an absolute positional encoding layer as used by, e.g., BERT.relPosEnc
: a relative positional encoding layer as used by, e.g., T5.initialLayerNorm
: a layer normalization layer for the embeddings.initialDropout
: a dropout layer for the embeddings.stack
: a stack of transformer blocks.finalLayerNorm
: the final layer normalization layer.finalDropout
: the final dropout layer.
GTransformer | |
|
Instances
Generic (GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) Source # | |
Defined in Torch.GraduallyTyped.NN.Transformer.GTransformer type Rep (GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) :: Type -> Type Source # from :: GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> Rep (GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) x Source # to :: Rep (GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) x -> GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout Source # | |
(Show posEnc, Show relPosEnc, Show initialLayerNorm, Show initialDropout, Show stack, Show finalLayerNorm, Show finalDropout) => Show (GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) Source # | |
Defined in Torch.GraduallyTyped.NN.Transformer.GTransformer showsPrec :: Int -> GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> ShowS Source # show :: GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> String Source # showList :: [GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout] -> ShowS Source # | |
(Eq posEnc, Eq relPosEnc, Eq initialLayerNorm, Eq initialDropout, Eq stack, Eq finalLayerNorm, Eq finalDropout) => Eq (GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) Source # | |
Defined in Torch.GraduallyTyped.NN.Transformer.GTransformer (==) :: GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> Bool Source # (/=) :: GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> Bool Source # | |
(Ord posEnc, Ord relPosEnc, Ord initialLayerNorm, Ord initialDropout, Ord stack, Ord finalLayerNorm, Ord finalDropout) => Ord (GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) Source # | |
Defined in Torch.GraduallyTyped.NN.Transformer.GTransformer compare :: GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> Ordering Source # (<) :: GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> Bool Source # (<=) :: GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> Bool Source # (>) :: GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> Bool Source # (>=) :: GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> Bool Source # max :: GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout Source # min :: GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout Source # | |
(HasStateDict posEnc, HasStateDict relPosEnc, HasStateDict initialLayerNorm, HasStateDict initialDropout, HasStateDict stack, HasStateDict finalLayerNorm, HasStateDict finalDropout) => HasStateDict (GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) Source # | |
Defined in Torch.GraduallyTyped.NN.Transformer.GTransformer fromStateDict :: (MonadIO m, MonadThrow m, MonadState StateDict m) => ModelSpec (GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) -> StateDictKey -> m (GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) Source # toStateDict :: (MonadThrow m, MonadState StateDict m) => StateDictKey -> GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> m () Source # | |
(HasInitialize posEnc generatorDevice posEnc' generatorDevice0, HasInitialize relPosEnc generatorDevice0 relPosEnc' generatorDevice1, HasInitialize initialLayerNorm generatorDevice1 initialLayerNorm' generatorDevice2, HasInitialize initialDropout generatorDevice2 initialDropout' generatorDevice3, HasInitialize stack generatorDevice3 stack' generatorDevice4, HasInitialize finalLayerNorm generatorDevice4 finalLayerNorm' generatorDevice5, HasInitialize finalDropout generatorDevice5 finalDropout' generatorOutputDevice) => HasInitialize (GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) generatorDevice (GTransformer posEnc' relPosEnc' initialLayerNorm' initialDropout' stack' finalLayerNorm' finalDropout') generatorOutputDevice Source # | |
Defined in Torch.GraduallyTyped.NN.Transformer.GTransformer initialize :: MonadThrow m => ModelSpec (GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) -> Generator generatorDevice -> m (GTransformer posEnc' relPosEnc' initialLayerNorm' initialDropout' stack' finalLayerNorm' finalDropout', Generator generatorOutputDevice) Source # | |
(HasForward initialLayerNorm (Tensor inputGradient inputLayout inputDevice inputDataType inputShape) generatorDevice tensor0 generatorDevice0, HasForward initialDropout tensor0 generatorDevice0 tensor1 generatorDevice1, HasForward relPosEnc (Tensor relPosGradient relPosLayout relPosDevice relPosDataType relPosShape) generatorDevice1 (Tensor relPosEncGradient relPosEncLayout relPosEncDevice relPosEncDataType relPosEncShape) generatorDevice2, HasForward stack (tensor1, Tensor (relPosEncGradient <|> attentionMaskGradient) (relPosEncLayout <+> attentionMaskLayout) (relPosEncDevice <+> attentionMaskDevice) (relPosEncDataType <+> attentionMaskDataType) (BroadcastShapesF doubleTransposedRelPosEncShape unsqueezedAttentionMaskShape)) generatorDevice2 tensor3 generatorDevice3, transposedRelPosEncShape ~ TransposeF ('SelectDim ('ByIndex 2 :: By Symbol Natural)) ('SelectDim ('ByIndex 3 :: By Symbol Natural)) relPosEncShape, Catch transposedRelPosEncShape, doubleTransposedRelPosEncShape ~ TransposeF ('SelectDim ('ByIndex 1 :: By Symbol Natural)) ('SelectDim ('ByIndex 2 :: By Symbol Natural)) transposedRelPosEncShape, Catch doubleTransposedRelPosEncShape, unsqueezedAttentionMaskShape ~ UnsqueezeF ('SelectDim ('ByIndex 1 :: By Symbol Natural)) attentionMaskShape, Catch unsqueezedAttentionMaskShape, Catch (BroadcastShapesF doubleTransposedRelPosEncShape unsqueezedAttentionMaskShape), HasForward finalLayerNorm tensor3 generatorDevice3 tensor4 generatorDevice4, HasForward finalDropout tensor4 generatorDevice4 output generatorOutputDevice) => HasForward (GTransformer () relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) (Tensor inputGradient inputLayout inputDevice inputDataType inputShape, Tensor relPosGradient relPosLayout relPosDevice relPosDataType relPosShape, Tensor attentionMaskGradient attentionMaskLayout attentionMaskDevice attentionMaskDataType attentionMaskShape) generatorDevice output generatorOutputDevice Source # |
┌───────┐ ┌────────┐ ┌───────────────┐ │ input │ │ relPos │ │ attentionMask │ └───┬───┘ └───┬────┘ └───────┬───────┘ │ │ │ │ ▼ │ │ tRelPosEnc │ │ ▼ │ │ transpose │ │ ▼ ▼ │ transpose unsqueeze ▼ │ │ (tInitialLayerNorm) │ │ ▼ └─────►add◄─────┘ (tInitialDropout) │ ▼ │ tStack◄───────────────┘ ▼ (tFinalLayerNorm) ▼ (tFinalDropout) │ ▼ ┌────────┐ │ output │ └────────┘ |
Defined in Torch.GraduallyTyped.NN.Transformer.GTransformer forward :: MonadThrow m => GTransformer () relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> (Tensor inputGradient inputLayout inputDevice inputDataType inputShape, Tensor relPosGradient relPosLayout relPosDevice relPosDataType relPosShape, Tensor attentionMaskGradient attentionMaskLayout attentionMaskDevice attentionMaskDataType attentionMaskShape) -> Generator generatorDevice -> m (output, Generator generatorOutputDevice) Source # | |
(HasForward posEnc (Tensor posGradient posLayout posDevice posDataType posShape) generatorDevice (Tensor posEncGradient posEncLayout posEncDevice posEncDataType posEncShape) generatorDevice0, HasForward initialLayerNorm (Tensor (inputGradient <|> posEncGradient) (inputLayout <+> posEncLayout) (inputDevice <+> posEncDevice) (inputDataType <+> posEncDataType) (BroadcastShapesF inputShape posEncShape)) generatorDevice0 tensor1 generatorDevice1, Catch (BroadcastShapesF inputShape posEncShape), HasForward initialDropout tensor1 generatorDevice1 tensor2 generatorDevice2, HasForward stack (tensor2, Tensor attentionMaskGradient attentionMaskLayout attentionMaskDevice attentionMaskDataType (UnsqueezeF ('SelectDim ('ByIndex 1 :: By Symbol Natural)) attentionMaskShape)) generatorDevice2 tensor3 generatorDevice3, Catch (UnsqueezeF ('SelectDim ('ByIndex 1 :: By Symbol Natural)) attentionMaskShape), HasForward finalLayerNorm tensor3 generatorDevice3 tensor4 generatorDevice4, HasForward finalDropout tensor4 generatorDevice4 output generatorOutputDevice) => HasForward (GTransformer posEnc () initialLayerNorm initialDropout stack finalLayerNorm finalDropout) (Tensor inputGradient inputLayout inputDevice inputDataType inputShape, Tensor posGradient posLayout posDevice posDataType posShape, Tensor attentionMaskGradient attentionMaskLayout attentionMaskDevice attentionMaskDataType attentionMaskShape) generatorDevice output generatorOutputDevice Source # |
┌───────┐ ┌─────┐ ┌───────────────┐ │ input │ │ pos │ │ attentionMask │ └───┬───┘ └─────┘ └───────┬───────┘ │ │ │ │ ▼ │ │ tPosEnc │ │ │ │ └──►add◄──┘ │ │ │ ▼ │ (tInitialLayerNorm) │ ▼ ▼ (tInitialDropout) unsqueeze ▼ │ tStack◄───────────────┘ ▼ (tFinalLayerNorm) ▼ (tFinalDropout) │ ▼ ┌────────┐ │ output │ └────────┘ |
Defined in Torch.GraduallyTyped.NN.Transformer.GTransformer forward :: MonadThrow m => GTransformer posEnc () initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> (Tensor inputGradient inputLayout inputDevice inputDataType inputShape, Tensor posGradient posLayout posDevice posDataType posShape, Tensor attentionMaskGradient attentionMaskLayout attentionMaskDevice attentionMaskDataType attentionMaskShape) -> Generator generatorDevice -> m (output, Generator generatorOutputDevice) Source # | |
(HasForward initialLayerNorm (Tensor decoderInputGradient decoderInputLayout decoderInputDevice decoderInputDataType decoderInputShape) generatorDevice tensor0 generatorDevice0, HasForward initialDropout tensor0 generatorDevice0 tensor1 generatorDevice1, HasForward relPosEnc (Tensor decoderRelPosGradient decoderRelPosLayout decoderRelPosDevice decoderRelPosDataType decoderRelPosShape) generatorDevice1 (Tensor decoderRelPosEncGradient decoderRelPosEncLayout decoderRelPosEncDevice decoderRelPosEncDataType decoderRelPosEncShape) generatorDevice2, HasForward stack (tensor1, Tensor encoderOutputGradient encoderOutputLayout encoderOutputDevice encoderOutputDataType encoderOutputShape, Tensor (decoderRelPosEncGradient <|> decoderAttentionMaskGradient) (decoderRelPosEncLayout <+> decoderAttentionMaskLayout) (decoderRelPosEncDevice <+> decoderAttentionMaskDevice) (decoderRelPosEncDataType <+> decoderAttentionMaskDataType) (BroadcastShapesF doubleTransposedDecoderRelPosEncShape unsqueezedDecoderAttentionMaskShape), Tensor crossAttentionMaskGradient crossAttentionMaskLayout crossAttentionMaskDevice crossAttentionMaskDataType unsqueezedCrossAttentionMaskShape) generatorDevice2 tensor3 generatorDevice3, transposedDecoderRelPosEncShape ~ TransposeF ('SelectDim ('ByIndex 2 :: By Symbol Natural)) ('SelectDim ('ByIndex 3 :: By Symbol Natural)) decoderRelPosEncShape, Catch transposedDecoderRelPosEncShape, doubleTransposedDecoderRelPosEncShape ~ TransposeF ('SelectDim ('ByIndex 1 :: By Symbol Natural)) ('SelectDim ('ByIndex 2 :: By Symbol Natural)) transposedDecoderRelPosEncShape, Catch doubleTransposedDecoderRelPosEncShape, unsqueezedDecoderAttentionMaskShape ~ UnsqueezeF ('SelectDim ('ByIndex 1 :: By Symbol Natural)) decoderAttentionMaskShape, Catch unsqueezedDecoderAttentionMaskShape, unsqueezedCrossAttentionMaskShape ~ UnsqueezeF ('SelectDim ('ByIndex 1 :: By Symbol Natural)) crossAttentionMaskShape, Catch unsqueezedCrossAttentionMaskShape, Catch (BroadcastShapesF doubleTransposedDecoderRelPosEncShape unsqueezedDecoderAttentionMaskShape), HasForward finalLayerNorm tensor3 generatorDevice3 tensor4 generatorDevice4, HasForward finalDropout tensor4 generatorDevice4 output generatorOutputDevice) => HasForward (GTransformer () relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) (Tensor decoderInputGradient decoderInputLayout decoderInputDevice decoderInputDataType decoderInputShape, Tensor encoderOutputGradient encoderOutputLayout encoderOutputDevice encoderOutputDataType encoderOutputShape, Tensor decoderRelPosGradient decoderRelPosLayout decoderRelPosDevice decoderRelPosDataType decoderRelPosShape, Tensor decoderAttentionMaskGradient decoderAttentionMaskLayout decoderAttentionMaskDevice decoderAttentionMaskDataType decoderAttentionMaskShape, Tensor crossAttentionMaskGradient crossAttentionMaskLayout crossAttentionMaskDevice crossAttentionMaskDataType crossAttentionMaskShape) generatorDevice output generatorOutputDevice Source # |
┌──────────────┐ ┌───────────────┐ ┌───────────────┐ ┌──────────────────────┐ ┌────────────────────┐ │ decoderInput │ │ encoderOutput │ │ decoderRelPos │ │ decoderAttentionMask │ │ crossAttentionMask │ └──────┬───────┘ └───────┬───────┘ └───────┬───────┘ └──────────┬───────────┘ └─────────┬──────────┘ │ │ │ │ │ │ │ ▼ │ │ │ │ tdRelPosEnc │ │ │ │ ▼ │ │ │ │ transpose │ │ │ │ ▼ ▼ ▼ │ │ transpose unsqueeze unsqueeze ▼ │ │ │ │ (tInitialLayerNorm) │ │ │ │ ▼ │ └────────►add◄────────┘ │ (tInitialDropout) │ │ │ ▼ │ │ │ tStack◄───────────────┘◄────────────────────────────┘◄──────────────────────────────────┘ ▼ (tFinalLayerNorm) ▼ (tFinalDropout) │ ▼ ┌────────┐ │ output │ └────────┘ |
Defined in Torch.GraduallyTyped.NN.Transformer.GTransformer forward :: MonadThrow m => GTransformer () relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> (Tensor decoderInputGradient decoderInputLayout decoderInputDevice decoderInputDataType decoderInputShape, Tensor encoderOutputGradient encoderOutputLayout encoderOutputDevice encoderOutputDataType encoderOutputShape, Tensor decoderRelPosGradient decoderRelPosLayout decoderRelPosDevice decoderRelPosDataType decoderRelPosShape, Tensor decoderAttentionMaskGradient decoderAttentionMaskLayout decoderAttentionMaskDevice decoderAttentionMaskDataType decoderAttentionMaskShape, Tensor crossAttentionMaskGradient crossAttentionMaskLayout crossAttentionMaskDevice crossAttentionMaskDataType crossAttentionMaskShape) -> Generator generatorDevice -> m (output, Generator generatorOutputDevice) Source # | |
(HasForward posEnc (Tensor decoderPosGradient decoderPosLayout decoderPosDevice decoderPosDataType decoderPosShape) generatorDevice (Tensor decoderPosEncGradient decoderPosEncLayout decoderPosEncDevice decoderPosEncDataType decoderPosEncShape) generatorDevice0, HasForward initialLayerNorm (Tensor (decoderInputGradient <|> decoderPosEncGradient) (decoderInputLayout <+> decoderPosEncLayout) (decoderInputDevice <+> decoderPosEncDevice) (decoderInputDataType <+> decoderPosEncDataType) (BroadcastShapesF decoderInputShape decoderPosEncShape)) generatorDevice0 tensor1 generatorDevice1, HasForward initialDropout tensor1 generatorDevice1 tensor2 generatorDevice2, HasForward stack (tensor2, Tensor encoderOutputGradient encoderOutputLayout encoderOutputDevice encoderOutputDataType encoderOutputShape, Tensor decoderAttentionMaskGradient decoderAttentionMaskLayout decoderAttentionMaskDevice decoderAttentionMaskDataType (UnsqueezeF ('SelectDim ('ByIndex 1 :: By Symbol Natural)) decoderAttentionMaskShape), Tensor crossAttentionMaskGradient crossAttentionMaskLayout crossAttentionMaskDevice crossAttentionMaskDataType (UnsqueezeF ('SelectDim ('ByIndex 1 :: By Symbol Natural)) crossAttentionMaskShape)) generatorDevice2 tensor3 generatorDevice3, Catch (UnsqueezeF ('SelectDim ('ByIndex 1 :: By Symbol Natural)) decoderAttentionMaskShape), Catch (UnsqueezeF ('SelectDim ('ByIndex 1 :: By Symbol Natural)) crossAttentionMaskShape), Catch (BroadcastShapesF decoderInputShape decoderPosEncShape), HasForward finalLayerNorm tensor3 generatorDevice3 tensor4 generatorDevice4, HasForward finalDropout tensor4 generatorDevice4 output generatorOutputDevice) => HasForward (GTransformer posEnc () initialLayerNorm initialDropout stack finalLayerNorm finalDropout) (Tensor decoderInputGradient decoderInputLayout decoderInputDevice decoderInputDataType decoderInputShape, Tensor encoderOutputGradient encoderOutputLayout encoderOutputDevice encoderOutputDataType encoderOutputShape, Tensor decoderPosGradient decoderPosLayout decoderPosDevice decoderPosDataType decoderPosShape, Tensor decoderAttentionMaskGradient decoderAttentionMaskLayout decoderAttentionMaskDevice decoderAttentionMaskDataType decoderAttentionMaskShape, Tensor crossAttentionMaskGradient crossAttentionMaskLayout crossAttentionMaskDevice crossAttentionMaskDataType crossAttentionMaskShape) generatorDevice output generatorOutputDevice Source # |
┌──────────────┐ ┌────────────┐ ┌───────────────┐ ┌──────────────────────┐ ┌────────────────────┐ │ decoderInput │ │ decoderPos │ │ encoderOutput │ │ decoderAttentionMask │ │ crossAttentionMask │ └──────┬───────┘ └──────┬─────┘ └───────┬───────┘ └──────────┬───────────┘ └──────────┬─────────┘ │ │ │ │ │ │ ▼ │ │ │ │ tdPosEnc │ │ │ │ │ │ │ │ └──────►add◄──────┘ │ │ │ │ │ │ │ ▼ │ │ │ (tInitialLayerNorm) │ │ │ ▼ │ ▼ ▼ (tInitialDropout) │ unsqueeze unsqueeze ▼ │ │ │ tStack◄──────────────────────┘◄────────────────────┘◄────────────────────────┘ ▼ (tFinalLayerNorm) ▼ (tFinalDropout) │ ▼ ┌────────┐ │ output │ └────────┘ |
Defined in Torch.GraduallyTyped.NN.Transformer.GTransformer forward :: MonadThrow m => GTransformer posEnc () initialLayerNorm initialDropout stack finalLayerNorm finalDropout -> (Tensor decoderInputGradient decoderInputLayout decoderInputDevice decoderInputDataType decoderInputShape, Tensor encoderOutputGradient encoderOutputLayout encoderOutputDevice encoderOutputDataType encoderOutputShape, Tensor decoderPosGradient decoderPosLayout decoderPosDevice decoderPosDataType decoderPosShape, Tensor decoderAttentionMaskGradient decoderAttentionMaskLayout decoderAttentionMaskDevice decoderAttentionMaskDataType decoderAttentionMaskShape, Tensor crossAttentionMaskGradient crossAttentionMaskLayout crossAttentionMaskDevice crossAttentionMaskDataType crossAttentionMaskShape) -> Generator generatorDevice -> m (output, Generator generatorOutputDevice) Source # | |
type Rep (GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) Source # | |
Defined in Torch.GraduallyTyped.NN.Transformer.GTransformer type Rep (GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) = D1 ('MetaData "GTransformer" "Torch.GraduallyTyped.NN.Transformer.GTransformer" "hasktorch-gradually-typed-0.2.0.0-1KV1aIPzzbp6JpSr37tC1K" 'False) (C1 ('MetaCons "GTransformer" 'PrefixI 'True) ((S1 ('MetaSel ('Just "tPosEnc") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 posEnc) :*: (S1 ('MetaSel ('Just "tRelPosEnc") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 relPosEnc) :*: S1 ('MetaSel ('Just "tInitialLayerNorm") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 initialLayerNorm))) :*: ((S1 ('MetaSel ('Just "tInitialDropout") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 initialDropout) :*: S1 ('MetaSel ('Just "tStack") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 stack)) :*: (S1 ('MetaSel ('Just "tFinalLayerNorm") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 finalLayerNorm) :*: S1 ('MetaSel ('Just "tFinalDropout") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 finalDropout))))) | |
type ModelSpec (GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) Source # | |
Defined in Torch.GraduallyTyped.NN.Transformer.GTransformer type ModelSpec (GTransformer posEnc relPosEnc initialLayerNorm initialDropout stack finalLayerNorm finalDropout) = GTransformer (ModelSpec posEnc) (ModelSpec relPosEnc) (ModelSpec initialLayerNorm) (ModelSpec initialDropout) (ModelSpec stack) (ModelSpec finalLayerNorm) (ModelSpec finalDropout) |
type family TransformerEncoderF (style :: TransformerStyle) (numLayers :: Nat) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (headDim :: Dim (Name Symbol) (Size Nat)) (headEmbedDim :: Dim (Name Symbol) (Size Nat)) (embedDim :: Dim (Name Symbol) (Size Nat)) (inputEmbedDim :: Dim (Name Symbol) (Size Nat)) (ffnDim :: Dim (Name Symbol) (Size Nat)) (posEncDim :: Dim (Name Symbol) (Size Nat)) (hasDropout :: HasDropout) :: Type where ... Source #
TransformerEncoderF style numLayers gradient device dataType headDim headEmbedDim embedDim inputEmbedDim ffnDim posEncDim hasDropout = GTransformer (TEPosEncF style gradient device dataType inputEmbedDim posEncDim) (TERelPosEncF style gradient device dataType headDim posEncDim) (TEInitialLayerNormF style gradient device dataType inputEmbedDim) (TEInitialDropoutF style hasDropout) (TEStackF style numLayers gradient device dataType headDim headEmbedDim embedDim inputEmbedDim ffnDim hasDropout) (TEFinalLayerNormF style gradient device dataType inputEmbedDim) (TEFinalDropoutF style hasDropout) |
type family TEPosEncF (style :: TransformerStyle) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (inputEmbedDim :: Dim (Name Symbol) (Size Nat)) (posEncDim :: Dim (Name Symbol) (Size Nat)) :: Type where ... Source #
Specifies the absolute positional encoding layer of a transformer encoder.
TEPosEncF 'T5 _ _ _ _ _ = () | |
TEPosEncF 'ByT5 gradient device dataType inputEmbedDim posEncDim = TEPosEncF 'T5 gradient device dataType inputEmbedDim posEncDim | |
TEPosEncF 'BART gradient device dataType inputEmbedDim posEncDim = NamedModel (Embedding gradient ('Layout 'Dense) device dataType posEncDim inputEmbedDim 'Nothing) | |
TEPosEncF 'MBART gradient device dataType inputEmbedDim posEncDim = TEPosEncF 'BART gradient device dataType inputEmbedDim posEncDim | |
TEPosEncF 'Pegasus gradient device dataType inputEmbedDim posEncDim = TEPosEncF 'BART gradient device dataType inputEmbedDim posEncDim | |
TEPosEncF 'BERT gradient device dataType inputEmbedDim posEncDim = NamedModel (Embedding gradient ('Layout 'Dense) device dataType posEncDim inputEmbedDim 'Nothing) | |
TEPosEncF 'RoBERTa gradient device dataType inputEmbedDim posEncDim = TEPosEncF 'BERT gradient device dataType inputEmbedDim posEncDim |
type family TERelPosEncF (style :: TransformerStyle) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (headDim :: Dim (Name Symbol) (Size Nat)) (posEncDim :: Dim (Name Symbol) (Size Nat)) :: Type where ... Source #
Specifies the relative positional encoding layer of a transformer encoder.
TERelPosEncF 'T5 gradient device dataType headDim posEncDim = NamedModel (Embedding gradient ('Layout 'Dense) device dataType posEncDim headDim 'Nothing) | |
TERelPosEncF 'ByT5 gradient device dataType headDim posEncDim = TERelPosEncF 'T5 gradient device dataType headDim posEncDim | |
TERelPosEncF 'BART _ _ _ _ _ = () | |
TERelPosEncF 'MBART gradient device dataType headDim posEncDim = TERelPosEncF 'BART gradient device dataType headDim posEncDim | |
TERelPosEncF 'Pegasus gradient device dataType headDim posEncDim = TERelPosEncF 'BART gradient device dataType headDim posEncDim | |
TERelPosEncF 'BERT _ _ _ _ _ = () | |
TERelPosEncF 'RoBERTa gradient device dataType headDim posEncDim = TERelPosEncF 'BERT gradient device dataType headDim posEncDim |
type family TEInitialLayerNormF (style :: TransformerStyle) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (inputEmbedDim :: Dim (Name Symbol) (Size Nat)) :: Type where ... Source #
Specifies the initial layer normalization layer of a transformer encoder.
TEInitialLayerNormF 'T5 _ _ _ _ = () | |
TEInitialLayerNormF 'ByT5 gradient device dataType inputEmbedDim = TEInitialLayerNormF 'T5 gradient device dataType inputEmbedDim | |
TEInitialLayerNormF 'BART gradient device dataType inputEmbedDim = NamedModel (LayerNorm 'WithBias gradient device dataType ('Shape '[inputEmbedDim])) | |
TEInitialLayerNormF 'MBART gradient device dataType inputEmbedDim = TEInitialLayerNormF 'BART gradient device dataType inputEmbedDim | |
TEInitialLayerNormF 'Pegasus _ _ _ _ = () | |
TEInitialLayerNormF 'BERT gradient device dataType inputEmbedDim = NamedModel (LayerNorm 'WithBias gradient device dataType ('Shape '[inputEmbedDim])) | |
TEInitialLayerNormF 'RoBERTa gradient device dataType inputEmbedDim = TEInitialLayerNormF 'BERT gradient device dataType inputEmbedDim |
type family TEInitialDropoutF (style :: TransformerStyle) (hasDropout :: HasDropout) :: Type where ... Source #
Specifies the initial dropout layer of a transformer encoder.
type family TEStackF (style :: TransformerStyle) (numLayers :: Nat) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (headDim :: Dim (Name Symbol) (Size Nat)) (headEmbedDim :: Dim (Name Symbol) (Size Nat)) (embedDim :: Dim (Name Symbol) (Size Nat)) (inputEmbedDim :: Dim (Name Symbol) (Size Nat)) (ffnDim :: Dim (Name Symbol) (Size Nat)) (hasDropout :: HasDropout) :: Type where ... Source #
Specifies the transformer block stack of a transformer encoder.
TEStackF style numLayers gradient device dataType headDim headEmbedDim embedDim inputEmbedDim ffnDim hasDropout = NamedModel (EncoderStackF style numLayers gradient device dataType headDim headEmbedDim embedDim inputEmbedDim ffnDim hasDropout) |
type family TEFinalLayerNormF (style :: TransformerStyle) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (inputEmbedDim :: Dim (Name Symbol) (Size Nat)) :: Type where ... Source #
Specifies the final layer normalization layer of a transformer encoder.
TEFinalLayerNormF 'T5 gradient device dataType inputEmbedDim = NamedModel (LayerNorm 'WithoutBias gradient device dataType ('Shape '[inputEmbedDim])) | |
TEFinalLayerNormF 'ByT5 gradient device dataType inputEmbedDim = TEFinalLayerNormF 'T5 gradient device dataType inputEmbedDim | |
TEFinalLayerNormF 'BART _ _ _ _ = () | |
TEFinalLayerNormF 'MBART gradient device dataType inputEmbedDim = TEFinalLayerNormF 'BART gradient device dataType inputEmbedDim | |
TEFinalLayerNormF 'Pegasus gradient device dataType inputEmbedDim = NamedModel (LayerNorm 'WithBias gradient device dataType ('Shape '[inputEmbedDim])) | |
TEFinalLayerNormF 'BERT _ _ _ _ = () | |
TEFinalLayerNormF 'RoBERTa gradient device dataType inputEmbedDim = TEFinalLayerNormF 'BERT gradient device dataType inputEmbedDim |
type family TEFinalDropoutF (style :: TransformerStyle) (hasDropout :: HasDropout) :: Type where ... Source #
Specifies the final dropout layer of a transformer encoder.
TEFinalDropoutF 'T5 'WithDropout = Dropout | |
TEFinalDropoutF 'ByT5 'WithDropout = Dropout | |
TEFinalDropoutF 'BART _ = () | |
TEFinalDropoutF 'MBART _ = () | |
TEFinalDropoutF 'Pegasus _ = () | |
TEFinalDropoutF 'BERT _ = () | |
TEFinalDropoutF 'RoBERTa _ = () | |
TEFinalDropoutF _ 'WithoutDropout = () |
transformerEncoderSpec :: forall style numLayers gradient device dataType headDim headEmbedDim embedDim inputEmbedDim ffnDim posEncDim hasDropout. STransformerStyle style -> SNat numLayers -> SGradient gradient -> SDevice device -> SDataType dataType -> SDim headDim -> SDim headEmbedDim -> SDim embedDim -> SDim inputEmbedDim -> SDim ffnDim -> SDim posEncDim -> SHasDropout hasDropout -> Double -> Double -> ModelSpec (TransformerEncoderF style numLayers gradient device dataType headDim headEmbedDim embedDim inputEmbedDim ffnDim posEncDim hasDropout) Source #
Specifies the parameters of a transformer in an encoder configuration.
style
: the style of the transformer stack, e.g.ST5
,SByT5
, etc.gradient
: whether to compute the gradient of the stack's parameters.device
: the computational device on which the stack is allocated.dataType
: the data type of the stack's parameters.headDim
: the dimension of all transformer heads in the stack.headEmbedDim
: the dimension of the transformer head embeddings.embedDim
: the dimension of the transformer embeddings.inputEmbedDim
: the dimension of the transformer query embeddings.ffnDim
: the dimension of the feed-forward network.posEncDim
: the dimension of the positional encoding.dropoutP
: the dropout rate.eps
: the epsilon value for numerical stability of the layer normalization.
type family TransformerDecoderF (style :: TransformerStyle) (numLayers :: Nat) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (headDim :: Dim (Name Symbol) (Size Nat)) (headEmbedDim :: Dim (Name Symbol) (Size Nat)) (embedDim :: Dim (Name Symbol) (Size Nat)) (decoderInputEmbedDim :: Dim (Name Symbol) (Size Nat)) (encoderOutputEmbedDim :: Dim (Name Symbol) (Size Nat)) (ffnDim :: Dim (Name Symbol) (Size Nat)) (posEncDim :: Dim (Name Symbol) (Size Nat)) (hasDropout :: HasDropout) :: Type where ... Source #
TransformerDecoderF style numLayers gradient device dataType headDim headEmbedDim embedDim decoderInputEmbedDim encoderOutputEmbedDim ffnDim posEncDim hasDropout = GTransformer (TDPosEncF style gradient device dataType decoderInputEmbedDim posEncDim) (TDRelPosEncF style gradient device dataType headDim posEncDim) (TDInitialLayerNormF style gradient device dataType decoderInputEmbedDim) (TDInitialDropoutF style hasDropout) (TDStackF style numLayers gradient device dataType headDim headEmbedDim embedDim decoderInputEmbedDim encoderOutputEmbedDim ffnDim hasDropout) (TDFinalLayerNormF style gradient device dataType decoderInputEmbedDim) (TDFinalDropoutF style hasDropout) |
type family TDPosEncF (style :: TransformerStyle) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (inputEmbedDim :: Dim (Name Symbol) (Size Nat)) (posEncDim :: Dim (Name Symbol) (Size Nat)) :: Type where ... Source #
Specifies the absolute positional encoding layer of a transformer decoder.
TDPosEncF 'T5 _ _ _ _ _ = () | |
TDPosEncF 'ByT5 gradient device dataType inputEmbedDim posEncDim = TDPosEncF 'T5 gradient device dataType inputEmbedDim posEncDim | |
TDPosEncF 'BART gradient device dataType inputEmbedDim posEncDim = NamedModel (Embedding gradient ('Layout 'Dense) device dataType posEncDim inputEmbedDim 'Nothing) | |
TDPosEncF 'MBART gradient device dataType inputEmbedDim posEncDim = TDPosEncF 'BART gradient device dataType inputEmbedDim posEncDim | |
TDPosEncF 'Pegasus gradient device dataType inputEmbedDim posEncDim = TDPosEncF 'BART gradient device dataType inputEmbedDim posEncDim |
type family TDRelPosEncF (style :: TransformerStyle) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (headDim :: Dim (Name Symbol) (Size Nat)) (posEncDim :: Dim (Name Symbol) (Size Nat)) :: Type where ... Source #
Specifies the relative positional encoding layer of a transformer decoder.
TDRelPosEncF 'T5 gradient device dataType headDim posEncDim = NamedModel (Embedding gradient ('Layout 'Dense) device dataType posEncDim headDim 'Nothing) | |
TDRelPosEncF 'ByT5 gradient device dataType headDim posEncDim = TDRelPosEncF 'T5 gradient device dataType headDim posEncDim | |
TDRelPosEncF 'BART _ _ _ _ _ = () | |
TDRelPosEncF 'MBART gradient device dataType headDim posEncDim = TDRelPosEncF 'BART gradient device dataType headDim posEncDim | |
TDRelPosEncF 'Pegasus gradient device dataType headDim posEncDim = TDRelPosEncF 'BART gradient device dataType headDim posEncDim |
type family TDInitialLayerNormF (style :: TransformerStyle) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (inputEmbedDim :: Dim (Name Symbol) (Size Nat)) :: Type where ... Source #
Specifies the initial layer normalization layer of a transformer decoder.
TDInitialLayerNormF 'T5 _ _ _ _ = () | |
TDInitialLayerNormF 'ByT5 gradient device dataType inputEmbedDim = TDInitialLayerNormF 'T5 gradient device dataType inputEmbedDim | |
TDInitialLayerNormF 'BART gradient device dataType inputEmbedDim = NamedModel (LayerNorm 'WithBias gradient device dataType ('Shape '[inputEmbedDim])) | |
TDInitialLayerNormF 'MBART gradient device dataType inputEmbedDim = TDInitialLayerNormF 'BART gradient device dataType inputEmbedDim | |
TDInitialLayerNormF 'Pegasus _ _ _ _ = () |
type family TDInitialDropoutF (style :: TransformerStyle) (hasDropout :: HasDropout) :: Type where ... Source #
Specifies the initial dropout layer of a transformer decoder.
type family TDStackF (style :: TransformerStyle) (numLayers :: Nat) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (headDim :: Dim (Name Symbol) (Size Nat)) (headEmbedDim :: Dim (Name Symbol) (Size Nat)) (embedDim :: Dim (Name Symbol) (Size Nat)) (decoderInputEmbedDim :: Dim (Name Symbol) (Size Nat)) (encoderOutputEmbedDim :: Dim (Name Symbol) (Size Nat)) (ffnDim :: Dim (Name Symbol) (Size Nat)) (hasDropout :: HasDropout) :: Type where ... Source #
Specifies the transformer block stack of a transformer decoder.
TDStackF style numLayers gradient device dataType headDim headEmbedDim embedDim decoderInputEmbedDim encoderOutputEmbedDim ffnDim hasDropout = NamedModel (DecoderStackF style numLayers gradient device dataType headDim headEmbedDim embedDim decoderInputEmbedDim encoderOutputEmbedDim ffnDim hasDropout) |
type family TDFinalLayerNormF (style :: TransformerStyle) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (inputEmbedDim :: Dim (Name Symbol) (Size Nat)) :: Type where ... Source #
Specifies the final layer normalization layer of a transformer decoder.
TDFinalLayerNormF 'T5 gradient device dataType inputEmbedDim = NamedModel (LayerNorm 'WithoutBias gradient device dataType ('Shape '[inputEmbedDim])) | |
TDFinalLayerNormF 'ByT5 gradient device dataType inputEmbedDim = TDFinalLayerNormF 'T5 gradient device dataType inputEmbedDim | |
TDFinalLayerNormF 'BART _ _ _ _ = () | |
TDFinalLayerNormF 'MBART gradient device dataType inputEmbedDim = TDFinalLayerNormF 'BART gradient device dataType inputEmbedDim | |
TDFinalLayerNormF 'Pegasus gradient device dataType inputEmbedDim = NamedModel (LayerNorm 'WithBias gradient device dataType ('Shape '[inputEmbedDim])) |
type family TDFinalDropoutF (style :: TransformerStyle) (hasDropout :: HasDropout) :: Type where ... Source #
Specifies the final dropout layer of a transformer decoder.
TDFinalDropoutF 'T5 'WithDropout = Dropout | |
TDFinalDropoutF 'ByT5 'WithDropout = Dropout | |
TDFinalDropoutF 'BART _ = () | |
TDFinalDropoutF 'MBART _ = () | |
TDFinalDropoutF 'Pegasus _ = () | |
TDFinalDropoutF _ 'WithoutDropout = () |
transformerDecoderSpec :: forall style numLayers gradient device dataType headDim headEmbedDim embedDim decoderInputEmbedDim encoderOutputEmbedDim ffnDim posEncDim hasDropout. STransformerStyle style -> SNat numLayers -> SGradient gradient -> SDevice device -> SDataType dataType -> SDim headDim -> SDim headEmbedDim -> SDim embedDim -> SDim decoderInputEmbedDim -> SDim encoderOutputEmbedDim -> SDim ffnDim -> SDim posEncDim -> SHasDropout hasDropout -> Double -> Double -> ModelSpec (TransformerDecoderF style numLayers gradient device dataType headDim headEmbedDim embedDim decoderInputEmbedDim encoderOutputEmbedDim ffnDim posEncDim hasDropout) Source #
Specifies the parameters of a transformer in a decoder configuration.
style
: the style of the transformer stack, e.g.ST5
,SByT5
, etc.gradient
: whether to compute the gradient of the stack's parameters.device
: the computational device on which the stack is allocated.dataType
: the data type of the stack's parameters.headDim
: the dimension of all transformer heads in the stack.headEmbedDim
: the dimension of the transformer head embeddings.embedDim
: the dimension of the transformer embeddings.decoderInputEmbedDim
: the dimension of the decoder input embeddings.encoderOutputEmbedDim
: the dimension of the encoder output embeddings.ffnDim
: the dimension of the feed-forward network.posEncDim
: the dimension of the positional encoding.dropoutP
: the dropout rate.eps
: the epsilon value for numerical stability of the layer normalization.