Safe Haskell	Safe-Inferred
Language	Haskell2010

Torch.GraduallyTyped.NN.Transformer.GSelfAttention

Synopsis

data GSelfAttention (initialLayerNorm :: Type) (mha :: Type) (dropout :: Type) (finalLayerNorm :: Type) where
- GSelfAttention :: forall initialLayerNorm mha dropout finalLayerNorm. {..} -> GSelfAttention initialLayerNorm mha dropout finalLayerNorm
type family GSelfAttentionF (style :: TransformerStyle) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (headDim :: Dim (Name Symbol) (Size Nat)) (headEmbedDim :: Dim (Name Symbol) (Size Nat)) (embedDim :: Dim (Name Symbol) (Size Nat)) (queryEmbedDim :: Dim (Name Symbol) (Size Nat)) (hasDropout :: HasDropout) :: Type where ...
type family SAInitialLayerNormF (style :: TransformerStyle) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (queryEmbedDim :: Dim (Name Symbol) (Size Nat)) :: Type where ...
type family SAMultiheadAttentionF (style :: TransformerStyle) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (headDim :: Dim (Name Symbol) (Size Nat)) (headEmbedDim :: Dim (Name Symbol) (Size Nat)) (embedDim :: Dim (Name Symbol) (Size Nat)) (queryEmbedDim :: Dim (Name Symbol) (Size Nat)) (hasDropout :: HasDropout) :: Type where ...
type family SADropoutF (style :: TransformerStyle) (hasDropout :: HasDropout) :: Type where ...
type family SAFinalLayerNormF (style :: TransformerStyle) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (queryEmbedDim :: Dim (Name Symbol) (Size Nat)) :: Type where ...
selfAttentionSpec :: forall style gradient device dataType headDim headEmbedDim embedDim queryEmbedDim hasDropout. STransformerStyle style -> SGradient gradient -> SDevice device -> SDataType dataType -> SDim headDim -> SDim headEmbedDim -> SDim embedDim -> SDim queryEmbedDim -> SHasDropout hasDropout -> Double -> Double -> ModelSpec (GSelfAttentionF style gradient device dataType headDim headEmbedDim embedDim queryEmbedDim hasDropout)

Documentation

data GSelfAttention (initialLayerNorm :: Type) (mha :: Type) (dropout :: Type) (finalLayerNorm :: Type) where Source #

Generic self-attention layer data type.

initialLayerNorm: the initial layer normalization
mha: the multi-headed attention layer
dropout: the dropout layer
finalLayerNorm: the final layer normalization

Constructors

GSelfAttention

Fields

:: forall initialLayerNorm mha dropout finalLayerNorm. { saInitialLayerNorm :: initialLayerNorm
initial layer normalization of the self-attention layer.
, saMultiHeadAttention :: mha
multi-headed attention layer specialized for self-attention.
, saDropout :: dropout
dropout
, saFinalLayerNorm :: finalLayerNorm
final layer normalization of the self-attention layer.
} -> GSelfAttention initialLayerNorm mha dropout finalLayerNorm

Instances

Instances details

Generic (GSelfAttention initialLayerNorm mha dropout finalLayerNorm) Source #
Instance details Defined in Torch.GraduallyTyped.NN.Transformer.GSelfAttention Associated Types type Rep (GSelfAttention initialLayerNorm mha dropout finalLayerNorm) :: Type -> Type Source # Methods from :: GSelfAttention initialLayerNorm mha dropout finalLayerNorm -> Rep (GSelfAttention initialLayerNorm mha dropout finalLayerNorm) x Source # to :: Rep (GSelfAttention initialLayerNorm mha dropout finalLayerNorm) x -> GSelfAttention initialLayerNorm mha dropout finalLayerNorm Source #
(Show initialLayerNorm, Show mha, Show dropout, Show finalLayerNorm) => Show (GSelfAttention initialLayerNorm mha dropout finalLayerNorm) Source #
Instance details Defined in Torch.GraduallyTyped.NN.Transformer.GSelfAttention Methods showsPrec :: Int -> GSelfAttention initialLayerNorm mha dropout finalLayerNorm -> ShowS Source # show :: GSelfAttention initialLayerNorm mha dropout finalLayerNorm -> String Source # showList :: [GSelfAttention initialLayerNorm mha dropout finalLayerNorm] -> ShowS Source #
(Eq initialLayerNorm, Eq mha, Eq dropout, Eq finalLayerNorm) => Eq (GSelfAttention initialLayerNorm mha dropout finalLayerNorm) Source #
Instance details Defined in Torch.GraduallyTyped.NN.Transformer.GSelfAttention Methods (==) :: GSelfAttention initialLayerNorm mha dropout finalLayerNorm -> GSelfAttention initialLayerNorm mha dropout finalLayerNorm -> Bool Source # (/=) :: GSelfAttention initialLayerNorm mha dropout finalLayerNorm -> GSelfAttention initialLayerNorm mha dropout finalLayerNorm -> Bool Source #
(Ord initialLayerNorm, Ord mha, Ord dropout, Ord finalLayerNorm) => Ord (GSelfAttention initialLayerNorm mha dropout finalLayerNorm) Source #
Instance details Defined in Torch.GraduallyTyped.NN.Transformer.GSelfAttention Methods compare :: GSelfAttention initialLayerNorm mha dropout finalLayerNorm -> GSelfAttention initialLayerNorm mha dropout finalLayerNorm -> Ordering Source # (<) :: GSelfAttention initialLayerNorm mha dropout finalLayerNorm -> GSelfAttention initialLayerNorm mha dropout finalLayerNorm -> Bool Source # (<=) :: GSelfAttention initialLayerNorm mha dropout finalLayerNorm -> GSelfAttention initialLayerNorm mha dropout finalLayerNorm -> Bool Source # (>) :: GSelfAttention initialLayerNorm mha dropout finalLayerNorm -> GSelfAttention initialLayerNorm mha dropout finalLayerNorm -> Bool Source # (>=) :: GSelfAttention initialLayerNorm mha dropout finalLayerNorm -> GSelfAttention initialLayerNorm mha dropout finalLayerNorm -> Bool Source # max :: GSelfAttention initialLayerNorm mha dropout finalLayerNorm -> GSelfAttention initialLayerNorm mha dropout finalLayerNorm -> GSelfAttention initialLayerNorm mha dropout finalLayerNorm Source # min :: GSelfAttention initialLayerNorm mha dropout finalLayerNorm -> GSelfAttention initialLayerNorm mha dropout finalLayerNorm -> GSelfAttention initialLayerNorm mha dropout finalLayerNorm Source #
(HasStateDict initialLayerNorm, HasStateDict multiHeadAttention, HasStateDict dropout, HasStateDict finalLayerNorm) => HasStateDict (GSelfAttention initialLayerNorm multiHeadAttention dropout finalLayerNorm) Source #
Instance details Defined in Torch.GraduallyTyped.NN.Transformer.GSelfAttention Methods fromStateDict :: (MonadIO m, MonadThrow m, MonadState StateDict m) => ModelSpec (GSelfAttention initialLayerNorm multiHeadAttention dropout finalLayerNorm) -> StateDictKey -> m (GSelfAttention initialLayerNorm multiHeadAttention dropout finalLayerNorm) Source # toStateDict :: (MonadThrow m, MonadState StateDict m) => StateDictKey -> GSelfAttention initialLayerNorm multiHeadAttention dropout finalLayerNorm -> m () Source #
(HasInitialize initialLayerNorm generatorDevice initialLayerNorm' generatorDevice0, HasInitialize multiHeadAttention generatorDevice0 multiHeadAttention' generatorDevice1, HasInitialize dropout generatorDevice1 dropout' generatorDevice2, HasInitialize finalLayerNorm generatorDevice2 finalLayerNorm' generatorOutputDevice) => HasInitialize (GSelfAttention initialLayerNorm multiHeadAttention dropout finalLayerNorm) generatorDevice (GSelfAttention initialLayerNorm' multiHeadAttention' dropout' finalLayerNorm') generatorOutputDevice Source #
Instance details Defined in Torch.GraduallyTyped.NN.Transformer.GSelfAttention Methods initialize :: MonadThrow m => ModelSpec (GSelfAttention initialLayerNorm multiHeadAttention dropout finalLayerNorm) -> Generator generatorDevice -> m (GSelfAttention initialLayerNorm' multiHeadAttention' dropout' finalLayerNorm', Generator generatorOutputDevice) Source #
(HasForward initialLayerNorm (Tensor queryGradient queryLayout queryDevice queryDataType queryShape) generatorDevice tensor0 generatorDevice0, HasForward multiHeadAttention (tensor0, tensor0, tensor0, Tensor attentionBiasGradient attentionBiasLayout attentionBiasDevice attentionBiasDataType attentionBiasShape) generatorDevice0 tensor1 generatorDevice1, HasForward dropout tensor1 generatorDevice1 (Tensor gradient2 layout2 device2 dataType2 shape2) generatorDevice2, HasForward finalLayerNorm (Tensor (queryGradient <\|> gradient2) (queryLayout <+> layout2) (queryDevice <+> device2) (queryDataType <+> dataType2) (BroadcastShapesF queryShape shape2)) generatorDevice2 output generatorOutputDevice, Catch (BroadcastShapesF queryShape shape2)) => HasForward (GSelfAttention initialLayerNorm multiHeadAttention dropout finalLayerNorm) (Tensor queryGradient queryLayout queryDevice queryDataType queryShape, Tensor attentionBiasGradient attentionBiasLayout attentionBiasDevice attentionBiasDataType attentionBiasShape) generatorDevice output generatorOutputDevice Source #	`HasForward` instance for `GSelfAttention`. ┌───────────────┐ ┌───────┐ │ attentionBias │ │ query │ └───────┬───────┘ └───┬───┘ │ │ │ ┌─────┴─────┐ │ │ │ │ ▼ │ │ (saInitialLayerNorm) │ │ │ │ │ ┌────┼────┐ │ │ │ │ │ │ │ ▼ ▼ ▼ │ └─►saMultiHeadAttention │ │ │ ▼ │ saDropout │ │ │ └───►add◄───┘ │ ▼ (saFinalLayerNorm) │ ▼ ┌───────┐ │ query │ └───────┘
Instance details Defined in Torch.GraduallyTyped.NN.Transformer.GSelfAttention Methods forward :: MonadThrow m => GSelfAttention initialLayerNorm multiHeadAttention dropout finalLayerNorm -> (Tensor queryGradient queryLayout queryDevice queryDataType queryShape, Tensor attentionBiasGradient attentionBiasLayout attentionBiasDevice attentionBiasDataType attentionBiasShape) -> Generator generatorDevice -> m (output, Generator generatorOutputDevice) Source #
type Rep (GSelfAttention initialLayerNorm mha dropout finalLayerNorm) Source #
Instance details Defined in Torch.GraduallyTyped.NN.Transformer.GSelfAttention type Rep (GSelfAttention initialLayerNorm mha dropout finalLayerNorm) = D1 ('MetaData "GSelfAttention" "Torch.GraduallyTyped.NN.Transformer.GSelfAttention" "hasktorch-gradually-typed-0.2.0.0-1KV1aIPzzbp6JpSr37tC1K" 'False) (C1 ('MetaCons "GSelfAttention" 'PrefixI 'True) ((S1 ('MetaSel ('Just "saInitialLayerNorm") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 initialLayerNorm) :: S1 ('MetaSel ('Just "saMultiHeadAttention") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 mha)) :: (S1 ('MetaSel ('Just "saDropout") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 dropout) :*: S1 ('MetaSel ('Just "saFinalLayerNorm") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 finalLayerNorm))))
type ModelSpec (GSelfAttention initialLayerNorm mha dropout finalLayerNorm) Source #
Instance details Defined in Torch.GraduallyTyped.NN.Transformer.GSelfAttention type ModelSpec (GSelfAttention initialLayerNorm mha dropout finalLayerNorm) = GSelfAttention (ModelSpec initialLayerNorm) (ModelSpec mha) (ModelSpec dropout) (ModelSpec finalLayerNorm)

type family GSelfAttentionF (style :: TransformerStyle) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (headDim :: Dim (Name Symbol) (Size Nat)) (headEmbedDim :: Dim (Name Symbol) (Size Nat)) (embedDim :: Dim (Name Symbol) (Size Nat)) (queryEmbedDim :: Dim (Name Symbol) (Size Nat)) (hasDropout :: HasDropout) :: Type where ... Source #

Equations

GSelfAttentionF style gradient device dataType headDim headEmbedDim embedDim queryEmbedDim hasDropout = GSelfAttention (SAInitialLayerNormF style gradient device dataType queryEmbedDim) (SAMultiheadAttentionF style gradient device dataType headDim headEmbedDim embedDim queryEmbedDim hasDropout) (SADropoutF style hasDropout) (SAFinalLayerNormF style gradient device dataType queryEmbedDim)

type family SAInitialLayerNormF (style :: TransformerStyle) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (queryEmbedDim :: Dim (Name Symbol) (Size Nat)) :: Type where ... Source #

Specifies the initial layer normalization of the self-attention layer.

Equations

SAInitialLayerNormF 'T5 gradient device dataType queryEmbedDim = NamedModel (LayerNorm 'WithoutBias gradient device dataType ('Shape '[queryEmbedDim]))
SAInitialLayerNormF 'ByT5 gradient device dataType queryEmbedDim = SAInitialLayerNormF 'T5 gradient device dataType queryEmbedDim
SAInitialLayerNormF 'BART _ _ _ _ = ()
SAInitialLayerNormF 'MBART gradient device dataType queryEmbedDim = SAInitialLayerNormF 'BART gradient device dataType queryEmbedDim
SAInitialLayerNormF 'Pegasus gradient device dataType queryEmbedDim = NamedModel (LayerNorm 'WithBias gradient device dataType ('Shape '[queryEmbedDim]))
SAInitialLayerNormF 'BERT _ _ _ _ = ()
SAInitialLayerNormF 'RoBERTa gradient device dataType queryEmbedDim = SAInitialLayerNormF 'BERT gradient device dataType queryEmbedDim

type family SAMultiheadAttentionF (style :: TransformerStyle) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (headDim :: Dim (Name Symbol) (Size Nat)) (headEmbedDim :: Dim (Name Symbol) (Size Nat)) (embedDim :: Dim (Name Symbol) (Size Nat)) (queryEmbedDim :: Dim (Name Symbol) (Size Nat)) (hasDropout :: HasDropout) :: Type where ... Source #

Specifies the multi-headed attention layer of the self-attention layer.

Equations

SAMultiheadAttentionF style gradient device dataType headDim headEmbedDim embedDim queryEmbedDim hasDropout = NamedModel (GMultiHeadAttentionF style gradient device dataType headDim headEmbedDim embedDim queryEmbedDim queryEmbedDim queryEmbedDim hasDropout)

type family SADropoutF (style :: TransformerStyle) (hasDropout :: HasDropout) :: Type where ... Source #

Specifies the dropout layer of the self-attention layer.

Equations

SADropoutF _ 'WithDropout = Dropout
SADropoutF _ 'WithoutDropout = ()

type family SAFinalLayerNormF (style :: TransformerStyle) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (dataType :: DataType DType) (queryEmbedDim :: Dim (Name Symbol) (Size Nat)) :: Type where ... Source #

Specifies the final layer normalization of the self-attention layer.

Equations

SAFinalLayerNormF 'T5 _ _ _ _ = ()
SAFinalLayerNormF 'ByT5 gradient device dataType queryEmbedDim = SAFinalLayerNormF 'T5 gradient device dataType queryEmbedDim
SAFinalLayerNormF 'BART gradient device dataType queryEmbedDim = NamedModel (LayerNorm 'WithBias gradient device dataType ('Shape '[queryEmbedDim]))
SAFinalLayerNormF 'MBART gradient device dataType queryEmbedDim = SAFinalLayerNormF 'BART gradient device dataType queryEmbedDim
SAFinalLayerNormF 'Pegasus gradient device dataType queryEmbedDim = ()
SAFinalLayerNormF 'BERT gradient device dataType queryEmbedDim = NamedModel (LayerNorm 'WithBias gradient device dataType ('Shape '[queryEmbedDim]))
SAFinalLayerNormF 'RoBERTa gradient device dataType queryEmbedDim = SAFinalLayerNormF 'BERT gradient device dataType queryEmbedDim

selfAttentionSpec :: forall style gradient device dataType headDim headEmbedDim embedDim queryEmbedDim hasDropout. STransformerStyle style -> SGradient gradient -> SDevice device -> SDataType dataType -> SDim headDim -> SDim headEmbedDim -> SDim embedDim -> SDim queryEmbedDim -> SHasDropout hasDropout -> Double -> Double -> ModelSpec (GSelfAttentionF style gradient device dataType headDim headEmbedDim embedDim queryEmbedDim hasDropout) Source #

Specifies the parameters of a self-attention layer.

style: the style of the transformer stack, e.g. ST5, SByT5, etc.
gradient: whether to compute the gradient of the stack's parameters.
device: the computational device on which the stack is allocated.
dataType: the data type of the stack's parameters.
headDim: the dimension of all transformer heads in the stack.
headEmbedDim: the dimension of the transformer head embeddings.
embedDim: the dimension of the transformer embeddings.
queryEmbedDim: the dimension of the transformer query embeddings.
dropoutP: the dropout rate.
eps: the epsilon value for numerical stability of the layer normalization.