Safe Haskell	Safe-Inferred
Language	Haskell2010

Torch.GraduallyTyped.NN.Transformer.T5.Common

Synopsis

type T5DType = 'Float
t5DType :: SDType T5DType
type T5DataType = 'DataType T5DType
t5DataType :: SDataType T5DataType
t5DropoutP :: Double
type T5RelPosEncBucketDim = 'Dim ('Name "*") ('Size 32)
t5RelPosEncBucketDim :: SDim T5RelPosEncBucketDim
t5Eps :: Double
t5MaxDistance :: Int
t5PadTokenId :: Int
t5BOSTokenId :: Int
t5EOSTokenId :: Int
t5AttentionMaskBias :: Double
type family T5ModelF style transformerHead numEncoderLayers numDecoderLayers gradient device headDim headEmbedDim embedDim inputEmbedDim ffnDim vocabDim hasDropout where ...
t5ModelSpec :: forall style transformerHead numEncoderLayers numDecoderLayers gradient device headDim headEmbedDim embedDim inputEmbedDim ffnDim vocabDim hasDropout. (SingI headDim, SingI headEmbedDim, SingI embedDim, SingI inputEmbedDim, SingI ffnDim, SingI vocabDim) => STransformerStyle style -> STransformerHead transformerHead -> SNat numEncoderLayers -> SNat numDecoderLayers -> SGradient gradient -> SDevice device -> SHasDropout hasDropout -> ModelSpec (T5ModelF style transformerHead numEncoderLayers numDecoderLayers gradient device headDim headEmbedDim embedDim inputEmbedDim ffnDim vocabDim hasDropout)
mkT5Input :: forall batchDim seqDim device m output. (MonadThrow m, SGetDim batchDim, SGetDim seqDim, Catch ('Shape '['Dim ('Name "*") 'UncheckedSize, 'Dim ('Name "*") 'UncheckedSize] <+> 'Shape '[batchDim, seqDim]), output ~ Tensor ('Gradient 'WithoutGradient) ('Layout 'Dense) device ('DataType 'Int64) ('Shape '[batchDim, seqDim])) => SDim batchDim -> SDim seqDim -> SDevice device -> [[Int]] -> m output

Documentation

type T5DType = 'Float Source #

T5 dType.

t5DType :: SDType T5DType Source #

T5 dType singleton.

type T5DataType = 'DataType T5DType Source #

T5 data type.

t5DataType :: SDataType T5DataType Source #

T5 data type singleton.

t5DropoutP :: Double Source #

T5 dropout rate. 'dropout_rate = 0.1'

type T5RelPosEncBucketDim = 'Dim ('Name "*") ('Size 32) Source #

T5 relative positional encoding bucket dimension. 'relative_attention_num_buckets = 32'

t5RelPosEncBucketDim :: SDim T5RelPosEncBucketDim Source #

T5 relative positional encoding bucket dimension singleton.

t5Eps :: Double Source #

T5 layer-norm epsilon. 'layer_norm_epsilon = 1e-06'

t5MaxDistance :: Int Source #

T5 maximum distance for relative positional encoding.

t5PadTokenId :: Int Source #

T5 padding token id. 'pad_token_id = 0'

t5BOSTokenId :: Int Source #

T5 begin-of-sentence token id.

t5EOSTokenId :: Int Source #

T5 end-of-sentence token id. 'eos_token_id = 1'

t5AttentionMaskBias :: Double Source #

T5 attention mask bias

type family T5ModelF style transformerHead numEncoderLayers numDecoderLayers gradient device headDim headEmbedDim embedDim inputEmbedDim ffnDim vocabDim hasDropout where ... Source #

Specifies a T5 or ByT5 model.

Equations

T5ModelF 'T5 transformerHead numEncoderLayers numDecoderLayers gradient device headDim headEmbedDim embedDim inputEmbedDim ffnDim vocabDim hasDropout = GSimplifiedEncoderDecoderTransformer (GEncoderDecoderTransformerF 'T5 transformerHead numEncoderLayers numDecoderLayers gradient device T5DataType headDim headEmbedDim embedDim inputEmbedDim ffnDim T5RelPosEncBucketDim vocabDim hasDropout) (MkRelPos T5RelPosEncBucketDim) (MkRelPos T5RelPosEncBucketDim) MkTransformerPaddingMask (MkTransformerAttentionMask T5DataType) (MkTransformerCrossAttentionMask T5DataType) (MkTransformerDecoderAttentionMask T5DataType)

T5ModelF 'ByT5 transformerHead numEncoderLayers numDecoderLayers gradient device headDim headEmbedDim embedDim inputEmbedDim ffnDim vocabDim hasDropout = GSimplifiedEncoderDecoderTransformer (GEncoderDecoderTransformerF 'ByT5 transformerHead numEncoderLayers numDecoderLayers gradient device T5DataType headDim headEmbedDim embedDim inputEmbedDim ffnDim T5RelPosEncBucketDim vocabDim hasDropout) (MkRelPos T5RelPosEncBucketDim) (MkRelPos T5RelPosEncBucketDim) MkTransformerPaddingMask (MkTransformerAttentionMask T5DataType) (MkTransformerCrossAttentionMask T5DataType) (MkTransformerDecoderAttentionMask T5DataType)

t5ModelSpec :: forall style transformerHead numEncoderLayers numDecoderLayers gradient device headDim headEmbedDim embedDim inputEmbedDim ffnDim vocabDim hasDropout. (SingI headDim, SingI headEmbedDim, SingI embedDim, SingI inputEmbedDim, SingI ffnDim, SingI vocabDim) => STransformerStyle style -> STransformerHead transformerHead -> SNat numEncoderLayers -> SNat numDecoderLayers -> SGradient gradient -> SDevice device -> SHasDropout hasDropout -> ModelSpec (T5ModelF style transformerHead numEncoderLayers numDecoderLayers gradient device headDim headEmbedDim embedDim inputEmbedDim ffnDim vocabDim hasDropout) Source #

Specifies the parameters of a T5 or ByT5 model.

transformerHead: the head of the T5 or ByT5 model.
numLayers: the number of layers in the T5 or ByT5 model.
gradient: whether to compute the gradient of the T5 or ByT5 model.
device: the computational device on which the T5 or ByT5 model parameters are to be allocated.

mkT5Input :: forall batchDim seqDim device m output. (MonadThrow m, SGetDim batchDim, SGetDim seqDim, Catch ('Shape '['Dim ('Name "*") 'UncheckedSize, 'Dim ('Name "*") 'UncheckedSize] <+> 'Shape '[batchDim, seqDim]), output ~ Tensor ('Gradient 'WithoutGradient) ('Layout 'Dense) device ('DataType 'Int64) ('Shape '[batchDim, seqDim])) => SDim batchDim -> SDim seqDim -> SDevice device -> [[Int]] -> m output Source #