hasktorch-gradually-typed-0.2.0.0: experimental project for hasktorch
Safe HaskellSafe-Inferred
LanguageHaskell2010

Torch.GraduallyTyped.NN.Transformer.T5.Common

Synopsis

Documentation

type T5DType = 'Float Source #

T5 dType.

t5DType :: SDType T5DType Source #

T5 dType singleton.

type T5DataType = 'DataType T5DType Source #

T5 data type.

t5DataType :: SDataType T5DataType Source #

T5 data type singleton.

t5DropoutP :: Double Source #

T5 dropout rate. 'dropout_rate = 0.1'

type T5RelPosEncBucketDim = 'Dim ('Name "*") ('Size 32) Source #

T5 relative positional encoding bucket dimension. 'relative_attention_num_buckets = 32'

t5RelPosEncBucketDim :: SDim T5RelPosEncBucketDim Source #

T5 relative positional encoding bucket dimension singleton.

t5Eps :: Double Source #

T5 layer-norm epsilon. 'layer_norm_epsilon = 1e-06'

t5MaxDistance :: Int Source #

T5 maximum distance for relative positional encoding.

t5PadTokenId :: Int Source #

T5 padding token id. 'pad_token_id = 0'

t5BOSTokenId :: Int Source #

T5 begin-of-sentence token id.

t5EOSTokenId :: Int Source #

T5 end-of-sentence token id. 'eos_token_id = 1'

t5AttentionMaskBias :: Double Source #

T5 attention mask bias

type family T5ModelF style transformerHead numEncoderLayers numDecoderLayers gradient device headDim headEmbedDim embedDim inputEmbedDim ffnDim vocabDim hasDropout where ... Source #

Specifies a T5 or ByT5 model.

Equations

T5ModelF 'T5 transformerHead numEncoderLayers numDecoderLayers gradient device headDim headEmbedDim embedDim inputEmbedDim ffnDim vocabDim hasDropout = GSimplifiedEncoderDecoderTransformer (GEncoderDecoderTransformerF 'T5 transformerHead numEncoderLayers numDecoderLayers gradient device T5DataType headDim headEmbedDim embedDim inputEmbedDim ffnDim T5RelPosEncBucketDim vocabDim hasDropout) (MkRelPos T5RelPosEncBucketDim) (MkRelPos T5RelPosEncBucketDim) MkTransformerPaddingMask (MkTransformerAttentionMask T5DataType) (MkTransformerCrossAttentionMask T5DataType) (MkTransformerDecoderAttentionMask T5DataType) 
T5ModelF 'ByT5 transformerHead numEncoderLayers numDecoderLayers gradient device headDim headEmbedDim embedDim inputEmbedDim ffnDim vocabDim hasDropout = GSimplifiedEncoderDecoderTransformer (GEncoderDecoderTransformerF 'ByT5 transformerHead numEncoderLayers numDecoderLayers gradient device T5DataType headDim headEmbedDim embedDim inputEmbedDim ffnDim T5RelPosEncBucketDim vocabDim hasDropout) (MkRelPos T5RelPosEncBucketDim) (MkRelPos T5RelPosEncBucketDim) MkTransformerPaddingMask (MkTransformerAttentionMask T5DataType) (MkTransformerCrossAttentionMask T5DataType) (MkTransformerDecoderAttentionMask T5DataType) 

t5ModelSpec :: forall style transformerHead numEncoderLayers numDecoderLayers gradient device headDim headEmbedDim embedDim inputEmbedDim ffnDim vocabDim hasDropout. (SingI headDim, SingI headEmbedDim, SingI embedDim, SingI inputEmbedDim, SingI ffnDim, SingI vocabDim) => STransformerStyle style -> STransformerHead transformerHead -> SNat numEncoderLayers -> SNat numDecoderLayers -> SGradient gradient -> SDevice device -> SHasDropout hasDropout -> ModelSpec (T5ModelF style transformerHead numEncoderLayers numDecoderLayers gradient device headDim headEmbedDim embedDim inputEmbedDim ffnDim vocabDim hasDropout) Source #

Specifies the parameters of a T5 or ByT5 model.

  • transformerHead: the head of the T5 or ByT5 model.
  • numLayers: the number of layers in the T5 or ByT5 model.
  • gradient: whether to compute the gradient of the T5 or ByT5 model.
  • device: the computational device on which the T5 or ByT5 model parameters are to be allocated.

mkT5Input :: forall batchDim seqDim device m output. (MonadThrow m, SGetDim batchDim, SGetDim seqDim, Catch ('Shape '['Dim ('Name "*") 'UncheckedSize, 'Dim ('Name "*") 'UncheckedSize] <+> 'Shape '[batchDim, seqDim]), output ~ Tensor ('Gradient 'WithoutGradient) ('Layout 'Dense) device ('DataType 'Int64) ('Shape '[batchDim, seqDim])) => SDim batchDim -> SDim seqDim -> SDevice device -> [[Int]] -> m output Source #