hasktorch-gradually-typed-0.2.0.0: experimental project for hasktorch

Safe Haskell	Safe-Inferred
Language	Haskell2010

Torch.GraduallyTyped.NN.Transformer.RoBERTa.Common

Synopsis

type RoBERTaDType = 'Float
robertaDType :: SDType RoBERTaDType
type RoBERTaDataType = 'DataType RoBERTaDType
robertaDataType :: SDataType RoBERTaDataType
robertaDropoutP :: Double
type RoBERTaPosEncDim = 'Dim ('Name "*") ('Size 514)
robertaPosEncDim :: SDim RoBERTaPosEncDim
robertaEps :: Double
robertaMaxPositionEmbeddings :: Int
robertaPadTokenId :: Int
robertaBOSTokenId :: Int
robertaEOSTokenId :: Int
robertaAttentionMaskBias :: Double
type family RoBERTaModelF (transformerHead :: TransformerHead) (numLayers :: Nat) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (headDim :: Dim (Name Symbol) (Size Nat)) (headEmbedDim :: Dim (Name Symbol) (Size Nat)) (embedDim :: Dim (Name Symbol) (Size Nat)) (inputEmbedDim :: Dim (Name Symbol) (Size Nat)) (ffnDim :: Dim (Name Symbol) (Size Nat)) (vocabDim :: Dim (Name Symbol) (Size Nat)) (typeVocabDim :: Dim (Name Symbol) (Size Nat)) (hasDropout :: HasDropout) :: Type where ...
robertaModelSpec :: forall transformerHead numLayers gradient device headDim headEmbedDim embedDim inputEmbedDim ffnDim vocabDim typeVocabDim hasDropout. (SingI headDim, SingI headEmbedDim, SingI embedDim, SingI inputEmbedDim, SingI ffnDim, SingI vocabDim, SingI typeVocabDim) => STransformerHead transformerHead -> SNat numLayers -> SGradient gradient -> SDevice device -> SHasDropout hasDropout -> ModelSpec (RoBERTaModelF transformerHead numLayers gradient device headDim headEmbedDim embedDim inputEmbedDim ffnDim vocabDim typeVocabDim hasDropout)
mkRoBERTaInput :: forall batchDim seqDim device m output. (MonadThrow m, SGetDim batchDim, SGetDim seqDim, Catch ('Shape '['Dim ('Name "*") 'UncheckedSize, 'Dim ('Name "*") 'UncheckedSize] <+> 'Shape '[batchDim, seqDim]), output ~ Tensor ('Gradient 'WithoutGradient) ('Layout 'Dense) device ('DataType 'Int64) ('Shape '[batchDim, seqDim])) => SDim batchDim -> SDim seqDim -> SDevice device -> [[Int]] -> m output

Documentation

type RoBERTaDType = 'Float Source #

RoBERTa dType.

robertaDType :: SDType RoBERTaDType Source #

RoBERTa dType singleton.

type RoBERTaDataType = 'DataType RoBERTaDType Source #

RoBERTa data type.

robertaDataType :: SDataType RoBERTaDataType Source #

RoBERTa data type singleton.

robertaDropoutP :: Double Source #

RoBERTa dropout rate. 'dropout_rate = 0.1'

type RoBERTaPosEncDim = 'Dim ('Name "*") ('Size 514) Source #

RoBERTa positional encoding dimension.

Note the two extra dimensions.

robertaPosEncDim :: SDim RoBERTaPosEncDim Source #

RoBERTa positional encoding dimension singleton.

robertaEps :: Double Source #

RoBERTa layer-norm epsilon. 'layer_norm_epsilon = 1e-5'

robertaMaxPositionEmbeddings :: Int Source #

RoBERTa maximum number of position embeddings. 'max_position_embeddings = 514'

robertaPadTokenId :: Int Source #

RoBERTa padding token id. 'pad_token_id = 1'

robertaBOSTokenId :: Int Source #

RoBERTa begin-of-sentence token id. 'bos_token_id = 0'

robertaEOSTokenId :: Int Source #

RoBERTa end-of-sentence token id. 'eos_token_id = 0'

robertaAttentionMaskBias :: Double Source #

RoBERTa attention mask bias

type family RoBERTaModelF (transformerHead :: TransformerHead) (numLayers :: Nat) (gradient :: Gradient RequiresGradient) (device :: Device (DeviceType Nat)) (headDim :: Dim (Name Symbol) (Size Nat)) (headEmbedDim :: Dim (Name Symbol) (Size Nat)) (embedDim :: Dim (Name Symbol) (Size Nat)) (inputEmbedDim :: Dim (Name Symbol) (Size Nat)) (ffnDim :: Dim (Name Symbol) (Size Nat)) (vocabDim :: Dim (Name Symbol) (Size Nat)) (typeVocabDim :: Dim (Name Symbol) (Size Nat)) (hasDropout :: HasDropout) :: Type where ... Source #

Specifies the RoBERTa model.

Equations

RoBERTaModelF transformerHead numLayers gradient device headDim headEmbedDim embedDim inputEmbedDim ffnDim vocabDim typeVocabDim hasDropout = GSimplifiedEncoderOnlyTransformer (GEncoderOnlyTransformerF 'RoBERTa transformerHead numLayers gradient device RoBERTaDataType headDim headEmbedDim embedDim inputEmbedDim ffnDim RoBERTaPosEncDim vocabDim typeVocabDim hasDropout) MkAbsPos MkTransformerPaddingMask (MkTransformerAttentionMask RoBERTaDataType)

robertaModelSpec :: forall transformerHead numLayers gradient device headDim headEmbedDim embedDim inputEmbedDim ffnDim vocabDim typeVocabDim hasDropout. (SingI headDim, SingI headEmbedDim, SingI embedDim, SingI inputEmbedDim, SingI ffnDim, SingI vocabDim, SingI typeVocabDim) => STransformerHead transformerHead -> SNat numLayers -> SGradient gradient -> SDevice device -> SHasDropout hasDropout -> ModelSpec (RoBERTaModelF transformerHead numLayers gradient device headDim headEmbedDim embedDim inputEmbedDim ffnDim vocabDim typeVocabDim hasDropout) Source #

Specifies the parameters of a RoBERTa model.

transformerHead: the head of the RoBERTa model.
numLayers: the number of layers in the RoBERTa model.
gradient: whether to compute the gradient of the RoBERTa model.
device: the computational device on which the RoBERTa model parameters are to be allocated.

mkRoBERTaInput :: forall batchDim seqDim device m output. (MonadThrow m, SGetDim batchDim, SGetDim seqDim, Catch ('Shape '['Dim ('Name "*") 'UncheckedSize, 'Dim ('Name "*") 'UncheckedSize] <+> 'Shape '[batchDim, seqDim]), output ~ Tensor ('Gradient 'WithoutGradient) ('Layout 'Dense) device ('DataType 'Int64) ('Shape '[batchDim, seqDim])) => SDim batchDim -> SDim seqDim -> SDevice device -> [[Int]] -> m output Source #