hasktorch-gradually-typed-0.2.0.0: experimental project for hasktorch
Safe HaskellSafe-Inferred
LanguageHaskell2010

Torch.GraduallyTyped.NN.Transformer.RoBERTa.Base

Synopsis

Documentation

type RoBERTaBaseNumLayers = 12 Source #

RoBERTa-Base number of layers. 'num_hidden_layers = 12'

robertaBaseNumLayers :: SNat RoBERTaBaseNumLayers Source #

RoBERTa-Base number of layers singleton.

type RoBERTaBaseHeadDim = 'Dim ('Name "*") ('Size 12) Source #

RoBERTa-Base number of attention heads. 'num_attention_heads = 12'

type RoBERTaBaseHeadEmbedDim = 'Dim ('Name "*") ('Size 64) Source #

RoBERTa-Base head embedding dimension. 'd_kv = 64'

type RoBERTaBaseEmbedDim = 'Dim ('Name "*") ('Size 768) Source #

RoBERTa-Base embedding dimension. 'hidden_size = n_heads * d_kv = 768'

type RoBERTaBaseInputEmbedDim = 'Dim ('Name "*") ('Size 768) Source #

RoBERTa-Base model dimension. 'hidden_size = 768'

type RoBERTaBaseFFNDim = 'Dim ('Name "*") ('Size 3072) Source #

RoBERTa-Base feed-forward network dimension. 'intermediate_size = 3072'

type RoBERTaBaseVocabDim = 'Dim ('Name "*") ('Size 50265) Source #

RoBERTa-Base vocabulary dimension. 'vocab_size = 50265'

robertaBaseVocabDim :: SDim RoBERTaBaseVocabDim Source #

RoBERTa-Base vocabulary dimension singleton.

type RoBERTaBaseTypeVocabDim = 'Dim ('Name "*") ('Size 1) Source #

RoBERTa-Base type vocabulary dimension. 'type_vocab_size = 1'

robertaBaseSpec :: STransformerHead transformerHead -> SGradient gradient -> SDevice device -> SHasDropout hasDropout -> ModelSpec (RoBERTaBase transformerHead gradient device hasDropout) Source #

RoBERTa-Base model specification.