NL-FM-Toolkit
Getting Started:
Introduction
Installation
Training a Tokenizer from Scratch
Creating Model Configuration File
Training a Masked Language Model from Scratch
Training a Causal Language Model from Scratch
Training a Sequence Labeler
Training a Sequence Classifier
Scripts
Modules
NL-FM-Toolkit
Index
Index
A
|
B
|
C
|
D
|
E
|
F
|
G
|
K
|
L
|
M
|
O
|
P
|
R
|
T
|
U
|
V
A
add_vocab_from_file() (in module train_tokenizer)
,
[1]
B
block_size (run_clm.DataTrainingArguments attribute)
,
[1]
C
cache_dir (run_clm.ModelArguments attribute)
,
[1]
(run_mlm.ModelArguments attribute)
,
[1]
(run_seq.ModelArguments attribute)
,
[1]
(run_tc.ModelArguments attribute)
,
[1]
config_name (run_clm.ModelArguments attribute)
,
[1]
(run_mlm.ModelArguments attribute)
,
[1]
(run_seq.ModelArguments attribute)
,
[1]
(run_tc.ModelArguments attribute)
,
[1]
config_overrides (run_clm.ModelArguments attribute)
,
[1]
(run_mlm.ModelArguments attribute)
,
[1]
create_config
module
D
dataset_config_name (run_clm.DataTrainingArguments attribute)
,
[1]
(run_mlm.DataTrainingArguments attribute)
,
[1]
(run_seq.DataTrainingArguments attribute)
,
[1]
(run_tc.DataTrainingArguments attribute)
,
[1]
dataset_name (run_clm.DataTrainingArguments attribute)
,
[1]
(run_mlm.DataTrainingArguments attribute)
,
[1]
(run_seq.DataTrainingArguments attribute)
,
[1]
(run_tc.DataTrainingArguments attribute)
,
[1]
DataTrainingArguments (class in run_clm)
,
[1]
(class in run_mlm)
,
[1]
(class in run_seq)
,
[1]
(class in run_tc)
,
[1]
E
early_stop (run_seq.TaskArguments attribute)
,
[1]
(run_tc.DataTrainingArguments attribute)
,
[1]
F
freeze_token_embed (run_mlm.ModelArguments attribute)
,
[1]
G
get_command_line_args() (in module tokenize_corpus)
(in module train_tokenizer)
K
keep_linebreaks (run_clm.DataTrainingArguments attribute)
,
[1]
(run_mlm.DataTrainingArguments attribute)
,
[1]
L
label_column_name (run_tc.DataTrainingArguments attribute)
,
[1]
line_by_line (run_clm.DataTrainingArguments attribute)
,
[1]
(run_mlm.DataTrainingArguments attribute)
,
[1]
log_dir (run_seq.ModelArguments attribute)
,
[1]
(run_tc.ModelArguments attribute)
,
[1]
M
main() (in module create_config)
(in module run_clm)
,
[1]
(in module run_mlm)
,
[1]
(in module run_seq)
,
[1]
(in module run_tc)
,
[1]
(in module tokenize_corpus)
,
[1]
(in module train_tokenizer)
,
[1]
max_eval_samples (run_clm.DataTrainingArguments attribute)
,
[1]
(run_mlm.DataTrainingArguments attribute)
,
[1]
(run_seq.DataTrainingArguments attribute)
,
[1]
max_predict_samples (run_seq.DataTrainingArguments attribute)
,
[1]
max_seq_length (run_clm.DataTrainingArguments attribute)
,
[1]
(run_mlm.DataTrainingArguments attribute)
,
[1]
(run_seq.DataTrainingArguments attribute)
,
[1]
(run_tc.DataTrainingArguments attribute)
,
[1]
max_train_samples (run_clm.DataTrainingArguments attribute)
,
[1]
(run_mlm.DataTrainingArguments attribute)
,
[1]
(run_seq.DataTrainingArguments attribute)
,
[1]
mlm_probability (run_mlm.DataTrainingArguments attribute)
,
[1]
model_name_or_path (run_clm.ModelArguments attribute)
,
[1]
(run_mlm.ModelArguments attribute)
,
[1]
(run_seq.ModelArguments attribute)
,
[1]
(run_tc.ModelArguments attribute)
,
[1]
model_revision (run_clm.ModelArguments attribute)
,
[1]
(run_mlm.ModelArguments attribute)
,
[1]
(run_seq.ModelArguments attribute)
,
[1]
model_type (run_clm.ModelArguments attribute)
,
[1]
(run_mlm.ModelArguments attribute)
,
[1]
ModelArguments (class in run_clm)
,
[1]
(class in run_mlm)
,
[1]
(class in run_seq)
,
[1]
(class in run_tc)
,
[1]
module
create_config
run_clm
,
[1]
run_mlm
,
[1]
run_seq
,
[1]
run_tc
,
[1]
tokenize_corpus
,
[1]
train_tokenizer
,
[1]
O
overwrite_cache (run_clm.DataTrainingArguments attribute)
,
[1]
(run_mlm.DataTrainingArguments attribute)
,
[1]
(run_seq.DataTrainingArguments attribute)
,
[1]
(run_tc.DataTrainingArguments attribute)
,
[1]
P
pad_to_max_length (run_clm.DataTrainingArguments attribute)
,
[1]
(run_mlm.DataTrainingArguments attribute)
,
[1]
(run_seq.DataTrainingArguments attribute)
,
[1]
(run_tc.DataTrainingArguments attribute)
,
[1]
preprocessing_num_workers (run_clm.DataTrainingArguments attribute)
,
[1]
(run_mlm.DataTrainingArguments attribute)
,
[1]
(run_tc.DataTrainingArguments attribute)
,
[1]
pretrained_token_embed (run_mlm.ModelArguments attribute)
,
[1]
R
read_txt_embeddings() (in module run_mlm)
,
[1]
run_clm
module
,
[1]
run_mlm
module
,
[1]
run_seq
module
,
[1]
run_tc
module
,
[1]
T
task (run_seq.TaskArguments attribute)
,
[1]
task_name (run_seq.DataTrainingArguments attribute)
,
[1]
(run_tc.DataTrainingArguments attribute)
,
[1]
TaskArguments (class in run_seq)
,
[1]
test_file (run_clm.DataTrainingArguments attribute)
,
[1]
(run_mlm.DataTrainingArguments attribute)
,
[1]
(run_seq.DataTrainingArguments attribute)
,
[1]
(run_tc.DataTrainingArguments attribute)
,
[1]
text_column_name (run_tc.DataTrainingArguments attribute)
,
[1]
tokenize_corpus
module
,
[1]
tokenizer_name (run_clm.ModelArguments attribute)
,
[1]
(run_mlm.ModelArguments attribute)
,
[1]
(run_seq.ModelArguments attribute)
,
[1]
(run_tc.ModelArguments attribute)
,
[1]
train_file (run_clm.DataTrainingArguments attribute)
,
[1]
(run_mlm.DataTrainingArguments attribute)
,
[1]
(run_seq.DataTrainingArguments attribute)
,
[1]
(run_tc.DataTrainingArguments attribute)
,
[1]
train_tokenizer
module
,
[1]
U
use_auth_token (run_clm.ModelArguments attribute)
,
[1]
(run_mlm.ModelArguments attribute)
,
[1]
(run_seq.ModelArguments attribute)
,
[1]
(run_tc.ModelArguments attribute)
,
[1]
use_fast (run_tc.ModelArguments attribute)
,
[1]
use_fast_tokenizer (run_clm.ModelArguments attribute)
,
[1]
(run_mlm.ModelArguments attribute)
,
[1]
(run_seq.ModelArguments attribute)
,
[1]
V
validation_file (run_clm.DataTrainingArguments attribute)
,
[1]
(run_mlm.DataTrainingArguments attribute)
,
[1]
(run_seq.DataTrainingArguments attribute)
,
[1]
(run_tc.DataTrainingArguments attribute)
,
[1]
validation_split_percentage (run_clm.DataTrainingArguments attribute)
,
[1]
(run_mlm.DataTrainingArguments attribute)
,
[1]