Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

seqtag_params.yaml 2.6 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
  1. corpus:
  2. columns: # columns definition for ColumnCorpus
  3. 0: 'text'
  4. 1: 'ner'
  5. 2: 'space-after'
  6. data_folder: "./data/"
  7. train_file: "train.txt"
  8. test_file: "test.txt"
  9. dev_file: "dev.txt"
  10. embeddings:
  11. flair-backward: "./models/best-flair-backward.pt"
  12. fasttext: "./models/fasttext_jurinet_jurica_01012020_01072020_anonym.gensim"
  13. seqtagger:
  14. hidden_size: 32 # hidden_size: number of hidden states in RNN
  15. rnn_layers: 1 # rnn_layers: number of RNN layers
  16. use_crf: False # use_crf: if True use CRF decoder, else project directly to tag space
  17. dropout: 0.0 # dropout: dropout probability
  18. word_dropout: 0.05 # word_dropout: word dropout probability
  19. reproject_embeddings: True # reproject_embeddings: if True, adds trainable linear map on top of embedding layer. If False, no map.
  20. train_initial_hidden_state: False # train_initial_hidden_state: if True, trains initial hidden state of RNN
  21. trainer:
  22. epoch: 8 # epoch: The starting epoch (normally 0 but could be higher if you continue training model)
  23. use_tensorboard: True # use_tensorboard: If True, writes out tensorboard information
  24. #optimizer: "SGD" # optimizer: The optimizer to use (typically SGD or Adam)
  25. type:
  26. is_fresh_start : False
  27. run_name: "run3"
  28. train:
  29. base_path: "./outputs/models/vanilla/" # base_path: Main path to which all output during training is logged and models are saved
  30. learning_rate: 0.01 # learning_rate: Initial learning rate
  31. mini_batch_size: 5 # mini_batch_size: Size of mini-batches during training
  32. max_epochs: 16 # max_epochs: Maximum number of epochs to train. Terminates training if this number is surpassed
  33. anneal_factor: 0.5 # anneal_factor: The factor by which the learning rate is annealed
  34. embeddings_storage_mode: "cpu" # embeddings_storage_mode: One of 'none' (all embeddings are deleted and freshly recomputed)
  35. patience: 2 # patience: Patience is the number of epochs with no improvement the Trainer waits until annealing the learning rate
  36. min_learning_rate: 0.1 # min_learning_rate: If the learning rate falls below this threshold, training terminates
  37. train_with_dev: False # train_with_dev: If True, training is performed using both train+dev data
  38. monitor_train: False # monitor_train: If True, training data is evaluated at end of each epoch
  39. monitor_test: True # monitor_test: If True, test data is evaluated at end of each epoch
  40. checkpoint: False # checkpoint: If True, a full checkpoint is saved at end of each epoch
  41. save_final_model: True # save_final_model: If True, final model is saved
  42. shuffle: True # shuffle: If True, data is shuffled during training
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...