Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

dvc.yaml 11 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
  1. stages:
  2. prepare:
  3. cmd: python -m scripts.prepare
  4. deps:
  5. - scripts/prepare.py
  6. - data/all.csv
  7. params:
  8. - basic
  9. outs:
  10. - outputs/vocab.plk
  11. - outputs/config.json
  12. validate:
  13. foreach: # List of simple values
  14. - mlp
  15. - lstm
  16. - cnn
  17. - selected
  18. do:
  19. cmd: python -m scripts.validate ${item}
  20. deps:
  21. - data/all.csv
  22. - outputs/vocab.plk
  23. - outputs/config.json
  24. - scripts/validate.py
  25. params:
  26. - ${item}
  27. - validate
  28. metrics:
  29. - outputs/${item}_validate_results.json:
  30. cache: false
  31. plots:
  32. - outputs/${item}_validate_plots.csv:
  33. cache: false
  34. validate_bert:
  35. foreach: # List of simple values
  36. bert-base-uncased_basic:
  37. pretrained_model: bert-base-uncased
  38. method: basic
  39. bert-base-uncased_lstm:
  40. pretrained_model: bert-base-uncased
  41. method: lstm
  42. bert-large-uncased_basic:
  43. pretrained_model: bert-large-uncased
  44. method: basic
  45. bert-large-uncased_lstm:
  46. pretrained_model: bert-large-uncased
  47. method: lstm
  48. do:
  49. cmd: python -m scripts.validate_bert bert ${item.pretrained_model} ${item.method}
  50. deps:
  51. - data/all.csv
  52. - model/bert/${item.method}.py
  53. - scripts/validate_bert.py
  54. params:
  55. - bert.max_len
  56. - bert.do_lower_case
  57. - bert.${item.method}
  58. - validate
  59. metrics:
  60. - outputs/bert-${item.pretrained_model}-${item.method}_validate_results.json:
  61. cache: false
  62. plots:
  63. - outputs/bert-${item.pretrained_model}-${item.method}_validate_plots.csv:
  64. cache: false
  65. validate_xlnet:
  66. foreach: # List of simple values
  67. - basic
  68. - sequence_classification
  69. do:
  70. cmd: python -m scripts.validate_bert xlnet ${item}
  71. deps:
  72. - data/all.csv
  73. - model/xlnet/${item}.py
  74. - scripts/validate_bert.py
  75. params:
  76. - xlnet.max_len
  77. - xlnet.${item}
  78. - xlnet.pretrained_model
  79. - validate
  80. metrics:
  81. - outputs/xlnet-${item}_validate_results.json:
  82. cache: false
  83. plots:
  84. - outputs/xlnet-${item}_validate_plots.csv:
  85. cache: false
  86. validate_roberta:
  87. foreach: # List of simple values
  88. siebert/sentiment-roberta-large-english_sentiment:
  89. pretrained_model: siebert/sentiment-roberta-large-english
  90. method: sentiment
  91. do:
  92. cmd: python -m scripts.validate_bert roberta ${item.pretrained_model} ${item.method}
  93. deps:
  94. - data/train.csv
  95. - model/roberta/${item.method}.py
  96. - scripts/validate_bert.py
  97. params:
  98. - roberta.max_len
  99. - roberta.do_lower_case
  100. - roberta.${item.method}
  101. - validate
  102. metrics:
  103. - outputs/roberta-${item.pretrained_model}-${item.method}_validate_results.json:
  104. cache: false
  105. plots:
  106. - outputs/roberta-${item.pretrained_model}-${item.method}_validate_plots.csv:
  107. cache: false
  108. train:
  109. foreach: # List of simple values
  110. - mlp
  111. - lstm
  112. - cnn
  113. - selected
  114. do:
  115. cmd: python -m scripts.train ${item}
  116. deps:
  117. - data/all.csv
  118. - outputs/vocab.plk
  119. - outputs/config.json
  120. - scripts/train.py
  121. params:
  122. - ${item}
  123. - train
  124. metrics:
  125. - outputs/${item}_results.json:
  126. cache: false
  127. plots:
  128. - outputs/${item}_plots.csv:
  129. cache: false
  130. outs:
  131. - outputs/${item}_checkpoint.pth
  132. train_bert:
  133. foreach: # List of simple values
  134. bert-base-uncased_basic:
  135. pretrained_model: bert-base-uncased
  136. method: basic
  137. bert-base-uncased_lstm:
  138. pretrained_model: bert-base-uncased
  139. method: lstm
  140. bert-large-uncased_basic:
  141. pretrained_model: bert-large-uncased
  142. method: basic
  143. bert-large-uncased_lstm:
  144. pretrained_model: bert-large-uncased
  145. method: lstm
  146. bert-large-uncased_cnn:
  147. pretrained_model: bert-large-uncased
  148. method: cnn
  149. do:
  150. cmd: python -m scripts.train_bert bert ${item.pretrained_model} ${item.method}
  151. deps:
  152. - data/all.csv
  153. - model/bert/${item.method}.py
  154. - scripts/train_bert.py
  155. params:
  156. - bert.max_len
  157. - bert.do_lower_case
  158. - bert.${item.method}
  159. - train
  160. metrics:
  161. - outputs/bert-${item.pretrained_model}-${item.method}_results.json:
  162. cache: false
  163. plots:
  164. - outputs/bert-${item.pretrained_model}-${item.method}_plots.csv:
  165. cache: false
  166. outs:
  167. - outputs/bert-${item.pretrained_model}-${item.method}_checkpoint.pth
  168. train_xlnet:
  169. foreach: # List of simple values
  170. xlnet-base-cased_basic:
  171. pretrained_model: xlnet-base-cased
  172. method: basic
  173. xlnet-large-cased_basic:
  174. pretrained_model: xlnet-large-cased
  175. method: basic
  176. xlnet-large-cased_cnn:
  177. pretrained_model: xlnet-large-cased
  178. method: cnn
  179. do:
  180. cmd: python -m scripts.train_bert xlnet ${item.pretrained_model} ${item.method}
  181. deps:
  182. - data/all.csv
  183. - model/xlnet/${item.method}.py
  184. - scripts/train_bert.py
  185. params:
  186. - xlnet.max_len
  187. - xlnet.do_lower_case
  188. - xlnet.${item.method}
  189. - train
  190. metrics:
  191. - outputs/xlnet-${item.pretrained_model}-${item.method}_results.json:
  192. cache: false
  193. plots:
  194. - outputs/xlnet-${item.pretrained_model}-${item.method}_plots.csv:
  195. cache: false
  196. outs:
  197. - outputs/xlnet-${item.pretrained_model}-${item.method}_checkpoint.pth
  198. train_roberta:
  199. foreach: # List of simple values
  200. roberta-base_basic:
  201. pretrained_model: roberta-base
  202. method: basic
  203. roberta-large_basic:
  204. pretrained_model: roberta-large
  205. method: basic
  206. roberta-large_cnn:
  207. pretrained_model: roberta-large
  208. method: cnn
  209. do:
  210. cmd: python -m scripts.train_bert roberta ${item.pretrained_model} ${item.method}
  211. deps:
  212. - data/all.csv
  213. - model/roberta/${item.method}.py
  214. - scripts/train_bert.py
  215. params:
  216. - roberta.max_len
  217. - roberta.do_lower_case
  218. - roberta.${item.method}
  219. - train
  220. metrics:
  221. - outputs/roberta-${item.pretrained_model}-${item.method}_results.json:
  222. cache: false
  223. plots:
  224. - outputs/roberta-${item.pretrained_model}-${item.method}_plots.csv:
  225. cache: false
  226. outs:
  227. - outputs/roberta-${item.pretrained_model}-${item.method}_checkpoint.pth
  228. train_albert:
  229. foreach: # List of simple values
  230. albert-xlarge-v2_cnn:
  231. pretrained_model: albert-xlarge-v2
  232. method: cnn
  233. do:
  234. cmd: python -m scripts.train_bert albert ${item.pretrained_model} ${item.method}
  235. deps:
  236. - data/all.csv
  237. - model/albert/${item.method}.py
  238. - scripts/train_bert.py
  239. params:
  240. - albert.max_len
  241. - albert.do_lower_case
  242. - albert.${item.method}
  243. - train
  244. metrics:
  245. - outputs/albert-${item.pretrained_model}-${item.method}_results.json:
  246. cache: false
  247. plots:
  248. - outputs/albert-${item.pretrained_model}-${item.method}_plots.csv:
  249. cache: false
  250. outs:
  251. - outputs/albert-${item.pretrained_model}-${item.method}_checkpoint.pth
  252. inference:
  253. foreach: # List of simple values
  254. - mlp
  255. - lstm
  256. - cnn
  257. - selected
  258. do:
  259. cmd: python -m scripts.inference ${item}
  260. deps:
  261. - data/test.csv
  262. - scripts/inference.py
  263. - outputs/${item}_checkpoint.pth
  264. - outputs/config.json
  265. outs:
  266. - outputs/${item}_submission.csv
  267. inference_bert:
  268. foreach: # List of simple values
  269. bert-base-uncased_basic:
  270. pretrained_model: bert-base-uncased
  271. method: basic
  272. bert-base-uncased_lstm:
  273. pretrained_model: bert-base-uncased
  274. method: lstm
  275. bert-large-uncased_basic:
  276. pretrained_model: bert-large-uncased
  277. method: basic
  278. bert-large-uncased_cnn:
  279. pretrained_model: bert-large-uncased
  280. method: cnn
  281. bert-large-uncased_lstm:
  282. pretrained_model: bert-large-uncased
  283. method: lstm
  284. do:
  285. cmd: python -m scripts.inference_bert bert ${item.pretrained_model} ${item.method}
  286. deps:
  287. - data/test.csv
  288. - scripts/inference_bert.py
  289. - outputs/bert-${item.pretrained_model}-${item.method}_checkpoint.pth
  290. outs:
  291. - outputs/bert-${item.pretrained_model}-${item.method}_submission.csv
  292. inference_xlnet:
  293. foreach: # List of simple values
  294. xlnet-base-cased_basic:
  295. pretrained_model: xlnet-base-cased
  296. method: basic
  297. xlnet-large-cased_basic:
  298. pretrained_model: xlnet-large-cased
  299. method: basic
  300. xlnet-large-cased_cnn:
  301. pretrained_model: xlnet-large-cased
  302. method: cnn
  303. do:
  304. cmd: python -m scripts.inference_bert xlnet ${item.pretrained_model} ${item.method}
  305. deps:
  306. - data/test.csv
  307. - scripts/inference_bert.py
  308. - outputs/xlnet-${item.pretrained_model}-${item.method}_checkpoint.pth
  309. outs:
  310. - outputs/xlnet-${item.pretrained_model}-${item.method}_submission.csv
  311. inference_roberta:
  312. foreach: # List of simple values
  313. roberta-base_basic:
  314. pretrained_model: roberta-base
  315. method: basic
  316. roberta-large_basic:
  317. pretrained_model: roberta-large
  318. method: basic
  319. roberta-large_cnn:
  320. pretrained_model: roberta-large
  321. method: cnn
  322. do:
  323. cmd: python -m scripts.inference_bert roberta ${item.pretrained_model} ${item.method}
  324. params:
  325. - roberta.eval_max_len
  326. deps:
  327. - data/test.csv
  328. - scripts/inference_bert.py
  329. - outputs/roberta-${item.pretrained_model}-${item.method}_checkpoint.pth
  330. outs:
  331. - outputs/roberta-${item.pretrained_model}-${item.method}_submission.csv
  332. inference_albert:
  333. foreach: # List of simple values
  334. albert-xlarge-v2_cnn:
  335. pretrained_model: albert-xlarge-v2
  336. method: cnn
  337. do:
  338. cmd: python -m scripts.inference_bert albert ${item.pretrained_model} ${item.method}
  339. params:
  340. - albert.eval_max_len
  341. deps:
  342. - data/test.csv
  343. - scripts/inference_bert.py
  344. - outputs/albert-${item.pretrained_model}-${item.method}_checkpoint.pth
  345. outs:
  346. - outputs/albert-${item.pretrained_model}-${item.method}_submission.csv
  347. ensemble:
  348. cmd: python -m scripts.ensemble
  349. deps:
  350. - scripts/ensemble.py
  351. - data/test.csv
  352. - outputs/bert-bert-large-uncased-basic_checkpoint.pth
  353. - outputs/roberta-roberta-large-basic_checkpoint.pth
  354. - outputs/xlnet-xlnet-large-cased-basic_checkpoint.pth
  355. params:
  356. - evaluate
  357. outs:
  358. - outputs/ensemble.csv
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...