Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

dvc.yaml 15 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
  1. stages:
  2. parse_labels:
  3. cmd: bohr parse-labels
  4. deps:
  5. - labels
  6. outs:
  7. - labels.py:
  8. cache: false
  9. bugginess_apply_heuristics__heuristics_bugginess__datasets_bugginess-train:
  10. cmd: bohr apply-heuristics bugginess --heuristic-group heuristics.bugginess --dataset
  11. datasets.bugginess-train
  12. deps:
  13. - data/bugginess/train/bug_sample.csv
  14. - data/bugginess/train/bug_sample_files.csv
  15. - data/bugginess/train/bug_sample_issues.csv
  16. - datasets/bugginess-train.py
  17. - heuristics/bugginess.py
  18. - heuristics/keywords
  19. - labels.py
  20. params:
  21. - bohr.json:
  22. - bohr_framework_version
  23. outs:
  24. - generated/bugginess/heuristics.bugginess/heuristic_matrix_datasets.bugginess-train.pkl
  25. metrics:
  26. - metrics/bugginess/heuristics.bugginess/heuristic_metrics_datasets.bugginess-train.json:
  27. cache: false
  28. bugginess_apply_heuristics__heuristics_bugginess__datasets_1151-commits:
  29. cmd: bohr apply-heuristics bugginess --heuristic-group heuristics.bugginess --dataset
  30. datasets.1151-commits
  31. deps:
  32. - data/bugginess/test/1151-commits.csv
  33. - datasets/1151-commits.py
  34. - heuristics/bugginess.py
  35. - heuristics/keywords
  36. - labels.py
  37. params:
  38. - bohr.json:
  39. - bohr_framework_version
  40. outs:
  41. - generated/bugginess/heuristics.bugginess/heuristic_matrix_datasets.1151-commits.pkl
  42. metrics:
  43. - metrics/bugginess/heuristics.bugginess/heuristic_metrics_datasets.1151-commits.json:
  44. cache: false
  45. bugginess_apply_heuristics__heuristics_bugginess__datasets_berger:
  46. cmd: bohr apply-heuristics bugginess --heuristic-group heuristics.bugginess --dataset
  47. datasets.berger
  48. deps:
  49. - data/bugginess/test/berger.csv
  50. - datasets/berger.py
  51. - heuristics/bugginess.py
  52. - heuristics/keywords
  53. - labels.py
  54. params:
  55. - bohr.json:
  56. - bohr_framework_version
  57. outs:
  58. - generated/bugginess/heuristics.bugginess/heuristic_matrix_datasets.berger.pkl
  59. metrics:
  60. - metrics/bugginess/heuristics.bugginess/heuristic_metrics_datasets.berger.json:
  61. cache: false
  62. bugginess_apply_heuristics__heuristics_bugginess__datasets_herzig:
  63. cmd: bohr apply-heuristics bugginess --heuristic-group heuristics.bugginess --dataset
  64. datasets.herzig
  65. deps:
  66. - data/bugginess/test/herzig.csv
  67. - datasets/herzig.py
  68. - heuristics/bugginess.py
  69. - heuristics/keywords
  70. - labels.py
  71. params:
  72. - bohr.json:
  73. - bohr_framework_version
  74. outs:
  75. - generated/bugginess/heuristics.bugginess/heuristic_matrix_datasets.herzig.pkl
  76. metrics:
  77. - metrics/bugginess/heuristics.bugginess/heuristic_metrics_datasets.herzig.json:
  78. cache: false
  79. bugginess_combine_heuristics:
  80. cmd: bohr apply-heuristics bugginess
  81. deps:
  82. - generated/bugginess/heuristics.bugginess/heuristic_matrix_datasets.1151-commits.pkl
  83. - generated/bugginess/heuristics.bugginess/heuristic_matrix_datasets.berger.pkl
  84. - generated/bugginess/heuristics.bugginess/heuristic_matrix_datasets.bugginess-train.pkl
  85. - generated/bugginess/heuristics.bugginess/heuristic_matrix_datasets.herzig.pkl
  86. params:
  87. - bohr.json:
  88. - bohr_framework_version
  89. outs:
  90. - generated/bugginess/analysis_datasets.1151-commits.csv:
  91. cache: false
  92. - generated/bugginess/analysis_datasets.berger.csv:
  93. cache: false
  94. - generated/bugginess/analysis_datasets.bugginess-train.csv:
  95. cache: false
  96. - generated/bugginess/analysis_datasets.herzig.csv:
  97. cache: false
  98. - generated/bugginess/heuristic_matrix_datasets.1151-commits.pkl
  99. - generated/bugginess/heuristic_matrix_datasets.berger.pkl
  100. - generated/bugginess/heuristic_matrix_datasets.bugginess-train.pkl
  101. - generated/bugginess/heuristic_matrix_datasets.herzig.pkl
  102. metrics:
  103. - metrics/bugginess/analysis_datasets.1151-commits.json:
  104. cache: false
  105. - metrics/bugginess/analysis_datasets.berger.json:
  106. cache: false
  107. - metrics/bugginess/analysis_datasets.bugginess-train.json:
  108. cache: false
  109. - metrics/bugginess/analysis_datasets.herzig.json:
  110. cache: false
  111. - metrics/bugginess/heuristic_metrics_datasets.1151-commits.json:
  112. cache: false
  113. - metrics/bugginess/heuristic_metrics_datasets.berger.json:
  114. cache: false
  115. - metrics/bugginess/heuristic_metrics_datasets.bugginess-train.json:
  116. cache: false
  117. - metrics/bugginess/heuristic_metrics_datasets.herzig.json:
  118. cache: false
  119. bugginess_train_label_model:
  120. cmd: bohr train-label-model bugginess datasets.bugginess-train
  121. deps:
  122. - data/bugginess/test/1151-commits.csv
  123. - data/bugginess/test/berger.csv
  124. - data/bugginess/test/herzig.csv
  125. - datasets/1151-commits.py
  126. - datasets/berger.py
  127. - datasets/herzig.py
  128. - generated/bugginess/heuristic_matrix_datasets.1151-commits.pkl
  129. - generated/bugginess/heuristic_matrix_datasets.berger.pkl
  130. - generated/bugginess/heuristic_matrix_datasets.bugginess-train.pkl
  131. - generated/bugginess/heuristic_matrix_datasets.herzig.pkl
  132. params:
  133. - bohr.json:
  134. - bohr_framework_version
  135. outs:
  136. - generated/bugginess/label_model.pkl
  137. metrics:
  138. - metrics/bugginess/label_model_metrics.json:
  139. cache: false
  140. bugginess_label_dataset_datasets_bugginess-train:
  141. cmd: bohr label-dataset bugginess datasets.bugginess-train
  142. deps:
  143. - data/bugginess/train/bug_sample.csv
  144. - data/bugginess/train/bug_sample_files.csv
  145. - data/bugginess/train/bug_sample_issues.csv
  146. - datasets/bugginess-train.py
  147. - generated/bugginess/heuristic_matrix_datasets.bugginess-train.pkl
  148. - generated/bugginess/label_model.pkl
  149. params:
  150. - bohr.json:
  151. - bohr_framework_version
  152. outs:
  153. - labeled-datasets/datasets.bugginess-train.labeled.csv
  154. bugginess_label_dataset_datasets_1151-commits:
  155. cmd: bohr label-dataset bugginess datasets.1151-commits
  156. deps:
  157. - data/bugginess/test/1151-commits.csv
  158. - datasets/1151-commits.py
  159. - generated/bugginess/heuristic_matrix_datasets.1151-commits.pkl
  160. - generated/bugginess/label_model.pkl
  161. params:
  162. - bohr.json:
  163. - bohr_framework_version
  164. outs:
  165. - labeled-datasets/datasets.1151-commits.labeled.csv
  166. bugginess_label_dataset_datasets_berger:
  167. cmd: bohr label-dataset bugginess datasets.berger
  168. deps:
  169. - data/bugginess/test/berger.csv
  170. - datasets/berger.py
  171. - generated/bugginess/heuristic_matrix_datasets.berger.pkl
  172. - generated/bugginess/label_model.pkl
  173. params:
  174. - bohr.json:
  175. - bohr_framework_version
  176. outs:
  177. - labeled-datasets/datasets.berger.labeled.csv
  178. bugginess_label_dataset_datasets_herzig:
  179. cmd: bohr label-dataset bugginess datasets.herzig
  180. deps:
  181. - data/bugginess/test/herzig.csv
  182. - datasets/herzig.py
  183. - generated/bugginess/heuristic_matrix_datasets.herzig.pkl
  184. - generated/bugginess/label_model.pkl
  185. params:
  186. - bohr.json:
  187. - bohr_framework_version
  188. outs:
  189. - labeled-datasets/datasets.herzig.labeled.csv
  190. smells_apply_heuristics__heuristics_smells__datasets_smells-train:
  191. cmd: bohr apply-heuristics smells --heuristic-group heuristics.smells --dataset
  192. datasets.smells-train
  193. deps:
  194. - data/smells/train.csv
  195. - datasets/smells-train.py
  196. - heuristics/keywords
  197. - heuristics/smells.py
  198. - labels.py
  199. params:
  200. - bohr.json:
  201. - bohr_framework_version
  202. outs:
  203. - generated/smells/heuristics.smells/heuristic_matrix_datasets.smells-train.pkl
  204. metrics:
  205. - metrics/smells/heuristics.smells/heuristic_metrics_datasets.smells-train.json:
  206. cache: false
  207. smells_apply_heuristics__heuristics_smells__datasets_smells-test:
  208. cmd: bohr apply-heuristics smells --heuristic-group heuristics.smells --dataset
  209. datasets.smells-test
  210. deps:
  211. - data/smells/test.csv
  212. - datasets/smells-test.py
  213. - heuristics/keywords
  214. - heuristics/smells.py
  215. - labels.py
  216. params:
  217. - bohr.json:
  218. - bohr_framework_version
  219. outs:
  220. - generated/smells/heuristics.smells/heuristic_matrix_datasets.smells-test.pkl
  221. metrics:
  222. - metrics/smells/heuristics.smells/heuristic_metrics_datasets.smells-test.json:
  223. cache: false
  224. smells_combine_heuristics:
  225. cmd: bohr apply-heuristics smells
  226. deps:
  227. - generated/smells/heuristics.smells/heuristic_matrix_datasets.smells-test.pkl
  228. - generated/smells/heuristics.smells/heuristic_matrix_datasets.smells-train.pkl
  229. params:
  230. - bohr.json:
  231. - bohr_framework_version
  232. outs:
  233. - generated/smells/analysis_datasets.smells-test.csv:
  234. cache: false
  235. - generated/smells/analysis_datasets.smells-train.csv:
  236. cache: false
  237. - generated/smells/heuristic_matrix_datasets.smells-test.pkl
  238. - generated/smells/heuristic_matrix_datasets.smells-train.pkl
  239. metrics:
  240. - metrics/smells/analysis_datasets.smells-test.json:
  241. cache: false
  242. - metrics/smells/analysis_datasets.smells-train.json:
  243. cache: false
  244. - metrics/smells/heuristic_metrics_datasets.smells-test.json:
  245. cache: false
  246. - metrics/smells/heuristic_metrics_datasets.smells-train.json:
  247. cache: false
  248. smells_train_label_model:
  249. cmd: bohr train-label-model smells datasets.smells-train
  250. deps:
  251. - data/smells/test.csv
  252. - datasets/smells-test.py
  253. - generated/smells/heuristic_matrix_datasets.smells-test.pkl
  254. - generated/smells/heuristic_matrix_datasets.smells-train.pkl
  255. params:
  256. - bohr.json:
  257. - bohr_framework_version
  258. outs:
  259. - generated/smells/label_model.pkl
  260. metrics:
  261. - metrics/smells/label_model_metrics.json:
  262. cache: false
  263. smells_label_dataset_datasets_smells-train:
  264. cmd: bohr label-dataset smells datasets.smells-train
  265. deps:
  266. - data/smells/train.csv
  267. - datasets/smells-train.py
  268. - generated/smells/heuristic_matrix_datasets.smells-train.pkl
  269. - generated/smells/label_model.pkl
  270. params:
  271. - bohr.json:
  272. - bohr_framework_version
  273. outs:
  274. - labeled-datasets/datasets.smells-train.labeled.csv
  275. smells_label_dataset_datasets_smells-test:
  276. cmd: bohr label-dataset smells datasets.smells-test
  277. deps:
  278. - data/smells/test.csv
  279. - datasets/smells-test.py
  280. - generated/smells/heuristic_matrix_datasets.smells-test.pkl
  281. - generated/smells/label_model.pkl
  282. params:
  283. - bohr.json:
  284. - bohr_framework_version
  285. outs:
  286. - labeled-datasets/datasets.smells-test.labeled.csv
  287. bugginess_combine_labels_1151-commits:
  288. cmd: python classifiers/bugginess-transformer/combine_labels.py labeled-datasets/datasets.1151-commits.labeled.csv
  289. metrics/bugginess/transformer/1151-commits/assigned_labels.csv labeled-datasets/datasets.1151-commits.labeled.both.csv
  290. deps:
  291. - classifiers/bugginess-transformer/combine_labels.py
  292. - labeled-datasets/datasets.1151-commits.labeled.csv
  293. - metrics/bugginess/transformer/1151-commits/assigned_labels.csv
  294. outs:
  295. - labeled-datasets/datasets.1151-commits.labeled.both.csv:
  296. cache: false
  297. bugginess_transformer_test_herzig:
  298. cmd: bash classifiers/bugginess-transformer/test.sh data/bugginess/test/herzig.csv
  299. metrics/bugginess/transformer/herzig
  300. deps:
  301. - classifiers/bugginess-transformer/run.py
  302. - classifiers/bugginess-transformer/test.sh
  303. - data/bugginess/test/herzig.csv
  304. - models/config.json
  305. - models/merges.txt
  306. - models/pytorch_model.bin
  307. - models/special_tokens_map.json
  308. - models/tokenizer_config.json
  309. - models/training_args.bin
  310. - models/vocab.json
  311. - requirements.txt
  312. metrics:
  313. - metrics/bugginess/transformer/herzig/eval_results.txt:
  314. cache: false
  315. bugginess_transformer_test_1151-commits:
  316. cmd: bash classifiers/bugginess-transformer/test.sh data/bugginess/test/1151-commits.csv
  317. metrics/bugginess/transformer/1151-commits
  318. deps:
  319. - classifiers/bugginess-transformer/run.py
  320. - classifiers/bugginess-transformer/test.sh
  321. - data/bugginess/test/1151-commits.csv
  322. - models/config.json
  323. - models/merges.txt
  324. - models/pytorch_model.bin
  325. - models/special_tokens_map.json
  326. - models/tokenizer_config.json
  327. - models/training_args.bin
  328. - models/vocab.json
  329. - requirements.txt
  330. metrics:
  331. - metrics/bugginess/transformer/1151-commits/eval_results.txt:
  332. cache: false
  333. preprocess_smells:
  334. cmd: bash data-preprocessing/smells.sh
  335. deps:
  336. - data-preprocessing/smells.sh
  337. - downloaded-data/smells-madeyski.csv
  338. outs:
  339. - data/smells/test.csv
  340. - data/smells/train.csv
  341. preprocess_bugginess_test:
  342. cmd: cp downloaded-data/{1151-commits.csv,berger.csv,herzig.csv} data/bugginess/test
  343. deps:
  344. - downloaded-data/1151-commits.csv
  345. - downloaded-data/berger.csv
  346. - downloaded-data/herzig.csv
  347. outs:
  348. - data/bugginess/test/1151-commits.csv
  349. - data/bugginess/test/berger.csv
  350. - data/bugginess/test/herzig.csv
  351. bugginess_transformer_train:
  352. cmd: bash classifiers/bugginess-transformer/train.sh labeled-data/bugginess.csv
  353. deps:
  354. - classifiers/bugginess-transformer/run.py
  355. - classifiers/bugginess-transformer/train.sh
  356. - labeled-datasets/datasets.bugginess-train.labeled.csv
  357. - requirements.txt
  358. outs:
  359. - models/config.json:
  360. cache: false
  361. - models/merges.txt:
  362. cache: false
  363. - models/pytorch_model.bin
  364. - models/special_tokens_map.json:
  365. cache: false
  366. - models/tokenizer_config.json:
  367. cache: false
  368. - models/training_args.bin
  369. - models/vocab.json:
  370. cache: false
  371. bugginess_transformer_label_1151-commits:
  372. cmd: bash classifiers/bugginess-transformer/label.sh data/bugginess/test/1151-commits.csv
  373. metrics/bugginess/transformer/1151-commits
  374. deps:
  375. - classifiers/bugginess-transformer/label.sh
  376. - classifiers/bugginess-transformer/run.py
  377. - data/bugginess/test/1151-commits.csv
  378. - models/config.json
  379. - models/merges.txt
  380. - models/pytorch_model.bin
  381. - models/special_tokens_map.json
  382. - models/tokenizer_config.json
  383. - models/training_args.bin
  384. - models/vocab.json
  385. - requirements.txt
  386. metrics:
  387. - metrics/bugginess/transformer/1151-commits/assigned_labels.csv:
  388. cache: false
  389. preprocess_bugginess_train:
  390. cmd: 7z x downloaded-data/bugginess_train.7z -odata/bugginess
  391. deps:
  392. - downloaded-data/bugginess_train.7z
  393. outs:
  394. - data/bugginess/train/bug_sample.csv:
  395. cache: false
  396. - data/bugginess/train/bug_sample_files.csv:
  397. cache: false
  398. - data/bugginess/train/bug_sample_issues.csv:
  399. cache: false
  400. bugginess_transformer_test_berger:
  401. cmd: bash classifiers/bugginess-transformer/test.sh data/bugginess/test/berger.csv
  402. metrics/bugginess/transformer/berger
  403. deps:
  404. - classifiers/bugginess-transformer/run.py
  405. - classifiers/bugginess-transformer/test.sh
  406. - data/bugginess/test/berger.csv
  407. - models/config.json
  408. - models/merges.txt
  409. - models/pytorch_model.bin
  410. - models/special_tokens_map.json
  411. - models/tokenizer_config.json
  412. - models/training_args.bin
  413. - models/vocab.json
  414. - requirements.txt
  415. metrics:
  416. - metrics/bugginess/transformer/berger/eval_results.txt:
  417. cache: false
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...