Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

dvc.yaml 14 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
  1. stages:
  2. preprocess_berger:
  3. cmd: cp downloaded-data/berger.csv data && echo "data/berger.csv" >> .gitignore
  4. && git add .gitignore
  5. deps:
  6. - downloaded-data/berger.csv
  7. outs:
  8. - data/berger.csv:
  9. cache: false
  10. preprocess_herzig:
  11. cmd: cp downloaded-data/herzig.csv data && echo "data/herzig.csv" >> .gitignore
  12. && git add .gitignore
  13. deps:
  14. - downloaded-data/herzig.csv
  15. outs:
  16. - data/herzig.csv:
  17. cache: false
  18. preprocess_1151-commits:
  19. cmd: cp downloaded-data/1151-commits.csv data && echo "data/1151-commits.csv"
  20. >> .gitignore && git add .gitignore
  21. deps:
  22. - downloaded-data/1151-commits.csv
  23. outs:
  24. - data/1151-commits.csv:
  25. cache: false
  26. preprocess_200k-commits:
  27. cmd: cp downloaded-data/200k-commits.csv data && echo "data/200k-commits.csv"
  28. >> .gitignore && git add .gitignore
  29. deps:
  30. - downloaded-data/200k-commits.csv
  31. outs:
  32. - data/200k-commits.csv:
  33. cache: false
  34. preprocess_200k-commits-issues:
  35. cmd: cp downloaded-data/200k-commits-issues.csv data && echo "data/200k-commits-issues.csv"
  36. >> .gitignore && git add .gitignore
  37. deps:
  38. - downloaded-data/200k-commits-issues.csv
  39. outs:
  40. - data/200k-commits-issues.csv:
  41. cache: false
  42. preprocess_200k-commits-files:
  43. cmd: 7z x downloaded-data/200k-commits-files.csv.7z -odata && echo "data/200k-commits-files.csv"
  44. >> .gitignore && git add .gitignore
  45. deps:
  46. - downloaded-data/200k-commits-files.csv.7z
  47. outs:
  48. - data/200k-commits-files.csv:
  49. cache: false
  50. preprocess_200k-commits-link-issues:
  51. cmd: cp downloaded-data/200k-commits-link-issues.csv data && echo "data/200k-commits-link-issues.csv"
  52. >> .gitignore && git add .gitignore
  53. deps:
  54. - downloaded-data/200k-commits-link-issues.csv
  55. outs:
  56. - data/200k-commits-link-issues.csv:
  57. cache: false
  58. preprocess_200k-commits-manual-labels:
  59. cmd: cp downloaded-data/200k-commits-manual-labels.csv data && echo "data/200k-commits-manual-labels.csv"
  60. >> .gitignore && git add .gitignore
  61. deps:
  62. - downloaded-data/200k-commits-manual-labels.csv
  63. outs:
  64. - data/200k-commits-manual-labels.csv:
  65. cache: false
  66. preprocess_smells-train:
  67. cmd: data-preprocessing/smells.sh
  68. deps:
  69. - data-preprocessing/smells.sh
  70. - downloaded-data/smells-madeyski.csv
  71. outs:
  72. - data/smells/train.csv
  73. preprocess_smells-test:
  74. cmd: data-preprocessing/smells.sh
  75. deps:
  76. - data-preprocessing/smells.sh
  77. - downloaded-data/smells-madeyski.csv
  78. outs:
  79. - data/smells/test.csv
  80. parse_labels:
  81. cmd: bohr parse-labels
  82. deps:
  83. - labels
  84. outs:
  85. - labels.py:
  86. cache: false
  87. bugginess_apply_heuristics__heuristics_bugginess__200k-commits:
  88. cmd: bohr apply-heuristics bugginess --heuristic-group heuristics.bugginess --dataset
  89. 200k-commits
  90. deps:
  91. - data/200k-commits-files.csv
  92. - data/200k-commits-issues.csv
  93. - data/200k-commits-manual-labels.csv
  94. - data/200k-commits.csv
  95. - heuristics/bugginess.py
  96. - labels.py
  97. params:
  98. - bohr.json:
  99. - bohr_framework_version
  100. outs:
  101. - generated/bugginess/heuristics.bugginess/heuristic_matrix_200k-commits.pkl
  102. metrics:
  103. - metrics/bugginess/heuristics.bugginess/heuristic_metrics_200k-commits.json:
  104. cache: false
  105. bugginess_apply_heuristics__heuristics_bugginess__1151-commits:
  106. cmd: bohr apply-heuristics bugginess --heuristic-group heuristics.bugginess --dataset
  107. 1151-commits
  108. deps:
  109. - data/1151-commits.csv
  110. - heuristics/bugginess.py
  111. - labels.py
  112. params:
  113. - bohr.json:
  114. - bohr_framework_version
  115. outs:
  116. - generated/bugginess/heuristics.bugginess/heuristic_matrix_1151-commits.pkl
  117. metrics:
  118. - metrics/bugginess/heuristics.bugginess/heuristic_metrics_1151-commits.json:
  119. cache: false
  120. bugginess_apply_heuristics__heuristics_bugginess__berger:
  121. cmd: bohr apply-heuristics bugginess --heuristic-group heuristics.bugginess --dataset
  122. berger
  123. deps:
  124. - data/berger.csv
  125. - heuristics/bugginess.py
  126. - labels.py
  127. params:
  128. - bohr.json:
  129. - bohr_framework_version
  130. outs:
  131. - generated/bugginess/heuristics.bugginess/heuristic_matrix_berger.pkl
  132. metrics:
  133. - metrics/bugginess/heuristics.bugginess/heuristic_metrics_berger.json:
  134. cache: false
  135. bugginess_apply_heuristics__heuristics_bugginess__herzig:
  136. cmd: bohr apply-heuristics bugginess --heuristic-group heuristics.bugginess --dataset
  137. herzig
  138. deps:
  139. - data/herzig.csv
  140. - heuristics/bugginess.py
  141. - labels.py
  142. params:
  143. - bohr.json:
  144. - bohr_framework_version
  145. outs:
  146. - generated/bugginess/heuristics.bugginess/heuristic_matrix_herzig.pkl
  147. metrics:
  148. - metrics/bugginess/heuristics.bugginess/heuristic_metrics_herzig.json:
  149. cache: false
  150. bugginess_apply_heuristics__heuristics_manuallabels__200k-commits:
  151. cmd: bohr apply-heuristics bugginess --heuristic-group heuristics.manuallabels
  152. --dataset 200k-commits
  153. deps:
  154. - data/200k-commits-files.csv
  155. - data/200k-commits-issues.csv
  156. - data/200k-commits-manual-labels.csv
  157. - data/200k-commits.csv
  158. - heuristics/manuallabels.py
  159. - labels.py
  160. params:
  161. - bohr.json:
  162. - bohr_framework_version
  163. outs:
  164. - generated/bugginess/heuristics.manuallabels/heuristic_matrix_200k-commits.pkl
  165. metrics:
  166. - metrics/bugginess/heuristics.manuallabels/heuristic_metrics_200k-commits.json:
  167. cache: false
  168. bugginess_apply_heuristics__heuristics_manuallabels__1151-commits:
  169. cmd: bohr apply-heuristics bugginess --heuristic-group heuristics.manuallabels
  170. --dataset 1151-commits
  171. deps:
  172. - data/1151-commits.csv
  173. - heuristics/manuallabels.py
  174. - labels.py
  175. params:
  176. - bohr.json:
  177. - bohr_framework_version
  178. outs:
  179. - generated/bugginess/heuristics.manuallabels/heuristic_matrix_1151-commits.pkl
  180. metrics:
  181. - metrics/bugginess/heuristics.manuallabels/heuristic_metrics_1151-commits.json:
  182. cache: false
  183. bugginess_apply_heuristics__heuristics_manuallabels__berger:
  184. cmd: bohr apply-heuristics bugginess --heuristic-group heuristics.manuallabels
  185. --dataset berger
  186. deps:
  187. - data/berger.csv
  188. - heuristics/manuallabels.py
  189. - labels.py
  190. params:
  191. - bohr.json:
  192. - bohr_framework_version
  193. outs:
  194. - generated/bugginess/heuristics.manuallabels/heuristic_matrix_berger.pkl
  195. metrics:
  196. - metrics/bugginess/heuristics.manuallabels/heuristic_metrics_berger.json:
  197. cache: false
  198. bugginess_apply_heuristics__heuristics_manuallabels__herzig:
  199. cmd: bohr apply-heuristics bugginess --heuristic-group heuristics.manuallabels
  200. --dataset herzig
  201. deps:
  202. - data/herzig.csv
  203. - heuristics/manuallabels.py
  204. - labels.py
  205. params:
  206. - bohr.json:
  207. - bohr_framework_version
  208. outs:
  209. - generated/bugginess/heuristics.manuallabels/heuristic_matrix_herzig.pkl
  210. metrics:
  211. - metrics/bugginess/heuristics.manuallabels/heuristic_metrics_herzig.json:
  212. cache: false
  213. bugginess_combine_heuristics:
  214. cmd: bohr apply-heuristics bugginess
  215. deps:
  216. - generated/bugginess/heuristics.bugginess/heuristic_matrix_1151-commits.pkl
  217. - generated/bugginess/heuristics.bugginess/heuristic_matrix_200k-commits.pkl
  218. - generated/bugginess/heuristics.bugginess/heuristic_matrix_berger.pkl
  219. - generated/bugginess/heuristics.bugginess/heuristic_matrix_herzig.pkl
  220. - generated/bugginess/heuristics.manuallabels/heuristic_matrix_1151-commits.pkl
  221. - generated/bugginess/heuristics.manuallabels/heuristic_matrix_200k-commits.pkl
  222. - generated/bugginess/heuristics.manuallabels/heuristic_matrix_berger.pkl
  223. - generated/bugginess/heuristics.manuallabels/heuristic_matrix_herzig.pkl
  224. params:
  225. - bohr.json:
  226. - bohr_framework_version
  227. outs:
  228. - generated/bugginess/analysis_1151-commits.csv:
  229. cache: false
  230. - generated/bugginess/analysis_200k-commits.csv:
  231. cache: false
  232. - generated/bugginess/analysis_berger.csv:
  233. cache: false
  234. - generated/bugginess/analysis_herzig.csv:
  235. cache: false
  236. - generated/bugginess/heuristic_matrix_1151-commits.pkl
  237. - generated/bugginess/heuristic_matrix_200k-commits.pkl
  238. - generated/bugginess/heuristic_matrix_berger.pkl
  239. - generated/bugginess/heuristic_matrix_herzig.pkl
  240. metrics:
  241. - metrics/bugginess/analysis_1151-commits.json:
  242. cache: false
  243. - metrics/bugginess/analysis_200k-commits.json:
  244. cache: false
  245. - metrics/bugginess/analysis_berger.json:
  246. cache: false
  247. - metrics/bugginess/analysis_herzig.json:
  248. cache: false
  249. - metrics/bugginess/heuristic_metrics_1151-commits.json:
  250. cache: false
  251. - metrics/bugginess/heuristic_metrics_200k-commits.json:
  252. cache: false
  253. - metrics/bugginess/heuristic_metrics_berger.json:
  254. cache: false
  255. - metrics/bugginess/heuristic_metrics_herzig.json:
  256. cache: false
  257. bugginess_train_label_model:
  258. cmd: bohr train-label-model bugginess 200k-commits
  259. deps:
  260. - data/1151-commits.csv
  261. - data/berger.csv
  262. - data/herzig.csv
  263. - generated/bugginess/heuristic_matrix_1151-commits.pkl
  264. - generated/bugginess/heuristic_matrix_200k-commits.pkl
  265. - generated/bugginess/heuristic_matrix_berger.pkl
  266. - generated/bugginess/heuristic_matrix_herzig.pkl
  267. params:
  268. - bohr.json:
  269. - bohr_framework_version
  270. outs:
  271. - generated/bugginess/label_model.pkl
  272. - generated/bugginess/label_model_weights.csv:
  273. cache: false
  274. metrics:
  275. - metrics/bugginess/label_model_metrics.json:
  276. cache: false
  277. bugginess_label_dataset_200k-commits:
  278. cmd: bohr label-dataset bugginess 200k-commits
  279. deps:
  280. - data/200k-commits.csv
  281. - generated/bugginess/heuristic_matrix_200k-commits.pkl
  282. - generated/bugginess/label_model.pkl
  283. params:
  284. - bohr.json:
  285. - bohr_framework_version
  286. outs:
  287. - labeled-datasets/200k-commits.labeled.csv
  288. bugginess_label_dataset_1151-commits:
  289. cmd: bohr label-dataset bugginess 1151-commits
  290. deps:
  291. - data/1151-commits.csv
  292. - generated/bugginess/heuristic_matrix_1151-commits.pkl
  293. - generated/bugginess/label_model.pkl
  294. params:
  295. - bohr.json:
  296. - bohr_framework_version
  297. outs:
  298. - labeled-datasets/1151-commits.labeled.csv
  299. bugginess_label_dataset_berger:
  300. cmd: bohr label-dataset bugginess berger
  301. deps:
  302. - data/berger.csv
  303. - generated/bugginess/heuristic_matrix_berger.pkl
  304. - generated/bugginess/label_model.pkl
  305. params:
  306. - bohr.json:
  307. - bohr_framework_version
  308. outs:
  309. - labeled-datasets/berger.labeled.csv
  310. bugginess_label_dataset_herzig:
  311. cmd: bohr label-dataset bugginess herzig
  312. deps:
  313. - data/herzig.csv
  314. - generated/bugginess/heuristic_matrix_herzig.pkl
  315. - generated/bugginess/label_model.pkl
  316. params:
  317. - bohr.json:
  318. - bohr_framework_version
  319. outs:
  320. - labeled-datasets/herzig.labeled.csv
  321. smells_apply_heuristics__heuristics_smells__smells-train:
  322. cmd: bohr apply-heuristics smells --heuristic-group heuristics.smells --dataset
  323. smells-train
  324. deps:
  325. - data/smells/train.csv
  326. - heuristics/smells.py
  327. - labels.py
  328. params:
  329. - bohr.json:
  330. - bohr_framework_version
  331. outs:
  332. - generated/smells/heuristics.smells/heuristic_matrix_smells-train.pkl
  333. metrics:
  334. - metrics/smells/heuristics.smells/heuristic_metrics_smells-train.json:
  335. cache: false
  336. smells_apply_heuristics__heuristics_smells__smells-test:
  337. cmd: bohr apply-heuristics smells --heuristic-group heuristics.smells --dataset
  338. smells-test
  339. deps:
  340. - data/smells/test.csv
  341. - heuristics/smells.py
  342. - labels.py
  343. params:
  344. - bohr.json:
  345. - bohr_framework_version
  346. outs:
  347. - generated/smells/heuristics.smells/heuristic_matrix_smells-test.pkl
  348. metrics:
  349. - metrics/smells/heuristics.smells/heuristic_metrics_smells-test.json:
  350. cache: false
  351. smells_combine_heuristics:
  352. cmd: bohr apply-heuristics smells
  353. deps:
  354. - generated/smells/heuristics.smells/heuristic_matrix_smells-test.pkl
  355. - generated/smells/heuristics.smells/heuristic_matrix_smells-train.pkl
  356. params:
  357. - bohr.json:
  358. - bohr_framework_version
  359. outs:
  360. - generated/smells/analysis_smells-test.csv:
  361. cache: false
  362. - generated/smells/analysis_smells-train.csv:
  363. cache: false
  364. - generated/smells/heuristic_matrix_smells-test.pkl
  365. - generated/smells/heuristic_matrix_smells-train.pkl
  366. metrics:
  367. - metrics/smells/analysis_smells-test.json:
  368. cache: false
  369. - metrics/smells/analysis_smells-train.json:
  370. cache: false
  371. - metrics/smells/heuristic_metrics_smells-test.json:
  372. cache: false
  373. - metrics/smells/heuristic_metrics_smells-train.json:
  374. cache: false
  375. smells_train_label_model:
  376. cmd: bohr train-label-model smells smells-train
  377. deps:
  378. - data/smells/test.csv
  379. - generated/smells/heuristic_matrix_smells-test.pkl
  380. - generated/smells/heuristic_matrix_smells-train.pkl
  381. params:
  382. - bohr.json:
  383. - bohr_framework_version
  384. outs:
  385. - generated/smells/label_model.pkl
  386. - generated/smells/label_model_weights.csv:
  387. cache: false
  388. metrics:
  389. - metrics/smells/label_model_metrics.json:
  390. cache: false
  391. smells_label_dataset_smells-train:
  392. cmd: bohr label-dataset smells smells-train
  393. deps:
  394. - data/smells/train.csv
  395. - generated/smells/heuristic_matrix_smells-train.pkl
  396. - generated/smells/label_model.pkl
  397. params:
  398. - bohr.json:
  399. - bohr_framework_version
  400. outs:
  401. - labeled-datasets/smells-train.labeled.csv
  402. smells_label_dataset_smells-test:
  403. cmd: bohr label-dataset smells smells-test
  404. deps:
  405. - data/smells/test.csv
  406. - generated/smells/heuristic_matrix_smells-test.pkl
  407. - generated/smells/label_model.pkl
  408. params:
  409. - bohr.json:
  410. - bohr_framework_version
  411. outs:
  412. - labeled-datasets/smells-test.labeled.csv
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...