Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

dvc.lock 33 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
  1. schema: '2.0'
  2. stages:
  3. preprocess_1151-commits:
  4. cmd: cp downloaded-data/1151-commits.csv data && echo "data/1151-commits.csv"
  5. >> .gitignore && git add .gitignore
  6. deps:
  7. - path: downloaded-data/1151-commits.csv
  8. md5: dd000fe19ba4aac9efa3a3856e2acc5e
  9. size: 346306
  10. outs:
  11. - path: data/1151-commits.csv
  12. md5: dd000fe19ba4aac9efa3a3856e2acc5e
  13. size: 346306
  14. preprocess_herzig:
  15. cmd: cp downloaded-data/herzig.csv data && echo "data/herzig.csv" >> .gitignore
  16. && git add .gitignore
  17. deps:
  18. - path: downloaded-data/herzig.csv
  19. md5: 69a17c08643aed84b874384a2a57c7ed
  20. size: 1483281
  21. outs:
  22. - path: data/herzig.csv
  23. md5: 69a17c08643aed84b874384a2a57c7ed
  24. size: 1483281
  25. preprocess_smells-test:
  26. cmd: data-preprocessing/smells.sh
  27. deps:
  28. - path: data-preprocessing/smells.sh
  29. md5: 1792bc2011c1aba4d51cdca74beee11e
  30. size: 2148
  31. - path: downloaded-data/smells-madeyski.csv
  32. md5: 3d60d277b9fa1306c05ccfdefe22e9d1
  33. size: 7513770
  34. outs:
  35. - path: data/smells/test.csv
  36. md5: 0200db0eec17554a48a5b3a25719fd03
  37. size: 77607
  38. parse_labels:
  39. cmd: bohr parse-labels
  40. deps:
  41. - path: labels
  42. md5: 237b5064b298ca8ebd422a9c35035b98.dir
  43. size: 622
  44. nfiles: 3
  45. outs:
  46. - path: labels.py
  47. md5: 039b719577b0f54b176e58a432242060
  48. size: 1910
  49. smells_apply_heuristics__heuristics_smells__smells-test:
  50. cmd: bohr apply-heuristics smells --heuristic-group heuristics.smells --dataset
  51. smells-test
  52. deps:
  53. - path: data/smells/test.csv
  54. md5: 0200db0eec17554a48a5b3a25719fd03
  55. size: 77607
  56. - path: heuristics/smells.py
  57. md5: a2254c51be33d4ede33baab98ba18b09
  58. size: 712
  59. - path: labels.py
  60. md5: 039b719577b0f54b176e58a432242060
  61. size: 1910
  62. params:
  63. bohr.json:
  64. bohr_framework_version: 0.4.5
  65. outs:
  66. - path: generated/smells/heuristics.smells/heuristic_matrix_smells-test.pkl
  67. md5: 143760b1cdf4c422a3430c583345337c
  68. size: 4230
  69. - path: metrics/smells/heuristics.smells/heuristic_metrics_smells-test.json
  70. md5: 7fe3f867a78993a1e5e27178ba30b57b
  71. size: 73
  72. preprocess_smells-train:
  73. cmd: data-preprocessing/smells.sh
  74. deps:
  75. - path: data-preprocessing/smells.sh
  76. md5: 1792bc2011c1aba4d51cdca74beee11e
  77. size: 2148
  78. - path: downloaded-data/smells-madeyski.csv
  79. md5: 3d60d277b9fa1306c05ccfdefe22e9d1
  80. size: 7513770
  81. outs:
  82. - path: data/smells/train.csv
  83. md5: 7fc9a7617e6f201523fba311317ba48f
  84. size: 296970
  85. smells_apply_heuristics__heuristics_smells__smells-train:
  86. cmd: bohr apply-heuristics smells --heuristic-group heuristics.smells --dataset
  87. smells-train
  88. deps:
  89. - path: data/smells/train.csv
  90. md5: 7fc9a7617e6f201523fba311317ba48f
  91. size: 296970
  92. - path: heuristics/smells.py
  93. md5: a2254c51be33d4ede33baab98ba18b09
  94. size: 712
  95. - path: labels.py
  96. md5: 039b719577b0f54b176e58a432242060
  97. size: 1910
  98. params:
  99. bohr.json:
  100. bohr_framework_version: 0.4.5
  101. outs:
  102. - path: generated/smells/heuristics.smells/heuristic_matrix_smells-train.pkl
  103. md5: 5e0fb0ce062af88c8d2b5360948bb3c9
  104. size: 14312
  105. - path: metrics/smells/heuristics.smells/heuristic_metrics_smells-train.json
  106. md5: 3e6a9fc4c1202a8c31e3c1980abfbabe
  107. size: 32
  108. smells_combine_heuristics:
  109. cmd: bohr apply-heuristics smells
  110. deps:
  111. - path: generated/smells/heuristics.smells/heuristic_matrix_smells-test.pkl
  112. md5: 143760b1cdf4c422a3430c583345337c
  113. size: 4230
  114. - path: generated/smells/heuristics.smells/heuristic_matrix_smells-train.pkl
  115. md5: 5e0fb0ce062af88c8d2b5360948bb3c9
  116. size: 14312
  117. params:
  118. bohr.json:
  119. bohr_framework_version: 0.4.5
  120. outs:
  121. - path: generated/smells/analysis_smells-test.csv
  122. md5: 0e9a0cb4bca25888a4c21bb79dd903fc
  123. size: 337
  124. - path: generated/smells/analysis_smells-train.csv
  125. md5: fbb491949031407029f57e7089a123ca
  126. size: 252
  127. - path: generated/smells/heuristic_matrix_smells-test.pkl
  128. md5: 03cfd9383f0463c0be0c15d54690b8ab
  129. size: 4230
  130. - path: generated/smells/heuristic_matrix_smells-train.pkl
  131. md5: d353c8b3bbcec3442b88fa4e64056d68
  132. size: 14312
  133. - path: metrics/smells/analysis_smells-test.json
  134. md5: b210737d114c81d670a7b5d49a99a250
  135. size: 1119
  136. - path: metrics/smells/analysis_smells-train.json
  137. md5: 4d96593b82baf44b775be1d171c73358
  138. size: 698
  139. - path: metrics/smells/heuristic_metrics_smells-test.json
  140. md5: 7fe3f867a78993a1e5e27178ba30b57b
  141. size: 73
  142. - path: metrics/smells/heuristic_metrics_smells-train.json
  143. md5: 3e6a9fc4c1202a8c31e3c1980abfbabe
  144. size: 32
  145. bugginess_apply_heuristics__heuristics_bugginess__1151-commits:
  146. cmd: bohr apply-heuristics bugginess --heuristic-group heuristics.bugginess --dataset
  147. 1151-commits
  148. deps:
  149. - path: data/1151-commits.csv
  150. md5: dd000fe19ba4aac9efa3a3856e2acc5e
  151. size: 346306
  152. - path: heuristics/bugginess.py
  153. md5: 9fa2be4c3ad70b831bf06a1c229ff1b6
  154. size: 8895
  155. - path: labels.py
  156. md5: 039b719577b0f54b176e58a432242060
  157. size: 1910
  158. params:
  159. bohr.json:
  160. bohr_framework_version: 0.4.5
  161. outs:
  162. - path: generated/bugginess/heuristics.bugginess/heuristic_matrix_1151-commits.pkl
  163. md5: 732df7ec50a8165d7e9a1e79415064b5
  164. size: 2792584
  165. - path: metrics/bugginess/heuristics.bugginess/heuristic_metrics_1151-commits.json
  166. md5: 590f784b4669d243ce5bff2a8d09345b
  167. size: 73
  168. preprocess_berger:
  169. cmd: cp downloaded-data/berger.csv data && echo "data/berger.csv" >> .gitignore
  170. && git add .gitignore
  171. deps:
  172. - path: downloaded-data/berger.csv
  173. md5: 126de41c9204a9e807e72406b1f9d631
  174. size: 62247
  175. outs:
  176. - path: data/berger.csv
  177. md5: 126de41c9204a9e807e72406b1f9d631
  178. size: 62247
  179. bugginess_apply_heuristics__heuristics_bugginess__berger:
  180. cmd: bohr apply-heuristics bugginess --heuristic-group heuristics.bugginess --dataset
  181. berger
  182. deps:
  183. - path: data/berger.csv
  184. md5: 126de41c9204a9e807e72406b1f9d631
  185. size: 62247
  186. - path: heuristics/bugginess.py
  187. md5: 9fa2be4c3ad70b831bf06a1c229ff1b6
  188. size: 8895
  189. - path: labels.py
  190. md5: 039b719577b0f54b176e58a432242060
  191. size: 1910
  192. params:
  193. bohr.json:
  194. bohr_framework_version: 0.4.5
  195. outs:
  196. - path: generated/bugginess/heuristics.bugginess/heuristic_matrix_berger.pkl
  197. md5: e93deda462057f4a205960cf0d24d2ec
  198. size: 917768
  199. - path: metrics/bugginess/heuristics.bugginess/heuristic_metrics_berger.json
  200. md5: feb313c11f1c1afb0fc58ef5ad73ab6a
  201. size: 73
  202. preprocess_bugginess-train:
  203. cmd: 7z x downloaded-data/bugginess_train.7z -odata/bugginess_train && echo "data/bugginess_train"
  204. >> .gitignore && git add .gitignore
  205. deps:
  206. - path: downloaded-data/bugginess_train.7z
  207. md5: d4dc26c2b0f0704b1559f2c0ce6320d7
  208. size: 255969433
  209. outs:
  210. - path: data/bugginess_train
  211. md5: f7cbfc7a91dfeca3aff7b7d3b6d7ea72.dir
  212. size: 2489726547
  213. nfiles: 3
  214. bugginess_apply_heuristics__heuristics_bugginess__bugginess-train:
  215. cmd: bohr apply-heuristics bugginess --heuristic-group heuristics.bugginess --dataset
  216. bugginess-train
  217. deps:
  218. - path: data/bugginess_train
  219. md5: f7cbfc7a91dfeca3aff7b7d3b6d7ea72.dir
  220. size: 2489726547
  221. nfiles: 3
  222. - path: heuristics/bugginess.py
  223. md5: 9f9ea19cd5c53bbbd41f94cf7b8f3d14
  224. size: 2873
  225. - path: heuristics/keywords
  226. md5: b4e7587c1b8e4e1461685a305d48bd66.dir
  227. size: 1382
  228. nfiles: 5
  229. - path: labels.py
  230. md5: 4ad220b4c289b2d8597bd6431c6565a6
  231. size: 1707
  232. params:
  233. bohr.json:
  234. bohr_framework_version: 0.4.2
  235. outs:
  236. - path: generated/bugginess/heuristics.bugginess/heuristic_matrix_bugginess-train.pkl
  237. md5: 5d1c71dcd36417356cabe2e340ca959d
  238. size: 500879984
  239. - path: metrics/bugginess/heuristics.bugginess/heuristic_metrics_bugginess-train.json
  240. md5: 9c903723760f0000193679b361437e41
  241. size: 32
  242. bugginess_apply_heuristics__heuristics_bugginess__herzig:
  243. cmd: bohr apply-heuristics bugginess --heuristic-group heuristics.bugginess --dataset
  244. herzig
  245. deps:
  246. - path: data/herzig.csv
  247. md5: 69a17c08643aed84b874384a2a57c7ed
  248. size: 1483281
  249. - path: heuristics/bugginess.py
  250. md5: 9fa2be4c3ad70b831bf06a1c229ff1b6
  251. size: 8895
  252. - path: labels.py
  253. md5: 039b719577b0f54b176e58a432242060
  254. size: 1910
  255. params:
  256. bohr.json:
  257. bohr_framework_version: 0.4.5
  258. outs:
  259. - path: generated/bugginess/heuristics.bugginess/heuristic_matrix_herzig.pkl
  260. md5: 15bf08d69bf793c3978a2ffe1458c76d
  261. size: 12608792
  262. - path: metrics/bugginess/heuristics.bugginess/heuristic_metrics_herzig.json
  263. md5: 525291ad999e1bee97789045c1ae8333
  264. size: 72
  265. bugginess_combine_heuristics:
  266. cmd: bohr apply-heuristics bugginess
  267. deps:
  268. - path: generated/bugginess/heuristics.bugginess/heuristic_matrix_1151-commits.pkl
  269. md5: 732df7ec50a8165d7e9a1e79415064b5
  270. size: 2792584
  271. - path: generated/bugginess/heuristics.bugginess/heuristic_matrix_200k-commits.pkl
  272. md5: 319c4568d1d675d1c0e6532d603e0650
  273. size: 498669340
  274. - path: generated/bugginess/heuristics.bugginess/heuristic_matrix_berger.pkl
  275. md5: e93deda462057f4a205960cf0d24d2ec
  276. size: 917768
  277. - path: generated/bugginess/heuristics.bugginess/heuristic_matrix_herzig.pkl
  278. md5: 15bf08d69bf793c3978a2ffe1458c76d
  279. size: 12608792
  280. - path: generated/bugginess/heuristics.manuallabels/heuristic_matrix_1151-commits.pkl
  281. md5: 6586d8bf9010aff3be65327facde2edc
  282. size: 9975
  283. - path: generated/bugginess/heuristics.manuallabels/heuristic_matrix_200k-commits.pkl
  284. md5: 7febb1ec7e27a0b380e83a4c732946ab
  285. size: 1651964
  286. - path: generated/bugginess/heuristics.manuallabels/heuristic_matrix_berger.pkl
  287. md5: e93c0bbc779f1cf928251c4613ef5cc6
  288. size: 3767
  289. - path: generated/bugginess/heuristics.manuallabels/heuristic_matrix_herzig.pkl
  290. md5: 8e22aefcc1b79b3bc8c00ebd4503dca3
  291. size: 42479
  292. params:
  293. bohr.json:
  294. bohr_framework_version: 0.4.5
  295. outs:
  296. - path: generated/bugginess/analysis_1151-commits.csv
  297. md5: b9a8293c90963f2ce36669ded72d2e1a
  298. size: 24263
  299. - path: generated/bugginess/analysis_200k-commits.csv
  300. md5: ae396f6f90c8e3cf4dd4a1e5d59e7200
  301. size: 30413
  302. - path: generated/bugginess/analysis_berger.csv
  303. md5: 4994f12a769112913fba6a9a7dbb0eb4
  304. size: 21371
  305. - path: generated/bugginess/analysis_herzig.csv
  306. md5: ea0eb8e17dbdb9b9ebcf5f7cedbc979a
  307. size: 25808
  308. - path: generated/bugginess/heuristic_matrix_1151-commits.pkl
  309. md5: a0a1a7fbd7512b6cb31eb06d61c3bf9c
  310. size: 2801973
  311. - path: generated/bugginess/heuristic_matrix_200k-commits.pkl
  312. md5: 96bf235aea2cdfae9f33f00ddb8c8547
  313. size: 500320716
  314. - path: generated/bugginess/heuristic_matrix_berger.pkl
  315. md5: 1b5504acb50064f2971a00799380d780
  316. size: 920949
  317. - path: generated/bugginess/heuristic_matrix_herzig.pkl
  318. md5: c604a8886af18d4982a1f3626fa57619
  319. size: 12650685
  320. - path: metrics/bugginess/analysis_1151-commits.json
  321. md5: cfd93443f948403db776e31f346eecb4
  322. size: 108603
  323. - path: metrics/bugginess/analysis_200k-commits.json
  324. md5: 0bb85bc367472a8321c03a1324df9dbc
  325. size: 78828
  326. - path: metrics/bugginess/analysis_berger.json
  327. md5: 3d99febdbd632bb38a06cdb7c2bf4e62
  328. size: 105095
  329. - path: metrics/bugginess/analysis_herzig.json
  330. md5: 30b7f8dc4e151d6d5691ed1965b09aff
  331. size: 110390
  332. - path: metrics/bugginess/heuristic_metrics_1151-commits.json
  333. md5: 590f784b4669d243ce5bff2a8d09345b
  334. size: 73
  335. - path: metrics/bugginess/heuristic_metrics_200k-commits.json
  336. md5: f6fd2a0332c43002c7f5679d0db5019a
  337. size: 32
  338. - path: metrics/bugginess/heuristic_metrics_berger.json
  339. md5: feb313c11f1c1afb0fc58ef5ad73ab6a
  340. size: 73
  341. - path: metrics/bugginess/heuristic_metrics_herzig.json
  342. md5: 525291ad999e1bee97789045c1ae8333
  343. size: 72
  344. bugginess_train_label_model:
  345. cmd: bohr train-label-model bugginess 200k-commits
  346. deps:
  347. - path: data/1151-commits.csv
  348. md5: dd000fe19ba4aac9efa3a3856e2acc5e
  349. size: 346306
  350. - path: data/berger.csv
  351. md5: 126de41c9204a9e807e72406b1f9d631
  352. size: 62247
  353. - path: data/herzig.csv
  354. md5: 69a17c08643aed84b874384a2a57c7ed
  355. size: 1483281
  356. - path: generated/bugginess/heuristic_matrix_1151-commits.pkl
  357. md5: a0a1a7fbd7512b6cb31eb06d61c3bf9c
  358. size: 2801973
  359. - path: generated/bugginess/heuristic_matrix_200k-commits.pkl
  360. md5: 96bf235aea2cdfae9f33f00ddb8c8547
  361. size: 500320716
  362. - path: generated/bugginess/heuristic_matrix_berger.pkl
  363. md5: 1b5504acb50064f2971a00799380d780
  364. size: 920949
  365. - path: generated/bugginess/heuristic_matrix_herzig.pkl
  366. md5: c604a8886af18d4982a1f3626fa57619
  367. size: 12650685
  368. params:
  369. bohr.json:
  370. bohr_framework_version: 0.4.5
  371. outs:
  372. - path: generated/bugginess/label_model.pkl
  373. md5: 06ba133ca77f25c7fc03f22c317127de
  374. size: 1875458
  375. - path: generated/bugginess/label_model_weights.csv
  376. md5: 073ebcf1b24a5759c44d91e2e8cdafbb
  377. size: 20342
  378. - path: metrics/bugginess/label_model_metrics.json
  379. md5: edec9faec7b0e4f80cc48b3e17588cdc
  380. size: 429
  381. bugginess_label_dataset_herzig:
  382. cmd: bohr label-dataset bugginess herzig
  383. deps:
  384. - path: data/herzig.csv
  385. md5: 69a17c08643aed84b874384a2a57c7ed
  386. size: 1483281
  387. - path: generated/bugginess/heuristic_matrix_herzig.pkl
  388. md5: c604a8886af18d4982a1f3626fa57619
  389. size: 12650685
  390. - path: generated/bugginess/label_model.pkl
  391. md5: 06ba133ca77f25c7fc03f22c317127de
  392. size: 1875458
  393. params:
  394. bohr.json:
  395. bohr_framework_version: 0.4.5
  396. outs:
  397. - path: labeled-datasets/herzig.labeled.csv
  398. md5: 56b4851603a90dd67c4da0f0fbe24313
  399. size: 1539683
  400. smells_train_label_model:
  401. cmd: bohr train-label-model smells smells-train
  402. deps:
  403. - path: data/smells/test.csv
  404. md5: 0200db0eec17554a48a5b3a25719fd03
  405. size: 77607
  406. - path: generated/smells/heuristic_matrix_smells-test.pkl
  407. md5: 03cfd9383f0463c0be0c15d54690b8ab
  408. size: 4230
  409. - path: generated/smells/heuristic_matrix_smells-train.pkl
  410. md5: d353c8b3bbcec3442b88fa4e64056d68
  411. size: 14312
  412. params:
  413. bohr.json:
  414. bohr_framework_version: 0.4.5
  415. outs:
  416. - path: generated/smells/label_model.pkl
  417. md5: c7ecac10ac53e7d00554e22dd105e72c
  418. size: 4900
  419. - path: generated/smells/label_model_weights.csv
  420. md5: ee5dcfe72e30c40c8ba6a56fd5a6219a
  421. size: 179
  422. - path: metrics/smells/label_model_metrics.json
  423. md5: 8982bcbb2ce25ea92593c9e844107f7d
  424. size: 155
  425. smells_label_dataset_smells-train:
  426. cmd: bohr label-dataset smells smells-train
  427. deps:
  428. - path: data/smells/train.csv
  429. md5: 7fc9a7617e6f201523fba311317ba48f
  430. size: 296970
  431. - path: generated/smells/heuristic_matrix_smells-train.pkl
  432. md5: d353c8b3bbcec3442b88fa4e64056d68
  433. size: 14312
  434. - path: generated/smells/label_model.pkl
  435. md5: c7ecac10ac53e7d00554e22dd105e72c
  436. size: 4900
  437. params:
  438. bohr.json:
  439. bohr_framework_version: 0.4.5
  440. outs:
  441. - path: labeled-datasets/smells-train.labeled.csv
  442. md5: b3433438369f2ca2276c22cff309631e
  443. size: 296121
  444. smells_label_dataset_smells-test:
  445. cmd: bohr label-dataset smells smells-test
  446. deps:
  447. - path: data/smells/test.csv
  448. md5: 0200db0eec17554a48a5b3a25719fd03
  449. size: 77607
  450. - path: generated/smells/heuristic_matrix_smells-test.pkl
  451. md5: 03cfd9383f0463c0be0c15d54690b8ab
  452. size: 4230
  453. - path: generated/smells/label_model.pkl
  454. md5: c7ecac10ac53e7d00554e22dd105e72c
  455. size: 4900
  456. params:
  457. bohr.json:
  458. bohr_framework_version: 0.4.5
  459. outs:
  460. - path: labeled-datasets/smells-test.labeled.csv
  461. md5: fe4a97ad13be96db8f076fda178bf984
  462. size: 77279
  463. bugginess_label_dataset_bugginess-train:
  464. cmd: bohr label-dataset bugginess bugginess-train
  465. deps:
  466. - path: data/bugginess_train
  467. md5: f7cbfc7a91dfeca3aff7b7d3b6d7ea72.dir
  468. size: 2489726547
  469. nfiles: 3
  470. - path: generated/bugginess/heuristic_matrix_bugginess-train.pkl
  471. md5: d9141b7bf8b3eb25cf3e90490acbb812
  472. size: 500879984
  473. - path: generated/bugginess/label_model.pkl
  474. md5: ce78684652e122b347fe0c7fc32ba035
  475. size: 1863238
  476. params:
  477. bohr.json:
  478. bohr_framework_version: 0.4.2
  479. outs:
  480. - path: labeled-datasets/bugginess-train.labeled.csv
  481. md5: bfe4ac306c08f7188e094acc20e1ff03
  482. size: 61623779
  483. bugginess_label_dataset_1151-commits:
  484. cmd: bohr label-dataset bugginess 1151-commits
  485. deps:
  486. - path: data/1151-commits.csv
  487. md5: dd000fe19ba4aac9efa3a3856e2acc5e
  488. size: 346306
  489. - path: generated/bugginess/heuristic_matrix_1151-commits.pkl
  490. md5: a0a1a7fbd7512b6cb31eb06d61c3bf9c
  491. size: 2801973
  492. - path: generated/bugginess/label_model.pkl
  493. md5: 06ba133ca77f25c7fc03f22c317127de
  494. size: 1875458
  495. params:
  496. bohr.json:
  497. bohr_framework_version: 0.4.5
  498. outs:
  499. - path: labeled-datasets/1151-commits.labeled.csv
  500. md5: d3ca36b0f762db4dba3b3b010d1a5f27
  501. size: 359761
  502. bugginess_label_dataset_berger:
  503. cmd: bohr label-dataset bugginess berger
  504. deps:
  505. - path: data/berger.csv
  506. md5: 126de41c9204a9e807e72406b1f9d631
  507. size: 62247
  508. - path: generated/bugginess/heuristic_matrix_berger.pkl
  509. md5: 1b5504acb50064f2971a00799380d780
  510. size: 920949
  511. - path: generated/bugginess/label_model.pkl
  512. md5: 06ba133ca77f25c7fc03f22c317127de
  513. size: 1875458
  514. params:
  515. bohr.json:
  516. bohr_framework_version: 0.4.5
  517. outs:
  518. - path: labeled-datasets/berger.labeled.csv
  519. md5: d318e371f868b40c9686b43910ea4597
  520. size: 66780
  521. bugginess_transformer_train:
  522. cmd: bash classifiers/bugginess-transformer/train.sh labeled-data/bugginess.csv
  523. deps:
  524. - path: classifiers/bugginess-transformer/run.py
  525. md5: faf5ebb8f0348b28aa1205e2c56cd41c
  526. size: 12023
  527. - path: classifiers/bugginess-transformer/train.sh
  528. md5: 3a19e011c049042bbec7e8315e883c38
  529. size: 557
  530. - path: labeled-datasets/bugginess-train.labeled.csv
  531. md5: bfe4ac306c08f7188e094acc20e1ff03
  532. size: 61623779
  533. - path: requirements.txt
  534. md5: 29b4c5d66c523cec0712dbcdcced42bb
  535. size: 21
  536. outs:
  537. - path: models/config.json
  538. md5: 3effd3229ade2ed52eeb90d252790bf5
  539. size: 716
  540. - path: models/merges.txt
  541. md5: fb9c1e34b6999f3a062df6ed4a604957
  542. size: 458459
  543. - path: models/pytorch_model.bin
  544. md5: 40379a0207d19e2a24e116e941d7d675
  545. size: 333858922
  546. - path: models/special_tokens_map.json
  547. md5: 17bb9e090d1d3a775683aba3ba610591
  548. size: 239
  549. - path: models/tokenizer_config.json
  550. md5: e1a3e947aa301aadc524ee29f0dbcc39
  551. size: 1257
  552. - path: models/training_args.bin
  553. md5: 408c3f12467908cabb77ded5ce3490ed
  554. size: 2159
  555. - path: models/vocab.json
  556. md5: ca70df26ed267d27a9edde9c5341f17b
  557. size: 813062
  558. bugginess_transformer_test_herzig:
  559. cmd: bash classifiers/bugginess-transformer/test.sh data/bugginess/herzig.csv
  560. metrics/bugginess/transformer/herzig
  561. deps:
  562. - path: classifiers/bugginess-transformer/run.py
  563. md5: faf5ebb8f0348b28aa1205e2c56cd41c
  564. size: 12023
  565. - path: classifiers/bugginess-transformer/test.sh
  566. md5: 92c4020b4c026f9b85fd38ddee1bd528
  567. size: 313
  568. - path: data/herzig.csv
  569. md5: 279936268f488e1e613f81a537f29055
  570. size: 1458311
  571. - path: models/config.json
  572. md5: 3effd3229ade2ed52eeb90d252790bf5
  573. size: 716
  574. - path: models/merges.txt
  575. md5: fb9c1e34b6999f3a062df6ed4a604957
  576. size: 458459
  577. - path: models/pytorch_model.bin
  578. md5: 40379a0207d19e2a24e116e941d7d675
  579. size: 333858922
  580. - path: models/special_tokens_map.json
  581. md5: 17bb9e090d1d3a775683aba3ba610591
  582. size: 239
  583. - path: models/tokenizer_config.json
  584. md5: e1a3e947aa301aadc524ee29f0dbcc39
  585. size: 1257
  586. - path: models/training_args.bin
  587. md5: 408c3f12467908cabb77ded5ce3490ed
  588. size: 2159
  589. - path: models/vocab.json
  590. md5: ca70df26ed267d27a9edde9c5341f17b
  591. size: 813062
  592. - path: requirements.txt
  593. md5: 29b4c5d66c523cec0712dbcdcced42bb
  594. size: 21
  595. outs:
  596. - path: metrics/bugginess/transformer/herzig/eval_results.txt
  597. md5: bd726240700dac6e926d5532eb76c5c4
  598. size: 145
  599. bugginess_transformer_test_1151-commits:
  600. cmd: bash classifiers/bugginess-transformer/test.sh data/bugginess/1151-commits.csv
  601. metrics/bugginess/transformer/1151-commits
  602. deps:
  603. - path: classifiers/bugginess-transformer/run.py
  604. md5: faf5ebb8f0348b28aa1205e2c56cd41c
  605. size: 12023
  606. - path: classifiers/bugginess-transformer/test.sh
  607. md5: 92c4020b4c026f9b85fd38ddee1bd528
  608. size: 313
  609. - path: data/1151-commits.csv
  610. md5: 7b32f404edf5982eb4c5f51b956663c4
  611. size: 341651
  612. - path: models/config.json
  613. md5: 3effd3229ade2ed52eeb90d252790bf5
  614. size: 716
  615. - path: models/merges.txt
  616. md5: fb9c1e34b6999f3a062df6ed4a604957
  617. size: 458459
  618. - path: models/pytorch_model.bin
  619. md5: 40379a0207d19e2a24e116e941d7d675
  620. size: 333858922
  621. - path: models/special_tokens_map.json
  622. md5: 17bb9e090d1d3a775683aba3ba610591
  623. size: 239
  624. - path: models/tokenizer_config.json
  625. md5: e1a3e947aa301aadc524ee29f0dbcc39
  626. size: 1257
  627. - path: models/training_args.bin
  628. md5: 408c3f12467908cabb77ded5ce3490ed
  629. size: 2159
  630. - path: models/vocab.json
  631. md5: ca70df26ed267d27a9edde9c5341f17b
  632. size: 813062
  633. - path: requirements.txt
  634. md5: 29b4c5d66c523cec0712dbcdcced42bb
  635. size: 21
  636. outs:
  637. - path: metrics/bugginess/transformer/1151-commits/eval_results.txt
  638. md5: 52b1b36d2896195e78ca5b7d42de4839
  639. size: 146
  640. bugginess_transformer_label_1151-commits:
  641. cmd: bash classifiers/bugginess-transformer/label.sh data/bugginess/1151-commits.csv
  642. metrics/bugginess/transformer/1151-commits
  643. deps:
  644. - path: classifiers/bugginess-transformer/label.sh
  645. md5: ce2646a4233e68991b57bbf2c7404ace
  646. size: 320
  647. - path: classifiers/bugginess-transformer/run.py
  648. md5: faf5ebb8f0348b28aa1205e2c56cd41c
  649. size: 12023
  650. - path: data/1151-commits.csv
  651. md5: 7b32f404edf5982eb4c5f51b956663c4
  652. size: 341651
  653. - path: models/config.json
  654. md5: 3effd3229ade2ed52eeb90d252790bf5
  655. size: 716
  656. - path: models/merges.txt
  657. md5: fb9c1e34b6999f3a062df6ed4a604957
  658. size: 458459
  659. - path: models/pytorch_model.bin
  660. md5: 40379a0207d19e2a24e116e941d7d675
  661. size: 333858922
  662. - path: models/special_tokens_map.json
  663. md5: 17bb9e090d1d3a775683aba3ba610591
  664. size: 239
  665. - path: models/tokenizer_config.json
  666. md5: e1a3e947aa301aadc524ee29f0dbcc39
  667. size: 1257
  668. - path: models/training_args.bin
  669. md5: 408c3f12467908cabb77ded5ce3490ed
  670. size: 2159
  671. - path: models/vocab.json
  672. md5: ca70df26ed267d27a9edde9c5341f17b
  673. size: 813062
  674. - path: requirements.txt
  675. md5: 29b4c5d66c523cec0712dbcdcced42bb
  676. size: 21
  677. outs:
  678. - path: metrics/bugginess/transformer/1151-commits/assigned_labels.csv
  679. md5: 527025d5fa114a28fa55eed7f4c10801
  680. size: 6964
  681. bugginess_transformer_test_berger:
  682. cmd: bash classifiers/bugginess-transformer/test.sh data/bugginess/berger.csv
  683. metrics/bugginess/transformer/berger
  684. deps:
  685. - path: classifiers/bugginess-transformer/run.py
  686. md5: faf5ebb8f0348b28aa1205e2c56cd41c
  687. size: 12023
  688. - path: classifiers/bugginess-transformer/test.sh
  689. md5: 92c4020b4c026f9b85fd38ddee1bd528
  690. size: 313
  691. - path: data/berger.csv
  692. md5: 71b9738db6cb47e3af599da316e3b570
  693. size: 60847
  694. - path: models/config.json
  695. md5: 3effd3229ade2ed52eeb90d252790bf5
  696. size: 716
  697. - path: models/merges.txt
  698. md5: fb9c1e34b6999f3a062df6ed4a604957
  699. size: 458459
  700. - path: models/pytorch_model.bin
  701. md5: 40379a0207d19e2a24e116e941d7d675
  702. size: 333858922
  703. - path: models/special_tokens_map.json
  704. md5: 17bb9e090d1d3a775683aba3ba610591
  705. size: 239
  706. - path: models/tokenizer_config.json
  707. md5: e1a3e947aa301aadc524ee29f0dbcc39
  708. size: 1257
  709. - path: models/training_args.bin
  710. md5: 408c3f12467908cabb77ded5ce3490ed
  711. size: 2159
  712. - path: models/vocab.json
  713. md5: ca70df26ed267d27a9edde9c5341f17b
  714. size: 813062
  715. - path: requirements.txt
  716. md5: 29b4c5d66c523cec0712dbcdcced42bb
  717. size: 21
  718. outs:
  719. - path: metrics/bugginess/transformer/berger/eval_results.txt
  720. md5: 30da05826f977ee9b867560731091915
  721. size: 144
  722. bugginess_combine_labels_1151-commits:
  723. cmd: python classifiers/bugginess-transformer/combine_labels.py labeled-datasets/1151-commits.labeled.csv
  724. metrics/bugginess/transformer/1151-commits/assigned_labels.csv labeled-datasets/1151-commits.labeled.both.csv
  725. && echo "labeled-datasets/1151-commits.labeled.both.csv" >> .gitignore
  726. deps:
  727. - path: classifiers/bugginess-transformer/combine_labels.py
  728. md5: 85cef7e65682e381b5e746d5a0901ec2
  729. size: 720
  730. - path: labeled-datasets/1151-commits.labeled.csv
  731. md5: 70250f3b3489aed05065c35a0b859c00
  732. size: 359755
  733. - path: metrics/bugginess/transformer/1151-commits/assigned_labels.csv
  734. md5: 527025d5fa114a28fa55eed7f4c10801
  735. size: 6964
  736. outs:
  737. - path: labeled-datasets/1151-commits.labeled.both.csv
  738. md5: f4d459e7b167fb0197dc49483eb2d2af
  739. size: 366721
  740. preprocess_200k-commits:
  741. cmd: cp downloaded-data/200k-commits.csv data && echo "data/200k-commits.csv"
  742. >> .gitignore && git add .gitignore
  743. deps:
  744. - path: downloaded-data/200k-commits.csv
  745. md5: 6ce10284e630c44110ffc483a7bb33df
  746. size: 71402002
  747. outs:
  748. - path: data/200k-commits.csv
  749. md5: 6ce10284e630c44110ffc483a7bb33df
  750. size: 71402002
  751. preprocess_200k-commits-issues:
  752. cmd: cp downloaded-data/200k-commits-issues.csv data && echo "data/200k-commits-issues.csv"
  753. >> .gitignore && git add .gitignore
  754. deps:
  755. - path: downloaded-data/200k-commits-issues.csv
  756. md5: da4b0d654f7ce1469857b9171a9647aa
  757. size: 96908075
  758. outs:
  759. - path: data/200k-commits-issues.csv
  760. md5: da4b0d654f7ce1469857b9171a9647aa
  761. size: 96908075
  762. preprocess_200k-commits-files:
  763. cmd: 7z x downloaded-data/200k-commits-files.csv.7z -odata && echo "data/200k-commits-files.csv"
  764. >> .gitignore && git add .gitignore
  765. deps:
  766. - path: downloaded-data/200k-commits-files.csv.7z
  767. md5: 56697c21cfd7bba5d0f68dcd0fbd86f0
  768. size: 240190210
  769. outs:
  770. - path: data/200k-commits-files.csv
  771. md5: bc989c140c305bed62a5a8b161883d3b
  772. size: 2284439219
  773. bugginess_apply_heuristics__heuristics_bugginess__200k-commits:
  774. cmd: bohr apply-heuristics bugginess --heuristic-group heuristics.bugginess --dataset
  775. 200k-commits
  776. deps:
  777. - path: data/200k-commits-files.csv
  778. md5: bc989c140c305bed62a5a8b161883d3b
  779. size: 2284439219
  780. - path: data/200k-commits-issues.csv
  781. md5: da4b0d654f7ce1469857b9171a9647aa
  782. size: 96908075
  783. - path: data/200k-commits-manual-labels.csv
  784. md5: 447bf23d38df7f7e3007dc35f70cab91
  785. size: 1187
  786. - path: data/200k-commits.csv
  787. md5: 6ce10284e630c44110ffc483a7bb33df
  788. size: 71402002
  789. - path: heuristics/bugginess.py
  790. md5: 9fa2be4c3ad70b831bf06a1c229ff1b6
  791. size: 8895
  792. - path: labels.py
  793. md5: 039b719577b0f54b176e58a432242060
  794. size: 1910
  795. params:
  796. bohr.json:
  797. bohr_framework_version: 0.4.5
  798. outs:
  799. - path: generated/bugginess/heuristics.bugginess/heuristic_matrix_200k-commits.pkl
  800. md5: 319c4568d1d675d1c0e6532d603e0650
  801. size: 498669340
  802. - path: metrics/bugginess/heuristics.bugginess/heuristic_metrics_200k-commits.json
  803. md5: 8b26b556f9ff7e64d6c0df924ff4bf8e
  804. size: 32
  805. bugginess_label_dataset_200k-commits:
  806. cmd: bohr label-dataset bugginess 200k-commits
  807. deps:
  808. - path: data/200k-commits.csv
  809. md5: 6ce10284e630c44110ffc483a7bb33df
  810. size: 71402002
  811. - path: generated/bugginess/heuristic_matrix_200k-commits.pkl
  812. md5: 96bf235aea2cdfae9f33f00ddb8c8547
  813. size: 500320716
  814. - path: generated/bugginess/label_model.pkl
  815. md5: 06ba133ca77f25c7fc03f22c317127de
  816. size: 1875458
  817. params:
  818. bohr.json:
  819. bohr_framework_version: 0.4.5
  820. outs:
  821. - path: labeled-datasets/200k-commits.labeled.csv
  822. md5: 3913a2cd793ccb22ccfb9b6f796588b0
  823. size: 73208475
  824. preprocess_200k-commits-link-issues:
  825. cmd: cp downloaded-data/200k-commits-link-issues.csv data && echo "data/200k-commits-link-issues.csv"
  826. >> .gitignore && git add .gitignore
  827. deps:
  828. - path: downloaded-data/200k-commits-link-issues.csv
  829. md5: f75c8b5c7747abc8c2bd1b3b847dac18
  830. size: 3005661
  831. outs:
  832. - path: data/200k-commits-link-issues.csv
  833. md5: f75c8b5c7747abc8c2bd1b3b847dac18
  834. size: 3005661
  835. preprocess_200k-commits-manual-labels:
  836. cmd: cp downloaded-data/200k-commits-manual-labels.csv data && echo "data/200k-commits-manual-labels.csv"
  837. >> .gitignore && git add .gitignore
  838. deps:
  839. - path: downloaded-data/200k-commits-manual-labels.csv
  840. md5: 447bf23d38df7f7e3007dc35f70cab91
  841. size: 1187
  842. outs:
  843. - path: data/200k-commits-manual-labels.csv
  844. md5: 447bf23d38df7f7e3007dc35f70cab91
  845. size: 1187
  846. bugginess_apply_heuristics__heuristics_manuallabels__herzig:
  847. cmd: bohr apply-heuristics bugginess --heuristic-group heuristics.manuallabels
  848. --dataset herzig
  849. deps:
  850. - path: data/herzig.csv
  851. md5: 69a17c08643aed84b874384a2a57c7ed
  852. size: 1483281
  853. - path: heuristics/manuallabels.py
  854. md5: 5120f1e19e5bfdaf6f61cfa064782e05
  855. size: 278
  856. - path: labels.py
  857. md5: 039b719577b0f54b176e58a432242060
  858. size: 1910
  859. params:
  860. bohr.json:
  861. bohr_framework_version: 0.4.5
  862. outs:
  863. - path: generated/bugginess/heuristics.manuallabels/heuristic_matrix_herzig.pkl
  864. md5: 8e22aefcc1b79b3bc8c00ebd4503dca3
  865. size: 42479
  866. - path: metrics/bugginess/heuristics.manuallabels/heuristic_metrics_herzig.json
  867. md5: 6881c30e66d12aec85d162df31e5e04d
  868. size: 58
  869. bugginess_apply_heuristics__heuristics_manuallabels__1151-commits:
  870. cmd: bohr apply-heuristics bugginess --heuristic-group heuristics.manuallabels
  871. --dataset 1151-commits
  872. deps:
  873. - path: data/1151-commits.csv
  874. md5: dd000fe19ba4aac9efa3a3856e2acc5e
  875. size: 346306
  876. - path: heuristics/manuallabels.py
  877. md5: 5120f1e19e5bfdaf6f61cfa064782e05
  878. size: 278
  879. - path: labels.py
  880. md5: 039b719577b0f54b176e58a432242060
  881. size: 1910
  882. params:
  883. bohr.json:
  884. bohr_framework_version: 0.4.5
  885. outs:
  886. - path: generated/bugginess/heuristics.manuallabels/heuristic_matrix_1151-commits.pkl
  887. md5: 6586d8bf9010aff3be65327facde2edc
  888. size: 9975
  889. - path: metrics/bugginess/heuristics.manuallabels/heuristic_metrics_1151-commits.json
  890. md5: 452fdb0e2c252999419be5771a3774cc
  891. size: 58
  892. bugginess_apply_heuristics__heuristics_manuallabels__200k-commits:
  893. cmd: bohr apply-heuristics bugginess --heuristic-group heuristics.manuallabels
  894. --dataset 200k-commits
  895. deps:
  896. - path: data/200k-commits-files.csv
  897. md5: bc989c140c305bed62a5a8b161883d3b
  898. size: 2284439219
  899. - path: data/200k-commits-issues.csv
  900. md5: da4b0d654f7ce1469857b9171a9647aa
  901. size: 96908075
  902. - path: data/200k-commits-manual-labels.csv
  903. md5: 447bf23d38df7f7e3007dc35f70cab91
  904. size: 1187
  905. - path: data/200k-commits.csv
  906. md5: 6ce10284e630c44110ffc483a7bb33df
  907. size: 71402002
  908. - path: heuristics/manuallabels.py
  909. md5: 5120f1e19e5bfdaf6f61cfa064782e05
  910. size: 278
  911. - path: labels.py
  912. md5: 039b719577b0f54b176e58a432242060
  913. size: 1910
  914. params:
  915. bohr.json:
  916. bohr_framework_version: 0.4.5
  917. outs:
  918. - path: generated/bugginess/heuristics.manuallabels/heuristic_matrix_200k-commits.pkl
  919. md5: 7febb1ec7e27a0b380e83a4c732946ab
  920. size: 1651964
  921. - path: metrics/bugginess/heuristics.manuallabels/heuristic_metrics_200k-commits.json
  922. md5: b550e0fca5c368f3221fc11db0ba8a3e
  923. size: 36
  924. bugginess_apply_heuristics__heuristics_manuallabels__berger:
  925. cmd: bohr apply-heuristics bugginess --heuristic-group heuristics.manuallabels
  926. --dataset berger
  927. deps:
  928. - path: data/berger.csv
  929. md5: 126de41c9204a9e807e72406b1f9d631
  930. size: 62247
  931. - path: heuristics/manuallabels.py
  932. md5: 5120f1e19e5bfdaf6f61cfa064782e05
  933. size: 278
  934. - path: labels.py
  935. md5: 039b719577b0f54b176e58a432242060
  936. size: 1910
  937. params:
  938. bohr.json:
  939. bohr_framework_version: 0.4.5
  940. outs:
  941. - path: generated/bugginess/heuristics.manuallabels/heuristic_matrix_berger.pkl
  942. md5: e93c0bbc779f1cf928251c4613ef5cc6
  943. size: 3767
  944. - path: metrics/bugginess/heuristics.manuallabels/heuristic_metrics_berger.json
  945. md5: c2fefb5ddd23aee9e2705356b8d131c1
  946. size: 59
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...