1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
schema: '2.0'
stages:
preprocess_1151-commits:
cmd: cp downloaded-data/1151-commits.csv data && echo "data/1151-commits.csv"
>> .gitignore && git add .gitignore
deps:
- path: downloaded-data/1151-commits.csv
md5: dd000fe19ba4aac9efa3a3856e2acc5e
size: 346306
outs:
- path: data/1151-commits.csv
md5: dd000fe19ba4aac9efa3a3856e2acc5e
size: 346306
preprocess_herzig:
cmd: cp downloaded-data/herzig.csv data && echo "data/herzig.csv" >> .gitignore
&& git add .gitignore
deps:
- path: downloaded-data/herzig.csv
md5: 69a17c08643aed84b874384a2a57c7ed
size: 1483281
outs:
- path: data/herzig.csv
md5: 69a17c08643aed84b874384a2a57c7ed
size: 1483281
preprocess_smells-test:
cmd: data-preprocessing/smells.sh
deps:
- path: data-preprocessing/smells.sh
md5: 1792bc2011c1aba4d51cdca74beee11e
size: 2148
- path: downloaded-data/smells-madeyski.csv
md5: 3d60d277b9fa1306c05ccfdefe22e9d1
size: 7513770
outs:
- path: data/smells/test.csv
md5: 0200db0eec17554a48a5b3a25719fd03
size: 77607
parse_labels:
cmd: bohr parse-labels
deps:
- path: labels
md5: 237b5064b298ca8ebd422a9c35035b98.dir
size: 622
nfiles: 3
outs:
- path: labels.py
md5: 039b719577b0f54b176e58a432242060
size: 1910
smells_apply_heuristics__heuristics_smells__smells-test:
cmd: bohr apply-heuristics smells --heuristic-group heuristics.smells --dataset
smells-test
deps:
- path: data/smells/test.csv
md5: 0200db0eec17554a48a5b3a25719fd03
size: 77607
- path: heuristics/smells.py
md5: a2254c51be33d4ede33baab98ba18b09
size: 712
- path: labels.py
md5: 039b719577b0f54b176e58a432242060
size: 1910
params:
bohr.json:
bohr_framework_version: 0.4.5
outs:
- path: generated/smells/heuristics.smells/heuristic_matrix_smells-test.pkl
md5: 143760b1cdf4c422a3430c583345337c
size: 4230
- path: metrics/smells/heuristics.smells/heuristic_metrics_smells-test.json
md5: 7fe3f867a78993a1e5e27178ba30b57b
size: 73
preprocess_smells-train:
cmd: data-preprocessing/smells.sh
deps:
- path: data-preprocessing/smells.sh
md5: 1792bc2011c1aba4d51cdca74beee11e
size: 2148
- path: downloaded-data/smells-madeyski.csv
md5: 3d60d277b9fa1306c05ccfdefe22e9d1
size: 7513770
outs:
- path: data/smells/train.csv
md5: 7fc9a7617e6f201523fba311317ba48f
size: 296970
smells_apply_heuristics__heuristics_smells__smells-train:
cmd: bohr apply-heuristics smells --heuristic-group heuristics.smells --dataset
smells-train
deps:
- path: data/smells/train.csv
md5: 7fc9a7617e6f201523fba311317ba48f
size: 296970
- path: heuristics/smells.py
md5: a2254c51be33d4ede33baab98ba18b09
size: 712
- path: labels.py
md5: 039b719577b0f54b176e58a432242060
size: 1910
params:
bohr.json:
bohr_framework_version: 0.4.5
outs:
- path: generated/smells/heuristics.smells/heuristic_matrix_smells-train.pkl
md5: 5e0fb0ce062af88c8d2b5360948bb3c9
size: 14312
- path: metrics/smells/heuristics.smells/heuristic_metrics_smells-train.json
md5: 3e6a9fc4c1202a8c31e3c1980abfbabe
size: 32
smells_combine_heuristics:
cmd: bohr apply-heuristics smells
deps:
- path: generated/smells/heuristics.smells/heuristic_matrix_smells-test.pkl
md5: 143760b1cdf4c422a3430c583345337c
size: 4230
- path: generated/smells/heuristics.smells/heuristic_matrix_smells-train.pkl
md5: 5e0fb0ce062af88c8d2b5360948bb3c9
size: 14312
params:
bohr.json:
bohr_framework_version: 0.4.5
outs:
- path: generated/smells/analysis_smells-test.csv
md5: 0e9a0cb4bca25888a4c21bb79dd903fc
size: 337
- path: generated/smells/analysis_smells-train.csv
md5: fbb491949031407029f57e7089a123ca
size: 252
- path: generated/smells/heuristic_matrix_smells-test.pkl
md5: 03cfd9383f0463c0be0c15d54690b8ab
size: 4230
- path: generated/smells/heuristic_matrix_smells-train.pkl
md5: d353c8b3bbcec3442b88fa4e64056d68
size: 14312
- path: metrics/smells/analysis_smells-test.json
md5: b210737d114c81d670a7b5d49a99a250
size: 1119
- path: metrics/smells/analysis_smells-train.json
md5: 4d96593b82baf44b775be1d171c73358
size: 698
- path: metrics/smells/heuristic_metrics_smells-test.json
md5: 7fe3f867a78993a1e5e27178ba30b57b
size: 73
- path: metrics/smells/heuristic_metrics_smells-train.json
md5: 3e6a9fc4c1202a8c31e3c1980abfbabe
size: 32
bugginess_apply_heuristics__heuristics_bugginess__1151-commits:
cmd: bohr apply-heuristics bugginess --heuristic-group heuristics.bugginess --dataset
1151-commits
deps:
- path: data/1151-commits.csv
md5: dd000fe19ba4aac9efa3a3856e2acc5e
size: 346306
- path: heuristics/bugginess.py
md5: 9fa2be4c3ad70b831bf06a1c229ff1b6
size: 8895
- path: labels.py
md5: 039b719577b0f54b176e58a432242060
size: 1910
params:
bohr.json:
bohr_framework_version: 0.4.5
outs:
- path: generated/bugginess/heuristics.bugginess/heuristic_matrix_1151-commits.pkl
md5: 732df7ec50a8165d7e9a1e79415064b5
size: 2792584
- path: metrics/bugginess/heuristics.bugginess/heuristic_metrics_1151-commits.json
md5: 590f784b4669d243ce5bff2a8d09345b
size: 73
preprocess_berger:
cmd: cp downloaded-data/berger.csv data && echo "data/berger.csv" >> .gitignore
&& git add .gitignore
deps:
- path: downloaded-data/berger.csv
md5: 126de41c9204a9e807e72406b1f9d631
size: 62247
outs:
- path: data/berger.csv
md5: 126de41c9204a9e807e72406b1f9d631
size: 62247
bugginess_apply_heuristics__heuristics_bugginess__berger:
cmd: bohr apply-heuristics bugginess --heuristic-group heuristics.bugginess --dataset
berger
deps:
- path: data/berger.csv
md5: 126de41c9204a9e807e72406b1f9d631
size: 62247
- path: heuristics/bugginess.py
md5: 9fa2be4c3ad70b831bf06a1c229ff1b6
size: 8895
- path: labels.py
md5: 039b719577b0f54b176e58a432242060
size: 1910
params:
bohr.json:
bohr_framework_version: 0.4.5
outs:
- path: generated/bugginess/heuristics.bugginess/heuristic_matrix_berger.pkl
md5: e93deda462057f4a205960cf0d24d2ec
size: 917768
- path: metrics/bugginess/heuristics.bugginess/heuristic_metrics_berger.json
md5: feb313c11f1c1afb0fc58ef5ad73ab6a
size: 73
preprocess_bugginess-train:
cmd: 7z x downloaded-data/bugginess_train.7z -odata/bugginess_train && echo "data/bugginess_train"
>> .gitignore && git add .gitignore
deps:
- path: downloaded-data/bugginess_train.7z
md5: d4dc26c2b0f0704b1559f2c0ce6320d7
size: 255969433
outs:
- path: data/bugginess_train
md5: f7cbfc7a91dfeca3aff7b7d3b6d7ea72.dir
size: 2489726547
nfiles: 3
bugginess_apply_heuristics__heuristics_bugginess__bugginess-train:
cmd: bohr apply-heuristics bugginess --heuristic-group heuristics.bugginess --dataset
bugginess-train
deps:
- path: data/bugginess_train
md5: f7cbfc7a91dfeca3aff7b7d3b6d7ea72.dir
size: 2489726547
nfiles: 3
- path: heuristics/bugginess.py
md5: 9f9ea19cd5c53bbbd41f94cf7b8f3d14
size: 2873
- path: heuristics/keywords
md5: b4e7587c1b8e4e1461685a305d48bd66.dir
size: 1382
nfiles: 5
- path: labels.py
md5: 4ad220b4c289b2d8597bd6431c6565a6
size: 1707
params:
bohr.json:
bohr_framework_version: 0.4.2
outs:
- path: generated/bugginess/heuristics.bugginess/heuristic_matrix_bugginess-train.pkl
md5: 5d1c71dcd36417356cabe2e340ca959d
size: 500879984
- path: metrics/bugginess/heuristics.bugginess/heuristic_metrics_bugginess-train.json
md5: 9c903723760f0000193679b361437e41
size: 32
bugginess_apply_heuristics__heuristics_bugginess__herzig:
cmd: bohr apply-heuristics bugginess --heuristic-group heuristics.bugginess --dataset
herzig
deps:
- path: data/herzig.csv
md5: 69a17c08643aed84b874384a2a57c7ed
size: 1483281
- path: heuristics/bugginess.py
md5: 9fa2be4c3ad70b831bf06a1c229ff1b6
size: 8895
- path: labels.py
md5: 039b719577b0f54b176e58a432242060
size: 1910
params:
bohr.json:
bohr_framework_version: 0.4.5
outs:
- path: generated/bugginess/heuristics.bugginess/heuristic_matrix_herzig.pkl
md5: 15bf08d69bf793c3978a2ffe1458c76d
size: 12608792
- path: metrics/bugginess/heuristics.bugginess/heuristic_metrics_herzig.json
md5: 525291ad999e1bee97789045c1ae8333
size: 72
bugginess_combine_heuristics:
cmd: bohr apply-heuristics bugginess
deps:
- path: generated/bugginess/heuristics.bugginess/heuristic_matrix_1151-commits.pkl
md5: 732df7ec50a8165d7e9a1e79415064b5
size: 2792584
- path: generated/bugginess/heuristics.bugginess/heuristic_matrix_200k-commits.pkl
md5: 319c4568d1d675d1c0e6532d603e0650
size: 498669340
- path: generated/bugginess/heuristics.bugginess/heuristic_matrix_berger.pkl
md5: e93deda462057f4a205960cf0d24d2ec
size: 917768
- path: generated/bugginess/heuristics.bugginess/heuristic_matrix_herzig.pkl
md5: 15bf08d69bf793c3978a2ffe1458c76d
size: 12608792
- path: generated/bugginess/heuristics.manuallabels/heuristic_matrix_1151-commits.pkl
md5: 6586d8bf9010aff3be65327facde2edc
size: 9975
- path: generated/bugginess/heuristics.manuallabels/heuristic_matrix_200k-commits.pkl
md5: 7febb1ec7e27a0b380e83a4c732946ab
size: 1651964
- path: generated/bugginess/heuristics.manuallabels/heuristic_matrix_berger.pkl
md5: e93c0bbc779f1cf928251c4613ef5cc6
size: 3767
- path: generated/bugginess/heuristics.manuallabels/heuristic_matrix_herzig.pkl
md5: 8e22aefcc1b79b3bc8c00ebd4503dca3
size: 42479
params:
bohr.json:
bohr_framework_version: 0.4.5
outs:
- path: generated/bugginess/analysis_1151-commits.csv
md5: b9a8293c90963f2ce36669ded72d2e1a
size: 24263
- path: generated/bugginess/analysis_200k-commits.csv
md5: ae396f6f90c8e3cf4dd4a1e5d59e7200
size: 30413
- path: generated/bugginess/analysis_berger.csv
md5: 4994f12a769112913fba6a9a7dbb0eb4
size: 21371
- path: generated/bugginess/analysis_herzig.csv
md5: ea0eb8e17dbdb9b9ebcf5f7cedbc979a
size: 25808
- path: generated/bugginess/heuristic_matrix_1151-commits.pkl
md5: a0a1a7fbd7512b6cb31eb06d61c3bf9c
size: 2801973
- path: generated/bugginess/heuristic_matrix_200k-commits.pkl
md5: 96bf235aea2cdfae9f33f00ddb8c8547
size: 500320716
- path: generated/bugginess/heuristic_matrix_berger.pkl
md5: 1b5504acb50064f2971a00799380d780
size: 920949
- path: generated/bugginess/heuristic_matrix_herzig.pkl
md5: c604a8886af18d4982a1f3626fa57619
size: 12650685
- path: metrics/bugginess/analysis_1151-commits.json
md5: cfd93443f948403db776e31f346eecb4
size: 108603
- path: metrics/bugginess/analysis_200k-commits.json
md5: 0bb85bc367472a8321c03a1324df9dbc
size: 78828
- path: metrics/bugginess/analysis_berger.json
md5: 3d99febdbd632bb38a06cdb7c2bf4e62
size: 105095
- path: metrics/bugginess/analysis_herzig.json
md5: 30b7f8dc4e151d6d5691ed1965b09aff
size: 110390
- path: metrics/bugginess/heuristic_metrics_1151-commits.json
md5: 590f784b4669d243ce5bff2a8d09345b
size: 73
- path: metrics/bugginess/heuristic_metrics_200k-commits.json
md5: f6fd2a0332c43002c7f5679d0db5019a
size: 32
- path: metrics/bugginess/heuristic_metrics_berger.json
md5: feb313c11f1c1afb0fc58ef5ad73ab6a
size: 73
- path: metrics/bugginess/heuristic_metrics_herzig.json
md5: 525291ad999e1bee97789045c1ae8333
size: 72
bugginess_train_label_model:
cmd: bohr train-label-model bugginess 200k-commits
deps:
- path: data/1151-commits.csv
md5: dd000fe19ba4aac9efa3a3856e2acc5e
size: 346306
- path: data/berger.csv
md5: 126de41c9204a9e807e72406b1f9d631
size: 62247
- path: data/herzig.csv
md5: 69a17c08643aed84b874384a2a57c7ed
size: 1483281
- path: generated/bugginess/heuristic_matrix_1151-commits.pkl
md5: a0a1a7fbd7512b6cb31eb06d61c3bf9c
size: 2801973
- path: generated/bugginess/heuristic_matrix_200k-commits.pkl
md5: 96bf235aea2cdfae9f33f00ddb8c8547
size: 500320716
- path: generated/bugginess/heuristic_matrix_berger.pkl
md5: 1b5504acb50064f2971a00799380d780
size: 920949
- path: generated/bugginess/heuristic_matrix_herzig.pkl
md5: c604a8886af18d4982a1f3626fa57619
size: 12650685
params:
bohr.json:
bohr_framework_version: 0.4.5
outs:
- path: generated/bugginess/label_model.pkl
md5: 06ba133ca77f25c7fc03f22c317127de
size: 1875458
- path: generated/bugginess/label_model_weights.csv
md5: 073ebcf1b24a5759c44d91e2e8cdafbb
size: 20342
- path: metrics/bugginess/label_model_metrics.json
md5: edec9faec7b0e4f80cc48b3e17588cdc
size: 429
bugginess_label_dataset_herzig:
cmd: bohr label-dataset bugginess herzig
deps:
- path: data/herzig.csv
md5: 69a17c08643aed84b874384a2a57c7ed
size: 1483281
- path: generated/bugginess/heuristic_matrix_herzig.pkl
md5: c604a8886af18d4982a1f3626fa57619
size: 12650685
- path: generated/bugginess/label_model.pkl
md5: 06ba133ca77f25c7fc03f22c317127de
size: 1875458
params:
bohr.json:
bohr_framework_version: 0.4.5
outs:
- path: labeled-datasets/herzig.labeled.csv
md5: 56b4851603a90dd67c4da0f0fbe24313
size: 1539683
smells_train_label_model:
cmd: bohr train-label-model smells smells-train
deps:
- path: data/smells/test.csv
md5: 0200db0eec17554a48a5b3a25719fd03
size: 77607
- path: generated/smells/heuristic_matrix_smells-test.pkl
md5: 03cfd9383f0463c0be0c15d54690b8ab
size: 4230
- path: generated/smells/heuristic_matrix_smells-train.pkl
md5: d353c8b3bbcec3442b88fa4e64056d68
size: 14312
params:
bohr.json:
bohr_framework_version: 0.4.5
outs:
- path: generated/smells/label_model.pkl
md5: c7ecac10ac53e7d00554e22dd105e72c
size: 4900
- path: generated/smells/label_model_weights.csv
md5: ee5dcfe72e30c40c8ba6a56fd5a6219a
size: 179
- path: metrics/smells/label_model_metrics.json
md5: 8982bcbb2ce25ea92593c9e844107f7d
size: 155
smells_label_dataset_smells-train:
cmd: bohr label-dataset smells smells-train
deps:
- path: data/smells/train.csv
md5: 7fc9a7617e6f201523fba311317ba48f
size: 296970
- path: generated/smells/heuristic_matrix_smells-train.pkl
md5: d353c8b3bbcec3442b88fa4e64056d68
size: 14312
- path: generated/smells/label_model.pkl
md5: c7ecac10ac53e7d00554e22dd105e72c
size: 4900
params:
bohr.json:
bohr_framework_version: 0.4.5
outs:
- path: labeled-datasets/smells-train.labeled.csv
md5: b3433438369f2ca2276c22cff309631e
size: 296121
smells_label_dataset_smells-test:
cmd: bohr label-dataset smells smells-test
deps:
- path: data/smells/test.csv
md5: 0200db0eec17554a48a5b3a25719fd03
size: 77607
- path: generated/smells/heuristic_matrix_smells-test.pkl
md5: 03cfd9383f0463c0be0c15d54690b8ab
size: 4230
- path: generated/smells/label_model.pkl
md5: c7ecac10ac53e7d00554e22dd105e72c
size: 4900
params:
bohr.json:
bohr_framework_version: 0.4.5
outs:
- path: labeled-datasets/smells-test.labeled.csv
md5: fe4a97ad13be96db8f076fda178bf984
size: 77279
bugginess_label_dataset_bugginess-train:
cmd: bohr label-dataset bugginess bugginess-train
deps:
- path: data/bugginess_train
md5: f7cbfc7a91dfeca3aff7b7d3b6d7ea72.dir
size: 2489726547
nfiles: 3
- path: generated/bugginess/heuristic_matrix_bugginess-train.pkl
md5: d9141b7bf8b3eb25cf3e90490acbb812
size: 500879984
- path: generated/bugginess/label_model.pkl
md5: ce78684652e122b347fe0c7fc32ba035
size: 1863238
params:
bohr.json:
bohr_framework_version: 0.4.2
outs:
- path: labeled-datasets/bugginess-train.labeled.csv
md5: bfe4ac306c08f7188e094acc20e1ff03
size: 61623779
bugginess_label_dataset_1151-commits:
cmd: bohr label-dataset bugginess 1151-commits
deps:
- path: data/1151-commits.csv
md5: dd000fe19ba4aac9efa3a3856e2acc5e
size: 346306
- path: generated/bugginess/heuristic_matrix_1151-commits.pkl
md5: a0a1a7fbd7512b6cb31eb06d61c3bf9c
size: 2801973
- path: generated/bugginess/label_model.pkl
md5: 06ba133ca77f25c7fc03f22c317127de
size: 1875458
params:
bohr.json:
bohr_framework_version: 0.4.5
outs:
- path: labeled-datasets/1151-commits.labeled.csv
md5: d3ca36b0f762db4dba3b3b010d1a5f27
size: 359761
bugginess_label_dataset_berger:
cmd: bohr label-dataset bugginess berger
deps:
- path: data/berger.csv
md5: 126de41c9204a9e807e72406b1f9d631
size: 62247
- path: generated/bugginess/heuristic_matrix_berger.pkl
md5: 1b5504acb50064f2971a00799380d780
size: 920949
- path: generated/bugginess/label_model.pkl
md5: 06ba133ca77f25c7fc03f22c317127de
size: 1875458
params:
bohr.json:
bohr_framework_version: 0.4.5
outs:
- path: labeled-datasets/berger.labeled.csv
md5: d318e371f868b40c9686b43910ea4597
size: 66780
bugginess_transformer_train:
cmd: bash classifiers/bugginess-transformer/train.sh labeled-data/bugginess.csv
deps:
- path: classifiers/bugginess-transformer/run.py
md5: faf5ebb8f0348b28aa1205e2c56cd41c
size: 12023
- path: classifiers/bugginess-transformer/train.sh
md5: 3a19e011c049042bbec7e8315e883c38
size: 557
- path: labeled-datasets/bugginess-train.labeled.csv
md5: bfe4ac306c08f7188e094acc20e1ff03
size: 61623779
- path: requirements.txt
md5: 29b4c5d66c523cec0712dbcdcced42bb
size: 21
outs:
- path: models/config.json
md5: 3effd3229ade2ed52eeb90d252790bf5
size: 716
- path: models/merges.txt
md5: fb9c1e34b6999f3a062df6ed4a604957
size: 458459
- path: models/pytorch_model.bin
md5: 40379a0207d19e2a24e116e941d7d675
size: 333858922
- path: models/special_tokens_map.json
md5: 17bb9e090d1d3a775683aba3ba610591
size: 239
- path: models/tokenizer_config.json
md5: e1a3e947aa301aadc524ee29f0dbcc39
size: 1257
- path: models/training_args.bin
md5: 408c3f12467908cabb77ded5ce3490ed
size: 2159
- path: models/vocab.json
md5: ca70df26ed267d27a9edde9c5341f17b
size: 813062
bugginess_transformer_test_herzig:
cmd: bash classifiers/bugginess-transformer/test.sh data/bugginess/herzig.csv
metrics/bugginess/transformer/herzig
deps:
- path: classifiers/bugginess-transformer/run.py
md5: faf5ebb8f0348b28aa1205e2c56cd41c
size: 12023
- path: classifiers/bugginess-transformer/test.sh
md5: 92c4020b4c026f9b85fd38ddee1bd528
size: 313
- path: data/herzig.csv
md5: 279936268f488e1e613f81a537f29055
size: 1458311
- path: models/config.json
md5: 3effd3229ade2ed52eeb90d252790bf5
size: 716
- path: models/merges.txt
md5: fb9c1e34b6999f3a062df6ed4a604957
size: 458459
- path: models/pytorch_model.bin
md5: 40379a0207d19e2a24e116e941d7d675
size: 333858922
- path: models/special_tokens_map.json
md5: 17bb9e090d1d3a775683aba3ba610591
size: 239
- path: models/tokenizer_config.json
md5: e1a3e947aa301aadc524ee29f0dbcc39
size: 1257
- path: models/training_args.bin
md5: 408c3f12467908cabb77ded5ce3490ed
size: 2159
- path: models/vocab.json
md5: ca70df26ed267d27a9edde9c5341f17b
size: 813062
- path: requirements.txt
md5: 29b4c5d66c523cec0712dbcdcced42bb
size: 21
outs:
- path: metrics/bugginess/transformer/herzig/eval_results.txt
md5: bd726240700dac6e926d5532eb76c5c4
size: 145
bugginess_transformer_test_1151-commits:
cmd: bash classifiers/bugginess-transformer/test.sh data/bugginess/1151-commits.csv
metrics/bugginess/transformer/1151-commits
deps:
- path: classifiers/bugginess-transformer/run.py
md5: faf5ebb8f0348b28aa1205e2c56cd41c
size: 12023
- path: classifiers/bugginess-transformer/test.sh
md5: 92c4020b4c026f9b85fd38ddee1bd528
size: 313
- path: data/1151-commits.csv
md5: 7b32f404edf5982eb4c5f51b956663c4
size: 341651
- path: models/config.json
md5: 3effd3229ade2ed52eeb90d252790bf5
size: 716
- path: models/merges.txt
md5: fb9c1e34b6999f3a062df6ed4a604957
size: 458459
- path: models/pytorch_model.bin
md5: 40379a0207d19e2a24e116e941d7d675
size: 333858922
- path: models/special_tokens_map.json
md5: 17bb9e090d1d3a775683aba3ba610591
size: 239
- path: models/tokenizer_config.json
md5: e1a3e947aa301aadc524ee29f0dbcc39
size: 1257
- path: models/training_args.bin
md5: 408c3f12467908cabb77ded5ce3490ed
size: 2159
- path: models/vocab.json
md5: ca70df26ed267d27a9edde9c5341f17b
size: 813062
- path: requirements.txt
md5: 29b4c5d66c523cec0712dbcdcced42bb
size: 21
outs:
- path: metrics/bugginess/transformer/1151-commits/eval_results.txt
md5: 52b1b36d2896195e78ca5b7d42de4839
size: 146
bugginess_transformer_label_1151-commits:
cmd: bash classifiers/bugginess-transformer/label.sh data/bugginess/1151-commits.csv
metrics/bugginess/transformer/1151-commits
deps:
- path: classifiers/bugginess-transformer/label.sh
md5: ce2646a4233e68991b57bbf2c7404ace
size: 320
- path: classifiers/bugginess-transformer/run.py
md5: faf5ebb8f0348b28aa1205e2c56cd41c
size: 12023
- path: data/1151-commits.csv
md5: 7b32f404edf5982eb4c5f51b956663c4
size: 341651
- path: models/config.json
md5: 3effd3229ade2ed52eeb90d252790bf5
size: 716
- path: models/merges.txt
md5: fb9c1e34b6999f3a062df6ed4a604957
size: 458459
- path: models/pytorch_model.bin
md5: 40379a0207d19e2a24e116e941d7d675
size: 333858922
- path: models/special_tokens_map.json
md5: 17bb9e090d1d3a775683aba3ba610591
size: 239
- path: models/tokenizer_config.json
md5: e1a3e947aa301aadc524ee29f0dbcc39
size: 1257
- path: models/training_args.bin
md5: 408c3f12467908cabb77ded5ce3490ed
size: 2159
- path: models/vocab.json
md5: ca70df26ed267d27a9edde9c5341f17b
size: 813062
- path: requirements.txt
md5: 29b4c5d66c523cec0712dbcdcced42bb
size: 21
outs:
- path: metrics/bugginess/transformer/1151-commits/assigned_labels.csv
md5: 527025d5fa114a28fa55eed7f4c10801
size: 6964
bugginess_transformer_test_berger:
cmd: bash classifiers/bugginess-transformer/test.sh data/bugginess/berger.csv
metrics/bugginess/transformer/berger
deps:
- path: classifiers/bugginess-transformer/run.py
md5: faf5ebb8f0348b28aa1205e2c56cd41c
size: 12023
- path: classifiers/bugginess-transformer/test.sh
md5: 92c4020b4c026f9b85fd38ddee1bd528
size: 313
- path: data/berger.csv
md5: 71b9738db6cb47e3af599da316e3b570
size: 60847
- path: models/config.json
md5: 3effd3229ade2ed52eeb90d252790bf5
size: 716
- path: models/merges.txt
md5: fb9c1e34b6999f3a062df6ed4a604957
size: 458459
- path: models/pytorch_model.bin
md5: 40379a0207d19e2a24e116e941d7d675
size: 333858922
- path: models/special_tokens_map.json
md5: 17bb9e090d1d3a775683aba3ba610591
size: 239
- path: models/tokenizer_config.json
md5: e1a3e947aa301aadc524ee29f0dbcc39
size: 1257
- path: models/training_args.bin
md5: 408c3f12467908cabb77ded5ce3490ed
size: 2159
- path: models/vocab.json
md5: ca70df26ed267d27a9edde9c5341f17b
size: 813062
- path: requirements.txt
md5: 29b4c5d66c523cec0712dbcdcced42bb
size: 21
outs:
- path: metrics/bugginess/transformer/berger/eval_results.txt
md5: 30da05826f977ee9b867560731091915
size: 144
bugginess_combine_labels_1151-commits:
cmd: python classifiers/bugginess-transformer/combine_labels.py labeled-datasets/1151-commits.labeled.csv
metrics/bugginess/transformer/1151-commits/assigned_labels.csv labeled-datasets/1151-commits.labeled.both.csv
&& echo "labeled-datasets/1151-commits.labeled.both.csv" >> .gitignore
deps:
- path: classifiers/bugginess-transformer/combine_labels.py
md5: 85cef7e65682e381b5e746d5a0901ec2
size: 720
- path: labeled-datasets/1151-commits.labeled.csv
md5: 70250f3b3489aed05065c35a0b859c00
size: 359755
- path: metrics/bugginess/transformer/1151-commits/assigned_labels.csv
md5: 527025d5fa114a28fa55eed7f4c10801
size: 6964
outs:
- path: labeled-datasets/1151-commits.labeled.both.csv
md5: f4d459e7b167fb0197dc49483eb2d2af
size: 366721
preprocess_200k-commits:
cmd: cp downloaded-data/200k-commits.csv data && echo "data/200k-commits.csv"
>> .gitignore && git add .gitignore
deps:
- path: downloaded-data/200k-commits.csv
md5: 6ce10284e630c44110ffc483a7bb33df
size: 71402002
outs:
- path: data/200k-commits.csv
md5: 6ce10284e630c44110ffc483a7bb33df
size: 71402002
preprocess_200k-commits-issues:
cmd: cp downloaded-data/200k-commits-issues.csv data && echo "data/200k-commits-issues.csv"
>> .gitignore && git add .gitignore
deps:
- path: downloaded-data/200k-commits-issues.csv
md5: da4b0d654f7ce1469857b9171a9647aa
size: 96908075
outs:
- path: data/200k-commits-issues.csv
md5: da4b0d654f7ce1469857b9171a9647aa
size: 96908075
preprocess_200k-commits-files:
cmd: 7z x downloaded-data/200k-commits-files.csv.7z -odata && echo "data/200k-commits-files.csv"
>> .gitignore && git add .gitignore
deps:
- path: downloaded-data/200k-commits-files.csv.7z
md5: 56697c21cfd7bba5d0f68dcd0fbd86f0
size: 240190210
outs:
- path: data/200k-commits-files.csv
md5: bc989c140c305bed62a5a8b161883d3b
size: 2284439219
bugginess_apply_heuristics__heuristics_bugginess__200k-commits:
cmd: bohr apply-heuristics bugginess --heuristic-group heuristics.bugginess --dataset
200k-commits
deps:
- path: data/200k-commits-files.csv
md5: bc989c140c305bed62a5a8b161883d3b
size: 2284439219
- path: data/200k-commits-issues.csv
md5: da4b0d654f7ce1469857b9171a9647aa
size: 96908075
- path: data/200k-commits-manual-labels.csv
md5: 447bf23d38df7f7e3007dc35f70cab91
size: 1187
- path: data/200k-commits.csv
md5: 6ce10284e630c44110ffc483a7bb33df
size: 71402002
- path: heuristics/bugginess.py
md5: 9fa2be4c3ad70b831bf06a1c229ff1b6
size: 8895
- path: labels.py
md5: 039b719577b0f54b176e58a432242060
size: 1910
params:
bohr.json:
bohr_framework_version: 0.4.5
outs:
- path: generated/bugginess/heuristics.bugginess/heuristic_matrix_200k-commits.pkl
md5: 319c4568d1d675d1c0e6532d603e0650
size: 498669340
- path: metrics/bugginess/heuristics.bugginess/heuristic_metrics_200k-commits.json
md5: 8b26b556f9ff7e64d6c0df924ff4bf8e
size: 32
bugginess_label_dataset_200k-commits:
cmd: bohr label-dataset bugginess 200k-commits
deps:
- path: data/200k-commits.csv
md5: 6ce10284e630c44110ffc483a7bb33df
size: 71402002
- path: generated/bugginess/heuristic_matrix_200k-commits.pkl
md5: 96bf235aea2cdfae9f33f00ddb8c8547
size: 500320716
- path: generated/bugginess/label_model.pkl
md5: 06ba133ca77f25c7fc03f22c317127de
size: 1875458
params:
bohr.json:
bohr_framework_version: 0.4.5
outs:
- path: labeled-datasets/200k-commits.labeled.csv
md5: 3913a2cd793ccb22ccfb9b6f796588b0
size: 73208475
preprocess_200k-commits-link-issues:
cmd: cp downloaded-data/200k-commits-link-issues.csv data && echo "data/200k-commits-link-issues.csv"
>> .gitignore && git add .gitignore
deps:
- path: downloaded-data/200k-commits-link-issues.csv
md5: f75c8b5c7747abc8c2bd1b3b847dac18
size: 3005661
outs:
- path: data/200k-commits-link-issues.csv
md5: f75c8b5c7747abc8c2bd1b3b847dac18
size: 3005661
preprocess_200k-commits-manual-labels:
cmd: cp downloaded-data/200k-commits-manual-labels.csv data && echo "data/200k-commits-manual-labels.csv"
>> .gitignore && git add .gitignore
deps:
- path: downloaded-data/200k-commits-manual-labels.csv
md5: 447bf23d38df7f7e3007dc35f70cab91
size: 1187
outs:
- path: data/200k-commits-manual-labels.csv
md5: 447bf23d38df7f7e3007dc35f70cab91
size: 1187
bugginess_apply_heuristics__heuristics_manuallabels__herzig:
cmd: bohr apply-heuristics bugginess --heuristic-group heuristics.manuallabels
--dataset herzig
deps:
- path: data/herzig.csv
md5: 69a17c08643aed84b874384a2a57c7ed
size: 1483281
- path: heuristics/manuallabels.py
md5: 5120f1e19e5bfdaf6f61cfa064782e05
size: 278
- path: labels.py
md5: 039b719577b0f54b176e58a432242060
size: 1910
params:
bohr.json:
bohr_framework_version: 0.4.5
outs:
- path: generated/bugginess/heuristics.manuallabels/heuristic_matrix_herzig.pkl
md5: 8e22aefcc1b79b3bc8c00ebd4503dca3
size: 42479
- path: metrics/bugginess/heuristics.manuallabels/heuristic_metrics_herzig.json
md5: 6881c30e66d12aec85d162df31e5e04d
size: 58
bugginess_apply_heuristics__heuristics_manuallabels__1151-commits:
cmd: bohr apply-heuristics bugginess --heuristic-group heuristics.manuallabels
--dataset 1151-commits
deps:
- path: data/1151-commits.csv
md5: dd000fe19ba4aac9efa3a3856e2acc5e
size: 346306
- path: heuristics/manuallabels.py
md5: 5120f1e19e5bfdaf6f61cfa064782e05
size: 278
- path: labels.py
md5: 039b719577b0f54b176e58a432242060
size: 1910
params:
bohr.json:
bohr_framework_version: 0.4.5
outs:
- path: generated/bugginess/heuristics.manuallabels/heuristic_matrix_1151-commits.pkl
md5: 6586d8bf9010aff3be65327facde2edc
size: 9975
- path: metrics/bugginess/heuristics.manuallabels/heuristic_metrics_1151-commits.json
md5: 452fdb0e2c252999419be5771a3774cc
size: 58
bugginess_apply_heuristics__heuristics_manuallabels__200k-commits:
cmd: bohr apply-heuristics bugginess --heuristic-group heuristics.manuallabels
--dataset 200k-commits
deps:
- path: data/200k-commits-files.csv
md5: bc989c140c305bed62a5a8b161883d3b
size: 2284439219
- path: data/200k-commits-issues.csv
md5: da4b0d654f7ce1469857b9171a9647aa
size: 96908075
- path: data/200k-commits-manual-labels.csv
md5: 447bf23d38df7f7e3007dc35f70cab91
size: 1187
- path: data/200k-commits.csv
md5: 6ce10284e630c44110ffc483a7bb33df
size: 71402002
- path: heuristics/manuallabels.py
md5: 5120f1e19e5bfdaf6f61cfa064782e05
size: 278
- path: labels.py
md5: 039b719577b0f54b176e58a432242060
size: 1910
params:
bohr.json:
bohr_framework_version: 0.4.5
outs:
- path: generated/bugginess/heuristics.manuallabels/heuristic_matrix_200k-commits.pkl
md5: 7febb1ec7e27a0b380e83a4c732946ab
size: 1651964
- path: metrics/bugginess/heuristics.manuallabels/heuristic_metrics_200k-commits.json
md5: b550e0fca5c368f3221fc11db0ba8a3e
size: 36
bugginess_apply_heuristics__heuristics_manuallabels__berger:
cmd: bohr apply-heuristics bugginess --heuristic-group heuristics.manuallabels
--dataset berger
deps:
- path: data/berger.csv
md5: 126de41c9204a9e807e72406b1f9d631
size: 62247
- path: heuristics/manuallabels.py
md5: 5120f1e19e5bfdaf6f61cfa064782e05
size: 278
- path: labels.py
md5: 039b719577b0f54b176e58a432242060
size: 1910
params:
bohr.json:
bohr_framework_version: 0.4.5
outs:
- path: generated/bugginess/heuristics.manuallabels/heuristic_matrix_berger.pkl
md5: e93c0bbc779f1cf928251c4613ef5cc6
size: 3767
- path: metrics/bugginess/heuristics.manuallabels/heuristic_metrics_berger.json
md5: c2fefb5ddd23aee9e2705356b8d131c1
size: 59
Tip!
Press p or to see the previous file or,
n or to see the next file