Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

bohr.lock 9.3 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
  1. {
  2. "config": {
  3. "bohr_framework_version": "0.4.10",
  4. "tasks": {
  5. "bugginess": {
  6. "description": "",
  7. "top_artifact": "bohr.collection.artifacts.commit.Commit",
  8. "label_categories": [
  9. "CommitLabel.NonBugFix",
  10. "CommitLabel.BugFix"
  11. ],
  12. "test_datasets": [
  13. "1151-commits",
  14. "berger",
  15. "developer-labeled-commits",
  16. "herzig"
  17. ],
  18. "train_datasets": [
  19. "200k-commits",
  20. "fine-grained-refactorings"
  21. ],
  22. "label_column_name": "bug"
  23. },
  24. "smells": {
  25. "description": "",
  26. "top_artifact": "bohr.collection.artifacts.method.Method",
  27. "label_categories": [
  28. "SnippetLabel.NonSmelly",
  29. "SnippetLabel.Smelly"
  30. ],
  31. "test_datasets": [
  32. "smells-test"
  33. ],
  34. "train_datasets": [
  35. "smells-train"
  36. ],
  37. "label_column_name": "smelly"
  38. }
  39. },
  40. "datasets": {
  41. "berger": {
  42. "author": "Berger, E. D., Hollenbeck, C., Maj, P., Vitek, O., & Vitek, J.",
  43. "description": "Dataset of labeled commits from Berger, E. D., Hollenbeck, C., Maj, P., Vitek, O., & Vitek, J. (2019). On the impact of programming languages on code quality: a reproduction study. ACM Transactions on Programming Languages and Systems (TOPLAS), 41(4), 1-24.",
  44. "path": "berger.csv",
  45. "path_preprocessed": "berger.csv",
  46. "preprocessor": "copy",
  47. "loader": "csv",
  48. "mapper": "bohr.collection.datamappers.commit.CommitMapper"
  49. },
  50. "herzig": {
  51. "author": "K Herzig, S Just, A Zeller",
  52. "description": "Dataset of manually classified bug reports from K Herzig, S Just, A Zeller It's not a bug, it's a feature: how misclassification impacts bug prediction with linked commits recovered by giganticode",
  53. "path": "herzig.csv",
  54. "path_preprocessed": "herzig.csv",
  55. "preprocessor": "copy",
  56. "loader": "csv",
  57. "mapper": "bohr.collection.datamappers.commit.CommitMapper"
  58. },
  59. "1151-commits": {
  60. "author": "Levin, S., Yehudai, A.",
  61. "description": "Dataset of manually-labeled 1k commits (corrective, perfective, adaptive) from Levin, S., & Yehudai, A. (2017, November). Boosting automatic commit classification into maintenance activities by utilizing source code changes. In Proceedings of the 13th International Conference on Predictive Models and Data Analytics in Software Engineering (pp. 97-106).",
  62. "path": "1151-commits.csv",
  63. "path_preprocessed": "1151-commits.csv",
  64. "preprocessor": "copy",
  65. "loader": "csv",
  66. "mapper": "bohr.collection.datamappers.commit.CommitMapper"
  67. },
  68. "200k-commits": {
  69. "author": "JA. Prenner",
  70. "description": "Dataset of commits mined by JA. Prenner from Github.\n Cite: @inproceedings{babii2021mining,\n title={Mining Software Repositories with a Collaborative Heuristic Repository},\n author={Babii, Hlib and Prenner, Julian Aron and Stricker, Laurin and Karmakar, Anjan and Janes, Andrea and Robbes, Romain},\n booktitle={2021 IEEE/ACM 43rd International Conference on Software Engineering: New Ideas and Emerging Results (ICSE-NIER)},\n pages={106--110},\n year={2021},\n organization={IEEE}\n}. License: Creative Commons Attribution 4.0 International",
  71. "path": "200k-commits.csv",
  72. "path_preprocessed": "200k-commits.csv",
  73. "preprocessor": "copy",
  74. "loader": "csv",
  75. "mapper": "bohr.collection.datamappers.commit.CommitMapper"
  76. },
  77. "200k-commits-issues": {
  78. "author": "JA. Prenner",
  79. "description": "200k-commit dataset: linked issues. License: Creative Commons Attribution 4.0 International",
  80. "path": "200k-commits-issues.csv",
  81. "path_preprocessed": "200k-commits-issues.csv",
  82. "preprocessor": "copy",
  83. "loader": "csv",
  84. "mapper": "bohr.collection.datamappers.issue.IssueMapper",
  85. "dtype": {
  86. "labels": "str"
  87. },
  88. "keep_default_na": false
  89. },
  90. "200k-commits-files": {
  91. "author": "JA. Prenner",
  92. "description": "200k-commit dataset: linked files. License: Creative Commons Attribution 4.0 International",
  93. "path": "200k-commits-files.csv.7z",
  94. "path_preprocessed": "200k-commits-files.csv",
  95. "preprocessor": "7z",
  96. "loader": "csv",
  97. "mapper": "bohr.collection.datamappers.commitfile.CommitFileMapper"
  98. },
  99. "200k-commits-link-issues": {
  100. "author": "JA. Prenner",
  101. "description": "Dataset of links between 200k-commits and 200-commits issues. License: Creative Commons Attribution 4.0 International",
  102. "path": "200k-commits-link-issues.csv",
  103. "path_preprocessed": "200k-commits-link-issues.csv",
  104. "preprocessor": "copy",
  105. "loader": "csv"
  106. },
  107. "200k-commits-manual-labels": {
  108. "author": "All contributors: H. Babii",
  109. "description": "Manual labels for the 200k-commits dataset. License: Creative Commons Attribution 4.0 International",
  110. "path": "200k-commits-manual-labels.csv",
  111. "path_preprocessed": "200k-commits-manual-labels.csv",
  112. "preprocessor": "copy",
  113. "loader": "csv",
  114. "mapper": "bohr.collection.datamappers.manuallabels.ManualLabelMapper"
  115. },
  116. "smells-train": {
  117. "author": "Madeyski, Lech, Lewowski, Tomasz",
  118. "description": "Dataset from Madeyski, Lech, and Tomasz Lewowski. \"MLCQ: Industry-relevant code smell data set.\" preprocessed by H. Babii to extract long method smells and split into train and test sets. License: Creative Commons Attribution 4.0 International",
  119. "path": "smells-madeyski.csv",
  120. "path_preprocessed": "smells/train.csv",
  121. "preprocessor": "data-preprocessing/smells.sh",
  122. "loader": "csv",
  123. "mapper": "bohr.collection.datamappers.method.MethodMapper",
  124. "sep": ";"
  125. },
  126. "developer-labeled-commits": {
  127. "author": "Andreas Mauczka, Florian Brosch, Christian Schanes, Thomas Grechenig",
  128. "description": "Dataset of Developer-Labeled Commit Messages",
  129. "path": "developer-labeled-commits.zip",
  130. "path_preprocessed": "developer-labeled.csv",
  131. "preprocessor": "data-preprocessing/developer_labeled.py",
  132. "loader": "csv",
  133. "mapper": "bohr.collection.datamappers.commit.CommitMapper"
  134. },
  135. "fine-grained-refactorings": {
  136. "author": "Krasniqi, Rrezarta; Cleland-Huang, Jane",
  137. "description": "dataset of manually-labeled fine-grained refactoring types from Enhancing Code Refactoring Detection with Explanations from Commit Messages",
  138. "path": "fine-grained-refactorings.zip",
  139. "path_preprocessed": "fine-grained-refactorings.csv",
  140. "preprocessor": "data-preprocessing/fine-grained-refactorings.py",
  141. "loader": "csv",
  142. "mapper": "bohr.collection.datamappers.commit.CommitMapper"
  143. },
  144. "smells-test": {
  145. "author": "Madeyski, Lech, Lewowski, Tomasz",
  146. "description": "Dataset from Madeyski, Lech, and Tomasz Lewowski. \"MLCQ: Industry-relevant code smell data set.\" preprocessed by H. Babii to extract long method smells and split into train and test sets. License: Creative Commons Attribution 4.0 International",
  147. "path": "smells-madeyski.csv",
  148. "path_preprocessed": "smells/test.csv",
  149. "preprocessor": "data-preprocessing/smells.sh",
  150. "loader": "csv",
  151. "mapper": "bohr.collection.datamappers.method.MethodMapper",
  152. "sep": ";"
  153. }
  154. },
  155. "dataset-linkers": [
  156. {
  157. "from": "200k-commits",
  158. "to": "200k-commits-files"
  159. },
  160. {
  161. "from": "200k-commits",
  162. "to": "200k-commits-issues",
  163. "link": "200k-commits-link-issues"
  164. },
  165. {
  166. "from": "200k-commits",
  167. "to": "200k-commits-manual-labels"
  168. }
  169. ]
  170. },
  171. "heuristics": {
  172. "heuristics/bugginess.py": "a55520b99cbaad572a26886b52e74652",
  173. "heuristics/manuallabels.py": "f338b2a285d76da97b3f53e9b167368a",
  174. "heuristics/smells.py": "b1a5ed3a14eb9eae8924b8a43e3bc452"
  175. },
  176. "manual_stages": {}
  177. }
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...