Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

dvc.lock 6.8 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
  1. schema: '2.0'
  2. stages:
  3. make_dataset:
  4. cmd: python3 src/data/make_dataset.py -c titanic -tr train.csv -te test.csv -o
  5. ./data/raw
  6. deps:
  7. - path: src/data/make_dataset.py
  8. md5: 6003407559d33e14c4122af507648dfd
  9. size: 2280
  10. params:
  11. params.yaml:
  12. dtypes:
  13. Age: float
  14. Embarked: category
  15. Fare: float
  16. Parch: int
  17. Pclass: category
  18. Sex: category
  19. SibSp: int
  20. Survived: category
  21. outs:
  22. - path: data/raw/test.csv
  23. md5: 029c9cd22461f6dbe8d9ab01def965c6
  24. size: 28629
  25. - path: data/raw/train.csv
  26. md5: 61fdd54abdbf6a85b778e937122e1194
  27. size: 61194
  28. - path: reports/figures/data_dictionary.tex
  29. md5: 10c5361db59b330722bd70b83ce0fcee
  30. size: 1521
  31. - path: reports/figures/table_one.tex
  32. md5: 4581508bdb37e12d9b9b5ff03244390d
  33. size: 844
  34. encode_labels:
  35. cmd: python3 src/data/encode_labels.py -tr data/raw/train.csv -te data/raw/test.csv
  36. -o data/interim
  37. deps:
  38. - path: data/raw/test.csv
  39. md5: 029c9cd22461f6dbe8d9ab01def965c6
  40. size: 28629
  41. - path: data/raw/train.csv
  42. md5: 61fdd54abdbf6a85b778e937122e1194
  43. size: 61194
  44. - path: src/data/encode_labels.py
  45. md5: 94202f6ff677094ae0bf8b98575baf50
  46. size: 3623
  47. params:
  48. params.yaml:
  49. dtypes:
  50. Age: float
  51. Embarked: category
  52. Fare: float
  53. Parch: int
  54. Pclass: category
  55. Sex: category
  56. SibSp: int
  57. Survived: category
  58. outs:
  59. - path: data/interim/label_encoding.yaml
  60. md5: 8e736d94ad872a9a10f2321653990609
  61. size: 115
  62. - path: data/interim/test_categorized.csv
  63. md5: f0fcdcd7bb08c23d382a665ac1436034
  64. size: 10788
  65. - path: data/interim/train_categorized.csv
  66. md5: 5d06666c95fed743140b44190fb67c77
  67. size: 23884
  68. impute_nan:
  69. cmd: python3 src/data/replace_nan.py -tr data/interim/train_categorized.csv -te
  70. data/interim/test_categorized.csv -o data/interim
  71. deps:
  72. - path: data/interim/test_categorized.csv
  73. md5: f0fcdcd7bb08c23d382a665ac1436034
  74. size: 10788
  75. - path: data/interim/train_categorized.csv
  76. md5: 5d06666c95fed743140b44190fb67c77
  77. size: 23884
  78. - path: src/data/replace_nan.py
  79. md5: 76582379c528d402d408982409bd9820
  80. size: 2800
  81. params:
  82. params.yaml:
  83. imputation:
  84. Age: 29.6991
  85. Fare: 32.2042
  86. method: mean
  87. outs:
  88. - path: data/interim/test_nan_imputed.csv
  89. md5: cbc38434c407b0761da80a422ba97cff
  90. size: 11136
  91. - path: data/interim/train_nan_imputed.csv
  92. md5: 9edd0421f46d2f0786ea6d82fdcf4e12
  93. size: 24592
  94. build_features:
  95. cmd: python3 src/features/build_features.py -tr data/interim/train_nan_imputed.csv
  96. -te data/interim/test_nan_imputed.csv -o data/interim/
  97. deps:
  98. - path: data/interim/test_nan_imputed.csv
  99. md5: cbc38434c407b0761da80a422ba97cff
  100. size: 11136
  101. - path: data/interim/train_nan_imputed.csv
  102. md5: 9edd0421f46d2f0786ea6d82fdcf4e12
  103. size: 24592
  104. - path: src/features/build_features.py
  105. md5: 2e6e7d169d77cb1e990fbb37c4f147bd
  106. size: 5030
  107. params:
  108. params.yaml:
  109. feature_eng:
  110. featurize: true
  111. random_seed: 12345
  112. outs:
  113. - path: data/interim/test_featurized.csv
  114. md5: 6879b369c8d9f93c8ddeff61baea9ada
  115. size: 59474
  116. - path: data/interim/train_featurized.csv
  117. md5: 980d370c7991c5b991bf8c47d13beb02
  118. size: 127169
  119. normalize_data:
  120. cmd: python3 src/features/normalize.py -tr data/interim/train_featurized.csv -te
  121. data/interim/test_featurized.csv -o data/processed/
  122. deps:
  123. - path: data/interim/test_featurized.csv
  124. md5: 6879b369c8d9f93c8ddeff61baea9ada
  125. size: 59474
  126. - path: data/interim/train_featurized.csv
  127. md5: 980d370c7991c5b991bf8c47d13beb02
  128. size: 127169
  129. - path: src/features/normalize.py
  130. md5: 1e67fe2375c91a702144569d41a9f9c1
  131. size: 1924
  132. params:
  133. params.yaml:
  134. normalize:
  135. outs:
  136. - path: data/processed/test_processed.csv
  137. md5: 0cb34fc53024fa12b32a098a32870612
  138. size: 59004
  139. - path: data/processed/train_processed.csv
  140. md5: 55fc818f9babfe04c7bd9a605e0f6240
  141. size: 126326
  142. split_train_dev:
  143. cmd: python3 src/data/split_train_dev.py -tr data/processed/train_processed.csv
  144. -o data/processed/
  145. deps:
  146. - path: data/processed/train_processed.csv
  147. md5: 55fc818f9babfe04c7bd9a605e0f6240
  148. size: 126326
  149. - path: src/data/split_train_dev.py
  150. md5: 18368dc893cc1b306e39f62f6d460861
  151. size: 2966
  152. params:
  153. params.yaml:
  154. random_seed: 12345
  155. train_test_split:
  156. n_split: 10
  157. shuffle: true
  158. target_class: Survived
  159. outs:
  160. - path: data/processed/split_train_dev.csv
  161. md5: d4d2c3159380a986fc2f04a8bcffda08
  162. size: 56115
  163. train_model:
  164. cmd: python3 src/models/train_model.py -tr data/processed/train_processed.csv
  165. -cv data/processed/split_train_dev.csv
  166. deps:
  167. - path: data/processed/split_train_dev.csv
  168. md5: d4d2c3159380a986fc2f04a8bcffda08
  169. size: 56115
  170. - path: data/processed/train_processed.csv
  171. md5: 55fc818f9babfe04c7bd9a605e0f6240
  172. size: 126326
  173. - path: src/models/train_model.py
  174. md5: 393a733bef3fbe5b26772193d9450585
  175. size: 4424
  176. params:
  177. params.yaml:
  178. classifier: random_forest
  179. model_params:
  180. logistic_regression:
  181. naive_bayes:
  182. neural_network:
  183. random_forest:
  184. criterion: gini
  185. max_depth: 15
  186. max_features: auto
  187. min_samples_leaf: 6
  188. min_samples_split: 9
  189. n_estimators: 460
  190. support_vector_machine:
  191. xgboost:
  192. random_seed: 12345
  193. train_test_split.target_class: Survived
  194. outs:
  195. - path: models/estimator.pkl
  196. md5: ef34445e0498248f60e9ac3e78093ebe
  197. size: 31660314
  198. - path: results/metrics.json
  199. md5: b836f34107676456da5aded50162522f
  200. size: 316
  201. predict_output:
  202. cmd: python3 src/models/predict.py -te data/processed/test_processed.csv -rd results/
  203. -md models/
  204. deps:
  205. - path: data/processed/test_processed.csv
  206. md5: 0cb34fc53024fa12b32a098a32870612
  207. size: 59004
  208. - path: models/estimator.pkl
  209. md5: ef34445e0498248f60e9ac3e78093ebe
  210. size: 31660314
  211. - path: src/models/metrics.py
  212. md5: 69f52aa7384511f15c07d1504b368643
  213. size: 2056
  214. - path: src/models/predict.py
  215. md5: 3ba9b367a92a19518c0d7fa28426f7a1
  216. size: 3268
  217. params:
  218. params.yaml:
  219. predict:
  220. js_estimator: true
  221. train_test_split.target_class: Survived
  222. outs:
  223. - path: results/test_predict_binary.csv
  224. md5: 76577b506c3bc22a50d1aa61f3b940d0
  225. size: 2839
  226. - path: results/test_predict_proba.csv
  227. md5: 00fb6f7226fc5db2885502f3f44d2bb9
  228. size: 10089
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...