Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

dvc.lock 3.7 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
  1. make_dataset:
  2. cmd: python3 src/data/make_dataset.py -c titanic -tr train.csv -te test.csv -o ./data/raw
  3. deps:
  4. - path: src/data/make_dataset.py
  5. md5: 6003407559d33e14c4122af507648dfd
  6. size: 2280
  7. params:
  8. params.yaml:
  9. dtypes:
  10. Age: float
  11. Embarked: category
  12. Fare: float
  13. Parch: int
  14. Pclass: category
  15. Sex: category
  16. SibSp: int
  17. Survived: category
  18. outs:
  19. - path: data/raw/test.csv
  20. md5: 029c9cd22461f6dbe8d9ab01def965c6
  21. size: 28629
  22. - path: data/raw/train.csv
  23. md5: 61fdd54abdbf6a85b778e937122e1194
  24. size: 61194
  25. - path: reports/figures/data_dictionary.tex
  26. md5: 10c5361db59b330722bd70b83ce0fcee
  27. size: 1521
  28. - path: reports/figures/table_one.tex
  29. md5: 4581508bdb37e12d9b9b5ff03244390d
  30. size: 844
  31. encode_labels:
  32. cmd: python3 src/data/encode_labels.py -tr data/raw/train.csv -te data/raw/test.csv
  33. -o data/interim
  34. deps:
  35. - path: data/raw/test.csv
  36. md5: 029c9cd22461f6dbe8d9ab01def965c6
  37. size: 28629
  38. - path: data/raw/train.csv
  39. md5: 61fdd54abdbf6a85b778e937122e1194
  40. size: 61194
  41. - path: src/data/encode_labels.py
  42. md5: d86af08312c29e68579950e4898c0e6b
  43. size: 3625
  44. params:
  45. params.yaml:
  46. dtypes:
  47. Age: float
  48. Embarked: category
  49. Fare: float
  50. Parch: int
  51. Pclass: category
  52. Sex: category
  53. SibSp: int
  54. Survived: category
  55. outs:
  56. - path: data/interim/label_encoding.yaml
  57. md5: 8e736d94ad872a9a10f2321653990609
  58. size: 115
  59. - path: data/interim/test_categorized.csv
  60. md5: f0fcdcd7bb08c23d382a665ac1436034
  61. size: 10788
  62. - path: data/interim/train_categorized.csv
  63. md5: 5d06666c95fed743140b44190fb67c77
  64. size: 23884
  65. impute_nan:
  66. cmd: python3 src/data/replace_nan.py -tr data/interim/train_categorized.csv -te
  67. data/interim/test_categorized.csv -o data/interim
  68. deps:
  69. - path: data/interim/test_categorized.csv
  70. md5: f0fcdcd7bb08c23d382a665ac1436034
  71. size: 10788
  72. - path: data/interim/train_categorized.csv
  73. md5: 5d06666c95fed743140b44190fb67c77
  74. size: 23884
  75. - path: src/data/replace_nan.py
  76. md5: 6834705a17ff8aec05abebfdb220dc77
  77. size: 2600
  78. params:
  79. params.yaml:
  80. imputation:
  81. Age: 29.6991
  82. outs:
  83. - path: data/interim/test_nan_imputed.csv
  84. md5: aa7540d1d3818bc69f943afdf4b57d39
  85. size: 11132
  86. - path: data/interim/train_nan_imputed.csv
  87. md5: 9edd0421f46d2f0786ea6d82fdcf4e12
  88. size: 24592
  89. normalize_data:
  90. cmd: python3 src/data/normalize_data.py -tr data/interim/train_nan_imputed.csv -te
  91. data/interim/test_nan_imputed.csv -o data/processed/
  92. deps:
  93. - path: data/interim/test_nan_imputed.csv
  94. md5: aa7540d1d3818bc69f943afdf4b57d39
  95. size: 11132
  96. - path: data/interim/train_nan_imputed.csv
  97. md5: 9edd0421f46d2f0786ea6d82fdcf4e12
  98. size: 24592
  99. - path: src/data/normalize_data.py
  100. md5: b7f1a5e16165928f55495fa6d48ca26e
  101. size: 1925
  102. params:
  103. params.yaml:
  104. normalize: None
  105. outs:
  106. - path: data/processed/test_processed.csv
  107. md5: aa7540d1d3818bc69f943afdf4b57d39
  108. size: 11132
  109. - path: data/processed/train_processed.csv
  110. md5: 9edd0421f46d2f0786ea6d82fdcf4e12
  111. size: 24592
  112. split_train_dev:
  113. cmd: python3 src/data/split_train_dev.py -tr data/processed/train_processed.csv
  114. -o data/processed/
  115. deps:
  116. - path: data/processed/train_processed.csv
  117. md5: 9edd0421f46d2f0786ea6d82fdcf4e12
  118. size: 24592
  119. - path: src/data/split_train_dev.py
  120. md5: 5b39923281d4fc59839e236664d5d0ea
  121. size: 2978
  122. params:
  123. params.yaml:
  124. random_seed: 12345
  125. train_test_split:
  126. target_class: Survived
  127. n_split: 10
  128. shuffle: true
  129. outs:
  130. - path: data/processed/split_train_dev.csv
  131. md5: d4d2c3159380a986fc2f04a8bcffda08
  132. size: 56115
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...