Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

dvc.lock 8.6 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
  1. schema: '2.0'
  2. stages:
  3. scan-authors:
  4. cmd: cargo run --release -- openlib scan-authors ../data/openlib/ol_dump_authors.txt.gz
  5. deps:
  6. - path: ../data/openlib/ol_dump_authors.txt.gz
  7. hash: md5
  8. md5: d174f253b528f74c0d6446f38feb6b63
  9. size: 598486891
  10. - path: ../src/cli/openlib.rs
  11. md5: dbe8caca3345f0f9ca7f730c2df79f29
  12. size: 1938
  13. - path: ../src/openlib/
  14. hash: md5
  15. md5: 27c1c622ab641d042af5b8fe2685d1ff.dir
  16. size: 16669
  17. nfiles: 6
  18. outs:
  19. - path: author-names.parquet
  20. hash: md5
  21. md5: c83794d9417c6ed9730b55f8e4070aec
  22. size: 206500073
  23. - path: authors.parquet
  24. hash: md5
  25. md5: fe8c17f92d93dd3e85184927d3dff22e
  26. size: 240905689
  27. scan-editions:
  28. cmd: cargo run --release -- openlib scan-editions ../data/openlib/ol_dump_editions.txt.gz
  29. deps:
  30. - path: ../data/openlib/ol_dump_editions.txt.gz
  31. hash: md5
  32. md5: 6b13afe5a50fb1aeeb430bcd01164d13
  33. size: 10078352001
  34. - path: ../src/cli/openlib.rs
  35. md5: dbe8caca3345f0f9ca7f730c2df79f29
  36. size: 1938
  37. - path: ../src/openlib/
  38. hash: md5
  39. md5: 27c1c622ab641d042af5b8fe2685d1ff.dir
  40. size: 16669
  41. nfiles: 6
  42. - path: author-ids-after-works.parquet
  43. hash: md5
  44. md5: e83ffaacbe90cdfb864d4035b7c7a0f1
  45. size: 89062717
  46. - path: authors.parquet
  47. hash: md5
  48. md5: fe8c17f92d93dd3e85184927d3dff22e
  49. size: 240905689
  50. - path: works.parquet
  51. hash: md5
  52. md5: e142c29e5d077d22c20f27497b1dcd08
  53. size: 923755753
  54. outs:
  55. - path: all-authors.parquet
  56. hash: md5
  57. md5: 1bb21b70ab9dc309b4e4643323abf292
  58. size: 89082395
  59. - path: all-works.parquet
  60. hash: md5
  61. md5: e0b8264cf73b57d1d9d8ed8dc6339f85
  62. size: 242195062
  63. - path: edition-authors.parquet
  64. hash: md5
  65. md5: d829268d4697178064cdd961c348f0a5
  66. size: 288214520
  67. - path: edition-isbns.parquet
  68. hash: md5
  69. md5: 53ad906e9e4b3d41115df7a36a08f002
  70. size: 304629821
  71. - path: edition-subjects.parquet
  72. hash: md5
  73. md5: 269c73b599b9b833c9c6ba53eb75449d
  74. size: 653028313
  75. - path: edition-works.parquet
  76. hash: md5
  77. md5: e4d17ebeaf8d5a41ad6904324556af29
  78. size: 293781641
  79. - path: editions.parquet
  80. hash: md5
  81. md5: 742e29d6617f26b7696d790b3549fad5
  82. size: 1216690399
  83. scan-works:
  84. cmd: cargo run --release -- openlib scan-works ../data/openlib/ol_dump_works.txt.gz
  85. deps:
  86. - path: ../data/openlib/ol_dump_works.txt.gz
  87. hash: md5
  88. md5: 0d96507a98acf4dcc5416b10d85b1225
  89. size: 3200947685
  90. - path: ../src/cli/openlib.rs
  91. md5: dbe8caca3345f0f9ca7f730c2df79f29
  92. size: 1938
  93. - path: ../src/openlib/
  94. hash: md5
  95. md5: 27c1c622ab641d042af5b8fe2685d1ff.dir
  96. size: 16669
  97. nfiles: 6
  98. - path: authors.parquet
  99. hash: md5
  100. md5: fe8c17f92d93dd3e85184927d3dff22e
  101. size: 240905689
  102. outs:
  103. - path: author-ids-after-works.parquet
  104. hash: md5
  105. md5: e83ffaacbe90cdfb864d4035b7c7a0f1
  106. size: 89062717
  107. - path: work-authors.parquet
  108. hash: md5
  109. md5: f005bf244e8c22e2686f7ddbc4447bf3
  110. size: 225212340
  111. - path: work-subjects.parquet
  112. hash: md5
  113. md5: a7d4bf8eadf91625766eb638daed322a
  114. size: 543492486
  115. - path: works.parquet
  116. hash: md5
  117. md5: e142c29e5d077d22c20f27497b1dcd08
  118. size: 923755753
  119. edition-isbn-ids:
  120. cmd: cargo run --release -- link-isbn-ids -R edition -o openlibrary/edition-isbn-ids.parquet
  121. openlibrary/edition-isbns.parquet
  122. deps:
  123. - path: book-links/all-isbns.parquet
  124. hash: md5
  125. md5: 8803c162ab97efac8b098df7e9252314
  126. size: 464207536
  127. - path: openlibrary/edition-isbns.parquet
  128. hash: md5
  129. md5: 53ad906e9e4b3d41115df7a36a08f002
  130. size: 304629821
  131. outs:
  132. - path: openlibrary/edition-isbn-ids.parquet
  133. hash: md5
  134. md5: 348916ff4a045bd10dcf4ef8b6eb93e7
  135. size: 226887130
  136. schema@edition-isbn-ids:
  137. cmd: python ../run.py --rust pq-info -o edition-isbn-ids.json edition-isbn-ids.parquet
  138. deps:
  139. - path: edition-isbn-ids.parquet
  140. md5: 6cf6500ebab273315fa90e2f2031dd3b
  141. size: 145485299
  142. outs:
  143. - path: edition-isbn-ids.json
  144. md5: 0fe837f400c6e7b7d4535b67237af989
  145. size: 251
  146. schema@works:
  147. cmd: python ../run.py --rust pq-info -o works.json works.parquet
  148. deps:
  149. - path: works.parquet
  150. md5: 0392b4f5308bf945efc14d206bb8b9fa
  151. size: 663422438
  152. outs:
  153. - path: works.json
  154. md5: 5b5e973797c2f0d8aff51c7f6d300fa6
  155. size: 327
  156. schema@all-works:
  157. cmd: python ../run.py --rust pq-info -o all-works.json all-works.parquet
  158. deps:
  159. - path: all-works.parquet
  160. md5: 832cc1253307df9c7bbe4d83e7e3fa93
  161. size: 176243794
  162. outs:
  163. - path: all-works.json
  164. md5: 1e5228092f43d81e5871c48117be3146
  165. size: 239
  166. schema@editions:
  167. cmd: python ../run.py --rust pq-info -o editions.json editions.parquet
  168. deps:
  169. - path: editions.parquet
  170. md5: b082da165d2b2d9c0fa5489376c8c57a
  171. size: 888704794
  172. outs:
  173. - path: editions.json
  174. md5: 464dc59fc293103fcbf3e9baa3373c1a
  175. size: 327
  176. schema@work-authors:
  177. cmd: python ../run.py --rust pq-info -o work-authors.json work-authors.parquet
  178. deps:
  179. - path: work-authors.parquet
  180. md5: e62c8dfb5507c4ec67b3e0f7b940523f
  181. size: 153899196
  182. outs:
  183. - path: work-authors.json
  184. md5: 140a8f1c6dc0ad558bf6ca19e742210f
  185. size: 331
  186. schema@author-names:
  187. cmd: python ../run.py --rust pq-info -o author-names.json author-names.parquet
  188. deps:
  189. - path: author-names.parquet
  190. md5: 76ca0c2e90393598cc71578ffe5493d1
  191. size: 151081619
  192. outs:
  193. - path: author-names.json
  194. md5: a8b5bf82ca7a88ff7ef58ec4698a0e3a
  195. size: 331
  196. schema@edition-authors:
  197. cmd: python ../run.py --rust pq-info -o edition-authors.json edition-authors.parquet
  198. deps:
  199. - path: edition-authors.parquet
  200. md5: 231cda3e2643ce16e46942a58aa3b57e
  201. size: 193584177
  202. outs:
  203. - path: edition-authors.json
  204. md5: 4ba99be63216163455443c29e8acafe4
  205. size: 336
  206. schema@edition-isbns:
  207. cmd: python ../run.py --rust pq-info -o edition-isbns.json edition-isbns.parquet
  208. deps:
  209. - path: edition-isbns.parquet
  210. md5: 4e25efdddefdaa656de713a99b0ca2d7
  211. size: 207849845
  212. outs:
  213. - path: edition-isbns.json
  214. md5: 7f750ebd2dcdfebb39eb49c0cf6948a2
  215. size: 247
  216. schema@edition-works:
  217. cmd: python ../run.py --rust pq-info -o edition-works.json edition-works.parquet
  218. deps:
  219. - path: edition-works.parquet
  220. md5: 9e7de4af3d400505f77235d8f903f826
  221. size: 203839441
  222. outs:
  223. - path: edition-works.json
  224. md5: d8b6f877272dbc1c7f690a5774ddada8
  225. size: 248
  226. schema@all-authors:
  227. cmd: python ../run.py --rust pq-info -o all-authors.json all-authors.parquet
  228. deps:
  229. - path: all-authors.parquet
  230. md5: 632add46e59ff1653d9532ecbc87b851
  231. size: 67096542
  232. outs:
  233. - path: all-authors.json
  234. md5: 6921aee9159edf48ca9a733245113f6e
  235. size: 237
  236. schema@authors:
  237. cmd: python ../run.py --rust pq-info -o authors.json authors.parquet
  238. deps:
  239. - path: authors.parquet
  240. md5: c77ceaa2f2cb3f90f514edd561e11398
  241. size: 178808160
  242. outs:
  243. - path: authors.json
  244. md5: a33ba4a8bb933055f43ec0d933cdebec
  245. size: 325
  246. schema@work-subjects:
  247. cmd: python ../run.py --rust pq-info -o work-subjects.json work-subjects.parquet
  248. deps:
  249. - path: work-subjects.parquet
  250. md5: 273c91a6d0b49f57ae8d10816fb4f5a0
  251. size: 304767903
  252. outs:
  253. - path: work-subjects.json
  254. md5: 164a92469b9c94c2d99ff5fa8ab27551
  255. size: 245
  256. schema@edition-subjects:
  257. cmd: python ../run.py --rust pq-info -o edition-subjects.json edition-subjects.parquet
  258. deps:
  259. - path: edition-subjects.parquet
  260. md5: d58f4b70cf2a7ebf8d919aa18113d2e2
  261. size: 522800217
  262. outs:
  263. - path: edition-subjects.json
  264. md5: 04631619a35917b9dd4a850541190b8d
  265. size: 245
  266. work-clusters:
  267. cmd: cargo run --release -- cluster extract-books -n work_id -o openlibrary/work-clusters.parquet
  268. OL-W
  269. deps:
  270. - path: book-links/cluster-graph-nodes.parquet
  271. hash: md5
  272. md5: 2f0e64cd13b40c850326a75d4e69731a
  273. size: 1226145163
  274. outs:
  275. - path: openlibrary/work-clusters.parquet
  276. hash: md5
  277. md5: 95cb00013c55edf3bfea1dcc703f32d2
  278. size: 238932284
  279. schema@work-clusters:
  280. cmd: python ../run.py --rust pq-info -o work-clusters.json work-clusters.parquet
  281. deps:
  282. - path: work-clusters.parquet
  283. md5: 82b15db1b6cb8a76cd9526a1770bc8dc
  284. size: 172722261
  285. outs:
  286. - path: work-clusters.json
  287. md5: 9b31d738006f09d80ffc1e2da748d976
  288. size: 251
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...