Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

dvc.lock 19 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
  1. schema: '2.0'
  2. stages:
  3. scan-interactions:
  4. cmd: cargo run --release -- goodreads scan interactions ../data/goodreads/goodreads_interactions.json.gz
  5. deps:
  6. - path: ../data/goodreads/goodreads_interactions.json.gz
  7. hash: md5
  8. md5: f2d054a85f33d405a9bff6933005ba89
  9. size: 9388113365
  10. - path: ../src/cli/goodreads
  11. hash: md5
  12. md5: 02847887f789f5b5f5672f8768f95b05.dir
  13. size: 10485
  14. nfiles: 4
  15. - path: ../src/goodreads
  16. hash: md5
  17. md5: e8117b3c433c7fa4f7ffbaf7cb8d7f06.dir
  18. size: 19592
  19. nfiles: 9
  20. outs:
  21. - path: gr-interactions.parquet
  22. hash: md5
  23. md5: b30430653770266a99921696af2d4044
  24. size: 4587488091
  25. - path: gr-users.parquet
  26. hash: md5
  27. md5: ceb6314193b9a242df2a4db02e39f639
  28. size: 18637038
  29. scan-book-info:
  30. cmd: cargo run --release -- goodreads scan books ../data/goodreads/goodreads_books.json.gz
  31. deps:
  32. - path: ../data/goodreads/goodreads_books.json.gz
  33. md5: 01b40c70a00fb6aa321ee478f0fd0d6b
  34. size: 2043729443
  35. - path: ../src/cli/goodreads
  36. hash: md5
  37. md5: 02847887f789f5b5f5672f8768f95b05.dir
  38. size: 10485
  39. nfiles: 4
  40. - path: ../src/goodreads
  41. hash: md5
  42. md5: e8117b3c433c7fa4f7ffbaf7cb8d7f06.dir
  43. size: 19592
  44. nfiles: 9
  45. outs:
  46. - path: gr-book-authors.parquet
  47. hash: md5
  48. md5: 588ef6c68a1a1a3418dac0164da333f7
  49. size: 21162981
  50. - path: gr-book-ids.parquet
  51. hash: md5
  52. md5: 402aceb6914ba3453d230144b39fbfe8
  53. size: 46635671
  54. - path: gr-book-info.parquet
  55. hash: md5
  56. md5: e5dd7a6abf966ecb35abac621b234db2
  57. size: 12358354
  58. - path: gr-book-series.parquet
  59. hash: md5
  60. md5: 30a54ee855d2a14238c215d96b5e6625
  61. size: 5857161
  62. scan-work-info:
  63. cmd: cargo run --release -- goodreads scan works ../data/goodreads/goodreads_book_works.json.gz
  64. deps:
  65. - path: ../data/goodreads/goodreads_book_works.json.gz
  66. md5: e80738a88d02d2b0081cd249d9b4f081
  67. size: 81412944
  68. - path: ../src/cli/goodreads
  69. hash: md5
  70. md5: 02847887f789f5b5f5672f8768f95b05.dir
  71. size: 10485
  72. nfiles: 4
  73. - path: ../src/goodreads
  74. hash: md5
  75. md5: e8117b3c433c7fa4f7ffbaf7cb8d7f06.dir
  76. size: 19592
  77. nfiles: 9
  78. outs:
  79. - path: gr-work-info.parquet
  80. hash: md5
  81. md5: f67f4698e120af10eefae7ce4fb11498
  82. size: 19901545
  83. book-isbn-ids:
  84. cmd: cargo run --release -- link-isbn-ids -o goodreads/book-isbn-ids.parquet -R
  85. book_id -I isbn10 -I isbn13 -I asin goodreads/gr-book-ids.parquet
  86. deps:
  87. - path: book-links/all-isbns.parquet
  88. hash: md5
  89. md5: 8803c162ab97efac8b098df7e9252314
  90. size: 464207536
  91. - path: goodreads/gr-book-ids.parquet
  92. hash: md5
  93. md5: 402aceb6914ba3453d230144b39fbfe8
  94. size: 46635671
  95. - path: src/cli/goodreads
  96. hash: md5
  97. md5: 02847887f789f5b5f5672f8768f95b05.dir
  98. size: 10485
  99. nfiles: 4
  100. outs:
  101. - path: goodreads/book-isbn-ids.parquet
  102. hash: md5
  103. md5: 5c95445b6996decd1754750077214289
  104. size: 16211736
  105. cluster-ratings:
  106. cmd: cargo run --release -- goodreads cluster-interactions --ratings -o goodreads/gr-cluster-ratings.parquet
  107. deps:
  108. - path: goodreads/gr-book-link.parquet
  109. hash: md5
  110. md5: 8e85c40384efe8936830b3d92dc59b9a
  111. size: 19973611
  112. - path: goodreads/gr-interactions.parquet
  113. hash: md5
  114. md5: b30430653770266a99921696af2d4044
  115. size: 4587488091
  116. - path: src/cli/goodreads/cluster.rs
  117. hash: md5
  118. md5: 840dee408f740648113860ab5dbd3ec7
  119. size: 6175
  120. outs:
  121. - path: goodreads/gr-cluster-ratings.parquet
  122. hash: md5
  123. md5: 0afc2f46e59c3f22f203c3c0759653c2
  124. size: 1369545709
  125. book-links:
  126. cmd: cargo run --release -- cluster extract-books -o goodreads/gr-book-link.parquet
  127. -n book_id --join-file goodreads/gr-book-ids.parquet --join-field work_id GR-B
  128. deps:
  129. - path: book-links/cluster-graph-nodes.parquet
  130. hash: md5
  131. md5: 2f0e64cd13b40c850326a75d4e69731a
  132. size: 1226145163
  133. - path: goodreads/gr-book-ids.parquet
  134. hash: md5
  135. md5: 402aceb6914ba3453d230144b39fbfe8
  136. size: 46635671
  137. outs:
  138. - path: goodreads/gr-book-link.parquet
  139. hash: md5
  140. md5: 8e85c40384efe8936830b3d92dc59b9a
  141. size: 19973611
  142. cluster-interactions:
  143. cmd: python cluster-ratings.py
  144. deps:
  145. - path: cluster-ratings.py
  146. md5: d44be03784268f02b28c1b40bc108c27
  147. size: 1219
  148. - path: gr-book-link.parquet
  149. md5: f31fc29c8644c439e192af9868604a93
  150. size: 29380146
  151. - path: gr-interactions.parquet
  152. md5: 0ee400ec374aa0263198b25e9d9140e0
  153. size: 1619196241
  154. outs:
  155. - path: gr-cluster-ratings.parquet
  156. md5: af8b42db6e3b8f0ded896f7cab433530
  157. size: 327784394
  158. cluster-actions:
  159. cmd: cargo run --release -- goodreads cluster-interactions --add-actions -o goodreads/gr-cluster-actions.parquet
  160. deps:
  161. - path: goodreads/gr-book-link.parquet
  162. hash: md5
  163. md5: 8e85c40384efe8936830b3d92dc59b9a
  164. size: 19973611
  165. - path: goodreads/gr-interactions.parquet
  166. hash: md5
  167. md5: b30430653770266a99921696af2d4044
  168. size: 4587488091
  169. - path: src/cli/goodreads/cluster.rs
  170. hash: md5
  171. md5: 840dee408f740648113860ab5dbd3ec7
  172. size: 6175
  173. outs:
  174. - path: goodreads/gr-cluster-actions.parquet
  175. hash: md5
  176. md5: 6f3181c11589740851b49364f7140793
  177. size: 2709707272
  178. work-ratings:
  179. cmd: cargo run --release -- goodreads cluster-interactions --ratings --native-works
  180. -o goodreads/gr-work-ratings.parquet
  181. deps:
  182. - path: goodreads/gr-book-link.parquet
  183. hash: md5
  184. md5: 8e85c40384efe8936830b3d92dc59b9a
  185. size: 19973611
  186. - path: goodreads/gr-interactions.parquet
  187. hash: md5
  188. md5: b30430653770266a99921696af2d4044
  189. size: 4587488091
  190. - path: src/cli/goodreads/cluster.rs
  191. hash: md5
  192. md5: 840dee408f740648113860ab5dbd3ec7
  193. size: 6175
  194. outs:
  195. - path: goodreads/gr-work-ratings.parquet
  196. hash: md5
  197. md5: f4d72ec57c57ac3d013b7271c79d950b
  198. size: 1434246294
  199. work-actions:
  200. cmd: cargo run --release -- goodreads cluster-interactions --add-actions --native-works
  201. -o goodreads/gr-work-actions.parquet
  202. deps:
  203. - path: goodreads/gr-book-link.parquet
  204. hash: md5
  205. md5: 8e85c40384efe8936830b3d92dc59b9a
  206. size: 19973611
  207. - path: goodreads/gr-interactions.parquet
  208. hash: md5
  209. md5: b30430653770266a99921696af2d4044
  210. size: 4587488091
  211. - path: src/cli/goodreads/cluster.rs
  212. hash: md5
  213. md5: 840dee408f740648113860ab5dbd3ec7
  214. size: 6175
  215. outs:
  216. - path: goodreads/gr-work-actions.parquet
  217. hash: md5
  218. md5: d20518227c34e9b440ba5f39e85da954
  219. size: 2808086719
  220. work-gender:
  221. cmd: cargo run --release -- goodreads work-gender
  222. deps:
  223. - path: ../book-links/cluster-genders.parquet
  224. hash: md5
  225. md5: 5aef997fa371322d26041bbde5642c0d
  226. size: 180340846
  227. - path: ../src/cli/goodreads
  228. hash: md5
  229. md5: 02847887f789f5b5f5672f8768f95b05.dir
  230. size: 10485
  231. nfiles: 4
  232. - path: gr-book-link.parquet
  233. hash: md5
  234. md5: 8e85c40384efe8936830b3d92dc59b9a
  235. size: 19973611
  236. outs:
  237. - path: gr-work-gender.parquet
  238. hash: md5
  239. md5: ae8f42aedc5976b2e7ba17ad31c4d615
  240. size: 27992752
  241. schema@gr-work-gender:
  242. cmd: python ../run.py --rust pq-info -o gr-work-gender.json gr-work-gender.parquet
  243. deps:
  244. - path: gr-work-gender.parquet
  245. md5: 45e9d912b392b0d714f67c126531bbae
  246. size: 28891449
  247. outs:
  248. - path: gr-work-gender.json
  249. md5: 4c896ebcdce4495982076d1150ea9e5a
  250. size: 423
  251. schema@gr-work-info:
  252. cmd: python ../run.py --rust pq-info -o gr-work-info.json gr-work-info.parquet
  253. deps:
  254. - path: gr-work-info.parquet
  255. md5: 79b363824af58bff7fa61e645bbe23b1
  256. size: 21837297
  257. outs:
  258. - path: gr-work-info.json
  259. md5: 833d0d78ade406bdde4f310c619c7c13
  260. size: 517
  261. schema@gr-work-actions:
  262. cmd: python ../run.py --rust pq-info -o gr-work-actions.json gr-work-actions.parquet
  263. deps:
  264. - path: gr-work-actions.parquet
  265. md5: a15faa45f6b956f93a7795b453d946e8
  266. size: 1569601498
  267. outs:
  268. - path: gr-work-actions.json
  269. md5: fbd53ff95cd17f34166e93c8b039cb07
  270. size: 618
  271. schema@gr-work-ratings:
  272. cmd: python ../run.py --rust pq-info -o gr-work-ratings.json gr-work-ratings.parquet
  273. deps:
  274. - path: gr-work-ratings.parquet
  275. md5: 0d9f519acad4d8f1b94dafc54bace4a3
  276. size: 1640676645
  277. outs:
  278. - path: gr-work-ratings.json
  279. md5: 77c0c065962ddeee0d183dae1ac1e897
  280. size: 708
  281. schema@gr-book-info:
  282. cmd: python ../run.py --rust pq-info -o gr-book-info.json gr-book-info.parquet
  283. deps:
  284. - path: gr-book-info.parquet
  285. md5: a4344a4f10eb8631049a5d2f4cf91e7f
  286. size: 15146558
  287. outs:
  288. - path: gr-book-info.json
  289. md5: 8cc501ca40ea549e8d27d948b3ff0231
  290. size: 518
  291. schema@book-isbn-ids:
  292. cmd: python ../run.py --rust pq-info -o book-isbn-ids.json book-isbn-ids.parquet
  293. deps:
  294. - path: book-isbn-ids.parquet
  295. md5: 100ea102d2775993cc40522df7210687
  296. size: 15490623
  297. outs:
  298. - path: book-isbn-ids.json
  299. md5: 771436a0047e0443854ba36d95411b20
  300. size: 249
  301. schema@gr-interactions:
  302. cmd: python ../run.py --rust pq-info -o gr-interactions.json gr-interactions.parquet
  303. deps:
  304. - path: gr-interactions.parquet
  305. md5: 9788655b2499eb8150398f8c1558e823
  306. size: 4372045343
  307. outs:
  308. - path: gr-interactions.json
  309. md5: 9b3af7db2bcd5cb7616d6f0a17daefb4
  310. size: 990
  311. schema@gr-book-ids:
  312. cmd: python ../run.py --rust pq-info -o gr-book-ids.json gr-book-ids.parquet
  313. deps:
  314. - path: gr-book-ids.parquet
  315. md5: e259dab35fc7e37e8904bc2584245138
  316. size: 37497686
  317. outs:
  318. - path: gr-book-ids.json
  319. md5: 130b3c6d3d145ee97264041a3637ef7e
  320. size: 507
  321. schema@gr-cluster-ratings:
  322. cmd: python ../run.py --rust pq-info -o gr-cluster-ratings.json gr-cluster-ratings.parquet
  323. deps:
  324. - path: gr-cluster-ratings.parquet
  325. md5: 353cd5edc3df644c77fe01da8f26a436
  326. size: 1593822465
  327. outs:
  328. - path: gr-cluster-ratings.json
  329. md5: 6a5a2ef9fa4305c24fd17082f9d976e4
  330. size: 707
  331. schema@gr-users:
  332. cmd: python ../run.py --rust pq-info -o gr-users.json gr-users.parquet
  333. deps:
  334. - path: gr-users.parquet
  335. md5: ff0d06650a9944bf80cc8c4a99827c1d
  336. size: 18683237
  337. outs:
  338. - path: gr-users.json
  339. md5: 70a463f659828f45949b5288557788f7
  340. size: 244
  341. schema@gr-book-link:
  342. cmd: python ../run.py --rust pq-info -o gr-book-link.json gr-book-link.parquet
  343. deps:
  344. - path: gr-book-link.parquet
  345. md5: 17c43318b93c22563377f71b2ee45a1e
  346. size: 19787709
  347. outs:
  348. - path: gr-book-link.json
  349. md5: e6406cbc13eae52d6b605604c709a4af
  350. size: 338
  351. schema@gr-cluster-actions:
  352. cmd: python ../run.py --rust pq-info -o gr-cluster-actions.json gr-cluster-actions.parquet
  353. deps:
  354. - path: gr-cluster-actions.parquet
  355. md5: a93581e4cc78bf216b34358417954393
  356. size: 1525089046
  357. outs:
  358. - path: gr-cluster-actions.json
  359. md5: 6929df3364058a637d1aa4df32ba5567
  360. size: 617
  361. scan-book-genres:
  362. cmd: cargo run --release -- goodreads scan genres ../data/goodreads/goodreads_book_genres_initial.json.gz
  363. deps:
  364. - path: ../data/goodreads/goodreads_book_genres_initial.json.gz
  365. md5: 99ee3d1cadd68818c3dd0ef0d2f10602
  366. size: 24253992
  367. - path: ../src/cli/goodreads
  368. hash: md5
  369. md5: 02847887f789f5b5f5672f8768f95b05.dir
  370. size: 10485
  371. nfiles: 4
  372. - path: ../src/goodreads
  373. hash: md5
  374. md5: e8117b3c433c7fa4f7ffbaf7cb8d7f06.dir
  375. size: 19592
  376. nfiles: 9
  377. outs:
  378. - path: gr-book-genres.parquet
  379. hash: md5
  380. md5: cb1b59dbac1ccef2acd1ac225818b629
  381. size: 19187256
  382. - path: gr-genres.parquet
  383. hash: md5
  384. md5: c24a0e55e1bc79258188aac5da3e1ac3
  385. size: 809
  386. schema@gr-genres:
  387. cmd: python ../run.py --rust pq-info -o gr-genres.json gr-genres.parquet
  388. deps:
  389. - path: gr-genres.parquet
  390. md5: c24a0e55e1bc79258188aac5da3e1ac3
  391. size: 809
  392. outs:
  393. - path: gr-genres.json
  394. md5: 8df384efb85a16440ee09f14a3da1771
  395. size: 235
  396. schema@gr-book-series:
  397. cmd: python ../run.py --rust pq-info -o gr-book-series.json gr-book-series.parquet
  398. deps:
  399. - path: gr-book-series.parquet
  400. md5: 6688b051bf7a7b3ed720a49ba74a528c
  401. size: 5654585
  402. outs:
  403. - path: gr-book-series.json
  404. md5: 02b8f57e20dbdddb8fc883fc03ef0561
  405. size: 245
  406. schema@gr-book-genres:
  407. cmd: python ../run.py --rust pq-info -o gr-book-genres.json gr-book-genres.parquet
  408. deps:
  409. - path: gr-book-genres.parquet
  410. md5: 96e51ae7b7e09f9d752b110306bc8dd1
  411. size: 17278459
  412. outs:
  413. - path: gr-book-genres.json
  414. md5: 0a9f5acaf9bc9c79b6c2792f311f9889
  415. size: 338
  416. scan-simple-interactions:
  417. cmd: python ../run.py --rust goodreads scan interactions --csv --book-map ../data/goodreads/book_id_map.csv
  418. ../data/goodreads/goodreads_interactions.csv
  419. deps:
  420. - path: ../data/goodreads/book_id_map.csv
  421. md5: c4e5afd568df2f7a4a8a52f3eeb88413
  422. size: 37846957
  423. - path: ../data/goodreads/goodreads_interactions.csv
  424. md5: 696fbf71f0082c0b6a2379182b147c1e
  425. size: 4318621741
  426. - path: ../src/cli/goodreads.rs
  427. md5: 1fe05e7e29045b7ad1528df9af270c2d
  428. size: 3080
  429. - path: ../src/goodreads
  430. md5: 2a97b45388d5581a7db8e442cba294fb.dir
  431. size: 13762
  432. nfiles: 6
  433. outs:
  434. - path: gr-simple-interactions.parquet
  435. md5: e01dd1692896c9ae0a2b18e94b94e5aa
  436. size: 1456440955
  437. cluster-simple-ratings:
  438. cmd: python ../run.py gr-cluster-interactions.py --ratings --simple -o gr-cluster-simple-ratings.parquet
  439. deps:
  440. - path: gr-book-link.parquet
  441. md5: a8fbe0288a2682a983fe9550e500ad93
  442. size: 20310729
  443. - path: gr-cluster-interactions.py
  444. md5: f3bff4368de9ccfc6a9d92f9787eceb8
  445. size: 4159
  446. - path: gr-simple-interactions.parquet
  447. md5: e01dd1692896c9ae0a2b18e94b94e5aa
  448. size: 1456440955
  449. outs:
  450. - path: gr-cluster-simple-ratings.parquet
  451. md5: 0675244e9a9ba0d451bbe12dbcb3cbe4
  452. size: 689950939
  453. scan-author-info:
  454. cmd: cargo run --release -- goodreads scan authors ../data/goodreads/goodreads_book_authors.json.gz
  455. deps:
  456. - path: ../data/goodreads/goodreads_book_authors.json.gz
  457. md5: b193c3febd961fb69443b65ba05b83a7
  458. size: 17877585
  459. - path: ../src/cli/goodreads
  460. hash: md5
  461. md5: 02847887f789f5b5f5672f8768f95b05.dir
  462. size: 10485
  463. nfiles: 4
  464. - path: ../src/goodreads
  465. hash: md5
  466. md5: e8117b3c433c7fa4f7ffbaf7cb8d7f06.dir
  467. size: 19592
  468. nfiles: 9
  469. outs:
  470. - path: gr-author-info.parquet
  471. hash: md5
  472. md5: cf52195249be80735cd05e396fd749bf
  473. size: 10149282
  474. schema@gr-author-info:
  475. cmd: python ../run.py --rust pq-info -o gr-author-info.json gr-author-info.parquet
  476. deps:
  477. - path: gr-author-info.parquet
  478. md5: f91028921cc88b670cdcfddc8f66d23a
  479. size: 10031105
  480. outs:
  481. - path: gr-author-info.json
  482. md5: d60cc12c1bab7ad51515067d976ff3d1
  483. size: 245
  484. work-actions-5core:
  485. cmd: cargo run --release -- kcore -o gr-work-actions-5core.parquet gr-work-actions.parquet
  486. deps:
  487. - path: ../src/cli/kcore.rs
  488. hash: md5
  489. md5: 9a64f2beb19d2053d9c2386609beafe9
  490. size: 4874
  491. - path: gr-work-actions.parquet
  492. hash: md5
  493. md5: d20518227c34e9b440ba5f39e85da954
  494. size: 2808086719
  495. outs:
  496. - path: gr-work-actions-5core.parquet
  497. hash: md5
  498. md5: 9304c4ed695ecd786ccdbe67d5557eb2
  499. size: 2793734157
  500. cluster-ratings-5core:
  501. cmd: cargo run --release -- kcore -o gr-cluster-ratings-5core.parquet gr-cluster-ratings.parquet
  502. deps:
  503. - path: ../src/cli/kcore.rs
  504. hash: md5
  505. md5: 9a64f2beb19d2053d9c2386609beafe9
  506. size: 4874
  507. - path: gr-cluster-ratings.parquet
  508. hash: md5
  509. md5: 0afc2f46e59c3f22f203c3c0759653c2
  510. size: 1369545709
  511. outs:
  512. - path: gr-cluster-ratings-5core.parquet
  513. hash: md5
  514. md5: 3ae528be03c73d8974d4f0c51f3b698a
  515. size: 1347470376
  516. cluster-actions-5core:
  517. cmd: cargo run --release -- kcore -o gr-cluster-actions-5core.parquet gr-cluster-actions.parquet
  518. deps:
  519. - path: ../src/cli/kcore.rs
  520. hash: md5
  521. md5: 9a64f2beb19d2053d9c2386609beafe9
  522. size: 4874
  523. - path: gr-cluster-actions.parquet
  524. hash: md5
  525. md5: 6f3181c11589740851b49364f7140793
  526. size: 2709707272
  527. outs:
  528. - path: gr-cluster-actions-5core.parquet
  529. hash: md5
  530. md5: b407bad4df6930b6630427d138543020
  531. size: 2694955704
  532. work-ratings-5core:
  533. cmd: cargo run --release -- kcore -o gr-work-ratings-5core.parquet gr-work-ratings.parquet
  534. deps:
  535. - path: ../src/cli/kcore.rs
  536. hash: md5
  537. md5: 9a64f2beb19d2053d9c2386609beafe9
  538. size: 4874
  539. - path: gr-work-ratings.parquet
  540. hash: md5
  541. md5: f4d72ec57c57ac3d013b7271c79d950b
  542. size: 1434246294
  543. outs:
  544. - path: gr-work-ratings-5core.parquet
  545. hash: md5
  546. md5: e3433e12926d01b7c49f4f32fb603b83
  547. size: 1412366085
  548. work-actions-2015-100-10core:
  549. cmd: cargo run --release -- kcore --user-k 10 --item-k 100 --year 2015 -o gr-work-actions-2015-100-10core.parquet
  550. gr-work-actions.parquet
  551. deps:
  552. - path: ../src/cli/kcore.rs
  553. hash: md5
  554. md5: 9a64f2beb19d2053d9c2386609beafe9
  555. size: 4874
  556. - path: gr-work-actions.parquet
  557. hash: md5
  558. md5: d20518227c34e9b440ba5f39e85da954
  559. size: 2808086719
  560. outs:
  561. - path: gr-work-actions-2015-100-10core.parquet
  562. hash: md5
  563. md5: aa0c49abdfb8887fb7361b0cbff1ce34
  564. size: 177294387
  565. work-ratings-2015-100-10core:
  566. cmd: cargo run --release -- kcore --user-k 10 --item-k 100 --year 2015 -o gr-work-ratings-2015-100-10core.parquet
  567. gr-work-ratings.parquet
  568. deps:
  569. - path: ../src/cli/kcore.rs
  570. hash: md5
  571. md5: 9a64f2beb19d2053d9c2386609beafe9
  572. size: 4874
  573. - path: gr-work-ratings.parquet
  574. hash: md5
  575. md5: f4d72ec57c57ac3d013b7271c79d950b
  576. size: 1434246294
  577. outs:
  578. - path: gr-work-ratings-2015-100-10core.parquet
  579. hash: md5
  580. md5: b4d5bb0ae60646e8a1dd04cf57c47ab1
  581. size: 5153391
  582. scan-reviews:
  583. cmd: cargo run --release -- goodreads scan reviews ../data/goodreads/goodreads_reviews_dedup.json.gz
  584. deps:
  585. - path: ../data/goodreads/goodreads_reviews_dedup.json.gz
  586. hash: md5
  587. md5: bdd95c4f92691df3d311012254988a1e
  588. size: 5343299228
  589. - path: ../src/cli/goodreads
  590. hash: md5
  591. md5: 02847887f789f5b5f5672f8768f95b05.dir
  592. size: 10485
  593. nfiles: 4
  594. - path: ../src/goodreads
  595. hash: md5
  596. md5: e8117b3c433c7fa4f7ffbaf7cb8d7f06.dir
  597. size: 19592
  598. nfiles: 9
  599. - path: gr-book-link.parquet
  600. hash: md5
  601. md5: 8e85c40384efe8936830b3d92dc59b9a
  602. size: 19973611
  603. - path: gr-users.parquet
  604. hash: md5
  605. md5: ceb6314193b9a242df2a4db02e39f639
  606. size: 18637038
  607. outs:
  608. - path: gr-reviews.parquet
  609. hash: md5
  610. md5: c4fc1913cb2b332f1709f737086346a1
  611. size: 4513330717
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...