Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

dvc.yaml 5.4 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
  1. stages:
  2. book-isbn-ids:
  3. cmd: cargo run --release -- link-isbn-ids -o goodreads/book-isbn-ids.parquet -R book_id -I isbn10 -I isbn13 -I asin goodreads/gr-book-ids.parquet
  4. deps:
  5. - src/cli/goodreads
  6. - goodreads/gr-book-ids.parquet
  7. - book-links/all-isbns.parquet
  8. outs:
  9. - goodreads/book-isbn-ids.parquet
  10. wdir: ..
  11. book-links:
  12. cmd: cargo run --release -- cluster extract-books -o goodreads/gr-book-link.parquet -n book_id --join-file goodreads/gr-book-ids.parquet --join-field work_id GR-B
  13. deps:
  14. - goodreads/gr-book-ids.parquet
  15. - book-links/cluster-graph-nodes.parquet
  16. outs:
  17. - goodreads/gr-book-link.parquet
  18. wdir: ..
  19. cluster-actions:
  20. cmd: cargo run --release -- goodreads cluster-interactions --add-actions -o goodreads/gr-cluster-actions.parquet
  21. deps:
  22. - src/cli/goodreads/cluster.rs
  23. - goodreads/gr-book-link.parquet
  24. - goodreads/gr-interactions.parquet
  25. outs:
  26. - goodreads/gr-cluster-actions.parquet
  27. wdir: ..
  28. cluster-actions-5core:
  29. cmd: cargo run --release -- kcore -o gr-cluster-actions-5core.parquet gr-cluster-actions.parquet
  30. deps:
  31. - ../src/cli/kcore.rs
  32. - gr-cluster-actions.parquet
  33. outs:
  34. - gr-cluster-actions-5core.parquet
  35. cluster-ratings:
  36. cmd: cargo run --release -- goodreads cluster-interactions --ratings -o goodreads/gr-cluster-ratings.parquet
  37. deps:
  38. - src/cli/goodreads/cluster.rs
  39. - goodreads/gr-book-link.parquet
  40. - goodreads/gr-interactions.parquet
  41. outs:
  42. - goodreads/gr-cluster-ratings.parquet
  43. wdir: ..
  44. cluster-ratings-5core:
  45. cmd: cargo run --release -- kcore -o gr-cluster-ratings-5core.parquet gr-cluster-ratings.parquet
  46. deps:
  47. - ../src/cli/kcore.rs
  48. - gr-cluster-ratings.parquet
  49. outs:
  50. - gr-cluster-ratings-5core.parquet
  51. scan-author-info:
  52. cmd: cargo run --release -- goodreads scan authors ../data/goodreads/goodreads_book_authors.json.gz
  53. deps:
  54. - ../src/cli/goodreads
  55. - ../src/goodreads
  56. - ../data/goodreads/goodreads_book_authors.json.gz
  57. outs:
  58. - gr-author-info.parquet
  59. scan-book-genres:
  60. cmd: cargo run --release -- goodreads scan genres ../data/goodreads/goodreads_book_genres_initial.json.gz
  61. deps:
  62. - ../src/cli/goodreads
  63. - ../src/goodreads
  64. - ../data/goodreads/goodreads_book_genres_initial.json.gz
  65. outs:
  66. - gr-book-genres.parquet
  67. - gr-genres.parquet
  68. scan-book-info:
  69. cmd: cargo run --release -- goodreads scan books ../data/goodreads/goodreads_books.json.gz
  70. deps:
  71. - ../src/cli/goodreads
  72. - ../src/goodreads
  73. - ../data/goodreads/goodreads_books.json.gz
  74. outs:
  75. - gr-book-ids.parquet
  76. - gr-book-info.parquet
  77. - gr-book-authors.parquet
  78. - gr-book-series.parquet
  79. scan-interactions:
  80. cmd: cargo run --release -- goodreads scan interactions ../data/goodreads/goodreads_interactions.json.gz
  81. deps:
  82. - ../src/cli/goodreads
  83. - ../src/goodreads
  84. - ../data/goodreads/goodreads_interactions.json.gz
  85. outs:
  86. - gr-interactions.parquet
  87. - gr-users.parquet
  88. scan-reviews:
  89. cmd: cargo run --release -- goodreads scan reviews ../data/goodreads/goodreads_reviews_dedup.json.gz
  90. deps:
  91. - ../src/cli/goodreads
  92. - ../src/goodreads
  93. - ../data/goodreads/goodreads_reviews_dedup.json.gz
  94. - gr-book-link.parquet
  95. - gr-users.parquet
  96. outs:
  97. - gr-reviews.parquet
  98. scan-work-info:
  99. cmd: cargo run --release -- goodreads scan works ../data/goodreads/goodreads_book_works.json.gz
  100. deps:
  101. - ../src/cli/goodreads
  102. - ../src/goodreads
  103. - ../data/goodreads/goodreads_book_works.json.gz
  104. outs:
  105. - gr-work-info.parquet
  106. work-actions:
  107. cmd: cargo run --release -- goodreads cluster-interactions --add-actions --native-works -o goodreads/gr-work-actions.parquet
  108. deps:
  109. - src/cli/goodreads/cluster.rs
  110. - goodreads/gr-book-link.parquet
  111. - goodreads/gr-interactions.parquet
  112. outs:
  113. - goodreads/gr-work-actions.parquet
  114. wdir: ..
  115. work-actions-2015-100-10core:
  116. cmd: cargo run --release -- kcore --user-k 10 --item-k 100 --year 2015 -o gr-work-actions-2015-100-10core.parquet gr-work-actions.parquet
  117. deps:
  118. - gr-work-actions.parquet
  119. - ../src/cli/kcore.rs
  120. outs:
  121. - gr-work-actions-2015-100-10core.parquet
  122. work-actions-5core:
  123. cmd: cargo run --release -- kcore -o gr-work-actions-5core.parquet gr-work-actions.parquet
  124. deps:
  125. - ../src/cli/kcore.rs
  126. - gr-work-actions.parquet
  127. outs:
  128. - gr-work-actions-5core.parquet
  129. work-gender:
  130. cmd: cargo run --release -- goodreads work-gender
  131. deps:
  132. - ../src/cli/goodreads
  133. - gr-book-link.parquet
  134. - ../book-links/cluster-genders.parquet
  135. outs:
  136. - gr-work-gender.parquet
  137. work-ratings:
  138. cmd: cargo run --release -- goodreads cluster-interactions --ratings --native-works -o goodreads/gr-work-ratings.parquet
  139. deps:
  140. - src/cli/goodreads/cluster.rs
  141. - goodreads/gr-book-link.parquet
  142. - goodreads/gr-interactions.parquet
  143. outs:
  144. - goodreads/gr-work-ratings.parquet
  145. wdir: ..
  146. work-ratings-2015-100-10core:
  147. cmd: cargo run --release -- kcore --user-k 10 --item-k 100 --year 2015 -o gr-work-ratings-2015-100-10core.parquet gr-work-ratings.parquet
  148. deps:
  149. - gr-work-ratings.parquet
  150. - ../src/cli/kcore.rs
  151. outs:
  152. - gr-work-ratings-2015-100-10core.parquet
  153. work-ratings-5core:
  154. cmd: cargo run --release -- kcore -o gr-work-ratings-5core.parquet gr-work-ratings.parquet
  155. deps:
  156. - ../src/cli/kcore.rs
  157. - gr-work-ratings.parquet
  158. outs:
  159. - gr-work-ratings-5core.parquet
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...