Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

dvc.yaml 5.0 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
  1. # This file uses the two-stage status design, so new command stages
  2. # need to be accompanied by an entry in the status stage at the end
  3. stages:
  4. az-ratings:
  5. cmd: python run.py --rust pcat -t az.raw_ratings -s az-ratings -T import/az-ratings.transcript
  6. -D az-schema -f CSV data/ratings_Books.csv
  7. wdir: ..
  8. deps:
  9. - data/ratings_Books.csv
  10. - schemas/az-schema.status
  11. outs:
  12. - import/az-ratings.transcript
  13. bx-ratings:
  14. cmd: python run.py bx-import -T import/bx-ratings.transcript data/BX-Book-Ratings.csv
  15. wdir: ..
  16. deps:
  17. - data/BX-Book-Ratings.csv
  18. - schemas/bx-schema.status
  19. outs:
  20. - import/bx-ratings.transcript
  21. gr-authors:
  22. cmd: python run.py --rust import-json -T import/gr-authors.transcript --stage
  23. gr-authors -D gr-schema --truncate import/gr-authors.toml data/goodreads_book_authors.json.gz
  24. wdir: ..
  25. deps:
  26. - data/goodreads_book_authors.json.gz
  27. - import/gr-authors.toml
  28. - schemas/gr-schema.status
  29. outs:
  30. - import/gr-authors.transcript
  31. gr-book-genres:
  32. cmd: python run.py --rust import-json -T import/gr-book-genres.transcript --stage
  33. gr-book-genres -D gr-schema --truncate import/gr-book-genres.toml data/goodreads_book_genres_initial.json.gz
  34. wdir: ..
  35. deps:
  36. - data/goodreads_book_genres_initial.json.gz
  37. - import/gr-book-genres.toml
  38. - schemas/gr-schema.status
  39. outs:
  40. - import/gr-book-genres.transcript
  41. gr-books:
  42. cmd: python run.py --rust import-json -T import/gr-books.transcript --stage gr-books
  43. -D gr-schema --truncate import/gr-books.toml data/goodreads_books.json.gz
  44. wdir: ..
  45. deps:
  46. - data/goodreads_books.json.gz
  47. - import/gr-books.toml
  48. - schemas/gr-schema.status
  49. outs:
  50. - import/gr-books.transcript
  51. gr-interactions:
  52. cmd: python run.py --rust import-json -T import/gr-interactions.transcript --stage
  53. gr-interactions -D gr-schema --truncate import/gr-interactions.toml data/goodreads_interactions.json.gz
  54. wdir: ..
  55. deps:
  56. - data/goodreads_interactions.json.gz
  57. - import/gr-interactions.toml
  58. - schemas/gr-schema.status
  59. outs:
  60. - import/gr-interactions.transcript
  61. gr-works:
  62. cmd: python run.py --rust import-json -T import/gr-works.transcript --stage gr-works
  63. -D gr-schema --truncate import/gr-works.toml data/goodreads_book_works.json.gz
  64. wdir: ..
  65. deps:
  66. - data/goodreads_book_works.json.gz
  67. - import/gr-works.toml
  68. - schemas/gr-schema.status
  69. outs:
  70. - import/gr-works.transcript
  71. loc-mds-books:
  72. cmd: python run.py --rust parse-marc --db-schema locmds -t book_marc_field --truncate
  73. --stage loc-mds-books -D loc-mds-schema --transcript import/loc-mds-books.transcript
  74. --src-dir data/loc-books --src-prefix BooksAll.2016
  75. wdir: ..
  76. deps:
  77. - data/loc-books
  78. - schemas/loc-mds-schema.status
  79. outs:
  80. - import/loc-mds-books.transcript
  81. loc-mds-names:
  82. cmd: python run.py --rust parse-marc --db-schema locmds -t name_marc_field --truncate
  83. --stage loc-mds-names -D loc-mds-schema --transcript import/loc-mds-names.transcript
  84. --src-dir data/loc-names --src-prefix Names.2016
  85. wdir: ..
  86. deps:
  87. - data/loc-names
  88. - schemas/loc-mds-schema.status
  89. outs:
  90. - import/loc-mds-names.transcript
  91. ol-authors:
  92. cmd: python run.py --rust import-json -T import/ol-authors.transcript --stage
  93. ol-authors -D ol-schema --truncate import/ol-authors.toml data/ol_dump_authors.txt.gz
  94. wdir: ..
  95. deps:
  96. - data/ol_dump_authors.txt.gz
  97. - import/ol-authors.toml
  98. - schemas/ol-schema.status
  99. outs:
  100. - import/ol-authors.transcript
  101. ol-editions:
  102. cmd: python run.py --rust import-json -T import/ol-editions.transcript --stage
  103. ol-editions -D ol-schema --truncate import/ol-editions.toml data/ol_dump_editions.txt.gz
  104. wdir: ..
  105. deps:
  106. - data/ol_dump_editions.txt.gz
  107. - import/ol-editions.toml
  108. - schemas/ol-schema.status
  109. outs:
  110. - import/ol-editions.transcript
  111. ol-works:
  112. cmd: python run.py --rust import-json -T import/ol-works.transcript --stage ol-works
  113. -D ol-schema --truncate import/ol-works.toml data/ol_dump_works.txt.gz
  114. wdir: ..
  115. deps:
  116. - data/ol_dump_works.txt.gz
  117. - import/ol-works.toml
  118. - schemas/ol-schema.status
  119. outs:
  120. - import/ol-works.transcript
  121. viaf:
  122. cmd: python run.py --rust parse-marc --db-schema viaf -t marc_field --truncate
  123. --stage viaf -D viaf-schema --transcript import/viaf.transcript --line-mode
  124. data/viaf-clusters-marc21.xml.gz
  125. wdir: ..
  126. deps:
  127. - data/viaf-clusters-marc21.xml.gz
  128. - schemas/viaf-schema.status
  129. outs:
  130. - import/viaf.transcript
  131. status:
  132. foreach:
  133. - az-ratings
  134. - bx-ratings
  135. - gr-authors
  136. - gr-book-genres
  137. - gr-books
  138. - gr-interactions
  139. - gr-works
  140. - loc-mds-books
  141. - loc-mds-names
  142. - ol-authors
  143. - ol-editions
  144. - ol-works
  145. - viaf
  146. do:
  147. cmd: python ../run.py stage-status -o ${item}.status ${item}
  148. always_changed: true
  149. outs:
  150. - ${item}.status
  151. deps:
  152. - ${item}.transcript
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...