update dvc

Michael Ekstrand 6 months ago
parent
commit
0ce8407694
@@ -1,13 +0,0 @@
-deps:
-- path: pgstat://loc-mds-book-info
-  md5: ae5b3dfa240bfa1de37f585efbc9c55b
-- path: pgstat://gr-book-info
-  md5: b6d909057e78a11672d579e61c18df0a
-- path: pgstat://author-info
-  md5: 1276cdd4578b084c3b77103d7320b310
-- path: pgstat://author-stats
-  md5: 9261720ac32456ce94d3c5da68a5992b
-- path: pgstat://cluster-stats
-  md5: 5817bdef3006f73a09bcf33d4bc6a0b4
-- path: pgstat://loc-mds-index-names
-  md5: 4d8d4e061447b2d53f39262c41e60f74
@@ -1,29 +1,31 @@
-BX:
-  cmd: unzip BX-CSV-Dump.zip
-  deps:
-  - path: BX-CSV-Dump.zip
-    md5: 37d647ee9e18ba134ea6d78ee4fe5292
-  outs:
-  - path: BX-Book-Ratings.csv
-    md5: b34fe0534c9b846b8a45f316c60eb92b
-  - path: BX-Books.csv
-    md5: d50d59b0c40f10d37d379d9fd3fb98ac
-  - path: BX-Users.csv
-    md5: 6ec3fe6463da9e149d474ed8226612a2
-loc-books:
-  cmd: curl https://www.loc.gov/cds/downloads/MDSConnect/BooksAll.2016.part[01-43].xml.gz
-    -o "loc-books/BooksAll.2016.part#1.xml.gz" --create-dirs
-  outs:
-  - path: loc-books
-    md5: 1b1e7ab1d98cc81e373dfc53345d4bb7.dir
-loc-names:
-  cmd: curl https://www.loc.gov/cds/downloads/MDSConnect/Names.2016.part[01-40].xml.gz
-    -o "loc-names/Names.2016.part#1.xml.gz" --create-dirs
-  outs:
-  - path: loc-names
-    md5: fc488a8775561070cced774803fe0d72.dir
-viaf-clusters-marc21:
-  cmd: aria2c -o viaf-clusters-marc21.xml.gz http://viaf.org/viaf/data/viaf-20191007-clusters-marc21.xml.gz
-  outs:
-  - path: viaf-clusters-marc21.xml.gz
-    md5: 2f1af5262584d38f7331d333dec81cc3
+schema: '2.0'
+stages:
+  BX:
+    cmd: unzip BX-CSV-Dump.zip
+    deps:
+    - path: BX-CSV-Dump.zip
+      md5: 37d647ee9e18ba134ea6d78ee4fe5292
+    outs:
+    - path: BX-Book-Ratings.csv
+      md5: b34fe0534c9b846b8a45f316c60eb92b
+    - path: BX-Books.csv
+      md5: d50d59b0c40f10d37d379d9fd3fb98ac
+    - path: BX-Users.csv
+      md5: 6ec3fe6463da9e149d474ed8226612a2
+  loc-books:
+    cmd: curl https://www.loc.gov/cds/downloads/MDSConnect/BooksAll.2016.part[01-43].xml.gz
+      -o "loc-books/BooksAll.2016.part#1.xml.gz" --create-dirs
+    outs:
+    - path: loc-books
+      md5: 1b1e7ab1d98cc81e373dfc53345d4bb7.dir
+  loc-names:
+    cmd: curl https://www.loc.gov/cds/downloads/MDSConnect/Names.2016.part[01-40].xml.gz
+      -o "loc-names/Names.2016.part#1.xml.gz" --create-dirs
+    outs:
+    - path: loc-names
+      md5: fc488a8775561070cced774803fe0d72.dir
+  viaf-clusters-marc21:
+    cmd: aria2c -o viaf-clusters-marc21.xml.gz http://viaf.org/viaf/data/viaf-20191007-clusters-marc21.xml.gz
+    outs:
+    - path: viaf-clusters-marc21.xml.gz
+      md5: 2f1af5262584d38f7331d333dec81cc3
@@ -0,0 +1,47 @@
+schema: '2.0'
+stages:
+  schema:
+    cmd: 'true'
+    deps:
+    - path: schemas/az-schema.status
+      md5: 01a7ee13889d967ed8ac69bfdf592d80
+      size: 199
+    - path: schemas/bx-schema.status
+      md5: 48cd11df72cf2bb28d5a2ff58f02d972
+      size: 199
+    - path: schemas/common-schema.status
+      md5: f12f086695a243c125731397d4a31bf7
+      size: 202
+    - path: schemas/gr-schema.status
+      md5: bda41944c7f3ff2a207edb2c8a83c9fc
+      size: 404
+    - path: schemas/loc-mds-schema.status
+      md5: e63399b7692987ecd6b579066e5bd35e
+      size: 266
+    - path: schemas/ol-schema.status
+      md5: ea3a792c4941083707b2835f737ada05
+      size: 265
+    - path: schemas/viaf-schema.status
+      md5: 4ee5de53afb5dfc1e1740a8667887cc0
+      size: 204
+  all:
+    cmd: 'true'
+    deps:
+    - path: index/gr-book-info.status
+      md5: b6d909057e78a11672d579e61c18df0a
+      size: 553
+    - path: index/loc-mds-book-info.status
+      md5: ae5b3dfa240bfa1de37f585efbc9c55b
+      size: 180
+    - path: index/loc-mds-index-names.status
+      md5: 4d8d4e061447b2d53f39262c41e60f74
+      size: 186
+    - path: integrate/author-info.status
+      md5: 1276cdd4578b084c3b77103d7320b310
+      size: 395
+    - path: integrate/author-stats.status
+      md5: 9261720ac32456ce94d3c5da68a5992b
+      size: 213
+    - path: integrate/cluster-stats.status
+      md5: 5817bdef3006f73a09bcf33d4bc6a0b4
+      size: 340
@@ -0,0 +1,21 @@
+stages:
+  schema:
+    cmd: 'true'
+    deps:
+    - schemas/common-schema.status
+    - schemas/loc-mds-schema.status
+    - schemas/ol-schema.status
+    - schemas/az-schema.status
+    - schemas/bx-schema.status
+    - schemas/gr-schema.status
+    - schemas/viaf-schema.status
+
+  all:
+    cmd: 'true'
+    deps:
+    - index/loc-mds-book-info.status
+    - index/gr-book-info.status
+    - integrate/author-info.status
+    - integrate/author-stats.status
+    - integrate/cluster-stats.status
+    - index/loc-mds-index-names.status
@@ -1,15 +0,0 @@
-md5: 3659a42512b1273d52751f019a7ed365
-cmd: python run.py --rust pcat -t az.raw_ratings -s az-ratings -T import/az-ratings.transcript
-  -D az-schema -f CSV data/ratings_Books.csv
-wdir: ..
-deps:
-- md5: 77b4a5b887e14e16b13e7788bdf70156
-  path: data/ratings_Books.csv
-- md5: 01a7ee13889d967ed8ac69bfdf592d80
-  path: pgstat://az-schema
-outs:
-- path: pgstat://az-ratings
-  cache: false
-  md5: 08a44188987803be46be4286a2e5a5f3
-- md5: 4266a96f87e134dac6b60df0514a36d2
-  path: import/az-ratings.transcript
@@ -1,14 +0,0 @@
-md5: 039228477cc645453b79fb5fd70ec1f1
-cmd: python run.py bx-import -T import/bx-ratings.transcript data/BX-Book-Ratings.csv
-wdir: ..
-deps:
-- md5: b34fe0534c9b846b8a45f316c60eb92b
-  path: data/BX-Book-Ratings.csv
-- md5: 48cd11df72cf2bb28d5a2ff58f02d972
-  path: pgstat://bx-schema
-outs:
-- path: pgstat://bx-ratings
-  cache: false
-  md5: 9f1009bfdb68267b39ac94ff3d754756
-- md5: 511096523052e1563c4bdcc66b6ed143
-  path: import/bx-ratings.transcript
@@ -0,0 +1,329 @@
+schema: '2.0'
+stages:
+  az-ratings:
+    cmd: python run.py --rust pcat -t az.raw_ratings -s az-ratings -T import/az-ratings.transcript
+      -D az-schema -f CSV data/ratings_Books.csv
+    deps:
+    - path: data/ratings_Books.csv
+      md5: 77b4a5b887e14e16b13e7788bdf70156
+      size: 916259348
+    - path: schemas/az-schema.status
+      md5: 01a7ee13889d967ed8ac69bfdf592d80
+      size: 199
+    outs:
+    - path: import/az-ratings.transcript
+      md5: 4266a96f87e134dac6b60df0514a36d2
+  bx-ratings:
+    cmd: python run.py bx-import -T import/bx-ratings.transcript data/BX-Book-Ratings.csv
+    deps:
+    - path: data/BX-Book-Ratings.csv
+      md5: b34fe0534c9b846b8a45f316c60eb92b
+      size: 30682276
+    - path: schemas/bx-schema.status
+      md5: 48cd11df72cf2bb28d5a2ff58f02d972
+      size: 199
+    outs:
+    - path: import/bx-ratings.transcript
+      md5: 511096523052e1563c4bdcc66b6ed143
+  gr-authors:
+    cmd: python run.py --rust import-json -T import/gr-authors.transcript --stage
+      gr-authors -D gr-schema --truncate import/gr-authors.toml data/goodreads_book_authors.json.gz
+    deps:
+    - path: data/goodreads_book_authors.json.gz
+      md5: b193c3febd961fb69443b65ba05b83a7
+      size: 17877585
+    - path: import/gr-authors.toml
+      md5: 05080719337f65735dfeffacec9764c4
+      size: 64
+    - path: schemas/gr-schema.status
+      md5: bda41944c7f3ff2a207edb2c8a83c9fc
+      size: 404
+    outs:
+    - path: import/gr-authors.transcript
+      md5: 40acfcc81a4be3363d61febed1716f00
+  gr-book-genres:
+    cmd: python run.py --rust import-json -T import/gr-book-genres.transcript --stage
+      gr-book-genres -D gr-schema --truncate import/gr-book-genres.toml data/goodreads_book_genres_initial.json.gz
+    deps:
+    - path: data/goodreads_book_genres_initial.json.gz
+      md5: 99ee3d1cadd68818c3dd0ef0d2f10602
+      size: 24253992
+    - path: import/gr-book-genres.toml
+      md5: 5098a690f9f9e1320da3a12f0654192a
+      size: 74
+    - path: schemas/gr-schema.status
+      md5: bda41944c7f3ff2a207edb2c8a83c9fc
+      size: 404
+    outs:
+    - path: import/gr-book-genres.transcript
+      md5: 5cd443ca86c79bef13040a97aa135eb3
+  gr-books:
+    cmd: python run.py --rust import-json -T import/gr-books.transcript --stage gr-books
+      -D gr-schema --truncate import/gr-books.toml data/goodreads_books.json.gz
+    deps:
+    - path: data/goodreads_books.json.gz
+      md5: 01b40c70a00fb6aa321ee478f0fd0d6b
+      size: 2043729443
+    - path: import/gr-books.toml
+      md5: e30abcef8e2d1c32243dcae1a526dfcc
+      size: 60
+    - path: schemas/gr-schema.status
+      md5: bda41944c7f3ff2a207edb2c8a83c9fc
+      size: 404
+    outs:
+    - path: import/gr-books.transcript
+      md5: f493bf8e51b22ccdf258b07141c0a79d
+  gr-interactions:
+    cmd: python run.py --rust import-json -T import/gr-interactions.transcript --stage
+      gr-interactions -D gr-schema --truncate import/gr-interactions.toml data/goodreads_interactions.json.gz
+    deps:
+    - path: data/goodreads_interactions.json.gz
+      md5: f2d054a85f33d405a9bff6933005ba89
+      size: 9388113365
+    - path: import/gr-interactions.toml
+      md5: c6133a33f6dd80d2e378c0b976112802
+      size: 74
+    - path: schemas/gr-schema.status
+      md5: bda41944c7f3ff2a207edb2c8a83c9fc
+      size: 404
+    outs:
+    - path: import/gr-interactions.transcript
+      md5: 83ef4771b7c84a5730ba94fe8580a261
+  gr-works:
+    cmd: python run.py --rust import-json -T import/gr-works.transcript --stage gr-works
+      -D gr-schema --truncate import/gr-works.toml data/goodreads_book_works.json.gz
+    deps:
+    - path: data/goodreads_book_works.json.gz
+      md5: e80738a88d02d2b0081cd249d9b4f081
+      size: 81412944
+    - path: import/gr-works.toml
+      md5: 670f9192bd7f532e5787b829722ebe0a
+      size: 60
+    - path: schemas/gr-schema.status
+      md5: bda41944c7f3ff2a207edb2c8a83c9fc
+      size: 404
+    outs:
+    - path: import/gr-works.transcript
+      md5: 2ff4ab63f7c9043b7526f85f9bb36028
+  loc-mds-books:
+    cmd: python run.py --rust parse-marc --db-schema locmds -t book_marc_field --truncate
+      --stage loc-mds-books -D loc-mds-schema --transcript import/loc-mds-books.transcript
+      --src-dir data/loc-books --src-prefix BooksAll.2016
+    deps:
+    - path: data/loc-books
+      md5: 1b1e7ab1d98cc81e373dfc53345d4bb7.dir
+      size: 3129774145
+      nfiles: 43
+    - path: schemas/loc-mds-schema.status
+      md5: e63399b7692987ecd6b579066e5bd35e
+      size: 266
+    outs:
+    - path: import/loc-mds-books.transcript
+      md5: 19b0a011c9053361278a3d812518b229
+  loc-mds-names:
+    cmd: python run.py --rust parse-marc --db-schema locmds -t name_marc_field --truncate
+      --stage loc-mds-names -D loc-mds-schema --transcript import/loc-mds-names.transcript
+      --src-dir data/loc-names --src-prefix Names.2016
+    deps:
+    - path: data/loc-names
+      md5: fc488a8775561070cced774803fe0d72.dir
+      size: 1410755359
+      nfiles: 40
+    - path: schemas/loc-mds-schema.status
+      md5: e63399b7692987ecd6b579066e5bd35e
+      size: 266
+    outs:
+    - path: import/loc-mds-names.transcript
+      md5: 14ce7449e200f45a93058a6fdac918ec
+  ol-authors:
+    cmd: python run.py --rust import-json -T import/ol-authors.transcript --stage
+      ol-authors -D ol-schema --truncate import/ol-authors.toml data/ol_dump_authors.txt.gz
+    deps:
+    - path: data/ol_dump_authors.txt.gz
+      md5: 364e02a44e9e9a572e88692fc78fef27
+      size: 306408477
+    - path: import/ol-authors.toml
+      md5: d39f4bee21e807362a55474f7c6093d0
+      size: 111
+    - path: schemas/ol-schema.status
+      md5: ea3a792c4941083707b2835f737ada05
+      size: 265
+    outs:
+    - path: import/ol-authors.transcript
+      md5: 50e5bedc17cba4c5ceb8eef3d86fc307
+  ol-editions:
+    cmd: python run.py --rust import-json -T import/ol-editions.transcript --stage
+      ol-editions -D ol-schema --truncate import/ol-editions.toml data/ol_dump_editions.txt.gz
+    deps:
+    - path: data/ol_dump_editions.txt.gz
+      md5: e105295bf5f8025ecd7e43838ed0739c
+      size: 6081278291
+    - path: import/ol-editions.toml
+      md5: e528cfea761765865a7097be7abbc510
+      size: 114
+    - path: schemas/ol-schema.status
+      md5: ea3a792c4941083707b2835f737ada05
+      size: 265
+    outs:
+    - path: import/ol-editions.transcript
+      md5: c2c6951a052ebebe5f51ceb79f324162
+  ol-works:
+    cmd: python run.py --rust import-json -T import/ol-works.transcript --stage ol-works
+      -D ol-schema --truncate import/ol-works.toml data/ol_dump_works.txt.gz
+    deps:
+    - path: data/ol_dump_works.txt.gz
+      md5: 84e236955e5f683adde6a677d80475a0
+      size: 1666325144
+    - path: import/ol-works.toml
+      md5: 5ad04190c9392dd7cae3fdead9291f09
+      size: 105
+    - path: schemas/ol-schema.status
+      md5: ea3a792c4941083707b2835f737ada05
+      size: 265
+    outs:
+    - path: import/ol-works.transcript
+      md5: 6345885738815d2a51d331f7aba9e792
+  viaf:
+    cmd: python run.py --rust parse-marc --db-schema viaf -t marc_field --truncate
+      --stage viaf -D viaf-schema --transcript import/viaf.transcript --line-mode
+      data/viaf-clusters-marc21.xml.gz
+    deps:
+    - path: data/viaf-clusters-marc21.xml.gz
+      md5: 2f1af5262584d38f7331d333dec81cc3
+      size: 10662471024
+    - path: schemas/viaf-schema.status
+      md5: 4ee5de53afb5dfc1e1740a8667887cc0
+      size: 204
+    outs:
+    - path: import/viaf.transcript
+      md5: 326fe03b3007b0dbe02ff43c3da7e6cb
+  status@az-ratings:
+    cmd: python ../run.py stage-status -o az-ratings.status az-ratings
+    deps:
+    - path: az-ratings.transcript
+      md5: 4266a96f87e134dac6b60df0514a36d2
+      size: 94
+    outs:
+    - path: az-ratings.status
+      md5: 08a44188987803be46be4286a2e5a5f3
+      size: 180
+  status@loc-mds-books:
+    cmd: python ../run.py stage-status -o loc-mds-books.status loc-mds-books
+    deps:
+    - path: loc-mds-books.transcript
+      md5: 19b0a011c9053361278a3d812518b229
+      size: 4259
+    outs:
+    - path: loc-mds-books.status
+      md5: f6e0026b4d4fe4bac7056c7fe0491259
+      size: 4030
+  status@ol-authors:
+    cmd: python ../run.py stage-status -o ol-authors.status ol-authors
+    deps:
+    - path: ol-authors.transcript
+      md5: 50e5bedc17cba4c5ceb8eef3d86fc307
+      size: 130
+    outs:
+    - path: ol-authors.status
+      md5: 456954970c9a56193680bb9399ac9164
+      size: 185
+  status@gr-works:
+    cmd: python ../run.py stage-status -o gr-works.status gr-works
+    deps:
+    - path: gr-works.transcript
+      md5: 2ff4ab63f7c9043b7526f85f9bb36028
+      size: 136
+    outs:
+    - path: gr-works.status
+      md5: a223984989927e62d8aea9230810d6bb
+      size: 189
+  status@loc-mds-names:
+    cmd: python ../run.py stage-status -o loc-mds-names.status loc-mds-names
+    deps:
+    - path: loc-mds-names.transcript
+      md5: 14ce7449e200f45a93058a6fdac918ec
+      size: 3845
+    outs:
+    - path: loc-mds-names.status
+      md5: 5d6d486b5b3acde6b3a9ce64d0fe794b
+      size: 3637
+  status@ol-works:
+    cmd: python ../run.py stage-status -o ol-works.status ol-works
+    deps:
+    - path: ol-works.transcript
+      md5: 6345885738815d2a51d331f7aba9e792
+      size: 128
+    outs:
+    - path: ol-works.status
+      md5: c84b6ccf5f89e6d8faf00cc21b4d5566
+      size: 181
+  status@gr-books:
+    cmd: python ../run.py stage-status -o gr-books.status gr-books
+    deps:
+    - path: gr-books.transcript
+      md5: f493bf8e51b22ccdf258b07141c0a79d
+      size: 131
+    outs:
+    - path: gr-books.status
+      md5: a0dcde1044f2c61895def1a9523be067
+      size: 184
+  status@gr-interactions:
+    cmd: python ../run.py stage-status -o gr-interactions.status gr-interactions
+    deps:
+    - path: gr-interactions.transcript
+      md5: 83ef4771b7c84a5730ba94fe8580a261
+      size: 138
+    outs:
+    - path: gr-interactions.status
+      md5: acf7d8bdbf506c81f6c3ea4eded3d702
+      size: 198
+  status@gr-book-genres:
+    cmd: python ../run.py stage-status -o gr-book-genres.status gr-book-genres
+    deps:
+    - path: gr-book-genres.transcript
+      md5: 5cd443ca86c79bef13040a97aa135eb3
+      size: 145
+    outs:
+    - path: gr-book-genres.status
+      md5: 0c77c736582157805e8248a97b5f7037
+      size: 204
+  status@viaf:
+    cmd: python ../run.py stage-status -o viaf.status viaf
+    deps:
+    - path: viaf.transcript
+      md5: 326fe03b3007b0dbe02ff43c3da7e6cb
+      size: 136
+    outs:
+    - path: viaf.status
+      md5: 55fb31f4aaa86a2fd921c59c797c09a8
+      size: 186
+  status@bx-ratings:
+    cmd: python ../run.py stage-status -o bx-ratings.status bx-ratings
+    deps:
+    - path: bx-ratings.transcript
+      md5: 511096523052e1563c4bdcc66b6ed143
+      size: 146
+    outs:
+    - path: bx-ratings.status
+      md5: 9f1009bfdb68267b39ac94ff3d754756
+      size: 174
+  status@ol-editions:
+    cmd: python ../run.py stage-status -o ol-editions.status ol-editions
+    deps:
+    - path: ol-editions.transcript
+      md5: c2c6951a052ebebe5f51ceb79f324162
+      size: 131
+    outs:
+    - path: ol-editions.status
+      md5: 4c0b955dcf06c319b8dd9c1057c3c056
+      size: 187
+  status@gr-authors:
+    cmd: python ../run.py stage-status -o gr-authors.status gr-authors
+    deps:
+    - path: gr-authors.transcript
+      md5: 40acfcc81a4be3363d61febed1716f00
+      size: 138
+    outs:
+    - path: gr-authors.status
+      md5: b7701864036091016af9123c02b5d7ed
+      size: 193
@@ -0,0 +1,165 @@
+# This file uses the two-stage status design, so new command stages
+# need to be accompanied by an entry in the status stage at the end
+stages:
+  az-ratings:
+    cmd: python run.py --rust pcat -t az.raw_ratings -s az-ratings -T import/az-ratings.transcript
+      -D az-schema -f CSV data/ratings_Books.csv
+    wdir: ..
+    deps:
+    - data/ratings_Books.csv
+    - schemas/az-schema.status
+    outs:
+    - import/az-ratings.transcript
+
+  bx-ratings:
+    cmd: python run.py bx-import -T import/bx-ratings.transcript data/BX-Book-Ratings.csv
+    wdir: ..
+    deps:
+    - data/BX-Book-Ratings.csv
+    - schemas/bx-schema.status
+    outs:
+    - import/bx-ratings.transcript
+
+  gr-authors:
+    cmd: python run.py --rust import-json -T import/gr-authors.transcript --stage
+      gr-authors -D gr-schema --truncate import/gr-authors.toml data/goodreads_book_authors.json.gz
+    wdir: ..
+    deps:
+    - data/goodreads_book_authors.json.gz
+    - import/gr-authors.toml
+    - schemas/gr-schema.status
+    outs:
+    - import/gr-authors.transcript
+
+  gr-book-genres:
+    cmd: python run.py --rust import-json -T import/gr-book-genres.transcript --stage
+      gr-book-genres -D gr-schema --truncate import/gr-book-genres.toml data/goodreads_book_genres_initial.json.gz
+    wdir: ..
+    deps:
+    - data/goodreads_book_genres_initial.json.gz
+    - import/gr-book-genres.toml
+    - schemas/gr-schema.status
+    outs:
+    - import/gr-book-genres.transcript
+
+  gr-books:
+    cmd: python run.py --rust import-json -T import/gr-books.transcript --stage gr-books
+      -D gr-schema --truncate import/gr-books.toml data/goodreads_books.json.gz
+    wdir: ..
+    deps:
+    - data/goodreads_books.json.gz
+    - import/gr-books.toml
+    - schemas/gr-schema.status
+    outs:
+    - import/gr-books.transcript
+
+  gr-interactions:
+    cmd: python run.py --rust import-json -T import/gr-interactions.transcript --stage
+      gr-interactions -D gr-schema --truncate import/gr-interactions.toml data/goodreads_interactions.json.gz
+    wdir: ..
+    deps:
+    - data/goodreads_interactions.json.gz
+    - import/gr-interactions.toml
+    - schemas/gr-schema.status
+    outs:
+    - import/gr-interactions.transcript
+
+  gr-works:
+    cmd: python run.py --rust import-json -T import/gr-works.transcript --stage gr-works
+      -D gr-schema --truncate import/gr-works.toml data/goodreads_book_works.json.gz
+    wdir: ..
+    deps:
+    - data/goodreads_book_works.json.gz
+    - import/gr-works.toml
+    - schemas/gr-schema.status
+    outs:
+    - import/gr-works.transcript
+
+  loc-mds-books:
+    cmd: python run.py --rust parse-marc --db-schema locmds -t book_marc_field --truncate
+      --stage loc-mds-books -D loc-mds-schema --transcript import/loc-mds-books.transcript
+      --src-dir data/loc-books --src-prefix BooksAll.2016
+    wdir: ..
+    deps:
+    - data/loc-books
+    - schemas/loc-mds-schema.status
+    outs:
+    - import/loc-mds-books.transcript
+
+  loc-mds-names:
+    cmd: python run.py --rust parse-marc --db-schema locmds -t name_marc_field --truncate
+      --stage loc-mds-names -D loc-mds-schema --transcript import/loc-mds-names.transcript
+      --src-dir data/loc-names --src-prefix Names.2016
+    wdir: ..
+    deps:
+    - data/loc-names
+    - schemas/loc-mds-schema.status
+    outs:
+    - import/loc-mds-names.transcript
+
+  ol-authors:
+    cmd: python run.py --rust import-json -T import/ol-authors.transcript --stage
+      ol-authors -D ol-schema --truncate import/ol-authors.toml data/ol_dump_authors.txt.gz
+    wdir: ..
+    deps:
+    - data/ol_dump_authors.txt.gz
+    - import/ol-authors.toml
+    - schemas/ol-schema.status
+    outs:
+    - import/ol-authors.transcript
+
+  ol-editions:
+    cmd: python run.py --rust import-json -T import/ol-editions.transcript --stage
+      ol-editions -D ol-schema --truncate import/ol-editions.toml data/ol_dump_editions.txt.gz
+    wdir: ..
+    deps:
+    - data/ol_dump_editions.txt.gz
+    - import/ol-editions.toml
+    - schemas/ol-schema.status
+    outs:
+    - import/ol-editions.transcript
+
+  ol-works:
+    cmd: python run.py --rust import-json -T import/ol-works.transcript --stage ol-works
+      -D ol-schema --truncate import/ol-works.toml data/ol_dump_works.txt.gz
+    wdir: ..
+    deps:
+    - data/ol_dump_works.txt.gz
+    - import/ol-works.toml
+    - schemas/ol-schema.status
+    outs:
+    - import/ol-works.transcript
+
+  viaf:
+    cmd: python run.py --rust parse-marc --db-schema viaf -t marc_field --truncate
+      --stage viaf -D viaf-schema --transcript import/viaf.transcript --line-mode
+      data/viaf-clusters-marc21.xml.gz
+    wdir: ..
+    deps:
+    - data/viaf-clusters-marc21.xml.gz
+    - schemas/viaf-schema.status
+    outs:
+    - import/viaf.transcript
+
+  status:
+    foreach:
+      - az-ratings
+      - bx-ratings
+      - gr-authors
+      - gr-book-genres
+      - gr-books
+      - gr-interactions
+      - gr-works
+      - loc-mds-books
+      - loc-mds-names
+      - ol-authors
+      - ol-editions
+      - ol-works
+      - viaf
+    do:
+      cmd: python ../run.py stage-status -o ${item}.status ${item}
+      always_changed: true
+      outs:
+      - ${item}.status
+      deps:
+      - ${item}.transcript
@@ -1,17 +0,0 @@
-cmd: python run.py --rust import-json -T import/gr-authors.transcript --stage gr-authors
-  -D gr-schema --truncate import/gr-authors.toml data/goodreads_book_authors.json.gz
-wdir: ..
-deps:
-- path: import/gr-authors.toml
-  md5: 05080719337f65735dfeffacec9764c4
-- path: data/goodreads_book_authors.json.gz
-  md5: b193c3febd961fb69443b65ba05b83a7
-- path: pgstat://gr-schema
-  md5: bda41944c7f3ff2a207edb2c8a83c9fc
-outs:
-- path: pgstat://gr-authors
-  cache: false
-  md5: b7701864036091016af9123c02b5d7ed
-- path: import/gr-authors.transcript
-  md5: 40acfcc81a4be3363d61febed1716f00
-md5: d40df1ed5cbf385bd9f721a3a601e76c
@@ -1,17 +0,0 @@
-cmd: python run.py --rust import-json -T import/gr-book-genres.transcript --stage
-  gr-book-genres -D gr-schema --truncate import/gr-book-genres.toml data/goodreads_book_genres_initial.json.gz
-wdir: ..
-deps:
-- path: import/gr-book-genres.toml
-  md5: 5098a690f9f9e1320da3a12f0654192a
-- path: data/goodreads_book_genres_initial.json.gz
-  md5: 99ee3d1cadd68818c3dd0ef0d2f10602
-- path: pgstat://gr-schema
-  md5: bda41944c7f3ff2a207edb2c8a83c9fc
-outs:
-- path: pgstat://gr-book-genres
-  cache: false
-  md5: 0c77c736582157805e8248a97b5f7037
-- path: import/gr-book-genres.transcript
-  md5: 5cd443ca86c79bef13040a97aa135eb3
-md5: 21e97b98d657e26e73527f4db20c5d31
@@ -1,17 +0,0 @@
-cmd: python run.py --rust import-json -T import/gr-books.transcript --stage gr-books
-  -D gr-schema --truncate import/gr-books.toml data/goodreads_books.json.gz
-wdir: ..
-deps:
-- path: import/gr-books.toml
-  md5: e30abcef8e2d1c32243dcae1a526dfcc
-- path: data/goodreads_books.json.gz
-  md5: 01b40c70a00fb6aa321ee478f0fd0d6b
-- path: pgstat://gr-schema
-  md5: bda41944c7f3ff2a207edb2c8a83c9fc
-outs:
-- path: pgstat://gr-books
-  cache: false
-  md5: a0dcde1044f2c61895def1a9523be067
-- path: import/gr-books.transcript
-  md5: f493bf8e51b22ccdf258b07141c0a79d
-md5: 3a6a5f8f12f4720e1a2e629cbc62001e
@@ -1,17 +0,0 @@
-cmd: python run.py --rust import-json -T import/gr-interactions.transcript --stage
-  gr-interactions -D gr-schema --truncate import/gr-interactions.toml data/goodreads_interactions.json.gz
-wdir: ..
-deps:
-- path: import/gr-interactions.toml
-  md5: c6133a33f6dd80d2e378c0b976112802
-- path: data/goodreads_interactions.json.gz
-  md5: f2d054a85f33d405a9bff6933005ba89
-- path: pgstat://gr-schema
-  md5: bda41944c7f3ff2a207edb2c8a83c9fc
-outs:
-- path: pgstat://gr-interactions
-  cache: false
-  md5: acf7d8bdbf506c81f6c3ea4eded3d702
-- path: import/gr-interactions.transcript
-  md5: 83ef4771b7c84a5730ba94fe8580a261
-md5: 992aa377a425cdd4d8d4aba5f77119bc
@@ -1,17 +0,0 @@
-cmd: python run.py --rust import-json -T import/gr-works.transcript --stage gr-works
-  -D gr-schema --truncate import/gr-works.toml data/goodreads_book_works.json.gz
-wdir: ..
-deps:
-- path: import/gr-works.toml
-  md5: 670f9192bd7f532e5787b829722ebe0a
-- path: data/goodreads_book_works.json.gz
-  md5: e80738a88d02d2b0081cd249d9b4f081
-- path: pgstat://gr-schema
-  md5: bda41944c7f3ff2a207edb2c8a83c9fc
-outs:
-- path: pgstat://gr-works
-  cache: false
-  md5: a223984989927e62d8aea9230810d6bb
-- path: import/gr-works.transcript
-  md5: 2ff4ab63f7c9043b7526f85f9bb36028
-md5: b500dc8755f32e5e2f3302dc5a4069aa
@@ -1,16 +0,0 @@
-md5: c05dd43881b82bbeffb23a7b6d3e1e96
-cmd: python run.py --rust parse-marc --db-schema locmds -t book_marc_field --truncate
-  --stage loc-mds-books -D loc-mds-schema --transcript import/loc-mds-books.transcript
-  --src-dir data/loc-books --src-prefix BooksAll.2016
-wdir: ..
-deps:
-- md5: 1b1e7ab1d98cc81e373dfc53345d4bb7.dir
-  path: data/loc-books
-- md5: e63399b7692987ecd6b579066e5bd35e
-  path: pgstat://loc-mds-schema
-outs:
-- path: pgstat://loc-mds-books
-  cache: false
-  md5: f6e0026b4d4fe4bac7056c7fe0491259
-- md5: 19b0a011c9053361278a3d812518b229
-  path: import/loc-mds-books.transcript
@@ -1,16 +0,0 @@
-md5: a83ec7b726726e26f5084b80bf894c9f
-cmd: python run.py --rust parse-marc --db-schema locmds -t name_marc_field --truncate
-  --stage loc-mds-names -D loc-mds-schema --transcript import/loc-mds-names.transcript
-  --src-dir data/loc-names --src-prefix Names.2016
-wdir: ..
-deps:
-- md5: fc488a8775561070cced774803fe0d72.dir
-  path: data/loc-names
-- md5: e63399b7692987ecd6b579066e5bd35e
-  path: pgstat://loc-mds-schema
-outs:
-- path: pgstat://loc-mds-names
-  cache: false
-  md5: 5d6d486b5b3acde6b3a9ce64d0fe794b
-- md5: 14ce7449e200f45a93058a6fdac918ec
-  path: import/loc-mds-names.transcript
@@ -1,17 +0,0 @@
-md5: 41b12fb82a71b892c79c766e60213349
-cmd: python run.py --rust import-json -T import/ol-authors.transcript --stage ol-authors
-  -D ol-schema --truncate import/ol-authors.toml data/ol_dump_authors.txt.gz
-wdir: ..
-deps:
-- path: import/ol-authors.toml
-  md5: d39f4bee21e807362a55474f7c6093d0
-- md5: 364e02a44e9e9a572e88692fc78fef27
-  path: data/ol_dump_authors.txt.gz
-- md5: ea3a792c4941083707b2835f737ada05
-  path: pgstat://ol-schema
-outs:
-- path: pgstat://ol-authors
-  cache: false
-  md5: 456954970c9a56193680bb9399ac9164
-- md5: 50e5bedc17cba4c5ceb8eef3d86fc307
-  path: import/ol-authors.transcript
@@ -1,17 +0,0 @@
-md5: 24dbe71ecb56b10c6ba6c621fc734aad
-cmd: python run.py --rust import-json -T import/ol-editions.transcript --stage ol-editions
-  -D ol-schema --truncate import/ol-editions.toml data/ol_dump_editions.txt.gz
-wdir: ..
-deps:
-- path: import/ol-editions.toml
-  md5: e528cfea761765865a7097be7abbc510
-- md5: e105295bf5f8025ecd7e43838ed0739c
-  path: data/ol_dump_editions.txt.gz
-- md5: ea3a792c4941083707b2835f737ada05
-  path: pgstat://ol-schema
-outs:
-- path: pgstat://ol-editions
-  cache: false
-  md5: 4c0b955dcf06c319b8dd9c1057c3c056
-- md5: c2c6951a052ebebe5f51ceb79f324162
-  path: import/ol-editions.transcript
@@ -1,17 +0,0 @@
-md5: 401afdb0f51cafd634e1980cce30cb96
-cmd: python run.py --rust import-json -T import/ol-works.transcript --stage ol-works
-  -D ol-schema --truncate import/ol-works.toml data/ol_dump_works.txt.gz
-wdir: ..
-deps:
-- path: import/ol-works.toml
-  md5: 5ad04190c9392dd7cae3fdead9291f09
-- md5: 84e236955e5f683adde6a677d80475a0
-  path: data/ol_dump_works.txt.gz
-- md5: ea3a792c4941083707b2835f737ada05
-  path: pgstat://ol-schema
-outs:
-- path: pgstat://ol-works
-  cache: false
-  md5: c84b6ccf5f89e6d8faf00cc21b4d5566
-- md5: 6345885738815d2a51d331f7aba9e792
-  path: import/ol-works.transcript
@@ -1,15 +0,0 @@
-md5: 3e58afead501a712551bf345111e0d02
-cmd: python run.py --rust parse-marc --db-schema viaf -t marc_field --truncate --stage
-  viaf -D viaf-schema --transcript import/viaf.transcript --line-mode data/viaf-clusters-marc21.xml.gz
-wdir: ..
-deps:
-- md5: 2f1af5262584d38f7331d333dec81cc3
-  path: data/viaf-clusters-marc21.xml.gz
-- md5: 4ee5de53afb5dfc1e1740a8667887cc0
-  path: pgstat://viaf-schema
-outs:
-- path: pgstat://viaf
-  cache: false
-  md5: 55fb31f4aaa86a2fd921c59c797c09a8
-- md5: 326fe03b3007b0dbe02ff43c3da7e6cb
-  path: import/viaf.transcript
@@ -1,15 +0,0 @@
-md5: a773a007075da53c462331462a2ff0b3
-cmd: python ../run.py sql-script az-index.sql
-deps:
-- path: az-index.sql
-  md5: 894b772669d1cff5511b8f651fe4a1bb
-- path: pgstat://az-ratings
-  md5: 08a44188987803be46be4286a2e5a5f3
-- path: pgstat://cluster
-  md5: abac8ffbe1d4b0e33b39320bdfd7974d
-outs:
-- path: pgstat://az-index
-  cache: false
-  md5: b845cbb28bd6735919d6ef20c069b2d1
-- path: az-index.transcript
-  md5: 11bcbcd2d11a8a121b22a9a799312434
@@ -1,15 +0,0 @@
-md5: 45d9ca358eb4531acc994d0e22d17ae7
-cmd: python ../run.py sql-script bx-index.sql
-deps:
-- path: bx-index.sql
-  md5: 4490ac14a5764c8dd928388f9ecde2af
-- path: pgstat://bx-ratings
-  md5: 9f1009bfdb68267b39ac94ff3d754756
-- path: pgstat://cluster
-  md5: abac8ffbe1d4b0e33b39320bdfd7974d
-outs:
-- path: pgstat://bx-index
-  cache: false
-  md5: 7e6211a90148898456ab9dada9353cc2
-- path: bx-index.transcript
-  md5: 10895cefc67a2d442b8d834d8399d05b
@@ -0,0 +1,369 @@
+schema: '2.0'
+stages:
+  az-index:
+    cmd: python ../run.py sql-script az-index.sql
+    deps:
+    - path: ../import/az-ratings.status
+      md5: 08a44188987803be46be4286a2e5a5f3
+      size: 180
+    - path: ../integrate/cluster.status
+      md5: abac8ffbe1d4b0e33b39320bdfd7974d
+      size: 51
+    - path: az-index.sql
+      md5: 894b772669d1cff5511b8f651fe4a1bb
+      size: 1320
+    outs:
+    - path: az-index.transcript
+      md5: 11bcbcd2d11a8a121b22a9a799312434
+  bx-index:
+    cmd: python ../run.py sql-script bx-index.sql
+    deps:
+    - path: ../import/bx-ratings.status
+      md5: 9f1009bfdb68267b39ac94ff3d754756
+      size: 174
+    - path: ../integrate/cluster.status
+      md5: abac8ffbe1d4b0e33b39320bdfd7974d
+      size: 51
+    - path: bx-index.sql
+      md5: 4490ac14a5764c8dd928388f9ecde2af
+      size: 1142
+    outs:
+    - path: bx-index.transcript
+      md5: 10895cefc67a2d442b8d834d8399d05b
+  gr-book-authors:
+    cmd: python ../run.py sql-script gr-book-authors.sql
+    deps:
+    - path: ../import/gr-authors.status
+      md5: b7701864036091016af9123c02b5d7ed
+      size: 193
+    - path: ../import/gr-books.status
+      md5: a0dcde1044f2c61895def1a9523be067
+      size: 184
+    - path: ../import/gr-works.status
+      md5: a223984989927e62d8aea9230810d6bb
+      size: 189
+    - path: ../integrate/cluster.status
+      md5: abac8ffbe1d4b0e33b39320bdfd7974d
+      size: 51
+    - path: gr-book-authors.sql
+      md5: f007063c0371e5881b2e2663668b2c0c
+      size: 1305
+    - path: gr-index-books.status
+      md5: 3fe0d48fd17efd5e670f6b2649fc2286
+      size: 357
+    outs:
+    - path: gr-book-authors.transcript
+      md5: 52947a5942c872b654f995adec8d508e
+  gr-book-info:
+    cmd: python ../run.py sql-script gr-book-info.sql
+    deps:
+    - path: ../import/gr-books.status
+      md5: a0dcde1044f2c61895def1a9523be067
+      size: 184
+    - path: ../import/gr-works.status
+      md5: a223984989927e62d8aea9230810d6bb
+      size: 189
+    - path: ../integrate/cluster.status
+      md5: abac8ffbe1d4b0e33b39320bdfd7974d
+      size: 51
+    - path: gr-book-authors.status
+      md5: bbad1d52089bf2bd674606946a066cb8
+      size: 370
+    - path: gr-book-info.sql
+      md5: 622dad6cbfde4b167bdeb05584b33a64
+      size: 4107
+    - path: gr-index-books.status
+      md5: 3fe0d48fd17efd5e670f6b2649fc2286
+      size: 357
+    outs:
+    - path: gr-book-info.transcript
+      md5: 659436f044b9398eb82378dcc2e97d7a
+  gr-index-books:
+    cmd: python ../run.py sql-script gr-index-books.sql
+    deps:
+    - path: ../import/gr-authors.status
+      md5: b7701864036091016af9123c02b5d7ed
+      size: 193
+    - path: ../import/gr-book-genres.status
+      md5: 0c77c736582157805e8248a97b5f7037
+      size: 204
+    - path: ../import/gr-books.status
+      md5: a0dcde1044f2c61895def1a9523be067
+      size: 184
+    - path: ../import/gr-works.status
+      md5: a223984989927e62d8aea9230810d6bb
+      size: 189
+    - path: gr-index-books.sql
+      md5: bf2560c7da69f6fc8d124a98f4f5a0ed
+      size: 4403
+    outs:
+    - path: gr-index-books.transcript
+      md5: 39cfd15b59141412e4fb699be6b12762
+  gr-index-ratings:
+    cmd: python ../run.py sql-script gr-index-ratings.sql
+    deps:
+    - path: ../import/gr-interactions.status
+      md5: acf7d8bdbf506c81f6c3ea4eded3d702
+      size: 198
+    - path: ../integrate/cluster.status
+      md5: abac8ffbe1d4b0e33b39320bdfd7974d
+      size: 51
+    - path: gr-book-info.status
+      md5: b6d909057e78a11672d579e61c18df0a
+      size: 553
+    - path: gr-index-ratings.sql
+      md5: 3ab0a81a10a080e76108eb37aabd3394
+      size: 3578
+    outs:
+    - path: gr-index-ratings.transcript
+      md5: 98fcebd1647c3cc36e742a085833ec59
+  isbn-norm:
+    cmd: python ../run.py sql-script isbn-norm.sql
+    deps:
+    - path: gr-index-books.status
+      md5: 3fe0d48fd17efd5e670f6b2649fc2286
+      size: 357
+    - path: isbn-norm.sql
+      md5: fc9e633e6980c0f678b099bae28688d1
+      size: 491
+    - path: loc-mds-index-books.status
+      md5: abdf7eecd1861c7318b15a2b32435204
+      size: 387
+    - path: ol-index.status
+      md5: 431be74ffa0928a66c3ec3084f8d3640
+      size: 356
+    outs:
+    - path: isbn-norm.transcript
+      md5: 7c99a01ec9d598650ba83fd2102a8882
+  loc-mds-book-info:
+    cmd: python ../run.py sql-script loc-mds-book-info.sql
+    deps:
+    - path: loc-mds-book-info.sql
+      md5: c13381fbb505c7e778032685452c1f23
+      size: 1360
+    - path: loc-mds-index-books.status
+      md5: abdf7eecd1861c7318b15a2b32435204
+      size: 387
+    outs:
+    - path: loc-mds-book-info.transcript
+      md5: 83e7b6b2931146f417ea4b84b76614d2
+  loc-mds-extract-isbns:
+    cmd: python run.py --rust parse-isbns --src-table locmds.book_raw_isbn --out-table
+      locmds.book_extracted_isbn --stage loc-mds-extract-isbns -D loc-mds-books -T
+      loc-mds-extract-isbns.transcript
+    deps:
+    - path: import/loc-mds-books.status
+      md5: f6e0026b4d4fe4bac7056c7fe0491259
+      size: 4030
+    outs:
+    - path: loc-mds-extract-isbns.transcript
+      md5: e08255fdca28f536f2aa8f2d065a26cf
+  loc-mds-index-books:
+    cmd: python ../run.py sql-script loc-mds-index-books.sql
+    deps:
+    - path: ../import/loc-mds-books.status
+      md5: f6e0026b4d4fe4bac7056c7fe0491259
+      size: 4030
+    - path: loc-mds-extract-isbns.status
+      md5: c4ceff988a5b8a7c15ca00c0dbc4ec59
+      size: 132
+    - path: loc-mds-index-books.sql
+      md5: cdd4dabcd4f7b9b9f4eca3b83f1eb676
+      size: 4104
+    outs:
+    - path: loc-mds-index-books.transcript
+      md5: 0af5f4abe6b224d06ac8a2950d22ee61
+  loc-mds-index-names:
+    cmd: python ../run.py sql-script loc-mds-index-names.sql
+    deps:
+    - path: ../import/loc-mds-names.status
+      md5: 5d6d486b5b3acde6b3a9ce64d0fe794b
+      size: 3637
+    - path: loc-mds-index-names.sql
+      md5: 11f203b5b97526c532647d2de7c2c9cc
+      size: 888
+    outs:
+    - path: loc-mds-index-names.transcript
+      md5: 0d07d44aa3486e6aab6aa3ee67b0a5b3
+  ol-book-info:
+    cmd: python ../run.py sql-script ol-book-info.sql
+    deps:
+    - path: ol-book-info.sql
+      md5: 3ef46b2a3e878a00e3a5ad7a31aa5a1f
+      size: 582
+    - path: ol-index.status
+      md5: 431be74ffa0928a66c3ec3084f8d3640
+      size: 356
+    outs:
+    - path: ol-book-info.transcript
+      md5: 5fb342deef98c301027b1807d96d04d5
+  ol-index:
+    cmd: python ../run.py sql-script ol-index.sql
+    deps:
+    - path: ../import/ol-authors.status
+      md5: 456954970c9a56193680bb9399ac9164
+      size: 185
+    - path: ../import/ol-editions.status
+      md5: 4c0b955dcf06c319b8dd9c1057c3c056
+      size: 187
+    - path: ../import/ol-works.status
+      md5: c84b6ccf5f89e6d8faf00cc21b4d5566
+      size: 181
+    - path: ol-index.sql
+      md5: 627e1e62981c643104bdd6bec34fab7d
+      size: 7388
+    outs:
+    - path: ol-index.transcript
+      md5: 5528fa7ed03102ecefdf92ace25a13d0
+  viaf-index:
+    cmd: python ../run.py sql-script viaf-index.sql
+    deps:
+    - path: ../import/viaf.status
+      md5: 55fb31f4aaa86a2fd921c59c797c09a8
+      size: 186
+    - path: viaf-index.sql
+      md5: b39f849908bb3c4b82e10c1110e44786
+      size: 2728
+    outs:
+    - path: viaf-index.transcript
+      md5: a2a7c0283e2dddd99d87139e596512b6
+  status@gr-index-books:
+    cmd: python ../run.py stage-status -o gr-index-books.status gr-index-books
+    deps:
+    - path: gr-index-books.transcript
+      md5: 39cfd15b59141412e4fb699be6b12762
+      size: 2521
+    outs:
+    - path: gr-index-books.status
+      md5: 3fe0d48fd17efd5e670f6b2649fc2286
+      size: 357
+  status@ol-index:
+    cmd: python ../run.py stage-status -o ol-index.status ol-index
+    deps:
+    - path: ol-index.transcript
+      md5: 5528fa7ed03102ecefdf92ace25a13d0
+      size: 4934
+    outs:
+    - path: ol-index.status
+      md5: 431be74ffa0928a66c3ec3084f8d3640
+      size: 356
+  status@ol-book-info:
+    cmd: python ../run.py stage-status -o ol-book-info.status ol-book-info
+    deps:
+    - path: ol-book-info.transcript
+      md5: 5fb342deef98c301027b1807d96d04d5
+      size: 510
+    outs:
+    - path: ol-book-info.status
+      md5: 8ce3b1230c306ab53d019453bd336927
+      size: 159
+  status@viaf-index:
+    cmd: python ../run.py stage-status -o viaf-index.status viaf-index
+    deps:
+    - path: viaf-index.transcript
+      md5: a2a7c0283e2dddd99d87139e596512b6
+      size: 1292
+    outs:
+    - path: viaf-index.status
+      md5: 7598c00689dd89a355a1538d9855ff60
+      size: 291
+  status@gr-book-authors:
+    cmd: python ../run.py stage-status -o gr-book-authors.status gr-book-authors
+    deps:
+    - path: gr-book-authors.transcript
+      md5: 52947a5942c872b654f995adec8d508e
+      size: 484
+    outs:
+    - path: gr-book-authors.status
+      md5: bbad1d52089bf2bd674606946a066cb8
+      size: 370
+  status@loc-mds-index-names:
+    cmd: python ../run.py stage-status -o loc-mds-index-names.status loc-mds-index-names
+    deps:
+    - path: loc-mds-index-names.transcript
+      md5: 0d07d44aa3486e6aab6aa3ee67b0a5b3
+      size: 833
+    outs:
+    - path: loc-mds-index-names.status
+      md5: 4d8d4e061447b2d53f39262c41e60f74
+      size: 186
+  status@gr-index-ratings:
+    cmd: python ../run.py stage-status -o gr-index-ratings.status gr-index-ratings
+    deps:
+    - path: gr-index-ratings.transcript
+      md5: 98fcebd1647c3cc36e742a085833ec59
+      size: 1348
+    outs:
+    - path: gr-index-ratings.status
+      md5: 2e5bab21dcd1a3ad70c7b9aa33e1f929
+      size: 393
+  status@loc-mds-index-books:
+    cmd: python ../run.py stage-status -o loc-mds-index-books.status loc-mds-index-books
+    deps:
+    - path: loc-mds-index-books.transcript
+      md5: 0af5f4abe6b224d06ac8a2950d22ee61
+      size: 2313
+    outs:
+    - path: loc-mds-index-books.status
+      md5: abdf7eecd1861c7318b15a2b32435204
+      size: 387
+  status@isbn-norm:
+    cmd: python ../run.py stage-status -o isbn-norm.status isbn-norm
+    deps:
+    - path: isbn-norm.transcript
+      md5: 7c99a01ec9d598650ba83fd2102a8882
+      size: 257
+    outs:
+    - path: isbn-norm.status
+      md5: 5f281f436639bdcb85383e0b334183de
+      size: 262
+  status@gr-book-info:
+    cmd: python ../run.py stage-status -o gr-book-info.status gr-book-info
+    deps:
+    - path: gr-book-info.transcript
+      md5: 659436f044b9398eb82378dcc2e97d7a
+      size: 1792
+    outs:
+    - path: gr-book-info.status
+      md5: b6d909057e78a11672d579e61c18df0a
+      size: 553
+  status@bx-index:
+    cmd: python ../run.py stage-status -o bx-index.status bx-index
+    deps:
+    - path: bx-index.transcript
+      md5: 10895cefc67a2d442b8d834d8399d05b
+      size: 611
+    outs:
+    - path: bx-index.status
+      md5: 7e6211a90148898456ab9dada9353cc2
+      size: 274
+  status@loc-mds-book-info:
+    cmd: python ../run.py stage-status -o loc-mds-book-info.status loc-mds-book-info
+    deps:
+    - path: loc-mds-book-info.transcript
+      md5: 83e7b6b2931146f417ea4b84b76614d2
+      size: 1015
+    outs:
+    - path: loc-mds-book-info.status
+      md5: ae5b3dfa240bfa1de37f585efbc9c55b
+      size: 180
+  status@az-index:
+    cmd: python ../run.py stage-status -o az-index.status az-index
+    deps:
+    - path: az-index.transcript
+      md5: 11bcbcd2d11a8a121b22a9a799312434
+      size: 753
+    outs:
+    - path: az-index.status
+      md5: b845cbb28bd6735919d6ef20c069b2d1
+      size: 280
+  status@loc-mds-extract-isbns:
+    cmd: python ../run.py stage-status -o loc-mds-extract-isbns.status loc-mds-extract-isbns
+    deps:
+    - path: loc-mds-extract-isbns.transcript
+      md5: d41d8cd98f00b204e9800998ecf8427e
+      size: 0
+    outs:
+    - path: loc-mds-extract-isbns.status
+      md5: c4ceff988a5b8a7c15ca00c0dbc4ec59
+      size: 132
@@ -0,0 +1,158 @@
+stages:
+  az-index:
+    cmd: python ../run.py sql-script az-index.sql
+    deps:
+    - az-index.sql
+    - ../import/az-ratings.status
+    - ../integrate/cluster.status
+    outs:
+    - az-index.transcript
+
+  bx-index:
+    cmd: python ../run.py sql-script bx-index.sql
+    deps:
+    - bx-index.sql
+    - ../import/bx-ratings.status
+    - ../integrate/cluster.status
+    outs:
+    - bx-index.transcript
+
+  gr-book-authors:
+    cmd: python ../run.py sql-script gr-book-authors.sql
+    deps:
+    - gr-book-authors.sql
+    - ../integrate/cluster.status
+    - ../import/gr-authors.status
+    - ../import/gr-books.status
+    - gr-index-books.status
+    - ../import/gr-works.status
+    outs:
+    - gr-book-authors.transcript
+
+  gr-book-info:
+    cmd: python ../run.py sql-script gr-book-info.sql
+    deps:
+    - gr-book-info.sql
+    - ../integrate/cluster.status
+    - gr-book-authors.status
+    - ../import/gr-books.status
+    - gr-index-books.status
+    - ../import/gr-works.status
+    outs:
+    - gr-book-info.transcript
+
+  gr-index-books:
+    cmd: python ../run.py sql-script gr-index-books.sql
+    deps:
+    - gr-index-books.sql
+    - ../import/gr-authors.status
+    - ../import/gr-book-genres.status
+    - ../import/gr-books.status
+    - ../import/gr-works.status
+    outs:
+    - gr-index-books.transcript
+
+  gr-index-ratings:
+    cmd: python ../run.py sql-script gr-index-ratings.sql
+    deps:
+    - gr-index-ratings.sql
+    - ../integrate/cluster.status
+    - gr-book-info.status
+    - ../import/gr-interactions.status
+    outs:
+    - gr-index-ratings.transcript
+
+  isbn-norm:
+    cmd: python ../run.py sql-script isbn-norm.sql
+    deps:
+    - isbn-norm.sql
+    - gr-index-books.status
+    - loc-mds-index-books.status
+    - ol-index.status
+    outs:
+    - isbn-norm.transcript
+