use sphinx to buil docs

Michael Ekstrand 4 months ago
parent
commit
586ebc0e9f
@@ -11,20 +11,21 @@ jobs:
       - name: Checkout 🛎️
       - name: Checkout 🛎️
         uses: actions/checkout@v2
         uses: actions/checkout@v2
 
 
-      - name: Install Ruby 💎
-        uses: ruby/setup-ruby@v1
+      - name: Install Python 🐍
+        uses: actions/setup-python@v2
         with:
         with:
-          ruby-version: 2.7
-          bundler-cache: true
-          working-directory: docs
+          python-version: 3.8
 
 
-      - name: Build Jekyll site 🕸
+      - name: Install Python dependencies 📦
         run: |
         run: |
-          cd docs
-          bundle exec jekyll build
+          pip install -r doc-requirements.txt
+
+      - name: Build site 🕸
+        run: |
+          sphinx-build docs target/docs
 
 
       - name: Deploy 🚀
       - name: Deploy 🚀
         uses: JamesIves/github-pages-deploy-action@4.1.0
         uses: JamesIves/github-pages-deploy-action@4.1.0
         with:
         with:
           branch: gh-pages
           branch: gh-pages
-          folder: docs/_site
+          folder: target/docs
@@ -11,6 +11,7 @@ oprofile_data
 /.vscode
 /.vscode
 .pytest_cache/
 .pytest_cache/
 __pycache__/
 __pycache__/
+*venv/
 
 
 /target
 /target
 **/*.rs.bk
 **/*.rs.bk
@@ -18,5 +19,4 @@ __pycache__/
 db.cfg
 db.cfg
 *.gml
 *.gml
 *.graphml
 *.graphml
-*.txt
 /loc-mds-extract-isbns.transcript
 /loc-mds-extract-isbns.transcript
@@ -0,0 +1,4 @@
+sphinx
+furo
+myst-parser
+sphinxcontrib-bibtex
@@ -0,0 +1,103 @@
+#
+# This file is autogenerated by pip-compile
+# To update, run:
+#
+#    pip-compile --output-file='.\doc-requirements.txt' '.\doc-requirements.in'
+#
+alabaster==0.7.12
+    # via sphinx
+attrs==20.3.0
+    # via markdown-it-py
+babel==2.9.0
+    # via sphinx
+beautifulsoup4==4.9.3
+    # via furo
+certifi==2020.12.5
+    # via requests
+chardet==4.0.0
+    # via requests
+colorama==0.4.4
+    # via sphinx
+docutils==0.16
+    # via
+    #   myst-parser
+    #   pybtex-docutils
+    #   sphinx
+    #   sphinxcontrib-bibtex
+furo==2021.2.28b28
+    # via -r .\doc-requirements.in
+idna==2.10
+    # via requests
+imagesize==1.2.0
+    # via sphinx
+jinja2==2.11.3
+    # via
+    #   myst-parser
+    #   sphinx
+latexcodec==2.0.1
+    # via pybtex
+markdown-it-py==0.6.2
+    # via
+    #   mdit-py-plugins
+    #   myst-parser
+markupsafe==1.1.1
+    # via jinja2
+mdit-py-plugins==0.2.5
+    # via
+    #   markdown-it-py
+    #   myst-parser
+myst-parser==0.13.5
+    # via -r .\doc-requirements.in
+packaging==20.9
+    # via sphinx
+pybtex-docutils==1.0.0
+    # via sphinxcontrib-bibtex
+pybtex==0.24.0
+    # via
+    #   pybtex-docutils
+    #   sphinxcontrib-bibtex
+pygments==2.8.1
+    # via sphinx
+pyparsing==2.4.7
+    # via packaging
+pytz==2021.1
+    # via babel
+pyyaml==5.4.1
+    # via
+    #   myst-parser
+    #   pybtex
+requests==2.25.1
+    # via sphinx
+six==1.15.0
+    # via
+    #   latexcodec
+    #   pybtex
+snowballstemmer==2.1.0
+    # via sphinx
+soupsieve==2.2
+    # via beautifulsoup4
+sphinx==3.5.2
+    # via
+    #   -r .\doc-requirements.in
+    #   furo
+    #   myst-parser
+    #   sphinxcontrib-bibtex
+sphinxcontrib-applehelp==1.0.2
+    # via sphinx
+sphinxcontrib-bibtex==2.2.0
+    # via -r .\doc-requirements.in
+sphinxcontrib-devhelp==1.0.2
+    # via sphinx
+sphinxcontrib-htmlhelp==1.0.3
+    # via sphinx
+sphinxcontrib-jsmath==1.0.1
+    # via sphinx
+sphinxcontrib-qthelp==1.0.3
+    # via sphinx
+sphinxcontrib-serializinghtml==1.1.4
+    # via sphinx
+urllib3==1.26.3
+    # via requests
+
+# The following packages are considered to be unsafe in a requirements file:
+# setuptools
@@ -1 +1,2 @@
 /_site
 /_site
+/build/
@@ -1,7 +0,0 @@
-source "https://rubygems.org"
-
-gem "github-pages", group: :jekyll_plugins
-gem "jekyll-include-cache", group: :jekyll_plugins
-gem 'jekyll-scholar', group: :jekyll_plugins
-
-# gem 'wdm', '>= 0.1.0' if Gem.win_platform?
@@ -1,293 +0,0 @@
-GEM
-  remote: https://rubygems.org/
-  specs:
-    activesupport (6.0.3.5)
-      concurrent-ruby (~> 1.0, >= 1.0.2)
-      i18n (>= 0.7, < 2)
-      minitest (~> 5.1)
-      tzinfo (~> 1.1)
-      zeitwerk (~> 2.2, >= 2.2.2)
-    addressable (2.7.0)
-      public_suffix (>= 2.0.2, < 5.0)
-    bibtex-ruby (4.4.7)
-      latex-decode (~> 0.0)
-    citeproc (1.0.10)
-      namae (~> 1.0)
-    citeproc-ruby (1.1.13)
-      citeproc (~> 1.0, >= 1.0.9)
-      csl (~> 1.5)
-    coffee-script (2.4.1)
-      coffee-script-source
-      execjs
-    coffee-script-source (1.11.1)
-    colorator (1.1.0)
-    commonmarker (0.17.13)
-      ruby-enum (~> 0.5)
-    concurrent-ruby (1.1.8)
-    csl (1.5.2)
-      namae (~> 1.0)
-    csl-styles (1.0.1.10)
-      csl (~> 1.0)
-    dnsruby (1.61.5)
-      simpleidn (~> 0.1)
-    em-websocket (0.5.2)
-      eventmachine (>= 0.12.9)
-      http_parser.rb (~> 0.6.0)
-    ethon (0.12.0)
-      ffi (>= 1.3.0)
-    eventmachine (1.2.7)
-    eventmachine (1.2.7-x64-mingw32)
-    execjs (2.7.0)
-    faraday (1.3.0)
-      faraday-net_http (~> 1.0)
-      multipart-post (>= 1.2, < 3)
-      ruby2_keywords
-    faraday-net_http (1.0.1)
-    ffi (1.15.0)
-    ffi (1.15.0-x64-mingw32)
-    forwardable-extended (2.6.0)
-    gemoji (3.0.1)
-    github-pages (212)
-      github-pages-health-check (= 1.17.0)
-      jekyll (= 3.9.0)
-      jekyll-avatar (= 0.7.0)
-      jekyll-coffeescript (= 1.1.1)
-      jekyll-commonmark-ghpages (= 0.1.6)
-      jekyll-default-layout (= 0.1.4)
-      jekyll-feed (= 0.15.1)
-      jekyll-gist (= 1.5.0)
-      jekyll-github-metadata (= 2.13.0)
-      jekyll-mentions (= 1.6.0)
-      jekyll-optional-front-matter (= 0.3.2)
-      jekyll-paginate (= 1.1.0)
-      jekyll-readme-index (= 0.3.0)
-      jekyll-redirect-from (= 0.16.0)
-      jekyll-relative-links (= 0.6.1)
-      jekyll-remote-theme (= 0.4.2)
-      jekyll-sass-converter (= 1.5.2)
-      jekyll-seo-tag (= 2.7.1)
-      jekyll-sitemap (= 1.4.0)
-      jekyll-swiss (= 1.0.0)
-      jekyll-theme-architect (= 0.1.1)
-      jekyll-theme-cayman (= 0.1.1)
-      jekyll-theme-dinky (= 0.1.1)
-      jekyll-theme-hacker (= 0.1.2)
-      jekyll-theme-leap-day (= 0.1.1)
-      jekyll-theme-merlot (= 0.1.1)
-      jekyll-theme-midnight (= 0.1.1)
-      jekyll-theme-minimal (= 0.1.1)
-      jekyll-theme-modernist (= 0.1.1)
-      jekyll-theme-primer (= 0.5.4)
-      jekyll-theme-slate (= 0.1.1)
-      jekyll-theme-tactile (= 0.1.1)
-      jekyll-theme-time-machine (= 0.1.1)
-      jekyll-titles-from-headings (= 0.5.3)
-      jemoji (= 0.12.0)
-      kramdown (= 2.3.0)
-      kramdown-parser-gfm (= 1.1.0)
-      liquid (= 4.0.3)
-      mercenary (~> 0.3)
-      minima (= 2.5.1)
-      nokogiri (>= 1.10.4, < 2.0)
-      rouge (= 3.26.0)
-      terminal-table (~> 1.4)
-    github-pages-health-check (1.17.0)
-      addressable (~> 2.3)
-      dnsruby (~> 1.60)
-      octokit (~> 4.0)
-      public_suffix (>= 2.0.2, < 5.0)
-      typhoeus (~> 1.3)
-    html-pipeline (2.14.0)
-      activesupport (>= 2)
-      nokogiri (>= 1.4)
-    http_parser.rb (0.6.0)
-    i18n (0.9.5)
-      concurrent-ruby (~> 1.0)
-    jekyll (3.9.0)
-      addressable (~> 2.4)
-      colorator (~> 1.0)
-      em-websocket (~> 0.5)
-      i18n (~> 0.7)
-      jekyll-sass-converter (~> 1.0)
-      jekyll-watch (~> 2.0)
-      kramdown (>= 1.17, < 3)
-      liquid (~> 4.0)
-      mercenary (~> 0.3.3)
-      pathutil (~> 0.9)
-      rouge (>= 1.7, < 4)
-      safe_yaml (~> 1.0)
-    jekyll-avatar (0.7.0)
-      jekyll (>= 3.0, < 5.0)
-    jekyll-coffeescript (1.1.1)
-      coffee-script (~> 2.2)
-      coffee-script-source (~> 1.11.1)
-    jekyll-commonmark (1.3.1)
-      commonmarker (~> 0.14)
-      jekyll (>= 3.7, < 5.0)
-    jekyll-commonmark-ghpages (0.1.6)
-      commonmarker (~> 0.17.6)
-      jekyll-commonmark (~> 1.2)
-      rouge (>= 2.0, < 4.0)
-    jekyll-default-layout (0.1.4)
-      jekyll (~> 3.0)
-    jekyll-feed (0.15.1)
-      jekyll (>= 3.7, < 5.0)
-    jekyll-gist (1.5.0)
-      octokit (~> 4.2)
-    jekyll-github-metadata (2.13.0)
-      jekyll (>= 3.4, < 5.0)
-      octokit (~> 4.0, != 4.4.0)
-    jekyll-include-cache (0.2.1)
-      jekyll (>= 3.7, < 5.0)
-    jekyll-mentions (1.6.0)
-      html-pipeline (~> 2.3)
-      jekyll (>= 3.7, < 5.0)
-    jekyll-optional-front-matter (0.3.2)
-      jekyll (>= 3.0, < 5.0)
-    jekyll-paginate (1.1.0)
-    jekyll-readme-index (0.3.0)
-      jekyll (>= 3.0, < 5.0)
-    jekyll-redirect-from (0.16.0)
-      jekyll (>= 3.3, < 5.0)
-    jekyll-relative-links (0.6.1)
-      jekyll (>= 3.3, < 5.0)
-    jekyll-remote-theme (0.4.2)
-      addressable (~> 2.0)
-      jekyll (>= 3.5, < 5.0)
-      jekyll-sass-converter (>= 1.0, <= 3.0.0, != 2.0.0)
-      rubyzip (>= 1.3.0, < 3.0)
-    jekyll-sass-converter (1.5.2)
-      sass (~> 3.4)
-    jekyll-scholar (5.16.0)
-      bibtex-ruby (~> 4.0, >= 4.0.13)
-      citeproc-ruby (~> 1.0)
-      csl-styles (~> 1.0)
-      jekyll (~> 3.0)
-    jekyll-seo-tag (2.7.1)
-      jekyll (>= 3.8, < 5.0)
-    jekyll-sitemap (1.4.0)
-      jekyll (>= 3.7, < 5.0)
-    jekyll-swiss (1.0.0)
-    jekyll-theme-architect (0.1.1)
-      jekyll (~> 3.5)
-      jekyll-seo-tag (~> 2.0)
-    jekyll-theme-cayman (0.1.1)
-      jekyll (~> 3.5)
-      jekyll-seo-tag (~> 2.0)
-    jekyll-theme-dinky (0.1.1)
-      jekyll (~> 3.5)
-      jekyll-seo-tag (~> 2.0)
-    jekyll-theme-hacker (0.1.2)
-      jekyll (> 3.5, < 5.0)
-      jekyll-seo-tag (~> 2.0)
-    jekyll-theme-leap-day (0.1.1)
-      jekyll (~> 3.5)
-      jekyll-seo-tag (~> 2.0)
-    jekyll-theme-merlot (0.1.1)
-      jekyll (~> 3.5)
-      jekyll-seo-tag (~> 2.0)
-    jekyll-theme-midnight (0.1.1)
-      jekyll (~> 3.5)
-      jekyll-seo-tag (~> 2.0)
-    jekyll-theme-minimal (0.1.1)
-      jekyll (~> 3.5)
-      jekyll-seo-tag (~> 2.0)
-    jekyll-theme-modernist (0.1.1)
-      jekyll (~> 3.5)
-      jekyll-seo-tag (~> 2.0)
-    jekyll-theme-primer (0.5.4)
-      jekyll (> 3.5, < 5.0)
-      jekyll-github-metadata (~> 2.9)
-      jekyll-seo-tag (~> 2.0)
-    jekyll-theme-slate (0.1.1)
-      jekyll (~> 3.5)
-      jekyll-seo-tag (~> 2.0)
-    jekyll-theme-tactile (0.1.1)
-      jekyll (~> 3.5)
-      jekyll-seo-tag (~> 2.0)
-    jekyll-theme-time-machine (0.1.1)
-      jekyll (~> 3.5)
-      jekyll-seo-tag (~> 2.0)
-    jekyll-titles-from-headings (0.5.3)
-      jekyll (>= 3.3, < 5.0)
-    jekyll-watch (2.2.1)
-      listen (~> 3.0)
-    jemoji (0.12.0)
-      gemoji (~> 3.0)
-      html-pipeline (~> 2.2)
-      jekyll (>= 3.0, < 5.0)
-    kramdown (2.3.0)
-      rexml
-    kramdown-parser-gfm (1.1.0)
-      kramdown (~> 2.0)
-    latex-decode (0.3.2)
-    liquid (4.0.3)
-    listen (3.4.1)
-      rb-fsevent (~> 0.10, >= 0.10.3)
-      rb-inotify (~> 0.9, >= 0.9.10)
-    mercenary (0.3.6)
-    mini_portile2 (2.5.0)
-    minima (2.5.1)
-      jekyll (>= 3.5, < 5.0)
-      jekyll-feed (~> 0.9)
-      jekyll-seo-tag (~> 2.1)
-    minitest (5.14.4)
-    multipart-post (2.1.1)
-    namae (1.0.2)
-    nokogiri (1.11.2)
-      mini_portile2 (~> 2.5.0)
-      racc (~> 1.4)
-    nokogiri (1.11.2-x64-mingw32)
-      racc (~> 1.4)
-    octokit (4.20.0)
-      faraday (>= 0.9)
-      sawyer (~> 0.8.0, >= 0.5.3)
-    pathutil (0.16.2)
-      forwardable-extended (~> 2.6)
-    public_suffix (4.0.6)
-    racc (1.5.2)
-    rb-fsevent (0.10.4)
-    rb-inotify (0.10.1)
-      ffi (~> 1.0)
-    rexml (3.2.4)
-    rouge (3.26.0)
-    ruby-enum (0.9.0)
-      i18n
-    ruby2_keywords (0.0.4)
-    rubyzip (2.3.0)
-    safe_yaml (1.0.5)
-    sass (3.7.4)
-      sass-listen (~> 4.0.0)
-    sass-listen (4.0.0)
-      rb-fsevent (~> 0.9, >= 0.9.4)
-      rb-inotify (~> 0.9, >= 0.9.7)
-    sawyer (0.8.2)
-      addressable (>= 2.3.5)
-      faraday (> 0.8, < 2.0)
-    simpleidn (0.2.1)
-      unf (~> 0.1.4)
-    terminal-table (1.8.0)
-      unicode-display_width (~> 1.1, >= 1.1.1)
-    thread_safe (0.3.6)
-    typhoeus (1.4.0)
-      ethon (>= 0.9.0)
-    tzinfo (1.2.9)
-      thread_safe (~> 0.1)
-    unf (0.1.4)
-      unf_ext
-    unf_ext (0.0.7.7)
-    unf_ext (0.0.7.7-x64-mingw32)
-    unicode-display_width (1.7.0)
-    zeitwerk (2.4.2)
-
-PLATFORMS
-  ruby
-  x64-mingw32
-
-DEPENDENCIES
-  github-pages
-  jekyll-include-cache
-  jekyll-scholar
-
-BUNDLED WITH
-   1.17.2
@@ -1,19 +0,0 @@
-title: Book Data Tools
-name: Michael Ekstrand
-plugins:
-  - jekyll/scholar
-
-remote_theme: pmarsceill/just-the-docs
-scholar:
-  style: chicago-note-bibliography
-
-aux_links:
-  "GitHub": https://github.com/BoiseState/bookdata-tools
-  "PIReT": https://piret.info
-
-footer_content: >
-  Copyright &copy; 2020 Boise State University.  Distributed under the MIT License.
-  This material is based upon work supported by the National Science Foundation under
-  Grant No. IIS 17-51278. Any opinions, findings, and conclusions or recommendations
-  expressed in this material are those of the author(s) and do not necessarily reflect
-  the views of the National Science Foundation.
@@ -1,2 +0,0 @@
-$body-font-family: 'Lato', sans-serif;
-$mono-font-family: 'Source Code Pro', monospace;
@@ -1,4 +1,8 @@
+{% extends "furo/base.html" %}
+
+{% block extrahead %}
 <link rel=stylesheet type="text/css" href="https://unpkg.com/@openfonts/lato_latin/index.css">
 <link rel=stylesheet type="text/css" href="https://unpkg.com/@openfonts/lato_latin/index.css">
 <link rel=stylesheet type="text/css" href="https://unpkg.com/@openfonts/source-code-pro_latin/index.css">
 <link rel=stylesheet type="text/css" href="https://unpkg.com/@openfonts/source-code-pro_latin/index.css">
 <script data-goatcounter="https://piret-bookdata.goatcounter.com/count"
 <script data-goatcounter="https://piret-bookdata.goatcounter.com/count"
         async src="//gc.zgo.at/count.js"></script>
         async src="//gc.zgo.at/count.js"></script>
+{% endblock %}
@@ -0,0 +1,55 @@
+# -*- coding: utf-8 -*-
+
+import pybtex.plugin
+from pybtex.style.sorting import BaseSortingStyle
+from pybtex.style.formatting import plain
+
+project = 'Book Data Tools'
+copyright = '2020–2021 Boise State University'
+author = 'Michael D. Ekstrand'
+
+extensions = [
+    'myst_parser',
+    'sphinxcontrib.bibtex'
+]
+
+myst_enable_extensions = [
+    'deflist',
+    'colon_fence'
+]
+
+bibtex_bibfiles = [
+    'papers.bib'
+]
+
+html_theme = 'furo'
+html_theme_options = {
+    'repository_url': 'https://github.com/BoiseState/bookdata-tools',
+    'light_css_variables': {
+        'font-stack': 'Lato, sans-serif',
+        'font-stack--monospace': 'Source Code Pro, monospace'
+    }
+}
+html_baseurl = 'https://bookdata.piret.info'
+templates_path = ['_templates']
+
+
+class ChronoSort(BaseSortingStyle):
+    def sorting_key(self, entry):
+        year = entry.fields.get('year', '')
+        month = entry.fields.get('month', '')
+        title = entry.fields.get('title', '')
+        return year, month, title
+
+    def sort(self, entries):
+        sorted = super().sort(entries)
+        sorted.reverse()
+        return sorted
+
+
+class ChronoStyle(plain.Style):
+    default_sorting_style = 'chrono'
+
+
+pybtex.plugin.register_plugin('pybtex.style.sorting', 'chrono', ChronoSort)
+pybtex.plugin.register_plugin('pybtex.style.formatting', 'chrono', ChronoStyle)
@@ -5,7 +5,6 @@ nav_order: 6
 ---
 ---
 
 
 # Amazon Ratings
 # Amazon Ratings
-{: .no_toc}
 
 
 The [Amazon reviews data set](http://jmcauley.ucsd.edu/data/amazon/) consists of user-provided
 The [Amazon reviews data set](http://jmcauley.ucsd.edu/data/amazon/) consists of user-provided
 reviews and ratings for a variety of products.
 reviews and ratings for a variety of products.
@@ -16,9 +15,6 @@ Currently we import the ratings-only data from the Books segment of the 2014 dat
 
 
 Imported data lives in the `az` schema.  The source files are not automatically downloaded.
 Imported data lives in the `az` schema.  The source files are not automatically downloaded.
 
 
-1. TOC
-{:toc}
-
 ## Data Model Diagram
 ## Data Model Diagram
 
 
 ![Amazon data model](az.svg)
 ![Amazon data model](az.svg)
@@ -5,7 +5,6 @@ nav_order: 5
 ---
 ---
 
 
 # BookCrossing
 # BookCrossing
-{: .no_toc}
 
 
 The [BookCrossing data set](http://www2.informatik.uni-freiburg.de/~cziegler/BX/) consists of user-provided
 The [BookCrossing data set](http://www2.informatik.uni-freiburg.de/~cziegler/BX/) consists of user-provided
 ratings — both implicit and explicit — of books.
 ratings — both implicit and explicit — of books.
@@ -17,9 +16,6 @@ ratings — both implicit and explicit — of books.
 Imported data lives in the `bx` schema.  The source data files are automatically downloaded and unpacked by
 Imported data lives in the `bx` schema.  The source data files are automatically downloaded and unpacked by
 the provided scripts and DVC stages.
 the provided scripts and DVC stages.
 
 
-1. TOC
-{:toc}
-
 ## Data Model Diagram
 ## Data Model Diagram
 
 
 ![BookCrossing data model](bx.svg)
 ![BookCrossing data model](bx.svg)
@@ -5,7 +5,6 @@ nav_order: 8
 ---
 ---
 
 
 # Book Clusters
 # Book Clusters
-{: .no_toc}
 
 
 For recommendation and analysis, we often want to look at *works* instead of individual books or
 For recommendation and analysis, we often want to look at *works* instead of individual books or
 editions of those books.  The same material by the same author(s) may be reprinted in many different
 editions of those books.  The same material by the same author(s) may be reprinted in many different
@@ -5,14 +5,14 @@ nav_order: 9
 ---
 ---
 
 
 # Book Author Gender
 # Book Author Gender
-{: .no_toc}
 
 
 We compute the author gender for book clusters using the integrated data set.
 We compute the author gender for book clusters using the integrated data set.
 
 
-**See the paper for important limitations.**
+:::{warning}
+See the [paper][] for important limitations and ethical considerations.
+:::
 
 
-1. TOC
-{:toc}
+[paper]: https://md.ekstrandom.net/pubs/bag-extended
 
 
 ## Import Steps
 ## Import Steps
 
 
@@ -5,7 +5,6 @@ nav_order: 7
 ---
 ---
 
 
 # GoodReads (UCSD Book Graph)
 # GoodReads (UCSD Book Graph)
-{: .no_toc}
 
 
 We import GoodReads data from the [UCSD Book Graph](https://sites.google.com/eng.ucsd.edu/ucsdbookgraph/home)
 We import GoodReads data from the [UCSD Book Graph](https://sites.google.com/eng.ucsd.edu/ucsdbookgraph/home)
 for additional book and user interaction information.  The source files are not automatically downloaded; you
 for additional book and user interaction information.  The source files are not automatically downloaded; you
@@ -24,9 +23,6 @@ We do not yet support reviews.
 
 
 Imported data lives in the `gr` schema.
 Imported data lives in the `gr` schema.
 
 
-1. TOC
-{:toc}
-
 ## Data Model Diagram
 ## Data Model Diagram
 
 
 ![GoodReads model diagram](goodreads.svg)
 ![GoodReads model diagram](goodreads.svg)
@@ -5,13 +5,9 @@ nav_order: 1
 ---
 ---
 
 
 # Common Identifiers
 # Common Identifiers
-{: .no_toc}
 
 
 There are two key identifiers that are used across data sets.
 There are two key identifiers that are used across data sets.
 
 
-1. TOC
-{:toc}
-
 ## ISBNs
 ## ISBNs
 
 
 We use ISBNs for a lot of data linking.  In order to speed up ISBN-based operations, we map textual ISBNs to numeric 'ISBN IDs`.
 We use ISBNs for a lot of data linking.  In order to speed up ISBN-based operations, we map textual ISBNs to numeric 'ISBN IDs`.
@@ -12,3 +12,17 @@ integration.
 It doesn't describe every intermediate detail or table.
 It doesn't describe every intermediate detail or table.
 
 
 The data is organized into PostgreSQL schemas to make it easier to navigate; one effect of this is that if you just look at the default `public` schema, you will see very few of the tables.  Further, some tables are materialized views, so they may not show up in the table list.  The `\dm` command in `psql` shows materialized views.
 The data is organized into PostgreSQL schemas to make it easier to navigate; one effect of this is that if you just look at the default `public` schema, you will see very few of the tables.  Further, some tables are materialized views, so they may not show up in the table list.  The `\dm` command in `psql` shows materialized views.
+
+```{toctree}
+:maxdepth: 1
+
+ids
+loc
+openlib
+viaf
+bx
+amazon
+goodreads
+cluster
+gender
+```
@@ -1,11 +1,8 @@
 ---
 ---
 title: Library of Congress
 title: Library of Congress
-parent: Data Model
-nav_order: 2
 ---
 ---
 
 
 # Library of Congress
 # Library of Congress
-{: .no_toc}
 
 
 One of our sources of book data is the Library of Congress [MDSConnect Books](https://www.loc.gov/cds/products/MDSConnect-books_all.html) bibliography records.
 One of our sources of book data is the Library of Congress [MDSConnect Books](https://www.loc.gov/cds/products/MDSConnect-books_all.html) bibliography records.
 
 
@@ -13,9 +10,6 @@ We download and import the XML versions of these files.
 
 
 Imported data lives under the `locmds` schema.
 Imported data lives under the `locmds` schema.
 
 
-1. TOC
-{:toc}
-
 ## Data Model Diagram
 ## Data Model Diagram
 
 
 ![LOC data model](loc.svg)
 ![LOC data model](loc.svg)
@@ -43,7 +37,6 @@ The import is controlled by the following DVC steps:
 :   Run `loc-mds-book-info.sql` to extract additional book data into tables.
 :   Run `loc-mds-book-info.sql` to extract additional book data into tables.
 
 
 ## Raw Book Data
 ## Raw Book Data
-{: #raw}
 
 
 The `locmds.book_marc_fields` table contains the raw data imported from the MARC files, as MARC fields.  The LOC book data follows the [MARC 21 Bibliographic Data format](https://www.loc.gov/marc/bibliographic/); the various tags, field codes, and indicators are defined there.  This table is not terribly useful on its own, but it is the source from which the other tables are derived.
 The `locmds.book_marc_fields` table contains the raw data imported from the MARC files, as MARC fields.  The LOC book data follows the [MARC 21 Bibliographic Data format](https://www.loc.gov/marc/bibliographic/); the various tags, field codes, and indicators are defined there.  This table is not terribly useful on its own, but it is the source from which the other tables are derived.
 
 
@@ -5,7 +5,6 @@ nav_order: 3
 ---
 ---
 
 
 # OpenLibrary
 # OpenLibrary
-{: .no_toc}
 
 
 We also source book data from [OpenLibrary](https://openlibrary.org), as downloaded from
 We also source book data from [OpenLibrary](https://openlibrary.org), as downloaded from
 their [developer dumps](https://openlibrary.org/developers/dumps).
 their [developer dumps](https://openlibrary.org/developers/dumps).
@@ -15,9 +14,6 @@ updated by modifying the `data/ol_dump_*.txt.gz.dvc` files.
 
 
 Imported data lives in the `ol` schema.
 Imported data lives in the `ol` schema.
 
 
-1. TOC
-{:toc}
-
 ## Import Steps
 ## Import Steps
 
 
 The import is controlled by the following DVC steps:
 The import is controlled by the following DVC steps:
@@ -5,7 +5,6 @@ nav_order: 4
 ---
 ---
 
 
 # Virtual Internet Authority File
 # Virtual Internet Authority File
-{: .no_toc}
 
 
 We source author data from the [Virtual Internet Authority File](http://viaf.org), as downloaded from
 We source author data from the [Virtual Internet Authority File](http://viaf.org), as downloaded from
 their [data dumps](http://viaf.org/viaf/data).  This file is slow and error-prone to download, and is
 their [data dumps](http://viaf.org/viaf/data).  This file is slow and error-prone to download, and is
@@ -13,9 +12,6 @@ not* auto-downloaded.
 
 
 Imported data lives in the `viaf` schema.
 Imported data lives in the `viaf` schema.
 
 
-1. TOC
-{:toc}
-
 ## Import Steps
 ## Import Steps
 
 
 The import is controlled by the following DVC steps:
 The import is controlled by the following DVC steps:
@@ -5,7 +5,6 @@ has_children: true
 ---
 ---
 
 
 # Design and Implementation
 # Design and Implementation
-{: .no_toc}
 
 
 These data and integration tools are designed to support several goals:
 These data and integration tools are designed to support several goals:
 
 
@@ -15,8 +14,11 @@ These data and integration tools are designed to support several goals:
 - Support updates (e.g. new OpenLibrary dumps) by replacing the file and re-running
 - Support updates (e.g. new OpenLibrary dumps) by replacing the file and re-running
 - Efficient import and integration
 - Efficient import and integration
 
 
-1. TOC
-{:toc}
+```{toctree}
+status
+layout
+dataset
+```
 
 
 ## Implementation Principles
 ## Implementation Principles
 
 
@@ -27,11 +29,6 @@ These goals are realized through a few technology and design decisions:
 - Implement as much data integration as possible in declarative SQL.
 - Implement as much data integration as possible in declarative SQL.
 - Make SQL scripts re-runnable, so they will either refresh or delete and recreate their outputs. Deletes cascading to downstream steps are fine, because the stage runner will re-run those stages anyway.
 - Make SQL scripts re-runnable, so they will either refresh or delete and recreate their outputs. Deletes cascading to downstream steps are fine, because the stage runner will re-run those stages anyway.
 
 
-## Further Information
-
-- [Status Tracking](status.md)
-- [Repository Layout](layout.md)
-- [How to Add a Data Set](dataset.md)
 
 
 ## DVC Dependency Graph
 ## DVC Dependency Graph
 
 
@@ -5,7 +5,6 @@ nav_order: 2
 ---
 ---
 
 
 # Layout
 # Layout
-{: .no_toc}
 
 
 The import code consists of Python, Rust, and SQL code, wired together with DVC.
 The import code consists of Python, Rust, and SQL code, wired together with DVC.
 
 
@@ -1,11 +1,8 @@
 ---
 ---
 title: Status Tracking
 title: Status Tracking
-parent: Implementation
-nav_order: 1
 ---
 ---
 
 
 # Status Tracking
 # Status Tracking
-{: .no_toc}
 
 
 The book tools are built around [Data Version Control](https://dvc.org), a tool for managing
 The book tools are built around [Data Version Control](https://dvc.org), a tool for managing
 data processing pipelines.  We use the software in a somewhat unusal way due to our use of
 data processing pipelines.  We use the software in a somewhat unusal way due to our use of
@@ -33,6 +33,15 @@ page that describes the data and its appropriate citation.
 
 
 See the [Setup page](using/setup.md) to get started and for system requirements.
 See the [Setup page](using/setup.md) to get started and for system requirements.
 
 
+```{toctree}
+:maxdepth: 1
+
+using/index
+data/index
+implementation/index
+papers
+```
+
 ## Video
 ## Video
 
 
 I recorded a video walking through the integration as an example for my [Data Science class](httsp://cs533.ekstrandom.net).
 I recorded a video walking through the integration as an example for my [Data Science class](httsp://cs533.ekstrandom.net).
@@ -10,7 +10,8 @@
   archivePrefix = "arXiv",
   archivePrefix = "arXiv",
   eprint        = "2009.01311",
   eprint        = "2009.01311",
   primaryClass  = "cs.IR",
   primaryClass  = "cs.IR",
-  arxivid       = "2009.01311"
+  arxivid       = "2009.01311",
+  journal = "arXiv preprints"
 }
 }
 
 
 @ARTICLE{Ekstrand2021-iu,
 @ARTICLE{Ekstrand2021-iu,
@@ -1,8 +0,0 @@
----
-title: Research Using BookData
-nav_order: 5
----
-
-# Papers Using BookData
-
-{% bibliography --file users %}
@@ -0,0 +1,9 @@
+Papers Using BookData
+=====================
+
+These are papers we know to be using this book data integration.
+
+.. bibliography:: papers.bib
+    :all:
+    :list: enumerated
+    :style: chrono
@@ -7,3 +7,9 @@ nav_order: 2
 # Using the Tools
 # Using the Tools
 
 
 This section of the documentation describes how to set up and use the book data integration tools.
 This section of the documentation describes how to set up and use the book data integration tools.
+
+```{toctree}
+running
+setup
+sources
+```
@@ -5,13 +5,9 @@ nav_order: 2
 ---
 ---
 
 
 # Setting Up the Environment
 # Setting Up the Environment
-{: .no_toc}
 
 
 These tools require PostgreSQL and an Anaconda installation.
 These tools require PostgreSQL and an Anaconda installation.
 
 
-1. TOC
-{:toc}
-
 ## System Requirements
 ## System Requirements
 
 
 You will need:
 You will need: