Browse Source

use sphinx to buil docs

Michael Ekstrand 2 months ago
parent
commit
586ebc0e9f

+ 10
- 9
.github/workflows/build-doc-site.yml

@@ -11,20 +11,21 @@ jobs:
       - name: Checkout 🛎️
         uses: actions/checkout@v2
 
-      - name: Install Ruby 💎
-        uses: ruby/setup-ruby@v1
+      - name: Install Python 🐍
+        uses: actions/setup-python@v2
         with:
-          ruby-version: 2.7
-          bundler-cache: true
-          working-directory: docs
+          python-version: 3.8
 
-      - name: Build Jekyll site 🕸
+      - name: Install Python dependencies 📦
         run: |
-          cd docs
-          bundle exec jekyll build
+          pip install -r doc-requirements.txt
+
+      - name: Build site 🕸
+        run: |
+          sphinx-build docs target/docs
 
       - name: Deploy 🚀
         uses: JamesIves/github-pages-deploy-action@4.1.0
         with:
           branch: gh-pages
-          folder: docs/_site
+          folder: target/docs

+ 1
- 1
.gitignore

@@ -11,6 +11,7 @@ oprofile_data
 /.vscode
 .pytest_cache/
 __pycache__/
+*venv/
 
 /target
 **/*.rs.bk
@@ -18,5 +19,4 @@ __pycache__/
 db.cfg
 *.gml
 *.graphml
-*.txt
 /loc-mds-extract-isbns.transcript

+ 4
- 0
doc-requirements.in

@@ -0,0 +1,4 @@
+sphinx
+furo
+myst-parser
+sphinxcontrib-bibtex

+ 103
- 0
doc-requirements.txt

@@ -0,0 +1,103 @@
+#
+# This file is autogenerated by pip-compile
+# To update, run:
+#
+#    pip-compile --output-file='.\doc-requirements.txt' '.\doc-requirements.in'
+#
+alabaster==0.7.12
+    # via sphinx
+attrs==20.3.0
+    # via markdown-it-py
+babel==2.9.0
+    # via sphinx
+beautifulsoup4==4.9.3
+    # via furo
+certifi==2020.12.5
+    # via requests
+chardet==4.0.0
+    # via requests
+colorama==0.4.4
+    # via sphinx
+docutils==0.16
+    # via
+    #   myst-parser
+    #   pybtex-docutils
+    #   sphinx
+    #   sphinxcontrib-bibtex
+furo==2021.2.28b28
+    # via -r .\doc-requirements.in
+idna==2.10
+    # via requests
+imagesize==1.2.0
+    # via sphinx
+jinja2==2.11.3
+    # via
+    #   myst-parser
+    #   sphinx
+latexcodec==2.0.1
+    # via pybtex
+markdown-it-py==0.6.2
+    # via
+    #   mdit-py-plugins
+    #   myst-parser
+markupsafe==1.1.1
+    # via jinja2
+mdit-py-plugins==0.2.5
+    # via
+    #   markdown-it-py
+    #   myst-parser
+myst-parser==0.13.5
+    # via -r .\doc-requirements.in
+packaging==20.9
+    # via sphinx
+pybtex-docutils==1.0.0
+    # via sphinxcontrib-bibtex
+pybtex==0.24.0
+    # via
+    #   pybtex-docutils
+    #   sphinxcontrib-bibtex
+pygments==2.8.1
+    # via sphinx
+pyparsing==2.4.7
+    # via packaging
+pytz==2021.1
+    # via babel
+pyyaml==5.4.1
+    # via
+    #   myst-parser
+    #   pybtex
+requests==2.25.1
+    # via sphinx
+six==1.15.0
+    # via
+    #   latexcodec
+    #   pybtex
+snowballstemmer==2.1.0
+    # via sphinx
+soupsieve==2.2
+    # via beautifulsoup4
+sphinx==3.5.2
+    # via
+    #   -r .\doc-requirements.in
+    #   furo
+    #   myst-parser
+    #   sphinxcontrib-bibtex
+sphinxcontrib-applehelp==1.0.2
+    # via sphinx
+sphinxcontrib-bibtex==2.2.0
+    # via -r .\doc-requirements.in
+sphinxcontrib-devhelp==1.0.2
+    # via sphinx
+sphinxcontrib-htmlhelp==1.0.3
+    # via sphinx
+sphinxcontrib-jsmath==1.0.1
+    # via sphinx
+sphinxcontrib-qthelp==1.0.3
+    # via sphinx
+sphinxcontrib-serializinghtml==1.1.4
+    # via sphinx
+urllib3==1.26.3
+    # via requests
+
+# The following packages are considered to be unsafe in a requirements file:
+# setuptools

+ 1
- 0
docs/.gitignore

@@ -1 +1,2 @@
 /_site
+/build/

+ 0
- 7
docs/Gemfile

@@ -1,7 +0,0 @@
-source "https://rubygems.org"
-
-gem "github-pages", group: :jekyll_plugins
-gem "jekyll-include-cache", group: :jekyll_plugins
-gem 'jekyll-scholar', group: :jekyll_plugins
-
-# gem 'wdm', '>= 0.1.0' if Gem.win_platform?

+ 0
- 293
docs/Gemfile.lock

@@ -1,293 +0,0 @@
-GEM
-  remote: https://rubygems.org/
-  specs:
-    activesupport (6.0.3.5)
-      concurrent-ruby (~> 1.0, >= 1.0.2)
-      i18n (>= 0.7, < 2)
-      minitest (~> 5.1)
-      tzinfo (~> 1.1)
-      zeitwerk (~> 2.2, >= 2.2.2)
-    addressable (2.7.0)
-      public_suffix (>= 2.0.2, < 5.0)
-    bibtex-ruby (4.4.7)
-      latex-decode (~> 0.0)
-    citeproc (1.0.10)
-      namae (~> 1.0)
-    citeproc-ruby (1.1.13)
-      citeproc (~> 1.0, >= 1.0.9)
-      csl (~> 1.5)
-    coffee-script (2.4.1)
-      coffee-script-source
-      execjs
-    coffee-script-source (1.11.1)
-    colorator (1.1.0)
-    commonmarker (0.17.13)
-      ruby-enum (~> 0.5)
-    concurrent-ruby (1.1.8)
-    csl (1.5.2)
-      namae (~> 1.0)
-    csl-styles (1.0.1.10)
-      csl (~> 1.0)
-    dnsruby (1.61.5)
-      simpleidn (~> 0.1)
-    em-websocket (0.5.2)
-      eventmachine (>= 0.12.9)
-      http_parser.rb (~> 0.6.0)
-    ethon (0.12.0)
-      ffi (>= 1.3.0)
-    eventmachine (1.2.7)
-    eventmachine (1.2.7-x64-mingw32)
-    execjs (2.7.0)
-    faraday (1.3.0)
-      faraday-net_http (~> 1.0)
-      multipart-post (>= 1.2, < 3)
-      ruby2_keywords
-    faraday-net_http (1.0.1)
-    ffi (1.15.0)
-    ffi (1.15.0-x64-mingw32)
-    forwardable-extended (2.6.0)
-    gemoji (3.0.1)
-    github-pages (212)
-      github-pages-health-check (= 1.17.0)
-      jekyll (= 3.9.0)
-      jekyll-avatar (= 0.7.0)
-      jekyll-coffeescript (= 1.1.1)
-      jekyll-commonmark-ghpages (= 0.1.6)
-      jekyll-default-layout (= 0.1.4)
-      jekyll-feed (= 0.15.1)
-      jekyll-gist (= 1.5.0)
-      jekyll-github-metadata (= 2.13.0)
-      jekyll-mentions (= 1.6.0)
-      jekyll-optional-front-matter (= 0.3.2)
-      jekyll-paginate (= 1.1.0)
-      jekyll-readme-index (= 0.3.0)
-      jekyll-redirect-from (= 0.16.0)
-      jekyll-relative-links (= 0.6.1)
-      jekyll-remote-theme (= 0.4.2)
-      jekyll-sass-converter (= 1.5.2)
-      jekyll-seo-tag (= 2.7.1)
-      jekyll-sitemap (= 1.4.0)
-      jekyll-swiss (= 1.0.0)
-      jekyll-theme-architect (= 0.1.1)
-      jekyll-theme-cayman (= 0.1.1)
-      jekyll-theme-dinky (= 0.1.1)
-      jekyll-theme-hacker (= 0.1.2)
-      jekyll-theme-leap-day (= 0.1.1)
-      jekyll-theme-merlot (= 0.1.1)
-      jekyll-theme-midnight (= 0.1.1)
-      jekyll-theme-minimal (= 0.1.1)
-      jekyll-theme-modernist (= 0.1.1)
-      jekyll-theme-primer (= 0.5.4)
-      jekyll-theme-slate (= 0.1.1)
-      jekyll-theme-tactile (= 0.1.1)
-      jekyll-theme-time-machine (= 0.1.1)
-      jekyll-titles-from-headings (= 0.5.3)
-      jemoji (= 0.12.0)
-      kramdown (= 2.3.0)
-      kramdown-parser-gfm (= 1.1.0)
-      liquid (= 4.0.3)
-      mercenary (~> 0.3)
-      minima (= 2.5.1)
-      nokogiri (>= 1.10.4, < 2.0)
-      rouge (= 3.26.0)
-      terminal-table (~> 1.4)
-    github-pages-health-check (1.17.0)
-      addressable (~> 2.3)
-      dnsruby (~> 1.60)
-      octokit (~> 4.0)
-      public_suffix (>= 2.0.2, < 5.0)
-      typhoeus (~> 1.3)
-    html-pipeline (2.14.0)
-      activesupport (>= 2)
-      nokogiri (>= 1.4)
-    http_parser.rb (0.6.0)
-    i18n (0.9.5)
-      concurrent-ruby (~> 1.0)
-    jekyll (3.9.0)
-      addressable (~> 2.4)
-      colorator (~> 1.0)
-      em-websocket (~> 0.5)
-      i18n (~> 0.7)
-      jekyll-sass-converter (~> 1.0)
-      jekyll-watch (~> 2.0)
-      kramdown (>= 1.17, < 3)
-      liquid (~> 4.0)
-      mercenary (~> 0.3.3)
-      pathutil (~> 0.9)
-      rouge (>= 1.7, < 4)
-      safe_yaml (~> 1.0)
-    jekyll-avatar (0.7.0)
-      jekyll (>= 3.0, < 5.0)
-    jekyll-coffeescript (1.1.1)
-      coffee-script (~> 2.2)
-      coffee-script-source (~> 1.11.1)
-    jekyll-commonmark (1.3.1)
-      commonmarker (~> 0.14)
-      jekyll (>= 3.7, < 5.0)
-    jekyll-commonmark-ghpages (0.1.6)
-      commonmarker (~> 0.17.6)
-      jekyll-commonmark (~> 1.2)
-      rouge (>= 2.0, < 4.0)
-    jekyll-default-layout (0.1.4)
-      jekyll (~> 3.0)
-    jekyll-feed (0.15.1)
-      jekyll (>= 3.7, < 5.0)
-    jekyll-gist (1.5.0)
-      octokit (~> 4.2)
-    jekyll-github-metadata (2.13.0)
-      jekyll (>= 3.4, < 5.0)
-      octokit (~> 4.0, != 4.4.0)
-    jekyll-include-cache (0.2.1)
-      jekyll (>= 3.7, < 5.0)
-    jekyll-mentions (1.6.0)
-      html-pipeline (~> 2.3)
-      jekyll (>= 3.7, < 5.0)
-    jekyll-optional-front-matter (0.3.2)
-      jekyll (>= 3.0, < 5.0)
-    jekyll-paginate (1.1.0)
-    jekyll-readme-index (0.3.0)
-      jekyll (>= 3.0, < 5.0)
-    jekyll-redirect-from (0.16.0)
-      jekyll (>= 3.3, < 5.0)
-    jekyll-relative-links (0.6.1)
-      jekyll (>= 3.3, < 5.0)
-    jekyll-remote-theme (0.4.2)
-      addressable (~> 2.0)
-      jekyll (>= 3.5, < 5.0)
-      jekyll-sass-converter (>= 1.0, <= 3.0.0, != 2.0.0)
-      rubyzip (>= 1.3.0, < 3.0)
-    jekyll-sass-converter (1.5.2)
-      sass (~> 3.4)
-    jekyll-scholar (5.16.0)
-      bibtex-ruby (~> 4.0, >= 4.0.13)
-      citeproc-ruby (~> 1.0)
-      csl-styles (~> 1.0)
-      jekyll (~> 3.0)
-    jekyll-seo-tag (2.7.1)
-      jekyll (>= 3.8, < 5.0)
-    jekyll-sitemap (1.4.0)
-      jekyll (>= 3.7, < 5.0)
-    jekyll-swiss (1.0.0)
-    jekyll-theme-architect (0.1.1)
-      jekyll (~> 3.5)
-      jekyll-seo-tag (~> 2.0)
-    jekyll-theme-cayman (0.1.1)
-      jekyll (~> 3.5)
-      jekyll-seo-tag (~> 2.0)
-    jekyll-theme-dinky (0.1.1)
-      jekyll (~> 3.5)
-      jekyll-seo-tag (~> 2.0)
-    jekyll-theme-hacker (0.1.2)
-      jekyll (> 3.5, < 5.0)
-      jekyll-seo-tag (~> 2.0)
-    jekyll-theme-leap-day (0.1.1)
-      jekyll (~> 3.5)
-      jekyll-seo-tag (~> 2.0)
-    jekyll-theme-merlot (0.1.1)
-      jekyll (~> 3.5)
-      jekyll-seo-tag (~> 2.0)
-    jekyll-theme-midnight (0.1.1)
-      jekyll (~> 3.5)
-      jekyll-seo-tag (~> 2.0)
-    jekyll-theme-minimal (0.1.1)
-      jekyll (~> 3.5)
-      jekyll-seo-tag (~> 2.0)
-    jekyll-theme-modernist (0.1.1)
-      jekyll (~> 3.5)
-      jekyll-seo-tag (~> 2.0)
-    jekyll-theme-primer (0.5.4)
-      jekyll (> 3.5, < 5.0)
-      jekyll-github-metadata (~> 2.9)
-      jekyll-seo-tag (~> 2.0)
-    jekyll-theme-slate (0.1.1)
-      jekyll (~> 3.5)
-      jekyll-seo-tag (~> 2.0)
-    jekyll-theme-tactile (0.1.1)
-      jekyll (~> 3.5)
-      jekyll-seo-tag (~> 2.0)
-    jekyll-theme-time-machine (0.1.1)
-      jekyll (~> 3.5)
-      jekyll-seo-tag (~> 2.0)
-    jekyll-titles-from-headings (0.5.3)
-      jekyll (>= 3.3, < 5.0)
-    jekyll-watch (2.2.1)
-      listen (~> 3.0)
-    jemoji (0.12.0)
-      gemoji (~> 3.0)
-      html-pipeline (~> 2.2)
-      jekyll (>= 3.0, < 5.0)
-    kramdown (2.3.0)
-      rexml
-    kramdown-parser-gfm (1.1.0)
-      kramdown (~> 2.0)
-    latex-decode (0.3.2)
-    liquid (4.0.3)
-    listen (3.4.1)
-      rb-fsevent (~> 0.10, >= 0.10.3)
-      rb-inotify (~> 0.9, >= 0.9.10)
-    mercenary (0.3.6)
-    mini_portile2 (2.5.0)
-    minima (2.5.1)
-      jekyll (>= 3.5, < 5.0)
-      jekyll-feed (~> 0.9)
-      jekyll-seo-tag (~> 2.1)
-    minitest (5.14.4)
-    multipart-post (2.1.1)
-    namae (1.0.2)
-    nokogiri (1.11.2)
-      mini_portile2 (~> 2.5.0)
-      racc (~> 1.4)
-    nokogiri (1.11.2-x64-mingw32)
-      racc (~> 1.4)
-    octokit (4.20.0)
-      faraday (>= 0.9)
-      sawyer (~> 0.8.0, >= 0.5.3)
-    pathutil (0.16.2)
-      forwardable-extended (~> 2.6)
-    public_suffix (4.0.6)
-    racc (1.5.2)
-    rb-fsevent (0.10.4)
-    rb-inotify (0.10.1)
-      ffi (~> 1.0)
-    rexml (3.2.4)
-    rouge (3.26.0)
-    ruby-enum (0.9.0)
-      i18n
-    ruby2_keywords (0.0.4)
-    rubyzip (2.3.0)
-    safe_yaml (1.0.5)
-    sass (3.7.4)
-      sass-listen (~> 4.0.0)
-    sass-listen (4.0.0)
-      rb-fsevent (~> 0.9, >= 0.9.4)
-      rb-inotify (~> 0.9, >= 0.9.7)
-    sawyer (0.8.2)
-      addressable (>= 2.3.5)
-      faraday (> 0.8, < 2.0)
-    simpleidn (0.2.1)
-      unf (~> 0.1.4)
-    terminal-table (1.8.0)
-      unicode-display_width (~> 1.1, >= 1.1.1)
-    thread_safe (0.3.6)
-    typhoeus (1.4.0)
-      ethon (>= 0.9.0)
-    tzinfo (1.2.9)
-      thread_safe (~> 0.1)
-    unf (0.1.4)
-      unf_ext
-    unf_ext (0.0.7.7)
-    unf_ext (0.0.7.7-x64-mingw32)
-    unicode-display_width (1.7.0)
-    zeitwerk (2.4.2)
-
-PLATFORMS
-  ruby
-  x64-mingw32
-
-DEPENDENCIES
-  github-pages
-  jekyll-include-cache
-  jekyll-scholar
-
-BUNDLED WITH
-   1.17.2

+ 0
- 19
docs/_config.yml

@@ -1,19 +0,0 @@
-title: Book Data Tools
-name: Michael Ekstrand
-plugins:
-  - jekyll/scholar
-
-remote_theme: pmarsceill/just-the-docs
-scholar:
-  style: chicago-note-bibliography
-
-aux_links:
-  "GitHub": https://github.com/BoiseState/bookdata-tools
-  "PIReT": https://piret.info
-
-footer_content: >
-  Copyright &copy; 2020 Boise State University.  Distributed under the MIT License.
-  This material is based upon work supported by the National Science Foundation under
-  Grant No. IIS 17-51278. Any opinions, findings, and conclusions or recommendations
-  expressed in this material are those of the author(s) and do not necessarily reflect
-  the views of the National Science Foundation.

+ 0
- 2
docs/_sass/custom/custom.scss

@@ -1,2 +0,0 @@
-$body-font-family: 'Lato', sans-serif;
-$mono-font-family: 'Source Code Pro', monospace;

+ 4
- 0
docs/_includes/head_custom.html

@@ -1,4 +1,8 @@
+{% extends "furo/base.html" %}
+
+{% block extrahead %}
 <link rel=stylesheet type="text/css" href="https://unpkg.com/@openfonts/lato_latin/index.css">
 <link rel=stylesheet type="text/css" href="https://unpkg.com/@openfonts/source-code-pro_latin/index.css">
 <script data-goatcounter="https://piret-bookdata.goatcounter.com/count"
         async src="//gc.zgo.at/count.js"></script>
+{% endblock %}

+ 55
- 0
docs/conf.py

@@ -0,0 +1,55 @@
+# -*- coding: utf-8 -*-
+
+import pybtex.plugin
+from pybtex.style.sorting import BaseSortingStyle
+from pybtex.style.formatting import plain
+
+project = 'Book Data Tools'
+copyright = '2020–2021 Boise State University'
+author = 'Michael D. Ekstrand'
+
+extensions = [
+    'myst_parser',
+    'sphinxcontrib.bibtex'
+]
+
+myst_enable_extensions = [
+    'deflist',
+    'colon_fence'
+]
+
+bibtex_bibfiles = [
+    'papers.bib'
+]
+
+html_theme = 'furo'
+html_theme_options = {
+    'repository_url': 'https://github.com/BoiseState/bookdata-tools',
+    'light_css_variables': {
+        'font-stack': 'Lato, sans-serif',
+        'font-stack--monospace': 'Source Code Pro, monospace'
+    }
+}
+html_baseurl = 'https://bookdata.piret.info'
+templates_path = ['_templates']
+
+
+class ChronoSort(BaseSortingStyle):
+    def sorting_key(self, entry):
+        year = entry.fields.get('year', '')
+        month = entry.fields.get('month', '')
+        title = entry.fields.get('title', '')
+        return year, month, title
+
+    def sort(self, entries):
+        sorted = super().sort(entries)
+        sorted.reverse()
+        return sorted
+
+
+class ChronoStyle(plain.Style):
+    default_sorting_style = 'chrono'
+
+
+pybtex.plugin.register_plugin('pybtex.style.sorting', 'chrono', ChronoSort)
+pybtex.plugin.register_plugin('pybtex.style.formatting', 'chrono', ChronoStyle)

+ 0
- 4
docs/data/amazon.md

@@ -5,7 +5,6 @@ nav_order: 6
 ---
 
 # Amazon Ratings
-{: .no_toc}
 
 The [Amazon reviews data set](http://jmcauley.ucsd.edu/data/amazon/) consists of user-provided
 reviews and ratings for a variety of products.
@@ -16,9 +15,6 @@ Currently we import the ratings-only data from the Books segment of the 2014 dat
 
 Imported data lives in the `az` schema.  The source files are not automatically downloaded.
 
-1. TOC
-{:toc}
-
 ## Data Model Diagram
 
 ![Amazon data model](az.svg)

+ 0
- 4
docs/data/bx.md

@@ -5,7 +5,6 @@ nav_order: 5
 ---
 
 # BookCrossing
-{: .no_toc}
 
 The [BookCrossing data set](http://www2.informatik.uni-freiburg.de/~cziegler/BX/) consists of user-provided
 ratings — both implicit and explicit — of books.
@@ -17,9 +16,6 @@ ratings — both implicit and explicit — of books.
 Imported data lives in the `bx` schema.  The source data files are automatically downloaded and unpacked by
 the provided scripts and DVC stages.
 
-1. TOC
-{:toc}
-
 ## Data Model Diagram
 
 ![BookCrossing data model](bx.svg)

+ 0
- 1
docs/data/cluster.md

@@ -5,7 +5,6 @@ nav_order: 8
 ---
 
 # Book Clusters
-{: .no_toc}
 
 For recommendation and analysis, we often want to look at *works* instead of individual books or
 editions of those books.  The same material by the same author(s) may be reprinted in many different

+ 4
- 4
docs/data/gender.md

@@ -5,14 +5,14 @@ nav_order: 9
 ---
 
 # Book Author Gender
-{: .no_toc}
 
 We compute the author gender for book clusters using the integrated data set.
 
-**See the paper for important limitations.**
+:::{warning}
+See the [paper][] for important limitations and ethical considerations.
+:::
 
-1. TOC
-{:toc}
+[paper]: https://md.ekstrandom.net/pubs/bag-extended
 
 ## Import Steps
 

+ 0
- 4
docs/data/goodreads.md

@@ -5,7 +5,6 @@ nav_order: 7
 ---
 
 # GoodReads (UCSD Book Graph)
-{: .no_toc}
 
 We import GoodReads data from the [UCSD Book Graph](https://sites.google.com/eng.ucsd.edu/ucsdbookgraph/home)
 for additional book and user interaction information.  The source files are not automatically downloaded; you
@@ -24,9 +23,6 @@ We do not yet support reviews.
 
 Imported data lives in the `gr` schema.
 
-1. TOC
-{:toc}
-
 ## Data Model Diagram
 
 ![GoodReads model diagram](goodreads.svg)

+ 0
- 4
docs/data/ids.md

@@ -5,13 +5,9 @@ nav_order: 1
 ---
 
 # Common Identifiers
-{: .no_toc}
 
 There are two key identifiers that are used across data sets.
 
-1. TOC
-{:toc}
-
 ## ISBNs
 
 We use ISBNs for a lot of data linking.  In order to speed up ISBN-based operations, we map textual ISBNs to numeric 'ISBN IDs`.

+ 14
- 0
docs/data/index.md

@@ -12,3 +12,17 @@ integration.
 It doesn't describe every intermediate detail or table.
 
 The data is organized into PostgreSQL schemas to make it easier to navigate; one effect of this is that if you just look at the default `public` schema, you will see very few of the tables.  Further, some tables are materialized views, so they may not show up in the table list.  The `\dm` command in `psql` shows materialized views.
+
+```{toctree}
+:maxdepth: 1
+
+ids
+loc
+openlib
+viaf
+bx
+amazon
+goodreads
+cluster
+gender
+```

+ 0
- 7
docs/data/loc.md

@@ -1,11 +1,8 @@
 ---
 title: Library of Congress
-parent: Data Model
-nav_order: 2
 ---
 
 # Library of Congress
-{: .no_toc}
 
 One of our sources of book data is the Library of Congress [MDSConnect Books](https://www.loc.gov/cds/products/MDSConnect-books_all.html) bibliography records.
 
@@ -13,9 +10,6 @@ We download and import the XML versions of these files.
 
 Imported data lives under the `locmds` schema.
 
-1. TOC
-{:toc}
-
 ## Data Model Diagram
 
 ![LOC data model](loc.svg)
@@ -43,7 +37,6 @@ The import is controlled by the following DVC steps:
 :   Run `loc-mds-book-info.sql` to extract additional book data into tables.
 
 ## Raw Book Data
-{: #raw}
 
 The `locmds.book_marc_fields` table contains the raw data imported from the MARC files, as MARC fields.  The LOC book data follows the [MARC 21 Bibliographic Data format](https://www.loc.gov/marc/bibliographic/); the various tags, field codes, and indicators are defined there.  This table is not terribly useful on its own, but it is the source from which the other tables are derived.
 

+ 0
- 4
docs/data/openlib.md

@@ -5,7 +5,6 @@ nav_order: 3
 ---
 
 # OpenLibrary
-{: .no_toc}
 
 We also source book data from [OpenLibrary](https://openlibrary.org), as downloaded from
 their [developer dumps](https://openlibrary.org/developers/dumps).
@@ -15,9 +14,6 @@ updated by modifying the `data/ol_dump_*.txt.gz.dvc` files.
 
 Imported data lives in the `ol` schema.
 
-1. TOC
-{:toc}
-
 ## Import Steps
 
 The import is controlled by the following DVC steps:

+ 0
- 4
docs/data/viaf.md

@@ -5,7 +5,6 @@ nav_order: 4
 ---
 
 # Virtual Internet Authority File
-{: .no_toc}
 
 We source author data from the [Virtual Internet Authority File](http://viaf.org), as downloaded from
 their [data dumps](http://viaf.org/viaf/data).  This file is slow and error-prone to download, and is
@@ -13,9 +12,6 @@ not* auto-downloaded.
 
 Imported data lives in the `viaf` schema.
 
-1. TOC
-{:toc}
-
 ## Import Steps
 
 The import is controlled by the following DVC steps:

+ 5
- 8
docs/implementation/index.md

@@ -5,7 +5,6 @@ has_children: true
 ---
 
 # Design and Implementation
-{: .no_toc}
 
 These data and integration tools are designed to support several goals:
 
@@ -15,8 +14,11 @@ These data and integration tools are designed to support several goals:
 - Support updates (e.g. new OpenLibrary dumps) by replacing the file and re-running
 - Efficient import and integration
 
-1. TOC
-{:toc}
+```{toctree}
+status
+layout
+dataset
+```
 
 ## Implementation Principles
 
@@ -27,11 +29,6 @@ These goals are realized through a few technology and design decisions:
 - Implement as much data integration as possible in declarative SQL.
 - Make SQL scripts re-runnable, so they will either refresh or delete and recreate their outputs. Deletes cascading to downstream steps are fine, because the stage runner will re-run those stages anyway.
 
-## Further Information
-
-- [Status Tracking](status.md)
-- [Repository Layout](layout.md)
-- [How to Add a Data Set](dataset.md)
 
 ## DVC Dependency Graph
 

+ 0
- 1
docs/implementation/layout.md

@@ -5,7 +5,6 @@ nav_order: 2
 ---
 
 # Layout
-{: .no_toc}
 
 The import code consists of Python, Rust, and SQL code, wired together with DVC.
 

+ 0
- 3
docs/implementation/status.md

@@ -1,11 +1,8 @@
 ---
 title: Status Tracking
-parent: Implementation
-nav_order: 1
 ---
 
 # Status Tracking
-{: .no_toc}
 
 The book tools are built around [Data Version Control](https://dvc.org), a tool for managing
 data processing pipelines.  We use the software in a somewhat unusal way due to our use of

+ 9
- 0
docs/index.md

@@ -33,6 +33,15 @@ page that describes the data and its appropriate citation.
 
 See the [Setup page](using/setup.md) to get started and for system requirements.
 
+```{toctree}
+:maxdepth: 1
+
+using/index
+data/index
+implementation/index
+papers
+```
+
 ## Video
 
 I recorded a video walking through the integration as an example for my [Data Science class](httsp://cs533.ekstrandom.net).

+ 2
- 1
docs/_bibliography/users.bib

@@ -10,7 +10,8 @@
   archivePrefix = "arXiv",
   eprint        = "2009.01311",
   primaryClass  = "cs.IR",
-  arxivid       = "2009.01311"
+  arxivid       = "2009.01311",
+  journal = "arXiv preprints"
 }
 
 @ARTICLE{Ekstrand2021-iu,

+ 0
- 8
docs/papers.md

@@ -1,8 +0,0 @@
----
-title: Research Using BookData
-nav_order: 5
----
-
-# Papers Using BookData
-
-{% bibliography --file users %}

+ 9
- 0
docs/papers.rst

@@ -0,0 +1,9 @@
+Papers Using BookData
+=====================
+
+These are papers we know to be using this book data integration.
+
+.. bibliography:: papers.bib
+    :all:
+    :list: enumerated
+    :style: chrono

+ 6
- 0
docs/using/index.md

@@ -7,3 +7,9 @@ nav_order: 2
 # Using the Tools
 
 This section of the documentation describes how to set up and use the book data integration tools.
+
+```{toctree}
+running
+setup
+sources
+```

+ 0
- 4
docs/using/setup.md

@@ -5,13 +5,9 @@ nav_order: 2
 ---
 
 # Setting Up the Environment
-{: .no_toc}
 
 These tools require PostgreSQL and an Anaconda installation.
 
-1. TOC
-{:toc}
-
 ## System Requirements
 
 You will need: