Browse Source

Use run.py

Michael Ekstrand 1 year ago
parent
commit
10a126641d

+ 5
- 0
README.md

@@ -84,6 +84,11 @@ Individual steps can be run with their corresponding `.dvc` files.
 
 The import code consists of Python, Rust, and SQL code, wired together with DVC.
 
+### Python Scripts
+
+Python scripts live under `scripts`, as a Python package.  They should not be launched directly, but
+rather via `run.py`, which will make sure the environment is set up properly for them.
+
 ### DVC Usage and Stage Files
 
 In order to allow DVC to be aware of current database state, we use a little bit of an unconventional

+ 1
- 2
bookdata/__init__.py

@@ -38,14 +38,13 @@ def script_log(name, debug=False):
     Initialize logging and get a logger for a script.
 
     Args:
-        name(str): The ``__file__`` of the script being run.
+        name(str): The ``__name__`` of the script being run.
         debug(bool): whether to enable debug logging to the console
     """
 
     if not _initialized:
         setup(debug)
 
-    name = pathlib.Path(name).stem
     logger = logging.getLogger(name)
 
     return logger

+ 1
- 1
common-schema.dvc

@@ -1,5 +1,5 @@
 md5: 62117241aaa5397c9163d71ed8f8bc50
-cmd: python -m scripts.sql-script common-schema.sql
+cmd: python run.py sql-script common-schema.sql
 deps:
 - md5: 4fad15b367f7c419cb4e66fb932cb187
   path: common-schema.sql

+ 1
- 1
common-schema.status.dvc

@@ -1,5 +1,5 @@
 md5: 176664268f649ed20ee2344a4a375df3
-cmd: python -m scripts.stage-status common-schema
+cmd: python run.py stage-status common-schema
 deps:
 - md5: f97e8e5a91d87f93ff674b4b247081f9
   path: common-schema.transcript

+ 1
- 1
data/loc-listings.dvc

@@ -1,5 +1,5 @@
 md5: a5e6fb8537ea047d6f2503418e8d9888
-cmd: python -m scripts.loc.list-files https://www.loc.gov/cds/downloads/MDSConnect/
+cmd: python run.py loc.list-files https://www.loc.gov/cds/downloads/MDSConnect/
   data/loc-listings
 wdir: ..
 outs:

+ 2
- 2
init.status.dvc

@@ -1,5 +1,5 @@
-md5: 317530f6a2d1e9620bcfd6c1b7e1e4ce
-cmd: python -m scripts.stage-status init
+md5: b7a0ea044911416c420a75db637f1091
+cmd: python run.py stage-status init
 outs:
 - md5: 4a8d2089d9ff6ad11db88f2fb1de021e
   path: init.status

+ 1
- 1
loc-id-schema.dvc

@@ -1,5 +1,5 @@
 md5: abf81a8761235c47103068a29463e236
-cmd: python -m scripts.sql-script loc-id-schema.sql
+cmd: python run.py sql-script loc-id-schema.sql
 deps:
 - md5: 194e550021d1c3846b5971292d826cd6
   path: loc-id-schema.sql

+ 1
- 1
loc-id-schema.status.dvc

@@ -1,5 +1,5 @@
 md5: 57be45ac6206938d20d3ce395c725660
-cmd: python -m scripts.stage-status loc-id-schema
+cmd: python run.py stage-status loc-id-schema
 deps:
 - md5: 4af92733734baee88da52ac1565286ab
   path: loc-id-schema.transcript

+ 1
- 1
loc-mds-schema.dvc

@@ -1,5 +1,5 @@
 md5: 9a04a91df7c38fa581ea5301dca422a4
-cmd: python -m scripts.sql-script loc-mds-schema.sql
+cmd: python run.py sql-script loc-mds-schema.sql
 deps:
 - md5: a273769b26c093a942f53e00fd3a83c9
   path: loc-mds-schema.sql

+ 1
- 1
loc-mds-schema.status.dvc

@@ -1,5 +1,5 @@
 md5: e5a1151bbc60b8e3ad563caf0924dc57
-cmd: python -m scripts.stage-status loc-mds-schema
+cmd: python run.py stage-status loc-mds-schema
 deps:
 - md5: c85fac0bd1548fc0a5925d0211b93718
   path: loc-mds-schema.transcript

+ 22
- 0
run.py

@@ -0,0 +1,22 @@
+"""
+Run a Python script.  The script name should come from a script name in 'scripts'.
+"""
+
+import sys
+import runpy
+from pathlib import Path
+import logging
+
+_log = logging.getLogger('run.py')
+
+src_dir = Path(__file__).parent
+sys.path.insert(0, src_dir)
+
+from bookdata import setup
+setup()
+
+script = sys.argv[1]
+_log.info('preparing to run %s', script)
+del sys.argv[1]
+
+runpy.run_module(f'scripts.{script}', alter_sys=True)

+ 1
- 1
scripts/loc/list-files.py

@@ -11,7 +11,7 @@ import html5lib
 from bookdata import script_log
 from docopt import docopt
 
-_log = script_log(__file__)
+_log = script_log(__name__)
 
 args = docopt(__doc__)
 

+ 1
- 1
scripts/sql-script.py

@@ -32,7 +32,7 @@ from bookdata import script_log
 from bookdata import db
 
 opts = docopt(__doc__)
-_log = script_log(__file__, opts.get('--verbose'))
+_log = script_log(__name__, opts.get('--verbose'))
 
 
 script_file = Path(opts.get('SCRIPT'))

+ 1
- 1
scripts/stage-status.py

@@ -15,7 +15,7 @@ import sys
 from docopt import docopt
 from bookdata import db, script_log
 
-_log = script_log(__file__)
+_log = script_log(__name__)
 opts = docopt(__doc__)
 
 timestamps = opts.get('--timestamps')