Browse Source

Get path-based statuses working

Michael Ekstrand 8 months ago
parent
commit
926f53b8f6
5 changed files with 42 additions and 14 deletions
  1. 16
    11
      bookdata/dvcpatch.py
  2. 9
    0
      bookdata/tracking.py
  3. 3
    0
      dvc.sh
  4. 7
    0
      init.dvc
  5. 7
    3
      scripts/dvcw.py

+ 16
- 11
bookdata/dvcpatch.py

@@ -5,11 +5,14 @@ Support code for our custom DVC remote.
 import logging
 
 from urllib.parse import urlparse
+import hashlib
 
 from dvc.remote.base import RemoteBASE
 from dvc.output.base import OutputBase
 from dvc.dependency.base import DependencyBase
 
+from . import tracking
+
 _log = logging.getLogger('dvc.bgpatch')
 
 
@@ -21,27 +24,29 @@ class PGRemote(RemoteBASE):
     PARAM_CHECKSUM = 'md5'
 
     def __init__(self, *args, **kwargs):
-        _log.error('creating pgremote')
         super().__init__(*args, **kwargs)
 
     def get_file_checksum(self, path_info):
-        _log.error('checksum from {}', path_info)
-        raise NotImplementedError()
+        _log.debug('checksum from {}', path_info)
+        status = tracking.stage_status(path_info.bucket)
+        h = hashlib.md5()
+        h.update(status.encode('utf-8'))
+        return h.hexdigest()
 
     def copy(self, from_info, to_info):
-        _log.error('copy from %s', from_info)
-        _log.error('copy to %s', to_info)
+        _log.debug('copy from %s', from_info)
+        _log.debug('copy to %s', to_info)
         raise NotImplementedError()
 
     def exists(self, path_info):
-        _log.error('exists? {}', path_info)
-        _log.info('pi type {}', type(path_info))
-        _log.info('pi scheme {}', path_info.scheme)
-        _log.info('pi path {}', path_info.bucket)
-        raise NotImplementedError()
+        _log.debug('exists? {}', path_info)
+        return tracking.stage_exists(path_info.bucket)
+
+    def remove(self, path_info):
+        _log.info('asked to remove {}, ignoring', path_info)
 
     def _download(self, from_info, to_info, name, no_progress_bar):
-        _log.error('exists? {}', from_info)
+        _log.info('download requested for {}', from_info)
         raise NotImplementedError()
 
 

+ 9
- 0
bookdata/tracking.py

@@ -128,6 +128,15 @@ def end_stage(cur, stage, key=None):
     ''', {'stage': stage, 'key': key})
 
 
+def stage_exists(stage):
+    "Query whether we have data for a stage"
+    with db.connect() as dbc, dbc.cursor() as cur:
+        cur.execute('SELECT COUNT(*) FROM stage_status WHERE stage_name = %s', [stage])
+        count, = cur.fetchone()
+        _log.debug('have %d records for stage %s', count, stage)
+        return count
+
+
 def stage_status(stage, file=None, *, timestamps=False):
     if file is None:
         sf = StringIO()

+ 3
- 0
dvc.sh

@@ -0,0 +1,3 @@
+#!/bin/sh
+
+exec python -m scripts.dvcw "$@"

+ 7
- 0
init.dvc

@@ -0,0 +1,7 @@
+md5: 422ddcff2659c53b5443d5502b77a1cc
+outs:
+- path: pgstat://init
+  cache: false
+  metric: false
+  persist: false
+  md5: 1ddbd0ddfedf41ea48b773f4fbdb768e

+ 7
- 3
scripts/dvcw.py

@@ -1,6 +1,10 @@
-from bookdata import dvcpatch
 from dvc.main import main
 
 
-dvcpatch.patch()
-main()
+if __name__ == '__main__':
+    try:
+        from bookdata import dvcpatch
+        dvcpatch.patch()
+    except ImportError:
+        pass
+    main()