Browse Source

y u no work?

Michael Ekstrand 10 months ago
parent
commit
34d8a5b39c
2 changed files with 45 additions and 8 deletions
  1. 9
    8
      src/commands/import_json.rs
  2. 36
    0
      src/io.rs

+ 9
- 8
src/commands/import_json.rs

@@ -13,11 +13,12 @@ use anyhow::{Result, anyhow};
 use serde::{Deserialize};
 use toml;
 
-use crate::io::{HashRead, HashWrite};
+use crate::io::{HashRead, HashWrite, DelimPrinter};
 use crate::cleaning::*;
 use crate::tsv::split_first;
 use crate::db::{DbOpts, CopyRequest};
 use crate::tracking::StageOpts;
+use crate::logging::set_progress;
 use super::Command;
 
 /// Process OpenLib data into format suitable for PostgreSQL import.
@@ -91,27 +92,26 @@ impl ImportSpec {
     let mut n = 0;
     for line in src.lines() {
       let mut line = line?;
+      let mut delim = DelimPrinter::new("\t", "\n");
       for i in 0..self.format.len() {
         let (fld, rest) = split_first(&line).ok_or_else(|| anyhow!("invalid line"))?;
         match self.format[i] {
           ColOp::Skip => (),
           ColOp::String => {
-            if i > 0 {
-              dst.write_all(b"\t")?;
-            }
+            debug!("writing string field {}", fld);
+            delim.preface(dst)?;
             write_pgencoded(dst, fld.as_bytes())?;
           },
           ColOp::JSON => {
-            if i > 0 {
-              dst.write_all(b"\t")?;
-            }
+            delim.preface(dst)?;
+            debug!("writing JSON field {}", fld);
             clean_json(&fld, &mut jsbuf);
             write_pgencoded(dst, jsbuf.as_bytes())?;
           }
         }
         line = rest.to_string();
       }
-      dst.write_all(b"\n")?;
+      delim.end(dst)?;
       n += 1;
     }
     Ok(n)
@@ -135,6 +135,7 @@ impl Command for ImportJson {
     let fs = File::open(infn)?;
     let pb = ProgressBar::new(fs.metadata()?.len());
     pb.set_style(ProgressStyle::default_bar().template("{elapsed_precise} {bar} {percent}% {bytes}/{total_bytes} (eta: {eta})"));
+    let _pbl = set_progress(&pb);
 
     // We want to hash the file while we read it
     let mut in_hash = Sha1::new();

+ 36
- 0
src/io.rs

@@ -1,6 +1,8 @@
 use std::io;
 use sha1::Sha1;
 
+use log::*;
+
 /// Write wrapper that computes Sha1 checksums of the data written.
 pub struct HashWrite<'a, W: io::Write> {
   writer: W,
@@ -51,3 +53,37 @@ impl <'a, R: io::Read> io::Read for HashRead<'a, R> {
     Ok(n)
   }
 }
+
+pub struct DelimPrinter<'a> {
+  delim: &'a [u8],
+  end: &'a [u8],
+  first: bool
+}
+
+impl <'a> DelimPrinter<'a> {
+  pub fn new(delim: &'a str, end: &'a str) -> DelimPrinter<'a> {
+    DelimPrinter {
+      delim: delim.as_bytes(),
+      end: end.as_bytes(),
+      first: true
+    }
+  }
+
+  pub fn preface<W: io::Write>(&mut self, w: &mut W) -> io::Result<bool> {
+    if self.first {
+      self.first = true;
+      Ok(false)
+    } else {
+      debug!("writing preface");
+      w.write_all(self.delim)?;
+      Ok(true)
+    }
+  }
+
+  pub fn end<W: io::Write>(&mut self, w: &mut W) -> io::Result<()> {
+    debug!("writing end");
+    w.write_all(self.end)?;
+    self.first = true;
+    Ok(())
+  }
+}