Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

ratings.py 1.3 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
  1. from io import StringIO
  2. import csv
  3. import numpy as np
  4. from tqdm import tqdm
  5. import psycopg2
  6. from invoke import task
  7. @task
  8. def import_bx_ratings(c):
  9. "Import BookCrossing ratings"
  10. print("initializing BX schema")
  11. c.run('psql -f bx-schema.sql')
  12. print("cleaning BX rating data")
  13. with open('data/BX-Book-Ratings.csv', 'rb') as bf:
  14. data = bf.read()
  15. barr = np.frombuffer(data, dtype='u1')
  16. # delete bytes that are too big
  17. barr = barr[barr < 128]
  18. # convert to LF
  19. barr = barr[barr != ord('\r')]
  20. # change delimiter to comma
  21. barr[barr == ord(';')] = ord(',')
  22. # write
  23. print('importing BX to database')
  24. data = bytes(barr)
  25. rd = StringIO(data.decode('utf8'))
  26. dbc = psycopg2.connect("")
  27. try:
  28. # with dbc encapsulates a transaction
  29. with dbc, dbc.cursor() as cur:
  30. for row in tqdm(csv.DictReader(rd)):
  31. cur.execute('INSERT INTO bx_raw_ratings (user_id, isbn, rating) VALUES (%s, %s, %s)',
  32. (row['User-ID'], row['ISBN'], row['Book-Rating']))
  33. finally:
  34. dbc.close()
  35. @task
  36. def import_az_ratings(c):
  37. "Import Amazon ratings"
  38. print('Resetting Amazon schema')
  39. c.run('psql -f az-schema.sql')
  40. print('Importing Amazon ratings')
  41. c.run("psql -c '\\copy az_raw_ratings FROM \\'data/ratings_Books.csv\\' WITH CSV'")
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...