Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

dowload_data.py 1.3 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
  1. from sqlalchemy.orm import sessionmaker
  2. import pandas as pd
  3. from datetime import date
  4. import sqlalchemy
  5. # TODO config with paths and passwords
  6. engine = sqlalchemy.create_engine("mysql+pymysql://root:$a8`k?B2y4nUxX2G@40.119.1.127:32006/nl2ml")
  7. Session = sessionmaker(bind=engine)
  8. session = Session()
  9. sql = '''select
  10. code_block_id,
  11. code_block ,
  12. data_format ,
  13. graph_vertex_id ,
  14. errors,
  15. marks,
  16. kaggle_id,
  17. competition_id
  18. from chunks ch
  19. left join codeblocks c on ch.code_block_id = c.id
  20. left join notebooks n on c.notebook_id = n.id'''
  21. data = pd.read_sql_query(sql, engine)
  22. print(data.shape)
  23. data.to_csv(f'../data/markup_data_{date.today()}.csv', index=False)
  24. sql = '''select id, graph_vertex, graph_vertex_subclass from graph_vertices'''
  25. data = pd.read_sql_query(sql, engine)
  26. print(data.shape)
  27. data.to_csv(f'../data/actual_graph_{date.today()}.csv', index=False)
  28. sql = '''select id, ref_link, comp_name, comp_type, description, metric, datatype, subject, problemtype, insert_ts
  29. from competitions
  30. where metric is not NULL
  31. and metric != 'unkown metric'
  32. and ref_link is not NUll
  33. and comp_type != 'inClass' '''
  34. data = pd.read_sql_query(sql, engine)
  35. print(data.shape)
  36. data.to_csv(f'../data/competitions_{date.today()}.csv', index=False)
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...