Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

duckdb.py 2.4 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
  1. import duckdb
  2. import pandas as pd
  3. import os
  4. from typing import List, Tuple
  5. path = "/path/for/duckdb/data/"
  6. filepaths = [path + f for f in os.listdir(path) if f.endswith(".csv")]
  7. def get_duckdb_data() -> List[Tuple[str, List[str]]]:
  8. """
  9. Get all duckdb data that needs to be send to vector store
  10. Parameters:
  11. -----------
  12. None
  13. Returns:
  14. -----------
  15. List of questions and question_ids
  16. """
  17. print("Working on getting all duckdb data")
  18. df = pd.concat(map(pd.read_csv, filepaths))
  19. data = duckdb.sql(
  20. """SELECT interview_question,
  21. LIST(CAST (id AS VARCHAR)) AS question_ids
  22. FROM df group by company_name,job_title,job_category,skill_tested,interview_question"""
  23. ).fetchall()
  24. return data
  25. def get_sample_questions(
  26. company_name: str, job_title: str, skill_tested: str, no_of_sample_questions: int
  27. ) -> Tuple[List[str], List[int]]:
  28. """
  29. Sample questions that needs to shared to question generator based on user inputs
  30. Parameters:
  31. -----------
  32. company_name: Name of the company
  33. job_title: Job title
  34. skill_tested: Skill that needs to be tested
  35. no_of_sample_questions: No. of sample questions that needs to returned
  36. Returns:
  37. -----------
  38. Tuple of relevant questions and question_ids
  39. """
  40. query_with_skill = f"SELECT id,interview_question FROM df WHERE trim(company_name)='{company_name}' AND trim(job_title)='{job_title}' AND trim(skill_tested)='{skill_tested}'"
  41. query_with_no_skill = f"SELECT id,interview_question FROM df WHERE trim(company_name)='{company_name}' AND trim(job_title)='{job_title}'"
  42. df = pd.concat(map(pd.read_csv, filepaths))
  43. res = duckdb.sql(query_with_skill).fetchall()
  44. if len(res) == 0:
  45. res = duckdb.sql(query_with_no_skill).fetchall()
  46. rel_ids = []
  47. rel_ques = set()
  48. for row in res:
  49. rel_ids.append(row[0])
  50. rel_ques.add(row[1])
  51. return list(rel_ques)[:no_of_sample_questions], rel_ids
  52. def get_questions_answer(filter_ids: tuple) -> List[Tuple[str, str, int]]:
  53. """
  54. Get question and answers based on relevant ids
  55. Parameters:
  56. -----------
  57. filter_ids: Tuple of all the relevant ids that needs to be searched
  58. Returns:
  59. -----------
  60. List of question,answers and rating as tuple
  61. """
  62. df = pd.concat(map(pd.read_csv, filepaths))
  63. query = f"SELECT question,answer,rating FROM df WHERE id IN {filter_ids}"
  64. data = duckdb.sql(query).fetchall()
  65. return data
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...