Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

chroma.py 2.6 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
  1. import chromadb
  2. from chromadb.api.types import Documents, Embeddings
  3. from chromadb.api.models.Collection import Collection
  4. from database.duckdb import get_duckdb_data, get_questions_answer
  5. from typing import Tuple, List
  6. CHROM_PERSISTENT_PATH = "/path/for/chroma/persistence"
  7. def create_chroma_collection(documents: List[Tuple[str, str]], name: str) -> Collection:
  8. """
  9. Create chroma collection if not exists
  10. Parameters:
  11. -----------
  12. documents: Documents that needs to be added to collection
  13. name: Name of the collection
  14. Returns:
  15. -----------
  16. ChromaDB collection client
  17. """
  18. print("Working on creating collection ", name)
  19. chroma_client = chromadb.PersistentClient(path=CHROM_PERSISTENT_PATH)
  20. db = chroma_client.create_collection(name=name)
  21. for i, row in enumerate(documents):
  22. if row[0] == None:
  23. continue
  24. mapping_ids = ",".join(row[1])
  25. db.add(documents=row[0], ids=str(i), metadatas={"mapping_ids": mapping_ids})
  26. return db
  27. def get_chroma_collection(name: str) -> Collection:
  28. """
  29. Get chroma collection client
  30. Parameters:
  31. -----------
  32. name: Name of the collection
  33. Returns:
  34. -----------
  35. ChromaDB collection client
  36. """
  37. chroma_client = chromadb.PersistentClient(path=CHROM_PERSISTENT_PATH)
  38. try:
  39. db = chroma_client.get_collection(name)
  40. except:
  41. print("Chroma collection not found")
  42. data = get_duckdb_data()
  43. db = create_chroma_collection(data, name)
  44. return db
  45. def get_relevant_qa(
  46. db: Collection, question: str, sample_question_ids: List[str], top_k: int = 10
  47. ) -> List[Tuple[str, str, str]]:
  48. """
  49. Get relevant Questions and Answers
  50. Parameters:
  51. -----------
  52. db: Chroma collection client
  53. question: Question to which relevant questions needs to be searched for
  54. sample_question_ids: List of ids of sample questions
  55. name: Name of the collection
  56. top_k: No of similar questions that needs to be returned by chroma
  57. Returns:
  58. -----------
  59. List of relevant questions, answers and ratings
  60. """
  61. results = db.query(query_texts=[question], n_results=top_k)
  62. relevant_qs_ids = results["metadatas"][0]
  63. # print("Relevant qs_ids chroma ", relevant_qs_ids)
  64. res = []
  65. for qs_ids in relevant_qs_ids:
  66. mapping_ids = qs_ids["mapping_ids"].split(",")
  67. tmp = [i for i in mapping_ids if int(i) in sample_question_ids]
  68. res += tmp
  69. res = tuple(res)
  70. if len(res) == 0:
  71. raise Exception("No relevant questions found in the filtered dataset")
  72. # print("Length of relevaent ids ", len(res))
  73. data = get_questions_answer(res)
  74. return data
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...