Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

vocab_test.py 1.2 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
  1. from pathlib import Path
  2. from bald.vocab import Vocab
  3. def test_vocab():
  4. vocab = Vocab()
  5. token_to_id = vocab.token_to_id
  6. id_to_token = vocab.id_to_token
  7. assert len(token_to_id) == len(id_to_token)
  8. assert len(token_to_id) == 3
  9. i = vocab.add_token("godzilla")
  10. assert token_to_id["godzilla"] == i
  11. assert vocab.lookup_token(i) == "godzilla"
  12. assert id_to_token[i] == "godzilla"
  13. assert vocab.lookup_id("godzilla") == i
  14. ii = vocab.add_token("godzilla")
  15. assert ii == i
  16. for token in token_to_id:
  17. assert token == id_to_token[token_to_id[token]]
  18. for j in id_to_token:
  19. assert j == token_to_id[id_to_token[j]]
  20. def test_json():
  21. vocab = Vocab()
  22. vocab.add_token("godzilla")
  23. vocab.add_token("spiderman")
  24. path = Path(__file__).resolve().parent
  25. path = path / Path("minimal.json")
  26. vocab.to_json(path)
  27. assert 1 == 1
  28. bacov = Vocab.from_json(path)
  29. for token in vocab.token_to_id:
  30. i = vocab.token_to_id[token]
  31. j = bacov.token_to_id[token]
  32. assert i == j
  33. for token in bacov.token_to_id:
  34. i = vocab.token_to_id[token]
  35. j = bacov.token_to_id[token]
  36. assert i == j
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...