Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

parse-page-sections-into-records.js 2.3 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
  1. const fs = require('fs')
  2. const path = require('path')
  3. const cheerio = require('cheerio')
  4. const parsePageSectionsIntoRecords = require('../../../script/search/parse-page-sections-into-records')
  5. const fixtures = {
  6. pageWithSections: fs.readFileSync(path.join(__dirname, 'fixtures/page-with-sections.html'), 'utf8'),
  7. pageWithoutSections: fs.readFileSync(path.join(__dirname, 'fixtures/page-without-sections.html'), 'utf8')
  8. }
  9. describe('search parsePageSectionsIntoRecords module', () => {
  10. test('works for pages with sections', () => {
  11. const html = fixtures.pageWithSections
  12. const $ = cheerio.load(html)
  13. const href = '/example/href'
  14. const records = parsePageSectionsIntoRecords(href, $)
  15. expect(Array.isArray(records)).toBe(true)
  16. expect(records.length).toBe(2)
  17. const expected = [
  18. {
  19. objectID: '/example/href#first',
  20. url: 'https://docs.github.com/example/href#first',
  21. slug: 'first',
  22. breadcrumbs: 'GitHub Actions / actions learning path',
  23. heading: 'First heading',
  24. title: 'I am the page title',
  25. content: "Here's a paragraph. And another.",
  26. topics: ['topic1', 'topic2', 'GitHub Actions', 'Actions']
  27. },
  28. {
  29. objectID: '/example/href#second',
  30. url: 'https://docs.github.com/example/href#second',
  31. slug: 'second',
  32. breadcrumbs: 'GitHub Actions / actions learning path',
  33. heading: 'Second heading',
  34. title: 'I am the page title',
  35. content: "Here's a paragraph in the second section. And another.",
  36. topics: ['topic1', 'topic2', 'GitHub Actions', 'Actions']
  37. }
  38. ]
  39. expect(records).toEqual(expected)
  40. })
  41. test('works for pages without sections', () => {
  42. const html = fixtures.pageWithoutSections
  43. const $ = cheerio.load(html)
  44. const href = '/example/href'
  45. const records = parsePageSectionsIntoRecords(href, $)
  46. expect(Array.isArray(records)).toBe(true)
  47. expect(records.length).toBe(1)
  48. const expected = [
  49. {
  50. objectID: '/example/href',
  51. url: 'https://docs.github.com/example/href',
  52. breadcrumbs: 'Education / map topic',
  53. title: 'A page without sections',
  54. content: 'First paragraph. Second paragraph.',
  55. topics: ['key1', 'key2', 'key3', 'Education']
  56. }
  57. ]
  58. expect(records).toEqual(expected)
  59. })
  60. })
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...