Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

update-internal-links.js 7.5 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
  1. #!/usr/bin/env node
  2. const fs = require('fs')
  3. const walk = require('walk-sync')
  4. const path = require('path')
  5. const astFromMarkdown = require('mdast-util-from-markdown')
  6. const visit = require('unist-util-visit')
  7. const { loadPages, loadPageMap } = require('../lib/pages')
  8. const loadSiteData = require('../lib/site-data')
  9. const loadRedirects = require('../lib/redirects/precompile')
  10. const { getPathWithoutLanguage, getPathWithoutVersion } = require('../lib/path-utils')
  11. const allVersions = Object.keys(require('../lib/all-versions'))
  12. const frontmatter = require('../lib/read-frontmatter')
  13. const renderContent = require('../lib/render-content')
  14. const patterns = require('../lib/patterns')
  15. const walkFiles = (pathToWalk) => {
  16. return walk(path.posix.join(__dirname, '..', pathToWalk), { includeBasePath: true, directories: false })
  17. .filter(file => file.endsWith('.md') && !file.endsWith('README.md'))
  18. .filter(file => !file.includes('/early-access/')) // ignore EA for now
  19. }
  20. const allFiles = walkFiles('content').concat(walkFiles('data'))
  21. // The script will throw an error if it finds any markup not represented here.
  22. // Hacky but it captures the current rare edge cases.
  23. const linkInlineMarkup = {
  24. emphasis: '*',
  25. strong: '**'
  26. }
  27. const currentVersionWithSpacesRegex = /\/enterprise\/{{ currentVersion }}/g
  28. const currentVersionWithoutSpaces = '/enterprise/{{currentVersion}}'
  29. // [start-readme]
  30. //
  31. // Run this script to find internal links in all content and data Markdown files, check if either the title or link
  32. // (or both) are outdated, and automatically update them if so.
  33. //
  34. // Exceptions:
  35. // * Links with fragments (e.g., [Bar](/foo#bar)) will get their root links updated if necessary, but the fragment
  36. // and title will be unchanged (e.g., [Bar](/noo#bar)).
  37. // * Links with hardcoded versions (e.g., [Foo](/enterprise-server/baz)) will get their root links updated if
  38. // necessary, but the hardcoded versions will be preserved (e.g., [Foo](/enterprise-server/qux)).
  39. // * Links with Liquid in the titles will have their root links updated if necessary, but the titles will be preserved.
  40. //
  41. // [end-readme]
  42. main()
  43. async function main () {
  44. console.log('Working...')
  45. const pageList = await loadPages()
  46. const pageMap = await loadPageMap(pageList)
  47. const redirects = await loadRedirects(pageList)
  48. const site = await loadSiteData()
  49. const context = {
  50. pages: pageMap,
  51. redirects,
  52. site: site.en.site,
  53. currentLanguage: 'en'
  54. }
  55. for (const file of allFiles) {
  56. const { data, content } = frontmatter(fs.readFileSync(file, 'utf8'))
  57. let newContent = content
  58. // Do a blanket find-replace for /enterprise/{{ currentVersion }}/ to /enterprise/{{currentVersion}}/
  59. // so that the AST parser recognizes the link as a link node. The spaces prevent it from doing so.
  60. newContent = newContent.replace(currentVersionWithSpacesRegex, currentVersionWithoutSpaces)
  61. const ast = astFromMarkdown(newContent)
  62. // We can't do async functions within visit, so gather the nodes upfront
  63. const nodesPerFile = []
  64. visit(ast, node => {
  65. if (node.type !== 'link') return
  66. if (!node.url.startsWith('/')) return
  67. if (node.url.startsWith('/assets')) return
  68. if (node.url.startsWith('/public')) return
  69. if (node.url.includes('/11.10.340/')) return
  70. if (node.url.includes('/2.1/')) return
  71. if (node.url === '/') return
  72. nodesPerFile.push(node)
  73. })
  74. // For every Markdown link...
  75. for (const node of nodesPerFile) {
  76. const oldLink = node.url
  77. // Find and preserve any inline markup in link titles, like [*Foo*](/foo)
  78. let inlineMarkup = ''
  79. if (node.children[0].children) {
  80. inlineMarkup = linkInlineMarkup[node.children[0].type]
  81. if (!inlineMarkup) {
  82. console.error(`Cannot find an inline markup entry for ${node.children[0].type}!`)
  83. process.exit(1)
  84. }
  85. }
  86. const oldTitle = node.children[0].value || node.children[0].children[0].value
  87. const oldMarkdownLink = `[${inlineMarkup}${oldTitle}${inlineMarkup}](${oldLink})`
  88. // As a blanket rule, only update titles in links that begin with quotes. (Many links
  89. // have punctuation before the closing quotes, so we'll only check for opening quotes.)
  90. // Update: "[Foo](/foo)
  91. // Do not update: [Bar](/bar)
  92. const hasQuotesAroundLink = newContent.includes(`"${oldMarkdownLink}`)
  93. let foundPage, fragmentMatch, versionMatch
  94. // Run through all supported versions...
  95. for (const version of allVersions) {
  96. context.currentVersion = version
  97. // Render the link for each version using the renderContent pipeline, which includes the rewrite-local-links plugin.
  98. const $ = await renderContent(oldMarkdownLink, context, { cheerioObject: true })
  99. let linkToCheck = $('a').attr('href')
  100. // We need to preserve fragments and hardcoded versions if any are found.
  101. fragmentMatch = oldLink.match(/(#.*$)/)
  102. versionMatch = oldLink.match(/(enterprise-server(?:@.[^/]*?)?)\//)
  103. // Remove the fragment for now.
  104. linkToCheck = linkToCheck
  105. .replace(/#.*$/, '')
  106. .replace(patterns.trailingSlash, '$1')
  107. // Try to find the rendered link in the set of pages!
  108. foundPage = findPage(linkToCheck, pageMap, redirects)
  109. // Once a page is found for a particular version, exit immediately; we don't need to check the other versions
  110. // because all we care about is the page title and path.
  111. if (foundPage) {
  112. break
  113. }
  114. }
  115. if (!foundPage) {
  116. console.error(`Can't find link in pageMap! ${oldLink} in ${file.replace(process.cwd(), '')}`)
  117. process.exit(1)
  118. }
  119. // If the original link includes a fragment OR the original title includes Liquid, do not change;
  120. // otherwise, use the found page title. (We don't want to update the title if a fragment is found because
  121. // the title likely points to the fragment section header, not the page title.)
  122. const newTitle = fragmentMatch || oldTitle.includes('{%') || !hasQuotesAroundLink ? oldTitle : foundPage.title
  123. // If the original link includes a fragment, append it to the found page path.
  124. // Also remove the language code because Markdown links don't include language codes.
  125. let newLink = getPathWithoutLanguage(fragmentMatch ? foundPage.path + fragmentMatch[1] : foundPage.path)
  126. // If the original link includes a hardcoded version, preserve it; otherwise, remove versioning
  127. // because Markdown links don't include versioning.
  128. newLink = versionMatch ? `/${versionMatch[1]}${getPathWithoutVersion(newLink)}` : getPathWithoutVersion(newLink)
  129. let newMarkdownLink = `[${inlineMarkup}${newTitle}${inlineMarkup}](${newLink})`
  130. // Handle a few misplaced quotation marks.
  131. if (oldMarkdownLink.includes('["')) {
  132. newMarkdownLink = `"${newMarkdownLink}`
  133. }
  134. // Stream the results to console as we find them.
  135. if (oldMarkdownLink !== newMarkdownLink) {
  136. console.log('old link', oldMarkdownLink)
  137. console.log('new link', newMarkdownLink)
  138. console.log('-------')
  139. }
  140. newContent = newContent.replace(oldMarkdownLink, newMarkdownLink)
  141. }
  142. fs.writeFileSync(file, frontmatter.stringify(newContent, data, { lineWidth: 10000 }))
  143. }
  144. console.log('Done!')
  145. }
  146. function findPage (tryPath, pageMap, redirects) {
  147. if (pageMap[tryPath]) {
  148. return {
  149. title: pageMap[tryPath].title,
  150. path: tryPath
  151. }
  152. }
  153. if (pageMap[redirects[tryPath]]) {
  154. return {
  155. title: pageMap[redirects[tryPath]].title,
  156. path: redirects[tryPath]
  157. }
  158. }
  159. }
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...