Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

check-english-links.js 5.0 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
  1. #!/usr/bin/env node
  2. const path = require('path')
  3. const fs = require('fs')
  4. const linkinator = require('linkinator')
  5. const program = require('commander')
  6. const { pull, uniq } = require('lodash')
  7. const checker = new linkinator.LinkChecker()
  8. const rimraf = require('rimraf').sync
  9. const mkdirp = require('mkdirp').sync
  10. const root = 'https://docs.github.com'
  11. const englishRoot = `${root}/en`
  12. const { deprecated } = require('../lib/enterprise-server-releases')
  13. const got = require('got')
  14. // Links with these codes may or may not really be broken.
  15. const retryStatusCodes = [429, 503, 'Invalid']
  16. // [start-readme]
  17. //
  18. // This script runs once per day via a scheduled GitHub Action to check all links in
  19. // English content, not including deprecated Enterprise Server content. It opens an issue
  20. // if it finds broken links. To exclude a link path, add it to `lib/excluded-links.js`.
  21. // Note that linkinator somtimes returns 429 and 503 errors for links that are not actually
  22. // broken, so this script double-checks those using `got`.
  23. //
  24. // [end-readme]
  25. program
  26. .description('Check all links in the English docs.')
  27. .option('-d, --dry-run', 'Turn off recursion to get a fast minimal report (useful for previewing output).')
  28. .option('-r, --do-not-retry', `Do not retry broken links with status codes ${retryStatusCodes.join(', ')}.`)
  29. .option('-p, --path <PATH>', `Provide an optional path to check. Best used with --dry-run. Default: ${englishRoot}`)
  30. .parse(process.argv)
  31. // Skip excluded links defined in separate file.
  32. const excludedLinks = require('../lib/excluded-links')
  33. // Skip non-English content.
  34. const languagesToSkip = Object.keys(require('../lib/languages'))
  35. .filter(code => code !== 'en')
  36. .map(code => `${root}/${code}`)
  37. // Skip deprecated Enterprise content.
  38. // Capture the old format https://docs.github.com/enterprise/2.1/
  39. // and the new format https://docs.github.com/enterprise-server@2.19/.
  40. const enterpriseReleasesToSkip = new RegExp(`${root}.+?[/@](${deprecated.join('|')})(/|$)`)
  41. const config = {
  42. path: program.path || englishRoot,
  43. concurrency: 300,
  44. // If this is a dry run, turn off recursion.
  45. recurse: !program.dryRun,
  46. silent: true,
  47. // The values in this array are treated as regexes.
  48. linksToSkip: [
  49. enterpriseReleasesToSkip,
  50. ...languagesToSkip,
  51. ...excludedLinks
  52. ]
  53. }
  54. main()
  55. async function main () {
  56. // Clear and recreate a directory for logs.
  57. const logFile = path.join(__dirname, '../.linkinator/full.log')
  58. rimraf(path.dirname(logFile))
  59. mkdirp(path.dirname(logFile))
  60. // Update CLI output and append to logfile after each checked link.
  61. checker.on('link', result => {
  62. // We don't need to dump all of the HTTP and HTML details
  63. delete result.failureDetails
  64. fs.appendFileSync(logFile, JSON.stringify(result) + '\n')
  65. })
  66. // Start the scan; events will be logged as they occur.
  67. const result = (await checker.check(config)).links
  68. // Scan is complete! Filter the results for broken links.
  69. const brokenLinks = result
  70. .filter(link => link.state === 'BROKEN')
  71. // Coerce undefined status codes into `Invalid` strings so we can display them.
  72. // Without this, undefined codes get JSON.stringified as `0`, which is not useful output.
  73. .map(link => { link.status = link.status || 'Invalid'; return link })
  74. if (!program.doNotRetry) {
  75. // Links to retry individually.
  76. const linksToRetry = brokenLinks
  77. .filter(link => retryStatusCodes.includes(link.status))
  78. await Promise.all(linksToRetry
  79. .map(async (link) => {
  80. try {
  81. // got throws an HTTPError if response code is not 2xx or 3xx.
  82. // If got succeeds, we can remove the link from the list.
  83. await got(link.url)
  84. pull(brokenLinks, link)
  85. // If got fails, do nothing. The link is already in the broken list.
  86. } catch (err) {
  87. // noop
  88. }
  89. }))
  90. }
  91. // Exit successfully if no broken links!
  92. if (!brokenLinks.length) {
  93. console.log('All links are good!')
  94. process.exit(0)
  95. }
  96. // Format and display the results.
  97. console.log(`${brokenLinks.length} broken links found on docs.github.com\n`)
  98. displayBrokenLinks(brokenLinks)
  99. // Exit unsuccessfully if broken links are found.
  100. process.exit(1)
  101. }
  102. function displayBrokenLinks (brokenLinks) {
  103. // Sort results by status code.
  104. const allStatusCodes = uniq(brokenLinks
  105. // Coerce undefined status codes into `Invalid` strings so we can display them.
  106. // Without this, undefined codes get JSON.stringified as `0`, which is not useful output.
  107. .map(link => link.status || 'Invalid')
  108. )
  109. allStatusCodes.forEach(statusCode => {
  110. const brokenLinksForStatus = brokenLinks.filter(x => x.status === statusCode)
  111. console.log(`## Status ${statusCode}: Found ${brokenLinksForStatus.length} broken links`)
  112. console.log('```')
  113. brokenLinksForStatus.forEach(brokenLinkObj => {
  114. // We don't need to dump all of the HTTP and HTML details
  115. delete brokenLinkObj.failureDetails
  116. console.log(JSON.stringify(brokenLinkObj, null, 2))
  117. })
  118. console.log('```')
  119. })
  120. }
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...