-
Notifications
You must be signed in to change notification settings - Fork 16
273 lines (236 loc) Β· 11.6 KB
/
link-checker.yml
File metadata and controls
273 lines (236 loc) Β· 11.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
name: Link Checker
#
# This workflow checks for broken links in the documentation site.
# It can be triggered manually or runs on a schedule.
#
# NOTE: For local link checking, simply run:
# pnpm links:check:local
#
# This automatically builds, serves on port 8888, and checks links.
# The dev server (pnpm start) does NOT generate sitemap.xml, which this checker requires.
on:
schedule:
- cron: '0 2 * * *'
workflow_dispatch:
inputs:
check_type:
description: 'Type of link check to perform'
required: true
default: 'deep'
type: choice
options:
- 'quick'
- 'deep'
base_url:
description: 'Base URL to check'
required: false
default: 'https://developers.glean.com'
type: string
permissions:
contents: read
issues: write
pull-requests: write
jobs:
link-check:
name: Check Links
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
sparse-checkout: |
scripts/check-links.sh
sparse-checkout-cone-mode: false
- name: Install dependencies
run: sudo apt-get update && sudo apt-get install -y libxml2-utils
- name: Cache lychee
id: cache-lychee
uses: actions/cache@v4
with:
path: ~/.cargo/bin/lychee
key: lychee-${{ runner.os }}-v1
- name: Install lychee
if: steps.cache-lychee.outputs.cache-hit != 'true'
run: |
curl -sSL https://github.com/lycheeverse/lychee/releases/latest/download/lychee-x86_64-unknown-linux-gnu.tar.gz | tar -xz
mkdir -p ~/.cargo/bin
mv lychee ~/.cargo/bin/
- name: Add lychee to PATH
run: echo "$HOME/.cargo/bin" >> $GITHUB_PATH
- name: Verify lychee installation
run: lychee --version
- name: Set check parameters
id: params
run: |
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
echo "check_type=${{ github.event.inputs.check_type }}" >> $GITHUB_OUTPUT
echo "base_url=${{ github.event.inputs.base_url }}" >> $GITHUB_OUTPUT
else
echo "check_type=deep" >> $GITHUB_OUTPUT
echo "base_url=https://developers.glean.com" >> $GITHUB_OUTPUT
fi
- name: Run link check
id: link-check
run: |
base_url="${{ steps.params.outputs.base_url }}"
check_type="${{ steps.params.outputs.check_type }}"
echo "π Running $check_type link check on $base_url"
# Use `|| exit_code=$?` so bash -e does not kill the step before outputs
# are written. Without this, any non-zero exit from the script terminates
# the step immediately and all GITHUB_OUTPUT writes below are skipped β
# leaving downstream steps with no status to act on.
exit_code=0
if [ "$check_type" = "deep" ]; then
echo "deep_check=true" >> $GITHUB_OUTPUT
./scripts/check-links.sh "$base_url" true > link-check-results.txt 2>&1 || exit_code=$?
else
echo "deep_check=false" >> $GITHUB_OUTPUT
./scripts/check-links.sh "$base_url" false > link-check-results.txt 2>&1 || exit_code=$?
fi
echo "exit_code=$exit_code" >> $GITHUB_OUTPUT
# Extract the lychee summary line (contains "Total") β more reliable than
# tail -n 1 which breaks if lychee emits trailing blank lines.
if [ -f link-check-results.txt ]; then
summary_line=$(grep -m1 "Total" link-check-results.txt || echo "")
echo "summary_line=$summary_line" >> $GITHUB_OUTPUT
fi
if [ "$exit_code" -eq 0 ]; then
echo "β
Link check passed!"
echo "status=success" >> $GITHUB_OUTPUT
else
echo "status=failure" >> $GITHUB_OUTPUT
echo ""
echo "β Link check failed β broken links detected:"
echo "ββββββββββββββββββββββββββββββββββββββββββββββ"
grep -E "\[ERROR\]|β" link-check-results.txt | head -40 || grep -v "^\[" link-check-results.txt | tail -40
echo "ββββββββββββββββββββββββββββββββββββββββββββββ"
echo "Full results are in the uploaded artifact."
fi
- name: Parse results
id: parse-results
run: |
# Use the summary line that was already extracted
summary_line="${{ steps.link-check.outputs.summary_line }}"
if [ -n "$summary_line" ]; then
# Parse the emoji-formatted summary line from lychee output
# Format: π 15518 Total (in 20s) β
13867 OK π« 0 Errors π» 1644 Excluded π 7 Redirects
# Extract numbers before specific keywords
total_links=$(echo "$summary_line" | grep -o "[0-9,]* Total" | grep -o "[0-9,]*" | tr -d ',' || echo "0")
ok_links=$(echo "$summary_line" | grep -o "[0-9,]* OK" | grep -o "[0-9,]*" | tr -d ',' || echo "0")
errors=$(echo "$summary_line" | grep -o "[0-9,]* Errors" | grep -o "[0-9,]*" | tr -d ',' || echo "0")
excluded=$(echo "$summary_line" | grep -o "[0-9,]* Excluded" | grep -o "[0-9,]*" | tr -d ',' || echo "0")
redirects=$(echo "$summary_line" | grep -o "[0-9,]* Redirects" | grep -o "[0-9,]*" | tr -d ',' || echo "0")
# Ensure values are not empty, default to 0
total_links="${total_links:-0}"
ok_links="${ok_links:-0}"
errors="${errors:-0}"
excluded="${excluded:-0}"
redirects="${redirects:-0}"
echo "total_links=$total_links" >> $GITHUB_OUTPUT
echo "ok_links=$ok_links" >> $GITHUB_OUTPUT
echo "errors=$errors" >> $GITHUB_OUTPUT
echo "excluded=$excluded" >> $GITHUB_OUTPUT
echo "redirects=$redirects" >> $GITHUB_OUTPUT
echo "Results Summary:"
echo "Total links checked: ${total_links}"
echo "OK links: ${ok_links}"
echo "Errors found: ${errors}"
echo "Links excluded: ${excluded}"
echo "Redirects followed: ${redirects}"
echo ""
echo "Lychee Summary: $summary_line"
else
# Fallback to zeros if no summary found
echo "total_links=0" >> $GITHUB_OUTPUT
echo "ok_links=0" >> $GITHUB_OUTPUT
echo "errors=0" >> $GITHUB_OUTPUT
echo "excluded=0" >> $GITHUB_OUTPUT
echo "redirects=0" >> $GITHUB_OUTPUT
echo "Warning: Could not parse lychee summary"
fi
- name: Upload results
if: always()
uses: actions/upload-artifact@v4
with:
name: link-check-results-${{ steps.params.outputs.check_type }}
path: link-check-results.txt
retention-days: 30
- name: Create job summary
if: always()
run: |
echo "# Link Check Results" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "**Check Type:** ${{ steps.params.outputs.check_type }}" >> $GITHUB_STEP_SUMMARY
echo "**Base URL:** ${{ steps.params.outputs.base_url }}" >> $GITHUB_STEP_SUMMARY
echo "**Status:** ${{ steps.link-check.outputs.status }}" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
# Display the lychee summary with all metrics
if [ -n "${{ steps.link-check.outputs.summary_line }}" ]; then
echo "## Summary" >> $GITHUB_STEP_SUMMARY
echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
echo "${{ steps.link-check.outputs.summary_line }}" >> $GITHUB_STEP_SUMMARY
echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
fi
if [ "${{ steps.link-check.outputs.status }}" = "failure" ]; then
echo "## β Broken Links Found" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "The following errors were detected:" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
grep -E "\[ERROR\]|β" link-check-results.txt | head -40 >> $GITHUB_STEP_SUMMARY || true
echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "π Full results are available in the uploaded artifact." >> $GITHUB_STEP_SUMMARY
elif [ "${{ steps.link-check.outputs.status }}" = "success" ]; then
echo "## β
All Links Working" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "No broken links were found! π" >> $GITHUB_STEP_SUMMARY
else
echo "## β οΈ Link Check Did Not Complete" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "The link check step failed before producing results. Check the step log for details." >> $GITHUB_STEP_SUMMARY
fi
- name: Create GitHub issue for broken links
if: always() && steps.link-check.outputs.status == 'failure' && github.event_name == 'schedule'
uses: actions/github-script@v7
with:
script: |
const fs = require('fs');
const results = fs.readFileSync('link-check-results.txt', 'utf8');
const errors = results.split('\n').filter(line => line.includes('[ERROR]') || line.includes('\u2717'));
const issueBody = `
# π Broken Links Detected
The nightly link check found **${{ steps.parse-results.outputs.errors }}** broken links on the site.
## Summary
- **Total Links Checked:** ${{ steps.parse-results.outputs.total_links }}
- **OK Links:** ${{ steps.parse-results.outputs.ok_links }}
- **Errors Found:** ${{ steps.parse-results.outputs.errors }}
- **Links Excluded:** ${{ steps.parse-results.outputs.excluded }}
- **Redirects Followed:** ${{ steps.parse-results.outputs.redirects }}
- **Check Type:** ${{ steps.params.outputs.check_type }}
- **Base URL:** ${{ steps.params.outputs.base_url }}
## β Broken Links
\`\`\`
${errors.slice(0, 30).join('\n')}
${errors.length > 30 ? '\n... and ' + (errors.length - 30) + ' more errors' : ''}
\`\`\`
## Next Steps
1. Review the full results in the [workflow run](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }})
2. Fix or exclude the broken links
3. Run \`pnpm links:check:deep\` locally to verify fixes
---
*This issue was automatically created by the nightly link checker.*
`;
await github.rest.issues.create({
owner: context.repo.owner,
repo: context.repo.repo,
title: `Broken Links Detected (${{ steps.parse-results.outputs.errors }} errors)`,
body: issueBody,
labels: ['bug', 'documentation', 'automated']
});
- name: Fail workflow if links are broken
if: always() && steps.link-check.outputs.status == 'failure'
run: |
echo "β Link check failed with ${{ steps.parse-results.outputs.errors }} broken links"
exit 1