Skip to content

Commit 92551ba

Browse files
authored
Merge pull request #63 from axiom-nz/feature/url_path_regex_to_ignore
Add UrlPathRegexToIgnore argument for ignoring paths by regex
2 parents df72c22 + 9b00bc1 commit 92551ba

7 files changed

Lines changed: 86 additions & 1 deletion

File tree

BlazorWasmPreRendering.Build.Test/StaticlizeCrawlerTest.cs

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ public async Task SaveToStaticFileAsync_IndexHtmlInSubFolder_Style_Test()
2020
var crawler = new StaticlizeCrawler(
2121
baseUrl,
2222
urlPathToExplicitFetch: null,
23+
urlPathRegexToIgnore: null,
2324
webRootPath: outDir,
2425
locales: new string[] { },
2526
OutputStyle.IndexHtmlInSubFolders,
@@ -40,6 +41,40 @@ public async Task SaveToStaticFileAsync_IndexHtmlInSubFolder_Style_Test()
4041
$"Getting {baseUrl}/fetchdata/weather-forecast...");
4142
}
4243

44+
[Test]
45+
public async Task SaveToStaticFileAsync_UrlPathRegexToIgnore_Test()
46+
{
47+
// Given
48+
const string baseUrl = "http://127.0.0.1:5058";
49+
await using var testSiteServer = await TestSites.StartTestSite1(baseUrl);
50+
using var outDir = new WorkDirectory();
51+
var logger = new TestLogger();
52+
53+
// When
54+
var crawler = new StaticlizeCrawler(
55+
baseUrl,
56+
urlPathToExplicitFetch: null,
57+
urlPathRegexToIgnore: "counter", // Skip "/counter"
58+
webRootPath: outDir,
59+
locales: new string[] { },
60+
OutputStyle.IndexHtmlInSubFolders,
61+
enableBrotliCompression: false,
62+
enableGZipCompression: false,
63+
logger: logger);
64+
var result = await crawler.SaveToStaticFileAsync();
65+
66+
// Then
67+
result.Is(StaticlizeCrawlingResult.Nothing);
68+
File.Exists(Path.Combine(outDir, "index.html")).IsTrue();
69+
File.Exists(Path.Combine(outDir, "counter", "index.html")).IsFalse();
70+
File.Exists(Path.Combine(outDir, "fetchdata", "weather-forecast", "index.html")).IsTrue();
71+
72+
logger.LogLines.OrderBy(line => line)
73+
.Is($"Getting {baseUrl}/...",
74+
$"Getting {baseUrl}/fetchdata/weather-forecast...",
75+
$"Skipping /counter (matched by UrlPathRegexToIgnore)");
76+
}
77+
4378
[Test]
4479
public async Task SaveToStaticFileAsync_AppendHtmlExtension_Style_Test()
4580
{
@@ -53,6 +88,7 @@ public async Task SaveToStaticFileAsync_AppendHtmlExtension_Style_Test()
5388
var crawler = new StaticlizeCrawler(
5489
baseUrl,
5590
urlPathToExplicitFetch: null,
91+
urlPathRegexToIgnore: null,
5692
webRootPath: outDir,
5793
locales: new[] { "en" },
5894
OutputStyle.AppendHtmlExtension,
@@ -87,6 +123,7 @@ public async Task SaveToStaticFileAsync_ServiceNotRegisteredError_Test(int port,
87123
var crawler = new StaticlizeCrawler(
88124
baseUrl,
89125
urlPathToExplicitFetch: null,
126+
urlPathRegexToIgnore: null,
90127
webRootPath: outDir,
91128
locales: new string[] { },
92129
OutputStyle.AppendHtmlExtension,
@@ -113,6 +150,7 @@ public async Task SaveToStaticFileAsync_FollowsAlternateLinks_Test()
113150
var crawler = new StaticlizeCrawler(
114151
baseUrl,
115152
urlPathToExplicitFetch: null,
153+
urlPathRegexToIgnore: null,
116154
webRootPath: outDir,
117155
locales: new string[] { },
118156
OutputStyle.IndexHtmlInSubFolders,
@@ -148,6 +186,7 @@ public async Task SaveToStaticFileAsync_JsInvokeOnServerError_Test()
148186
var crawler = new StaticlizeCrawler(
149187
baseUrl,
150188
urlPathToExplicitFetch: null,
189+
urlPathRegexToIgnore: null,
151190
webRootPath: outDir,
152191
locales: new string[] { },
153192
OutputStyle.AppendHtmlExtension,

BlazorWasmPreRendering.Build/CommandLineOptions.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ public bool DeleteLoadingContents
4848

4949
public string? UrlPathToExplicitFetch { get; set; }
5050

51+
public string? UrlPathRegexToIgnore { get; set; }
52+
5153
public bool KeepRunning { get; init; }
5254

5355
public static readonly string DefaultServerPort = "5050-5999";

BlazorWasmPreRendering.Build/Program.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ private static async Task<StaticlizeCrawlingResult> PreRenderToStaticFilesAsync(
4141
var crawler = new StaticlizeCrawler(
4242
baseUrl,
4343
commandLineOptions.UrlPathToExplicitFetch,
44+
commandLineOptions.UrlPathRegexToIgnore,
4445
prerenderingOptions.WebRootPath,
4546
prerenderingOptions.Locales,
4647
commandLineOptions.OutputStyle,

BlazorWasmPreRendering.Build/StaticlizeCrawler.cs

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,13 +32,16 @@ internal class StaticlizeCrawler
3232

3333
private IEnumerable<string> UrlPathToExplicitFetch { get; }
3434

35+
private Regex? UrlPathRegexToIgnore { get; }
36+
3537
private readonly List<string> _StaticalizedFiles = new List<string>();
3638

3739
public IEnumerable<string> StaticalizedFiles => this._StaticalizedFiles;
3840

3941
public StaticlizeCrawler(
4042
string baseUrl,
4143
string? urlPathToExplicitFetch,
44+
string? urlPathRegexToIgnore,
4245
string webRootPath,
4346
IEnumerable<string> locales,
4447
OutputStyle outputStyle,
@@ -59,6 +62,21 @@ public StaticlizeCrawler(
5962
.Where(s => !string.IsNullOrEmpty(s))
6063
.ToArray();
6164

65+
if (!string.IsNullOrEmpty(urlPathRegexToIgnore))
66+
{
67+
try
68+
{
69+
this.UrlPathRegexToIgnore = new Regex(urlPathRegexToIgnore, RegexOptions.Compiled | RegexOptions.IgnoreCase);
70+
}
71+
catch (ArgumentException ex)
72+
{
73+
throw new ArgumentException(
74+
"Invalid regex pattern for --urlpathregextoignore / BlazorWasmPrerenderingUrlPathRegexToIgnore.",
75+
nameof(urlPathRegexToIgnore),
76+
ex);
77+
}
78+
}
79+
6280
if (locales.Any())
6381
{
6482
this.HttpClient.DefaultRequestHeaders.AcceptLanguage.Clear();
@@ -90,6 +108,14 @@ private async Task SaveToStaticFileAsync((string Href, string Protocol, string P
90108
{
91109
var href = args.Href.Split('#').FirstOrDefault() ?? "";
92110
if (this.SavedPathSet.Contains(href)) return;
111+
112+
if (this.UrlPathRegexToIgnore != null && this.UrlPathRegexToIgnore.IsMatch(args.PathName))
113+
{
114+
this.SavedPathSet.Add(href);
115+
this.Logger.LogInformation($"Skipping {args.PathName} (matched by UrlPathRegexToIgnore)");
116+
return;
117+
}
118+
93119
this.SavedPathSet.Add(href);
94120

95121
// DEBUG: Console.WriteLine($"Protocol:[{args.Protocol}], PathName:[{args.PathName}], Href:[{args.Href}]");

BlazorWasmPreRendering.Build/build/BlazorWasmPreRendering.Build.targets

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
<BlazorWasmPrerenderingKeepServer Condition=" '$(BlazorWasmPrerenderingKeepServer)' == '' ">false</BlazorWasmPrerenderingKeepServer>
3232
<BlazorWasmPrerenderingDeleteLoadingContents Condition=" '$(BlazorWasmPrerenderingKeepServer)' == '' ">false</BlazorWasmPrerenderingDeleteLoadingContents>
3333
<BlazorWasmPrerenderingServerPort Condition=" '$(BlazorWasmPrerenderingServerPort)' == '' ">5050-5999</BlazorWasmPrerenderingServerPort>
34+
<BlazorWasmPrerenderingUrlPathRegexToIgnore Condition=" '$(BlazorWasmPrerenderingUrlPathRegexToIgnore)' == '' "></BlazorWasmPrerenderingUrlPathRegexToIgnore>
3435
<BlazorWasmPrerenderingDotNetHost Condition=" '$(BlazorWasmPrerenderingDotNetHost)' == '' ">$(DOTNET_HOST_PATH)</BlazorWasmPrerenderingDotNetHost>
3536
<BlazorWasmPrerenderingDotNetHost Condition=" '$(BlazorWasmPrerenderingDotNetHost)' == '' ">$(_DotNetHostDirectory)$(_DotNetHostFileName)</BlazorWasmPrerenderingDotNetHost>
3637
</PropertyGroup>
@@ -63,6 +64,6 @@
6364
<BlazorWasmPrerenderingDeleteLoadingContentsSwitch Condition=" '$(BlazorWasmPrerenderingDeleteLoadingContents)' == 'true' "> -d</BlazorWasmPrerenderingDeleteLoadingContentsSwitch>
6465
</PropertyGroup>
6566

66-
<Exec Command="&quot;$(BlazorWasmPrerenderingDotNetHost)&quot; &quot;$(BlazorWasmPrerenderingServerDll)&quot; --assemblyname &quot;$(BlazorWasmPrerenderingAssembly)&quot; -t &quot;$(BlazorWasmPrerenderingRootComponentType)&quot; --selectorofrootcomponent &quot;$(BlazorWasmPrerenderingRootComponentSelector)&quot; --selectorofheadoutletcomponent &quot;$(BlazorWasmPrerenderingHeadOutletComponentSelector)&quot; -p &quot;$(BlazorWasmPrerenderingPublishDir)&quot; -i &quot;$(BlazorWasmPrerenderingIntermediateDir)&quot; --assemblydir &quot;$(BlazorWasmPrerenderingTargetDir)&quot; -m &quot;$(BlazorWasmPrerenderingMiddlewareArg)&quot; -f &quot;$(BlazorWasmPrerenderingTFM)&quot; --serviceworkerassetsmanifest &quot;$(ServiceWorkerAssetsManifest)&quot; --environment &quot;$(BlazorWasmPrerenderingEnvironment)&quot; --emulateauthme &quot;$(BlazorWasmPrerenderingEmulateAuthMe)&quot; --locale &quot;$(BlazorWasmPrerenderingLocale)&quot; -o &quot;$(BlazorWasmPrerenderingOutputStyle)&quot; $(BlazorWasmPrerenderingKeepServerSwitch)$(BlazorWasmPrerenderingDeleteLoadingContentsSwitch) -u &quot;$(BlazorWasmPrerenderingUrlPathToExplicitFetch)&quot; -r &quot;$(BlazorWasmPrerenderingMode)&quot; --serverport &quot;$(BlazorWasmPrerenderingServerPort)&quot; --bwapoptionsdllext &quot;$(BlazorWasmPrerenderingBWAPDllExt)&quot;" />
67+
<Exec Command="&quot;$(BlazorWasmPrerenderingDotNetHost)&quot; &quot;$(BlazorWasmPrerenderingServerDll)&quot; --assemblyname &quot;$(BlazorWasmPrerenderingAssembly)&quot; -t &quot;$(BlazorWasmPrerenderingRootComponentType)&quot; --selectorofrootcomponent &quot;$(BlazorWasmPrerenderingRootComponentSelector)&quot; --selectorofheadoutletcomponent &quot;$(BlazorWasmPrerenderingHeadOutletComponentSelector)&quot; -p &quot;$(BlazorWasmPrerenderingPublishDir)&quot; -i &quot;$(BlazorWasmPrerenderingIntermediateDir)&quot; --assemblydir &quot;$(BlazorWasmPrerenderingTargetDir)&quot; -m &quot;$(BlazorWasmPrerenderingMiddlewareArg)&quot; -f &quot;$(BlazorWasmPrerenderingTFM)&quot; --serviceworkerassetsmanifest &quot;$(ServiceWorkerAssetsManifest)&quot; --environment &quot;$(BlazorWasmPrerenderingEnvironment)&quot; --emulateauthme &quot;$(BlazorWasmPrerenderingEmulateAuthMe)&quot; --locale &quot;$(BlazorWasmPrerenderingLocale)&quot; -o &quot;$(BlazorWasmPrerenderingOutputStyle)&quot; $(BlazorWasmPrerenderingKeepServerSwitch)$(BlazorWasmPrerenderingDeleteLoadingContentsSwitch) -u &quot;$(BlazorWasmPrerenderingUrlPathToExplicitFetch)&quot; --urlpathregextoignore &quot;$(BlazorWasmPrerenderingUrlPathRegexToIgnore)&quot; -r &quot;$(BlazorWasmPrerenderingMode)&quot; --serverport &quot;$(BlazorWasmPrerenderingServerPort)&quot; --bwapoptionsdllext &quot;$(BlazorWasmPrerenderingBWAPDllExt)&quot;" />
6768
</Target>
6869
</Project>

MSBUILD-PROPERTIES.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ BlazorWasmPrerenderingHeadOutletComponentSelector| `head::after` | Set the DOM e
1111
BlazorWasmPrerenderingOutputStyle | `IndexHtmlInSubFolders` | When it is set to `AppendHtmlExtension`, the page of the URL path `foo/bar` will be saved as the `foo/bar.html` instead of the `foo/bar/index.html`.
1212
BlazorWasmPrerenderingDeleteLoadingContents | `false` | When it is set to `true`, the "Loading..." contents will be deleted from prerendered output HTML files, and prerendered contents to be visible immediately even before the Blazor WebAssembly runtime has warmed up.
1313
BlazorWasmPrerenderingUrlPathToExplicitFetch| | Set the semicolon-separated URL paths explicitly that are not linked from anywhere, such as easter-egg pages, to be prerendered.
14+
BlazorWasmPrerenderingUrlPathRegexToIgnore | | Set a regular expression to skip paths that match it during the crawling process.
1415
BlazorWasmPrerenderingEnvironment | `Prerendering` | Set a name of a host environment that can retrieve via `IWebHostEnvironment.Environment`.
1516
BlazorWasmPrerenderingEmulateAuthMe | `true` | When it is set to `true`, prerendering server emulates Azure App Services Auth. That means the ULR endpoint **"/.auth/me"** will return the JSON content `{"clientPrincipal":null}`
1617
BlazorWasmPrerenderingLocale | `en` | Set a comma-separated locale list such as "en", "ja-JP,en-US", etc., those used when crawling. **⚠️Attention:** when you specify this MSBuild property via "dotnet" command line, you have to replace `,` (comma) with `%2c`.

README.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,21 @@ To support that case, please **set the URL path list that you want to fetch expl
203203
<BlazorWasmPrerenderingUrlPathToExplicitFetch>/unkinked/page1;/unlinked/page2</BlazorWasmPrerenderingUrlPathToExplicitFetch>
204204
...
205205
```
206+
207+
Similarly, if you want to skip some URL paths during the crawling process, you can **set a regular expression to the `BlazorWasmPrerenderingUrlPathRegexToIgnore` MSBuild property**.
208+
209+
```xml
210+
<Project Sdk="Microsoft.NET.Sdk.BlazorWebAssembly">
211+
...
212+
<PropertyGroup>
213+
<!--
214+
👇 If you set this, each URL path matched with the regex
215+
will be skipped during the crawling process.
216+
-->
217+
<BlazorWasmPrerenderingUrlPathRegexToIgnore>/(admin|private)</BlazorWasmPrerenderingUrlPathRegexToIgnore>
218+
...
219+
```
220+
206221
(* See also: [_MSBuild properties reference for the "BlazorWasmPreRendering.Build"_](https://github.com/jsakamoto/BlazorWasmPreRendering.Build/blob/master/MSBUILD-PROPERTIES.md))
207222
208223
### Render mode

0 commit comments

Comments
 (0)