|
4 | 4 | "fmt" |
5 | 5 | "log" |
6 | 6 | "net/url" |
| 7 | + "path/filepath" |
7 | 8 | "strings" |
8 | 9 | "time" |
9 | 10 |
|
@@ -62,17 +63,36 @@ var ( |
62 | 63 | var downloadedPostsCount int |
63 | 64 | dateFilterfunc := makeDateFilterFunc(beforeDate, afterDate) |
64 | 65 | urls, err := extractor.GetAllPostsURLs(ctx, downloadUrl, dateFilterfunc) |
| 66 | + urlsCount := len(urls) |
65 | 67 | if err != nil { |
66 | 68 | log.Fatalln(err) |
67 | 69 | } |
| 70 | + if urlsCount == 0 { |
| 71 | + if verbose { |
| 72 | + fmt.Println("No posts found, exiting...") |
| 73 | + } |
| 74 | + return |
| 75 | + } |
68 | 76 | if verbose { |
69 | | - fmt.Printf("Found %d posts\n", len(urls)) |
| 77 | + fmt.Printf("Found %d posts\n", urlsCount) |
70 | 78 | } |
71 | 79 | if dryRun { |
72 | | - fmt.Printf("Found %d posts\n", len(urls)) |
| 80 | + fmt.Printf("Found %d posts\n", urlsCount) |
73 | 81 | fmt.Println("Dry run, exiting...") |
74 | 82 | return |
75 | 83 | } |
| 84 | + urls, err = filterExistingPosts(urls, outputFolder, format) |
| 85 | + if err != nil { |
| 86 | + if verbose { |
| 87 | + fmt.Println("Error filtering existing posts:", err) |
| 88 | + } |
| 89 | + } |
| 90 | + if len(urls) == 0 { |
| 91 | + if verbose { |
| 92 | + fmt.Println("No new posts found, exiting...") |
| 93 | + } |
| 94 | + return |
| 95 | + } |
76 | 96 | bar := progressbar.NewOptions(len(urls), |
77 | 97 | progressbar.OptionSetWidth(25), |
78 | 98 | progressbar.OptionSetDescription("downloading"), |
@@ -154,3 +174,28 @@ func parseURL(toTest string) (*url.URL, error) { |
154 | 174 | func makePath(post lib.Post, outputFolder string, format string) string { |
155 | 175 | return fmt.Sprintf("%s/%s_%s.%s", outputFolder, convertDateTime(post.PostDate), post.Slug, format) |
156 | 176 | } |
| 177 | + |
| 178 | +// extractSlug extracts the slug from a Substack post URL |
| 179 | +// e.g. https://example.substack.com/p/this-is-the-post-title -> this-is-the-post-title |
| 180 | +func extractSlug(url string) string { |
| 181 | + split := strings.Split(url, "/") |
| 182 | + return split[len(split)-1] |
| 183 | +} |
| 184 | + |
| 185 | +// filterExistingPosts filters out posts that already exist in the output folder. |
| 186 | +// It looks for files whose name ends with the post slug. |
| 187 | +func filterExistingPosts(urls []string, outputFolder string, format string) ([]string, error) { |
| 188 | + var filtered []string |
| 189 | + for _, url := range urls { |
| 190 | + slug := extractSlug(url) |
| 191 | + path := fmt.Sprintf("%s/%s_%s.%s", outputFolder, "*", slug, format) |
| 192 | + matches, err := filepath.Glob(path) |
| 193 | + if err != nil { |
| 194 | + return urls, err |
| 195 | + } |
| 196 | + if len(matches) == 0 { |
| 197 | + filtered = append(filtered, url) |
| 198 | + } |
| 199 | + } |
| 200 | + return filtered, nil |
| 201 | +} |
0 commit comments