Skip to content

Commit 5a0baaf

Browse files
committed
Implement download resume
1 parent 2e50d4d commit 5a0baaf

File tree

3 files changed

+53
-4
lines changed

3 files changed

+53
-4
lines changed

README.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,10 @@ Use "sbstck-dl [command] --help" for more information about a command.
4444

4545
You can provide the url of a single post or the main url of the Substack you want to download.
4646

47+
By providing the main URL of a Substack, the downloader will download all the posts of the archive.
48+
49+
When downloading the full archive, if the downloader is interrupted, at the next execution it will resume the download of the remaining posts.
50+
4751
```bash
4852
Usage:
4953
sbstck-dl download [flags]
@@ -104,7 +108,6 @@ sbstck-dl download --url https://example.substack.com --cookie_name substack.sid
104108
105109
## TODO
106110
107-
- [ ] Implementing resuming downloads
108111
- [ ] Improve retry logic
109112
- [ ] Implement loading from config file
110113
- [ ] Add support for downloading media
@@ -113,3 +116,4 @@ sbstck-dl download --url https://example.substack.com --cookie_name substack.sid
113116
- [x] Add documentation
114117
- [x] Add support for private newsletters
115118
- [x] Implement filtering by date
119+
- [x] Implement resuming downloads

cmd/download.go

Lines changed: 47 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"fmt"
55
"log"
66
"net/url"
7+
"path/filepath"
78
"strings"
89
"time"
910

@@ -62,17 +63,36 @@ var (
6263
var downloadedPostsCount int
6364
dateFilterfunc := makeDateFilterFunc(beforeDate, afterDate)
6465
urls, err := extractor.GetAllPostsURLs(ctx, downloadUrl, dateFilterfunc)
66+
urlsCount := len(urls)
6567
if err != nil {
6668
log.Fatalln(err)
6769
}
70+
if urlsCount == 0 {
71+
if verbose {
72+
fmt.Println("No posts found, exiting...")
73+
}
74+
return
75+
}
6876
if verbose {
69-
fmt.Printf("Found %d posts\n", len(urls))
77+
fmt.Printf("Found %d posts\n", urlsCount)
7078
}
7179
if dryRun {
72-
fmt.Printf("Found %d posts\n", len(urls))
80+
fmt.Printf("Found %d posts\n", urlsCount)
7381
fmt.Println("Dry run, exiting...")
7482
return
7583
}
84+
urls, err = filterExistingPosts(urls, outputFolder, format)
85+
if err != nil {
86+
if verbose {
87+
fmt.Println("Error filtering existing posts:", err)
88+
}
89+
}
90+
if len(urls) == 0 {
91+
if verbose {
92+
fmt.Println("No new posts found, exiting...")
93+
}
94+
return
95+
}
7696
bar := progressbar.NewOptions(len(urls),
7797
progressbar.OptionSetWidth(25),
7898
progressbar.OptionSetDescription("downloading"),
@@ -154,3 +174,28 @@ func parseURL(toTest string) (*url.URL, error) {
154174
func makePath(post lib.Post, outputFolder string, format string) string {
155175
return fmt.Sprintf("%s/%s_%s.%s", outputFolder, convertDateTime(post.PostDate), post.Slug, format)
156176
}
177+
178+
// extractSlug extracts the slug from a Substack post URL
179+
// e.g. https://example.substack.com/p/this-is-the-post-title -> this-is-the-post-title
180+
func extractSlug(url string) string {
181+
split := strings.Split(url, "/")
182+
return split[len(split)-1]
183+
}
184+
185+
// filterExistingPosts filters out posts that already exist in the output folder.
186+
// It looks for files whose name ends with the post slug.
187+
func filterExistingPosts(urls []string, outputFolder string, format string) ([]string, error) {
188+
var filtered []string
189+
for _, url := range urls {
190+
slug := extractSlug(url)
191+
path := fmt.Sprintf("%s/%s_%s.%s", outputFolder, "*", slug, format)
192+
matches, err := filepath.Glob(path)
193+
if err != nil {
194+
return urls, err
195+
}
196+
if len(matches) == 0 {
197+
filtered = append(filtered, url)
198+
}
199+
}
200+
return filtered, nil
201+
}

cmd/version.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ var versionCmd = &cobra.Command{
1212
Short: "Print the version number of sbstck-dl",
1313
Long: `Display the current version of the app.`,
1414
Run: func(cmd *cobra.Command, args []string) {
15-
fmt.Println("sbstck-dl v0.3.1")
15+
fmt.Println("sbstck-dl v0.3.2")
1616
},
1717
}
1818

0 commit comments

Comments
 (0)