alexferrari88
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 1 deletion b/‎.gitignore‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.serena/.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.serena/.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎CLAUDE.md‎
Lines changed: 31 additions & 1 deletion b/‎CLAUDE.md‎
Lines changed: 31 additions & 1 deletion
diff --git a/‎README.md‎
Lines changed: 65 additions & 0 deletions b/‎README.md‎
Lines changed: 65 additions & 0 deletions
diff --git a/‎cmd/download.go‎
Lines changed: 43 additions & 0 deletions b/‎cmd/download.go‎
Lines changed: 43 additions & 0 deletions
@@ -29,4 +29,4 @@ test-download/
 .vscode/
 
 # serena
-cache/
+.serena/cache/
@@ -0,0 +1 @@
+/cache
@@ -3,7 +3,7 @@
 This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
 
 ## Project Overview
-This is a Go CLI tool for downloading posts from Substack blogs. It supports downloading individual posts or entire archives, with features for private newsletters (via cookies), rate limiting, format conversion (HTML/Markdown/Text), and downloading of images and file attachments locally.
+This is a Go CLI tool for downloading posts from Substack blogs. It supports downloading individual posts or entire archives, with features for private newsletters (via cookies), rate limiting, format conversion (HTML/Markdown/Text), downloading of images and file attachments locally, and creating archive index pages that link all downloaded posts with their metadata.
 
 ## Architecture
 The project follows a standard Go CLI structure:
@@ -49,8 +49,11 @@ go mod download
 
 ### Extractor (`lib/extractor.go`)
 - Parses Substack post JSON from HTML
+- Extracts post metadata including subtitle (.subtitle CSS selector) and cover image (og:image meta tag)
 - Converts HTML to Markdown/Text using external libraries
 - Handles file writing with different formats
+- Provides archive page generation functionality (HTML/Markdown/Text formats)
+- Manages archive entries with automatic sorting by publication date (newest first)
 
 ### Image Downloader (`lib/images.go`)
 - Downloads images locally from Substack posts
@@ -67,6 +70,15 @@ go mod download
 - Handles filename sanitization and collision avoidance
 - Integrates with existing image download workflow
 
+### Archive Page Generator (`lib/extractor.go`)
+- Creates index pages linking all downloaded posts with metadata
+- Supports HTML, Markdown, and Text formats matching the selected output format
+- Includes post titles (linked to downloaded files with relative paths)
+- Shows publication dates and download timestamps
+- Displays post descriptions/subtitles and cover images when available
+- Automatically sorts posts by publication date (newest first)
+- Generates `index.{format}` in the output directory root
+
 ### Commands Structure
 Uses Cobra framework:
 - `download`: Main functionality for downloading posts
@@ -120,6 +132,24 @@ go run . download --url https://example.substack.com --download-files --files-di
 go run . download --url https://example.substack.com/p/post-title --download-images --download-files --output ./downloads
 ```
 
+### Creating archive index pages
+```bash
+# Download posts and create an archive index page
+go run . download --url https://example.substack.com --create-archive --output ./downloads
+
+# Download entire archive with archive index in markdown format
+go run . download --url https://example.substack.com --create-archive --format md --output ./downloads
+
+# Download single post with archive page (useful for building up an archive over time)
+go run . download --url https://example.substack.com/p/post-title --create-archive --output ./downloads
+
+# Download with all features: images, files, and archive page
+go run . download --url https://example.substack.com --download-images --download-files --create-archive --output ./downloads
+
+# Download archive with specific format and custom directories
+go run . download --url https://example.substack.com --create-archive --format html --images-dir assets --files-dir attachments --output ./downloads
+```
+
 ### Building for release
 ```bash
 go build -ldflags="-s -w" -o sbstck-dl .
 
@@ -60,6 +60,7 @@ Usage:
 
 Flags:
       --add-source-url         Add the original post URL at the end of the downloaded file
+      --create-archive         Create an archive index page linking all downloaded posts
       --download-files         Download file attachments locally and update content to reference local files
       --download-images        Download images locally and update content to reference local files
   -d, --dry-run                Enable dry run
@@ -181,6 +182,68 @@ output/
         └── presentation.pptx
 ```
 
+#### Creating Archive Index Pages
+
+Use the `--create-archive` flag to generate an organized index page that links all downloaded posts with their metadata. This creates a beautiful overview of your downloaded content, making it easy to browse and access your Substack archive.
+
+**Features:**
+- Creates `index.{format}` file matching your selected output format (HTML/Markdown/Text)
+- Links to all downloaded posts using relative file paths
+- Displays post titles, publication dates, and download timestamps
+- Shows post descriptions/subtitles and cover images when available
+- Automatically sorts posts by publication date (newest first)
+- Works with both single post and bulk downloads
+
+**Examples:**
+
+```bash
+# Download entire archive and create index page
+sbstck-dl download --url https://example.substack.com --create-archive
+
+# Create archive index in Markdown format
+sbstck-dl download --url https://example.substack.com --create-archive --format md
+
+# Build archive over time with single posts
+sbstck-dl download --url https://example.substack.com/p/post-title --create-archive
+
+# Complete download with all features
+sbstck-dl download --url https://example.substack.com --download-images --download-files --create-archive
+
+# Custom directory structure with archive
+sbstck-dl download --url https://example.substack.com --create-archive --images-dir assets --files-dir attachments
+```
+
+**Archive Content Per Post:**
+- **Title**: Clickable link to the downloaded post file
+- **Publication Date**: When the post was originally published on Substack
+- **Download Date**: When you downloaded the post locally  
+- **Description**: Post subtitle or description (when available)
+- **Cover Image**: Featured image from the post (when available)
+
+**Archive Format Examples:**
+
+*HTML Format:* Styled webpage with images, organized post cards, and hover effects
+*Markdown Format:* Clean markdown with headers, links, and image references
+*Text Format:* Plain text listing with all metadata for maximum compatibility
+
+**Directory Structure with Archive:**
+```
+output/
+├── index.html                     # Archive index page
+├── 20231201_120000_post-title.html
+├── 20231115_090000_another-post.html
+├── images/
+│   ├── post-title/
+│   │   └── image1_1456x819.jpeg
+│   └── another-post/
+│       └── image2_848x636.png
+└── files/
+    ├── post-title/
+    │   └── document.pdf
+    └── another-post/
+        └── spreadsheet.xlsx
+```
+
 ### Listing posts
 
 ```bash
@@ -223,6 +286,8 @@ sbstck-dl download --url https://example.substack.com --cookie_name substack.sid
 - [x] Improve retry logic
 - [ ] Implement loading from config file
 - [x] Add support for downloading images
+- [x] Add support for downloading file attachments
+- [x] Add archive index page functionality
 - [x] Add tests
 - [x] Add CI
 - [x] Add documentation
 
@@ -26,12 +26,19 @@ var (
 	downloadFiles  bool
 	fileExtensions string
 	filesDir       string
+	createArchive  bool
 	downloadCmd    = &cobra.Command{
 		Use:   "download",
 		Short: "Download individual posts or the entire public archive",
 		Long:  `You can provide the url of a single post or the main url of the Substack you want to download.`,
 		Run: func(cmd *cobra.Command, args []string) {
 			startTime := time.Now()
+			
+			// Create archive instance if flag is set
+			var archive *lib.Archive
+			if createArchive {
+				archive = lib.NewArchive()
+			}
 
 			// if url contains "/p/", we are downloading a single post
 			if strings.Contains(downloadUrl, "/p/") {
@@ -80,6 +87,11 @@ var (
 					}
 				}
 
+				// Add to archive if enabled
+				if archive != nil {
+					archive.AddEntry(post, path, startTime)
+				}
+
 				if verbose {
 					fmt.Println("Done in ", time.Since(startTime))
 				}
@@ -166,12 +178,42 @@ var (
 							log.Printf("Error writing file %s: %v\n", path, err)
 						}
 					}
+
+					// Add to archive if enabled and post was successfully written
+					if archive != nil {
+						archive.AddEntry(post, path, time.Now())
+					}
 				}
 				if verbose {
 					fmt.Println("Downloaded", downloadedPostsCount, "posts, out of", len(urls))
 					fmt.Println("Done in ", time.Since(startTime))
 				}
 			}
+
+			// Generate archive page if enabled
+			if archive != nil && len(archive.Entries) > 0 {
+				if verbose {
+					fmt.Printf("Generating archive page in %s format...\n", format)
+				}
+				
+				var archiveErr error
+				switch format {
+				case "html":
+					archiveErr = archive.GenerateHTML(outputFolder)
+				case "md":
+					archiveErr = archive.GenerateMarkdown(outputFolder)
+				case "txt":
+					archiveErr = archive.GenerateText(outputFolder)
+				default:
+					archiveErr = fmt.Errorf("unknown format for archive: %s", format)
+				}
+				
+				if archiveErr != nil {
+					log.Printf("Error generating archive page: %v\n", archiveErr)
+				} else if verbose {
+					fmt.Printf("Archive page generated: %s/index.%s\n", outputFolder, format)
+				}
+			}
 		},
 	}
 )
@@ -188,6 +230,7 @@ func init() {
 	downloadCmd.Flags().BoolVar(&downloadFiles, "download-files", false, "Download file attachments locally and update content to reference local files")
 	downloadCmd.Flags().StringVar(&fileExtensions, "file-extensions", "", "Comma-separated list of file extensions to download (e.g., 'pdf,docx,txt'). If empty, downloads all file types")
 	downloadCmd.Flags().StringVar(&filesDir, "files-dir", "files", "Directory name for downloaded file attachments")
+	downloadCmd.Flags().BoolVar(&createArchive, "create-archive", false, "Create an archive index page linking all downloaded posts")
 	downloadCmd.MarkFlagRequired("url")
 }