diff --git a/.gitignore b/.gitignore index 9cbda3cfd..e0511be11 100644 --- a/.gitignore +++ b/.gitignore @@ -37,3 +37,30 @@ internal/e2e/immich-test internal/e2e/fixtures/wikimedia scratchpad/ internal/e2e/testdata + +# ── GSD baseline (auto-generated) ── +.gsd +.gsd-id +.mcp.json +.bg-shell/ +Thumbs.db +*.swp +*.swo +*~ +.idea/ +.vscode/ +*.code-workspace +.env.* +!.env.example +node_modules/ +.next/ +build/ +__pycache__/ +*.pyc +.venv/ +venv/ +target/ +vendor/ +coverage/ +.cache/ +tmp/ diff --git a/adapters/flickr/cmd.go b/adapters/flickr/cmd.go new file mode 100644 index 000000000..d3c7fb76c --- /dev/null +++ b/adapters/flickr/cmd.go @@ -0,0 +1,104 @@ +package flickr + +import ( + "context" + "errors" + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/simulot/immich-go/adapters" + "github.com/simulot/immich-go/app" + "github.com/simulot/immich-go/internal/fshelper" + "github.com/spf13/cobra" + "github.com/spf13/pflag" +) + +// NewFromFlickrCommand creates the "from-flickr" cobra subcommand and wires it into +// the upload pipeline via the provided runner. It follows the same construction +// pattern as NewFromGooglePhotosCommand: +// - cmd.SetContext is called before returning so context propagates correctly. +// - processor is read inside RunE (after PersistentPreRunE has populated it). +// - fsyss is populated and validated inside RunE; CloseFSs is deferred there. +func NewFromFlickrCommand(ctx context.Context, parent *cobra.Command, app *app.Application, runner adapters.Runner) *cobra.Command { + cmd := &cobra.Command{ + Use: "from-flickr [flags] ...", + Short: "Upload photos from a Flickr export directory or ZIP files", + Args: cobra.MinimumNArgs(1), + } + cmd.SetContext(ctx) + + f := &FlickrCmd{ + app: app, + } + f.RegisterFlags(cmd.Flags(), cmd) + + cmd.RunE = func(cmd *cobra.Command, args []string) error { //nolint:contextcheck + var err error + + // processor is populated by PersistentPreRunE which runs before RunE. + f.processor = app.FileProcessor() + + // Expand any directory arguments into their constituent ZIPs before + // ParsePath opens them. This lets users point at a download folder + // instead of enumerating every ZIP file individually. + args, err = expandDirArgs(args) + if err != nil { + return err + } + if len(args) == 0 { + return errors.New("no ZIP files found in: " + strings.Join(args, ", ")) + } + + f.fsyss, err = fshelper.ParsePath(args) + if err != nil { + return err + } + if len(f.fsyss) == 0 { + return errors.New("no file found matching the pattern: " + strings.Join(args, ",")) + } + + defer func() { + if err := fshelper.CloseFSs(f.fsyss); err != nil { + app.Log().Error("error closing file systems", "error", err) + } + }() + + return runner.Run(cmd, f) + } + + return cmd +} + +// expandDirArgs replaces any directory path in args with the list of *.zip +// files found inside that directory. Non-directory paths pass through unchanged. +// This allows users to pass a download folder instead of listing every ZIP. +func expandDirArgs(args []string) ([]string, error) { + out := make([]string, 0, len(args)) + for _, arg := range args { + info, err := os.Stat(arg) + if err != nil || !info.IsDir() { + // Not a directory (or stat failed) — pass through as-is. + out = append(out, arg) + continue + } + // Directory: find all ZIPs inside. + matches, err := filepath.Glob(filepath.Join(arg, "*.zip")) + if err != nil { + return nil, err + } + if len(matches) == 0 { + return nil, fmt.Errorf("no ZIP files found in directory: %s", arg) + } + out = append(out, matches...) + } + return out, nil +} + +// RegisterFlags registers the Flickr-specific CLI flags onto the provided FlagSet. +// Only flags relevant to a Flickr export import are included; GP-specific flags +// (TakeoutTag, PeopleTag, StackOptions, InclusionFlags) are intentionally omitted. +func (f *FlickrCmd) RegisterFlags(flags *pflag.FlagSet, cmd *cobra.Command) { + flags.BoolVar(&f.CreateAlbums, "sync-albums", true, "Automatically create albums in Immich that match the albums in your Flickr export") +} diff --git a/adapters/flickr/flickr.go b/adapters/flickr/flickr.go new file mode 100644 index 000000000..e9d5cda60 --- /dev/null +++ b/adapters/flickr/flickr.go @@ -0,0 +1,306 @@ +package flickr + +import ( + "context" + "errors" + "io/fs" + "path" + "regexp" + "strings" + "time" + + "github.com/simulot/immich-go/adapters" + "github.com/simulot/immich-go/app" + "github.com/simulot/immich-go/internal/assets" + "github.com/simulot/immich-go/internal/fileevent" + "github.com/simulot/immich-go/internal/fileprocessor" + "github.com/simulot/immich-go/internal/filetypes" + "github.com/simulot/immich-go/internal/fshelper" +) + +// Compile-time assertion that FlickrCmd satisfies adapters.Reader. +var _ adapters.Reader = (*FlickrCmd)(nil) + +// albumsJSONFile is the filename used to identify the metadata archive and +// parse album membership. Declared as a constant to satisfy goconst. +const albumsJSONFile = "albums.json" + +// Package-level compiled regexes — compiled once, never per-call. +var ( + // rePhotoIDPrimary matches the current Flickr export format: slug_ID_o.ext + // e.g. "my-photo_12345678_o.jpg" + rePhotoIDPrimary = regexp.MustCompile(`_(\d+)_o\.\w+$`) + + // rePhotoIDFallback matches the older Flickr export format: ID_hash_o.ext + // e.g. "12345678_ab1cd2ef3g_o.jpg" + rePhotoIDFallback = regexp.MustCompile(`^(\d+)_[0-9a-f]+_o\.\w+$`) +) + +// assetFile keeps information collected during pass one about a single image file. +type assetFile struct { + fsys fs.FS // the FS partition (archive) in which the file lives + base string // the original filename within that FS + length int // file size in bytes + date time.Time // file modification time +} + +// FlickrCmd holds the runtime state for a Flickr export import operation. +// catalog is keyed by Flickr photo ID (not by directory like the GP adapter) because +// Flickr exports are flat — all images live in a single root with no subdirectories. +type FlickrCmd struct { + // CLI flags + CreateAlbums bool // --sync-albums: create Immich albums matching Flickr albums + + // internal state + app *app.Application + processor *fileprocessor.FileProcessor + fsyss []fs.FS + catalog map[string]*assetFile // photo ID → image file entry + photoMeta map[string]*FlickrMetadata // photo ID → parsed per-photo JSON + albumIndex map[string][]string // photo ID → album titles (from albums.json) +} + +// classifyArchives partitions the provided FSes into exactly one metadata FS +// (containing albums.json) and zero or more image FSes. +// It uses fs.Stat rather than WalkDir — O(1) per FS, no directory traversal needed. +func classifyArchives(fsyss []fs.FS) (metaFS fs.FS, imageFS []fs.FS, err error) { + var metaCandidates []fs.FS + + for _, fsys := range fsyss { + _, statErr := fs.Stat(fsys, albumsJSONFile) + if statErr == nil { + // albums.json present → this is the metadata archive + metaCandidates = append(metaCandidates, fsys) + } else if errors.Is(statErr, fs.ErrNotExist) { + // albums.json absent → this is an image archive + imageFS = append(imageFS, fsys) + } else { + // Unexpected stat error (permissions, I/O, etc.) + return nil, nil, statErr + } + } + + switch len(metaCandidates) { + case 0: + return nil, nil, errors.New("no metadata archive found (missing albums.json)") + case 1: + return metaCandidates[0], imageFS, nil + default: + return nil, nil, errors.New("multiple metadata archives found") + } +} + +// extractPhotoID extracts the Flickr numeric photo ID from an image filename. +// It strips any directory prefix first, then tries the primary pattern (slug_ID_o.ext) +// followed by the fallback pattern (ID_hash_o.ext). +// Returns the ID string and true on success, or "", false if neither pattern matches. +func extractPhotoID(filename string) (photoID string, ok bool) { + base := path.Base(filename) + + if m := rePhotoIDPrimary.FindStringSubmatch(base); len(m) == 2 { + return m[1], true + } + if m := rePhotoIDFallback.FindStringSubmatch(base); len(m) == 2 { + return m[1], true + } + return "", false +} + +// Browse satisfies adapters.Reader. It runs a two-pass goroutine: +// - passOneImageFS walks each image archive and builds f.catalog +// - passOneMetaFS walks the metadata archive, reads photo_*.json files and albums.json +// - passTwo iterates the catalog and emits one assets.Group per photo +func (f *FlickrCmd) Browse(ctx context.Context) chan *assets.Group { + ctx, cancel := context.WithCancelCause(ctx) + gOut := make(chan *assets.Group) + go func() { + defer close(gOut) + + f.catalog = make(map[string]*assetFile) + f.photoMeta = make(map[string]*FlickrMetadata) + f.albumIndex = nil + + metaFS, imageFSes, err := classifyArchives(f.fsyss) + if err != nil { + cancel(err) + return + } + + // passOne: walk all image archives, then the metadata archive + for _, imgFS := range imageFSes { + if err := f.passOneImageFS(ctx, imgFS); err != nil { + cancel(err) + return + } + } + if err := f.passOneMetaFS(ctx, metaFS); err != nil { + cancel(err) + return + } + + // passTwo: emit one group per catalog entry + if err := f.passTwo(ctx, gOut); err != nil { + cancel(err) + return + } + cancel(nil) + }() + return gOut +} + +// passOneImageFS walks a single image archive and populates f.catalog. +// Every file receives exactly one fileevent log entry. +func (f *FlickrCmd) passOneImageFS(ctx context.Context, imgFS fs.FS) error { + return fs.WalkDir(imgFS, ".", func(name string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + if d.IsDir() { + return nil + } + + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + + finfo, err := fs.Stat(imgFS, name) + if err != nil { + f.processor.RecordNonAsset(ctx, fshelper.FSName(imgFS, name), 0, fileevent.ErrorFileAccess, "error", err.Error()) + return nil + } + + ext := strings.ToLower(path.Ext(name)) + mediaType := filetypes.DefaultSupportedMedia.TypeFromExt(ext) + + if mediaType != filetypes.TypeImage && mediaType != filetypes.TypeVideo { + f.processor.RecordNonAsset(ctx, fshelper.FSName(imgFS, name), finfo.Size(), fileevent.DiscoveredUnsupported, "reason", "unsupported file type") + return nil + } + + photoID, ok := extractPhotoID(name) + if !ok { + f.processor.RecordNonAsset(ctx, fshelper.FSName(imgFS, name), finfo.Size(), fileevent.DiscoveredUnsupported, "reason", "no photo ID in filename") + return nil + } + + if _, exists := f.catalog[photoID]; exists { + f.processor.RecordNonAsset(ctx, fshelper.FSName(imgFS, name), finfo.Size(), fileevent.DiscoveredUnsupported, "reason", "duplicate photo ID") + return nil + } + + f.catalog[photoID] = &assetFile{ + fsys: imgFS, + base: path.Base(name), + length: int(finfo.Size()), + date: finfo.ModTime(), + } + + code := fileevent.DiscoveredImage + if mediaType == filetypes.TypeVideo { + code = fileevent.DiscoveredVideo + } + f.processor.RecordAssetDiscovered(ctx, fshelper.FSName(imgFS, name), finfo.Size(), code) + return nil + }) +} + +// passOneMetaFS walks the metadata archive and populates f.photoMeta and f.albumIndex. +func (f *FlickrCmd) passOneMetaFS(ctx context.Context, metaFS fs.FS) error { + return fs.WalkDir(metaFS, ".", func(name string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + if d.IsDir() { + return nil + } + + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + + base := path.Base(name) + + switch { + case base == albumsJSONFile: + parsed, err := fshelper.ReadJSON[FlickrAlbums](metaFS, name) + if err != nil { + return err + } + f.albumIndex = albumIndex(parsed) + f.processor.RecordNonAsset(ctx, fshelper.FSName(metaFS, name), 0, fileevent.DiscoveredSidecar, "type", albumsJSONFile) + + case strings.HasPrefix(base, "photo_") && strings.HasSuffix(base, ".json"): + parsed, err := fshelper.ReadJSON[FlickrMetadata](metaFS, name) + if err != nil { + f.processor.RecordNonAsset(ctx, fshelper.FSName(metaFS, name), 0, fileevent.ErrorFileAccess, "error", err.Error()) + return nil + } + // Derive photo ID from filename: photo_.json → + photoID := strings.TrimSuffix(strings.TrimPrefix(base, "photo_"), ".json") + f.photoMeta[photoID] = parsed + f.processor.RecordNonAsset(ctx, fshelper.FSName(metaFS, name), 0, fileevent.DiscoveredSidecar, "type", "photo metadata", "id", photoID) + + default: + f.processor.RecordNonAsset(ctx, fshelper.FSName(metaFS, name), 0, fileevent.DiscoveredUnsupported, "reason", "unrecognised metadata file") + } + + return nil + }) +} + +// passTwo iterates the catalog and emits one assets.Group per photo. +// Photos with no matching JSON are still emitted (with ProcessedMissingMetadata logged). +// Albums from albumIndex are attached when present. +func (f *FlickrCmd) passTwo(ctx context.Context, gOut chan *assets.Group) error { + // Ensure albumIndex is never nil (handles missing albums.json gracefully) + if f.albumIndex == nil { + f.albumIndex = make(map[string][]string) + } + + for photoID, entry := range f.catalog { + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + + a := &assets.Asset{ + File: fshelper.FSName(entry.fsys, entry.base), + FileSize: entry.length, + OriginalFileName: entry.base, + FileDate: entry.date, + } + + if md, ok := f.photoMeta[photoID]; ok { + converted := md.AsMetadata(fshelper.FSName(entry.fsys, entry.base)) + a.FromApplication = a.UseMetadata(converted) + // Override OriginalFileName with the human-readable Flickr photo title when available, + // preserving the original file extension so upload type detection still works. + if md.Name != "" { + a.OriginalFileName = md.Name + path.Ext(entry.base) + } + } else { + f.processor.RecordNonAsset(ctx, fshelper.FSName(entry.fsys, entry.base), int64(entry.length), fileevent.ProcessedMissingMetadata) + } + + // Attach album membership sourced from albums.json (never from per-photo JSON). + if titles, ok := f.albumIndex[photoID]; ok { + albumSlice := make([]assets.Album, 0, len(titles)) + for _, title := range titles { + albumSlice = append(albumSlice, assets.Album{Title: title}) + } + a.MergeAlbums(albumSlice) + } + + select { + case gOut <- assets.NewGroup(assets.GroupByNone, a): + case <-ctx.Done(): + return ctx.Err() + } + } + return nil +} diff --git a/adapters/flickr/flickr_test.go b/adapters/flickr/flickr_test.go new file mode 100644 index 000000000..9e1e01819 --- /dev/null +++ b/adapters/flickr/flickr_test.go @@ -0,0 +1,426 @@ +package flickr + +import ( + "context" + "io/fs" + "strings" + "testing" + "testing/fstest" + "time" + + "github.com/simulot/immich-go/internal/assets" + "github.com/simulot/immich-go/internal/assettracker" + "github.com/simulot/immich-go/internal/fileevent" + "github.com/simulot/immich-go/internal/fileprocessor" +) + +// TestExtractPhotoID verifies both filename patterns found in real Flickr exports. +func TestExtractPhotoID(t *testing.T) { + tests := []struct { + name string + filename string + wantID string + wantOK bool + }{ + // Primary pattern: slug_ID_o.ext (current Flickr format) + { + name: "primary pattern simple", + filename: "my-photo_12345678_o.jpg", + wantID: "12345678", + wantOK: true, + }, + { + name: "primary pattern multi-word slug", + filename: "sunset-at-the-beach_987654321_o.jpeg", + wantID: "987654321", + wantOK: true, + }, + { + name: "primary pattern with directory prefix stripped", + filename: "some/dir/photo_42_o.png", + wantID: "42", + wantOK: true, + }, + // Plan table: slug_ID_o.ext variations + { + name: "plan table - photo_spring_67890_o.jpg", + filename: "photo_spring_67890_o.jpg", + wantID: "67890", + wantOK: true, + }, + { + name: "plan table - photo_title_12345_o.png", + filename: "photo_title_12345_o.png", + wantID: "12345", + wantOK: true, + }, + // Fallback pattern: ID_hash_o.ext (older Flickr format) + { + name: "fallback pattern", + filename: "12345678_ab1cd2ef3a_o.jpg", + wantID: "12345678", + wantOK: true, + }, + { + name: "fallback pattern long hash", + filename: "987654321_0123456789ab_o.jpg", + wantID: "987654321", + wantOK: true, + }, + // Plan table: fallback ID_hash_o.ext + { + name: "plan table - 98765_a1b2c3d4_o.jpg", + filename: "98765_a1b2c3d4_o.jpg", + wantID: "98765", + wantOK: true, + }, + // Non-matching inputs + { + name: "no match - plain name", + filename: "photo.jpg", + wantID: "", + wantOK: false, + }, + { + name: "no match - missing _o suffix", + filename: "photo_12345_orig.jpg", + wantID: "", + wantOK: false, + }, + { + name: "no match - empty string", + filename: "", + wantID: "", + wantOK: false, + }, + // Plan table: non-matching inputs + { + name: "plan table - some_file.jpg", + filename: "some_file.jpg", + wantID: "", + wantOK: false, + }, + { + name: "plan table - readme.txt", + filename: "readme.txt", + wantID: "", + wantOK: false, + }, + // Plan table: 12345_o.jpg — missing hash segment for fallback, no slug for primary + { + name: "plan table - 12345_o.jpg no hash segment", + filename: "12345_o.jpg", + wantID: "", + wantOK: false, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + gotID, gotOK := extractPhotoID(tc.filename) + if gotOK != tc.wantOK { + t.Errorf("extractPhotoID(%q): ok = %v, want %v", tc.filename, gotOK, tc.wantOK) + } + if gotID != tc.wantID { + t.Errorf("extractPhotoID(%q): id = %q, want %q", tc.filename, gotID, tc.wantID) + } + }) + } +} + +// makeFS is a helper that builds an in-memory FS with the given file names present. +func makeFS(files ...string) fs.FS { + m := fstest.MapFS{} + for _, f := range files { + m[f] = &fstest.MapFile{} + } + return m +} + +// TestClassifyArchives verifies the metadata / image FS partitioning logic. +func TestClassifyArchives(t *testing.T) { + metaFS := makeFS("albums.json", "photo_data.json") + imageFS1 := makeFS("photo_12345_o.jpg", "photo_67890_o.jpg") + imageFS2 := makeFS("photo_11111_o.jpg") + + t.Run("one meta one image", func(t *testing.T) { + meta, imgs, err := classifyArchives([]fs.FS{metaFS, imageFS1}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if meta == nil { + t.Error("expected non-nil metaFS") + } + // Verify the returned metaFS is the correct one — albums.json must be accessible. + if _, statErr := fs.Stat(meta, "albums.json"); statErr != nil { + t.Errorf("returned metaFS does not contain albums.json: %v", statErr) + } + if len(imgs) != 1 { + t.Errorf("got %d image FSes, want 1", len(imgs)) + } + }) + + t.Run("one meta two image", func(t *testing.T) { + meta, imgs, err := classifyArchives([]fs.FS{metaFS, imageFS1, imageFS2}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if meta == nil { + t.Error("expected non-nil metaFS") + } + // Verify the returned metaFS is the correct one. + if _, statErr := fs.Stat(meta, "albums.json"); statErr != nil { + t.Errorf("returned metaFS does not contain albums.json: %v", statErr) + } + if len(imgs) != 2 { + t.Errorf("got %d image FSes, want 2", len(imgs)) + } + }) + + t.Run("no metadata archive", func(t *testing.T) { + _, _, err := classifyArchives([]fs.FS{imageFS1, imageFS2}) + if err == nil { + t.Error("expected error for missing metadata archive, got nil") + } + if err != nil && !strings.Contains(err.Error(), "no metadata archive") { + t.Errorf("error message %q should contain %q", err.Error(), "no metadata archive") + } + }) + + t.Run("multiple metadata archives", func(t *testing.T) { + meta2 := makeFS("albums.json") + _, _, err := classifyArchives([]fs.FS{metaFS, meta2, imageFS1}) + if err == nil { + t.Error("expected error for multiple metadata archives, got nil") + } + if err != nil && !strings.Contains(err.Error(), "multiple metadata archives") { + t.Errorf("error message %q should contain %q", err.Error(), "multiple metadata archives") + } + }) + + t.Run("only metadata archive no images", func(t *testing.T) { + meta, imgs, err := classifyArchives([]fs.FS{metaFS}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if meta == nil { + t.Error("expected non-nil metaFS") + } + if len(imgs) != 0 { + t.Errorf("got %d image FSes, want 0", len(imgs)) + } + }) + + t.Run("single image-only FS no metadata", func(t *testing.T) { + _, _, err := classifyArchives([]fs.FS{imageFS1}) + if err == nil { + t.Error("expected error for single image-only FS, got nil") + } + if err != nil && !strings.Contains(err.Error(), "no metadata archive") { + t.Errorf("error message %q should contain %q", err.Error(), "no metadata archive") + } + }) + + t.Run("empty input", func(t *testing.T) { + _, _, err := classifyArchives([]fs.FS{}) + if err == nil { + t.Error("expected error for empty FS list, got nil") + } + }) +} + +// newTestFlickrCmd constructs a FlickrCmd with a real FileProcessor backed by +// in-memory tracker/recorder — no app.Application needed for unit tests. +func newTestFlickrCmd(metaFS fs.FS, imageFSes ...fs.FS) *FlickrCmd { + recorder := fileevent.NewRecorder(nil) + tracker := assettracker.New() + proc := fileprocessor.New(tracker, recorder) + + fsyss := make([]fs.FS, 0, 1+len(imageFSes)) + fsyss = append(fsyss, metaFS) + fsyss = append(fsyss, imageFSes...) + + return &FlickrCmd{ + processor: proc, + fsyss: fsyss, + } +} + +// drainBrowse runs Browse() and returns all assets collected across all groups. +func drainBrowse(f *FlickrCmd) ([]*assets.Asset, int) { + ctx := context.Background() + ch := f.Browse(ctx) + var all []*assets.Asset + groupCount := 0 + for g := range ch { + if g != nil && len(g.Assets) > 0 { + groupCount++ + all = append(all, g.Assets...) + } + } + return all, groupCount +} + +// TestBrowse validates Browse() end-to-end using fstest.MapFS inline fixtures. +func TestBrowse(t *testing.T) { + const photoJSON = `{ + "id": "12345678", + "name": "Sunset at the Beach", + "description": "A beautiful sunset", + "date_taken": "2009-08-02 14:05:00", + "date_upload": "1249204972", + "tags": [{"tag": "travel"}, {"tag": "sunset"}], + "albums": [] + }` + const albumJSON = `{ + "albums": [{ + "id": "72157XXXXXXX", + "title": "Vacation 2009", + "description": "", + "photos": ["12345678"] + }] + }` + + t.Run("happy path - title, date, tags, albums", func(t *testing.T) { + metaFS := fstest.MapFS{ + "photo_12345678.json": {Data: []byte(photoJSON)}, + "albums.json": {Data: []byte(albumJSON)}, + } + imageFS := fstest.MapFS{ + "my-photo_12345678_o.jpg": {Data: []byte{0xFF, 0xD8}}, + } + + assetList, groupCount := drainBrowse(newTestFlickrCmd(metaFS, imageFS)) + + if groupCount != 1 { + t.Fatalf("expected 1 group, got %d", groupCount) + } + if len(assetList) != 1 { + t.Fatalf("expected 1 asset, got %d", len(assetList)) + } + + a := assetList[0] + wantDate := time.Date(2009, 8, 2, 14, 5, 0, 0, time.Local) + if !a.CaptureDate.Equal(wantDate) { + t.Errorf("CaptureDate = %v, want %v", a.CaptureDate, wantDate) + } + if a.OriginalFileName != "Sunset at the Beach.jpg" { + t.Errorf("OriginalFileName = %q, want %q", a.OriginalFileName, "Sunset at the Beach.jpg") + } + if len(a.Tags) != 2 { + t.Errorf("len(Tags) = %d, want 2", len(a.Tags)) + } else { + tagValues := make(map[string]bool, len(a.Tags)) + for _, tag := range a.Tags { + tagValues[tag.Value] = true + } + if !tagValues["travel"] { + t.Errorf("tag 'travel' not found in %v", a.Tags) + } + if !tagValues["sunset"] { + t.Errorf("tag 'sunset' not found in %v", a.Tags) + } + } + if len(a.Albums) != 1 { + t.Errorf("len(Albums) = %d, want 1", len(a.Albums)) + } else if a.Albums[0].Title != "Vacation 2009" { + t.Errorf("Albums[0].Title = %q, want %q", a.Albums[0].Title, "Vacation 2009") + } + }) + + t.Run("image with no JSON - still emitted, no crash", func(t *testing.T) { + metaFS := fstest.MapFS{ + "albums.json": {Data: []byte(`{"albums":[]}`)}, + } + imageFS := fstest.MapFS{ + "my-photo_99999999_o.jpg": {Data: []byte{0xFF, 0xD8}}, + } + + assetList, _ := drainBrowse(newTestFlickrCmd(metaFS, imageFS)) + + if len(assetList) != 1 { + t.Fatalf("expected 1 asset, got %d", len(assetList)) + } + a := assetList[0] + if len(a.Tags) != 0 { + t.Errorf("expected 0 tags, got %d: %v", len(a.Tags), a.Tags) + } + if len(a.Albums) != 0 { + t.Errorf("expected 0 albums, got %d: %v", len(a.Albums), a.Albums) + } + }) + + t.Run("JSON with no image - no group emitted", func(t *testing.T) { + metaFS := fstest.MapFS{ + "photo_12345678.json": {Data: []byte(photoJSON)}, + "albums.json": {Data: []byte(albumJSON)}, + } + // Empty imageFS — no image files, so no catalog entries, no groups. + imageFS := fstest.MapFS{} + + assetList, _ := drainBrowse(newTestFlickrCmd(metaFS, imageFS)) + + if len(assetList) != 0 { + t.Errorf("expected 0 assets, got %d", len(assetList)) + } + }) + + t.Run("date_taken fallback to date_upload", func(t *testing.T) { + const noDateJSON = `{ + "id": "12345678", + "name": "No Date Photo", + "description": "", + "date_taken": "", + "date_upload": "1249204972", + "tags": [], + "albums": [] + }` + metaFS := fstest.MapFS{ + "photo_12345678.json": {Data: []byte(noDateJSON)}, + "albums.json": {Data: []byte(`{"albums":[]}`)}, + } + imageFS := fstest.MapFS{ + "my-photo_12345678_o.jpg": {Data: []byte{0xFF, 0xD8}}, + } + + assetList, _ := drainBrowse(newTestFlickrCmd(metaFS, imageFS)) + + if len(assetList) != 1 { + t.Fatalf("expected 1 asset, got %d", len(assetList)) + } + wantDate := time.Unix(1249204972, 0).In(time.Local) + if !assetList[0].CaptureDate.Equal(wantDate) { + t.Errorf("CaptureDate = %v, want %v", assetList[0].CaptureDate, wantDate) + } + }) + + t.Run("context cancelled - Browse exits cleanly", func(t *testing.T) { + metaFS := fstest.MapFS{ + "photo_12345678.json": {Data: []byte(photoJSON)}, + "albums.json": {Data: []byte(albumJSON)}, + } + imageFS := fstest.MapFS{ + "my-photo_12345678_o.jpg": {Data: []byte{0xFF, 0xD8}}, + } + + f := newTestFlickrCmd(metaFS, imageFS) + ctx, cancel := context.WithCancel(context.Background()) + cancel() // cancel immediately before Browse even starts + + ch := f.Browse(ctx) + + done := make(chan struct{}) + go func() { + for range ch { + } + close(done) + }() + + select { + case <-done: + // Browse() channel closed cleanly — pass + case <-time.After(3 * time.Second): + t.Fatal("Browse() did not close the channel within 3s after context cancellation") + } + }) +} diff --git a/adapters/flickr/json.go b/adapters/flickr/json.go new file mode 100644 index 000000000..3949e6cfd --- /dev/null +++ b/adapters/flickr/json.go @@ -0,0 +1,91 @@ +package flickr + +import ( + "strconv" + "time" + + "github.com/simulot/immich-go/internal/assets" + "github.com/simulot/immich-go/internal/fshelper" +) + +// FlickrTag is a single tag entry in a per-photo JSON file. +type FlickrTag struct { + Tag string `json:"tag"` +} + +// FlickrMetadata represents the contents of a per-photo JSON file (photo_.json). +// Note: the "albums" field in per-photo JSON is always empty in real Flickr exports; +// album membership is sourced exclusively from albums.json via albumIndex(). +type FlickrMetadata struct { + ID string `json:"id"` + Name string `json:"name"` + Description string `json:"description"` + DateTaken string `json:"date_taken"` + DateUpload string `json:"date_upload"` + Tags []FlickrTag `json:"tags"` +} + +// FlickrAlbum is a single album entry within albums.json. +type FlickrAlbum struct { + ID string `json:"id"` + Title string `json:"title"` + Description string `json:"description"` + Photos []string `json:"photos"` +} + +// FlickrAlbums represents the top-level albums.json file. +type FlickrAlbums struct { + Albums []FlickrAlbum `json:"albums"` +} + +// AsMetadata converts a FlickrMetadata to an *assets.Metadata value ready for +// consumption by the asset pipeline. +// +// Date resolution order: +// 1. Parse date_taken as "2006-01-02 15:04:05" in local time. +// 2. If date_taken is empty or unparseable, interpret date_upload as a Unix +// timestamp (seconds since epoch). +// 3. If both fail, DateTaken remains zero. +func (m FlickrMetadata) AsMetadata(file fshelper.FSAndName) *assets.Metadata { + md := assets.Metadata{ + File: file, + FileName: m.Name, + Description: m.Description, + } + + // Attempt to parse date_taken first. + if m.DateTaken != "" { + if t, err := time.ParseInLocation("2006-01-02 15:04:05", m.DateTaken, time.Local); err == nil { + md.DateTaken = t + } + } + + // Fall back to date_upload if date_taken was empty or unparseable. + if md.DateTaken.IsZero() && m.DateUpload != "" { + if ts, err := strconv.ParseInt(m.DateUpload, 10, 64); err == nil && ts != 0 { + md.DateTaken = time.Unix(ts, 0).In(time.Local) + } + } + + // Attach tags. + for _, t := range m.Tags { + md.AddTag(t.Tag) + } + + return &md +} + +// albumIndex inverts a FlickrAlbums value into a map from photo ID to the list +// of album titles that contain that photo. The returned map is never nil. +func albumIndex(albums *FlickrAlbums) map[string][]string { + result := make(map[string][]string) + if albums == nil { + return result + } + for _, album := range albums.Albums { + for _, photoID := range album.Photos { + result[photoID] = append(result[photoID], album.Title) + } + } + return result +} diff --git a/app/upload/upload.go b/app/upload/upload.go index ede80ed51..715d894eb 100644 --- a/app/upload/upload.go +++ b/app/upload/upload.go @@ -6,6 +6,7 @@ import ( "time" "github.com/simulot/immich-go/adapters" + "github.com/simulot/immich-go/adapters/flickr" "github.com/simulot/immich-go/adapters/folder" "github.com/simulot/immich-go/adapters/fromimmich" gp "github.com/simulot/immich-go/adapters/googlePhotos" @@ -112,6 +113,7 @@ func NewUploadCommand(ctx context.Context, app *app.Application) *cobra.Command cmd.AddCommand(folder.NewFromPicasaCommand(ctx, cmd, app, uc)) cmd.AddCommand(gp.NewFromGooglePhotosCommand(ctx, cmd, app, uc)) cmd.AddCommand(fromimmich.NewFromImmichCommand(ctx, cmd, app, uc)) + cmd.AddCommand(flickr.NewFromFlickrCommand(ctx, cmd, app, uc)) cmd.PersistentPreRunE = func(cmd *cobra.Command, args []string) error { // Initialize the FileProcessor (tracker + logger + event bus) diff --git a/docs/misc/flickr-import.md b/docs/misc/flickr-import.md new file mode 100644 index 000000000..8123c0b0b --- /dev/null +++ b/docs/misc/flickr-import.md @@ -0,0 +1,137 @@ +# Importing from Flickr + +This page describes how `immich-go` handles a Flickr export. +Flickr's export format is simpler than Google Photos takeout but has its own quirks — +particularly around how image filenames embed photo IDs and how album membership is stored. + +## What is a Flickr export? + +Flickr's "Request your account data" feature (available at flickr.com/account) produces +a set of ZIP files you download manually from your account page. There are two distinct +types: + +- **Metadata archive** — named with an opaque set-ID such as + `72157724905358313_3ac1836c2225_part1.zip`. Inside you will find `albums.json` + (a list of every album and its member photo IDs) and one `photo_.json` per photo. + The adapter identifies this archive automatically by the presence of `albums.json` + at the ZIP root. + +- **Image archives** — named `data-download-1.zip`, `data-download-2.zip`, etc. These + contain only the image and video files; there is no JSON metadata inside them. + For large accounts Flickr splits the images across multiple archives. + +The simplest way to import is to download all ZIP files into the same folder and point +`immich-go` at that folder. + +## Image filename formats + +Flickr image filenames follow one of two patterns depending on when the photo was uploaded: + +| Format | Pattern | Example | +| ------- | ------- | ------- | +| Current | `__o.` | `sunset-at-the-beach_54321_o.jpg` | +| Older | `__o.` | `54321_ab1cd2ef3a_o.jpg` | + +Both patterns are recognised automatically. The photo ID embedded in the filename is +the key used to link each image file to its JSON metadata. + +## Album membership + +Album membership is stored **exclusively** in `albums.json` at the root of the metadata +archive. The per-photo `photo_.json` files always contain an empty `albums` field — +do not rely on them for album data. The adapter inverts `albums.json` into an internal +map of photo ID → album titles and attaches that data during asset assembly. + +## Usage + +The easiest approach is to download all Flickr ZIPs into one folder and pass the folder: + +```sh +immich-go upload from-flickr \ + --server http://your-immich-server:2283 \ + --api-key YOUR_KEY \ + ~/Downloads/flickr-downloads/ +``` + +You can also pass individual ZIP files or a glob pattern — all three forms are equivalent: + +```sh +# Explicit files +immich-go upload from-flickr --server ... --api-key ... \ + 72157724905358313_3ac1836c2225_part1.zip \ + data-download-1.zip \ + data-download-2.zip + +# Glob +immich-go upload from-flickr --server ... --api-key ... \ + ~/Downloads/flickr-downloads/*.zip +``` + +The adapter identifies the metadata archive automatically (by the presence of `albums.json`) +regardless of how the ZIPs are passed — you do not need to specify which is which. + +`--sync-albums` (default: `true`) creates albums in Immich matching the album structure +in your Flickr export. + +## Metadata preserved + +| Flickr field | Immich field | Notes | +| ------------ | ------------ | ----- | +| `name` | Title / OriginalFileName | Falls back to the image filename when empty | +| `description` | Description | | +| `date_taken` | CaptureDate | Parsed as `"2006-01-02 15:04:05"` in local time | +| `date_upload` | CaptureDate (fallback) | Unix epoch; used only when `date_taken` is absent or unparseable | +| `tags[].tag` | Tags | All tags attached to the photo | +| `albums.json` entries | Albums | Reconstructed from album membership in `albums.json` | + +## Metadata not preserved + +The following data is present in a Flickr export but is **not** imported: + +- **GPS / geo coordinates** — the `geo` field is present in per-photo JSON files but was + empty in all tested real-world exports. GPS data embedded in the image EXIF is preserved + as-is by Immich during upload. +- **People / user tags** — Flickr's export always produces an empty `people` field even + for photos with tagged users. This appears to be a bug in Flickr's export system. +- **Comments** +- **Groups** +- **Favorites / faves** + +## How the adapter works + +These notes are intended for contributors who want to understand or modify the adapter. + +**Archive classification** — At startup the adapter calls `fs.Stat` on each provided +archive to check whether `albums.json` is present. The archive that contains it is +classified as the metadata archive; all others are treated as image archives. The adapter +returns an error if zero archives or two or more archives contain `albums.json`. + +**Photo ID extraction** — Two package-level compiled regexes extract the numeric photo ID +from each image filename. The primary pattern (`_(\d+)_o\.\w+$`) covers the current +`slug_ID_o.ext` format. The fallback pattern (`^(\d+)_[0-9a-f]+_o\.\w+$`) handles the +older `ID_hash_o.ext` format. The base filename is stripped of directory prefixes before +matching. + +**Two-pass processing** — Pass one walks every image archive and builds a catalog keyed by +photo ID, then walks the metadata archive to parse all `photo_.json` files and +`albums.json`. Pass two iterates the catalog and emits one `assets.Group` per photo, +attaching metadata and album assignments. + +**Missing metadata** — A photo whose image file has no matching JSON entry is logged with +a `ProcessedMissingMetadata` event and still emitted (without title, description, or tags). +Malformed JSON files are also logged and skipped; they do not abort the import. + +## Known limitations + +- Exactly one metadata archive (one ZIP containing `albums.json`) is required. The adapter + returns an error if none or more than one is found among the provided files. +- When passing a directory, only `*.zip` files at the top level of that directory are + discovered — subdirectories are not scanned. +- Geo/GPS data is not imported (empty in all tested real-world exports). +- Video support depends on Flickr's export format; the adapter passes video files through + the standard media-type filter and imports them if the type is recognised. + +## What if something goes wrong? + +Please open an issue with details. +You can share files or logs via Discord DM `@simulot`.