Skip to content

Commit 34b24f7

Browse files
committed
FIX localclient move cleanup directories across 'buckets'
1 parent 23b7f99 commit 34b24f7

File tree

2 files changed

+93
-4
lines changed

2 files changed

+93
-4
lines changed

cottoncandy/localclient.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import os
44
import shutil
55
from io import BytesIO as StringIO
6+
from typing import Optional
67

78
from .backend import CCBackEnd, CloudStream
89
from .utils import SEPARATOR, remove_root, remove_trivial_magic, sanitize_metadata
@@ -17,7 +18,7 @@ class LocalClient(CCBackEnd):
1718
Handle metadata in CouldStream objects by storing a json file (.meta.json).
1819
"""
1920

20-
def __init__(self, path):
21+
def __init__(self, path: str):
2122
if not os.path.isdir(path):
2223
os.makedirs(path)
2324
self.path = path
@@ -256,7 +257,7 @@ def move(self, source, destination, source_bucket, destination_bucket,
256257
shutil.move(source, destination)
257258
shutil.move(source_metadata, destination_metadata)
258259
# Clean up empty parent directories at source to mimic S3 behavior
259-
self._cleanup_empty_dirs(os.path.dirname(source))
260+
self._cleanup_empty_dirs(os.path.dirname(source), root_path=source_bucket)
260261
return True
261262

262263
def delete(self, cloud_name, recursive=False, delete=False):
@@ -353,7 +354,7 @@ def get_object_size(self, object_name):
353354
size = os.path.getsize(file_name)
354355
return size
355356

356-
def _cleanup_empty_dirs(self, dir_path: str) -> None:
357+
def _cleanup_empty_dirs(self, dir_path: str, root_path: Optional[str] = None) -> None:
357358
"""Remove empty parent directories up to self.path.
358359
359360
This mimics S3 behavior where directories don't exist independently -
@@ -364,10 +365,14 @@ def _cleanup_empty_dirs(self, dir_path: str) -> None:
364365
----------
365366
dir_path : str
366367
The directory path to start cleaning from
368+
root_path : Optional[str]
369+
The root path to stop cleaning at. If None, defaults to self.path.
367370
"""
368371
# Normalize paths for comparison
369372
dir_path = os.path.normpath(dir_path)
370-
root_path = os.path.normpath(self.path)
373+
if root_path is None:
374+
root_path = self.path
375+
root_path = os.path.normpath(root_path)
371376

372377
# Ensure the directory is under the root path before attempting cleanup
373378
try:

cottoncandy/tests/test_localclient.py

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
from pathlib import Path
33
import time
44

5+
import cottoncandy as cc
6+
57
from ..localclient import LocalClient
68

79

@@ -134,3 +136,85 @@ def test_move_cleans_up_empty_directories(cci, object_name):
134136
and cci.download_object(object_name + '/source') == content
135137
cci.rm(object_name + '/source')
136138
time.sleep(cci.wait_time)
139+
140+
141+
def test_move_cleans_up_source_bucket_directories_across_buckets(tmp_path):
142+
"""Moving across buckets should remove empty source directories."""
143+
source_bucket = tmp_path / 'source-bucket'
144+
destination_bucket = tmp_path / 'destination-bucket'
145+
source_bucket.mkdir()
146+
destination_bucket.mkdir()
147+
148+
cci_src = cc.get_interface(
149+
bucket_name=str(source_bucket),
150+
backend='local',
151+
verbose=False,
152+
)
153+
cci_dest = cc.get_interface(
154+
bucket_name=str(destination_bucket),
155+
backend='local',
156+
verbose=False,
157+
)
158+
source_key = 'nested/a/b/file.txt'
159+
destination_key = 'moved/file.txt'
160+
content = b'cross bucket move content'
161+
162+
cci_src.upload_object(source_key, BytesIO(content))
163+
164+
source_root = source_bucket / 'nested'
165+
source_a = source_root / 'a'
166+
source_b = source_a / 'b'
167+
source_file = source_b / 'file.txt'
168+
169+
assert source_file.exists()
170+
171+
# mv from src --> dest, calling from src client
172+
cci_src.mv(
173+
source_name=source_key,
174+
dest_name=destination_key,
175+
source_bucket=str(source_bucket),
176+
dest_bucket=str(destination_bucket),
177+
overwrite=True,
178+
)
179+
180+
moved_file = destination_bucket / 'moved' / 'file.txt'
181+
moved_metadata = destination_bucket / 'moved' / 'file.txt.meta.json'
182+
183+
assert moved_file.exists()
184+
assert cci_dest.download_object(destination_key) == content
185+
assert moved_metadata.exists()
186+
assert not cci_src.exists_object(source_key, bucket_name=str(source_bucket))
187+
188+
# The source bucket should be cleaned up back to the bucket root.
189+
assert not source_file.exists()
190+
assert not source_b.exists()
191+
assert not source_a.exists()
192+
assert not source_root.exists()
193+
assert source_bucket.exists()
194+
195+
# Cleanup
196+
cci_dest.rm(destination_key)
197+
assert not moved_file.exists()
198+
assert not moved_metadata.exists()
199+
assert not (destination_bucket / 'moved').exists()
200+
201+
# Now try the same thing, but calling `.mv()` from the destination client instead.
202+
cci_src.upload_object(source_key, BytesIO(content))
203+
cci_dest.mv(
204+
source_name=source_key,
205+
dest_name=destination_key,
206+
source_bucket=str(source_bucket),
207+
dest_bucket=str(destination_bucket),
208+
overwrite=True,
209+
)
210+
211+
assert moved_file.exists()
212+
assert cci_dest.download_object(destination_key) == content
213+
assert moved_metadata.exists()
214+
assert not cci_src.exists_object(source_key, bucket_name=str(source_bucket))
215+
216+
assert not source_file.exists()
217+
assert not source_b.exists()
218+
assert not source_a.exists()
219+
assert not source_root.exists()
220+
assert source_bucket.exists()

0 commit comments

Comments
 (0)