From 12bede1e5b0a9d40b34c23a3d703e1bc2205b0c0 Mon Sep 17 00:00:00 2001 From: mgaddy Date: Tue, 24 Feb 2026 14:48:04 -0500 Subject: [PATCH 1/2] Add start-path option for SMB crawling --- README.md | 6 ++++ man_spider/lib/spider.py | 5 +++ man_spider/lib/spiderling.py | 7 +++- man_spider/manspider.py | 15 +++++++++ tests/test_smb_integration.py | 63 +++++++++++++++++++++++++++++++++++ 5 files changed, 95 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index bc1b5fe..fc6bc59 100644 --- a/README.md +++ b/README.md @@ -149,6 +149,12 @@ $ manspider share.evilcorp.local -e pfx p12 pkcs12 pem key crt cer csr jks keyst $ manspider share.evilcorp.local -e docx xlsx pdf --modified-after 2026-01-01 -d evilcorp -u bob -p Passw0rd ~~~ +### Example #11: Start crawling from a specific path +~~~ +$ manspider share.evilcorp.local --sharenames windows$ --start-path \\users\\john -c passw -e docx pdf -d evilcorp -u bob -p Passw0rd +~~~ +This will only crawl files under `\\windows$\\users\\john` on `share.evilcorp.local`, instead of starting at the root of the `windows$` share. + ### Usage Tip #1: You can run multiple instances of manspider at one time. This is useful when one instance is already running, and you want to search what it's downloaded (similar to `grep -R`). To do this, specify the keyword `loot` as the target, which will search the downloaded files in `$HOME/.manspider/loot`. diff --git a/man_spider/lib/spider.py b/man_spider/lib/spider.py index afcc2bd..d4c7789 100644 --- a/man_spider/lib/spider.py +++ b/man_spider/lib/spider.py @@ -35,6 +35,11 @@ def __init__(self, options): self.dir_whitelist = options.dirnames self.dir_blacklist = options.exclude_dirnames + # Optional starting path within each share (for remote SMB targets only). + # When set, spiderlings will begin crawling from this path instead of + # the share root (e.g. "\\windows\\users\\john"). + self.start_path = getattr(options, "start_path", None) + self.no_download = options.no_download # applies "or" logic instead of "and" diff --git a/man_spider/lib/spiderling.py b/man_spider/lib/spiderling.py index 658d8bc..b8499a8 100644 --- a/man_spider/lib/spiderling.py +++ b/man_spider/lib/spiderling.py @@ -166,8 +166,13 @@ def files(self): log.debug(f"Skipping {file}: does not match filename/extension filters") else: + # remote files + # If a specific start path is configured, begin crawling from that + # subdirectory within each share instead of the share root. + start_path = getattr(self.parent, "start_path", None) or "" + for share in self.shares: - for remote_file in self.list_files(share): + for remote_file in self.list_files(share, start_path): if not self.parent.no_download or self.parent.parser.content_filters: self.get_file(remote_file) yield remote_file diff --git a/man_spider/manspider.py b/man_spider/manspider.py index c8823ed..a21d092 100755 --- a/man_spider/manspider.py +++ b/man_spider/manspider.py @@ -202,6 +202,13 @@ def main(): metavar="DATE", help="only show files modified before this date (format: YYYY-MM-DD)", ) + parser.add_argument( + "--start-path", + type=str, + default="", + metavar="PATH", + help="start crawling from this path within each share (e.g. \"\\\\windows$\\\\users\\\\john\")", + ) syntax_error = False try: @@ -257,6 +264,14 @@ def main(): options.dirnames = [s.lower() for s in options.dirnames] options.exclude_dirnames = [s.lower() for s in options.exclude_dirnames] + # normalize start_path: use backslashes and strip trailing slashes + if getattr(options, "start_path", ""): + # Replace forward slashes with backslashes and remove trailing slashes + normalized = options.start_path.replace("/", "\\").rstrip("\\") + options.start_path = normalized + else: + options.start_path = None + # deduplicate targets targets = set() [[targets.add(t) for t in g] for g in options.targets] diff --git a/tests/test_smb_integration.py b/tests/test_smb_integration.py index 5bb7058..e7e5047 100644 --- a/tests/test_smb_integration.py +++ b/tests/test_smb_integration.py @@ -174,6 +174,7 @@ def create_test_options(targets, loot_dir, **kwargs) -> Namespace: "kerberos": False, "aes_key": None, "dc_ip": None, + "start_path": None, } defaults.update(kwargs) return Namespace(**defaults) @@ -333,3 +334,65 @@ def test_manspider_finds_password_in_all_binary_files(self, smb_server_full, tmp found = self._find_matching_files(loot_dir, self.EXPECTED_BINARY_PATTERNS, ".bin") missing = set(self.EXPECTED_BINARY_PATTERNS) - found assert not missing, f"Missing binary patterns: {missing}. Found: {list(loot_dir.rglob('*.bin'))}" + + def test_manspider_can_start_from_specific_path(self, smb_server_full, tmp_path): + """ + MANSPIDER should be able to start crawling from a specific path + within a share, instead of always starting at the share root. + + This models a UNC-like path such as: + \\\\share.evilcorp.local\\windows$\\users\\john\\ + + For the test SMB server, we simulate this by creating a nested + directory structure under the single test share and verifying + that only files under the specified start path are crawled. + """ + from man_spider.lib.spider import MANSPIDER + + server, share_path = smb_server_full + loot_dir = tmp_path / "loot" + loot_dir.mkdir() + + # Create nested directory structure: windows/users/john + nested_dir = share_path / "windows" / "users" / "john" + nested_dir.mkdir(parents=True, exist_ok=True) + + # Move one known text file into the nested directory + # and leave another at the share root as a control. + root_text_file = share_path / "test-ascii.txt" + nested_text_file = share_path / "test-utf8.txt" + + if nested_text_file.exists(): + shutil.move(str(nested_text_file), nested_dir / nested_text_file.name) + + target = Target("127.0.0.1", server.port) + + options = create_test_options( + targets=[target], + loot_dir=loot_dir, + content=["Password123"], + extensions=[".txt"], + # Future behavior: start crawling from a specific path within the share. + # In a real CLI invocation this would look like combining: + # --sharenames windows$ + # --start-path \\users\\john + # Here we capture the desired semantics in the options object. + sharenames=["testshare"], + start_path="\\windows\\users\\john", + ) + + spider = MANSPIDER(options) + spider.start() + + # We expect to find files under the nested path (test-utf8.txt) + found_nested = self._find_matching_files(loot_dir, ["testutf8.txt"], ".txt") + assert "testutf8.txt" in found_nested, ( + "Expected to find test-utf8.txt when starting from \\windows\\users\\john" + ) + + # We do NOT expect to find files that live only at the share root (test-ascii.txt) + found_root = self._find_matching_files(loot_dir, ["testascii"], ".txt") + assert not found_root, ( + "Did not expect to crawl files at the share root when a specific start path is set" + ) + From 2c08dba4a78409c8684e7ae312e31a7faeb471c2 Mon Sep 17 00:00:00 2001 From: mgaddy Date: Tue, 24 Feb 2026 14:55:13 -0500 Subject: [PATCH 2/2] ruff Tests Fixed --- man_spider/manspider.py | 2 +- tests/test_smb_integration.py | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/man_spider/manspider.py b/man_spider/manspider.py index a21d092..c02b71f 100755 --- a/man_spider/manspider.py +++ b/man_spider/manspider.py @@ -207,7 +207,7 @@ def main(): type=str, default="", metavar="PATH", - help="start crawling from this path within each share (e.g. \"\\\\windows$\\\\users\\\\john\")", + help='start crawling from this path within each share (e.g. "\\\\windows$\\\\users\\\\john")', ) syntax_error = False diff --git a/tests/test_smb_integration.py b/tests/test_smb_integration.py index e7e5047..ee1a774 100644 --- a/tests/test_smb_integration.py +++ b/tests/test_smb_integration.py @@ -392,7 +392,4 @@ def test_manspider_can_start_from_specific_path(self, smb_server_full, tmp_path) # We do NOT expect to find files that live only at the share root (test-ascii.txt) found_root = self._find_matching_files(loot_dir, ["testascii"], ".txt") - assert not found_root, ( - "Did not expect to crawl files at the share root when a specific start path is set" - ) - + assert not found_root, "Did not expect to crawl files at the share root when a specific start path is set"