Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,12 @@ $ manspider share.evilcorp.local -e pfx p12 pkcs12 pem key crt cer csr jks keyst
$ manspider share.evilcorp.local -e docx xlsx pdf --modified-after 2026-01-01 -d evilcorp -u bob -p Passw0rd
~~~

### Example #11: Start crawling from a specific path
~~~
$ manspider share.evilcorp.local --sharenames windows$ --start-path \\users\\john -c passw -e docx pdf -d evilcorp -u bob -p Passw0rd
~~~
This will only crawl files under `\\windows$\\users\\john` on `share.evilcorp.local`, instead of starting at the root of the `windows$` share.

### Usage Tip #1:
You can run multiple instances of manspider at one time. This is useful when one instance is already running, and you want to search what it's downloaded (similar to `grep -R`). To do this, specify the keyword `loot` as the target, which will search the downloaded files in `$HOME/.manspider/loot`.

Expand Down
5 changes: 5 additions & 0 deletions man_spider/lib/spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@ def __init__(self, options):
self.dir_whitelist = options.dirnames
self.dir_blacklist = options.exclude_dirnames

# Optional starting path within each share (for remote SMB targets only).
# When set, spiderlings will begin crawling from this path instead of
# the share root (e.g. "\\windows\\users\\john").
self.start_path = getattr(options, "start_path", None)

self.no_download = options.no_download

# applies "or" logic instead of "and"
Expand Down
7 changes: 6 additions & 1 deletion man_spider/lib/spiderling.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,8 +166,13 @@ def files(self):
log.debug(f"Skipping {file}: does not match filename/extension filters")

else:
# remote files
# If a specific start path is configured, begin crawling from that
# subdirectory within each share instead of the share root.
start_path = getattr(self.parent, "start_path", None) or ""

for share in self.shares:
for remote_file in self.list_files(share):
for remote_file in self.list_files(share, start_path):
if not self.parent.no_download or self.parent.parser.content_filters:
self.get_file(remote_file)
yield remote_file
Expand Down
15 changes: 15 additions & 0 deletions man_spider/manspider.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,13 @@ def main():
metavar="DATE",
help="only show files modified before this date (format: YYYY-MM-DD)",
)
parser.add_argument(
"--start-path",
type=str,
default="",
metavar="PATH",
help='start crawling from this path within each share (e.g. "\\\\windows$\\\\users\\\\john")',
)

syntax_error = False
try:
Expand Down Expand Up @@ -257,6 +264,14 @@ def main():
options.dirnames = [s.lower() for s in options.dirnames]
options.exclude_dirnames = [s.lower() for s in options.exclude_dirnames]

# normalize start_path: use backslashes and strip trailing slashes
if getattr(options, "start_path", ""):
# Replace forward slashes with backslashes and remove trailing slashes
normalized = options.start_path.replace("/", "\\").rstrip("\\")
options.start_path = normalized
else:
options.start_path = None

# deduplicate targets
targets = set()
[[targets.add(t) for t in g] for g in options.targets]
Expand Down
60 changes: 60 additions & 0 deletions tests/test_smb_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ def create_test_options(targets, loot_dir, **kwargs) -> Namespace:
"kerberos": False,
"aes_key": None,
"dc_ip": None,
"start_path": None,
}
defaults.update(kwargs)
return Namespace(**defaults)
Expand Down Expand Up @@ -333,3 +334,62 @@ def test_manspider_finds_password_in_all_binary_files(self, smb_server_full, tmp
found = self._find_matching_files(loot_dir, self.EXPECTED_BINARY_PATTERNS, ".bin")
missing = set(self.EXPECTED_BINARY_PATTERNS) - found
assert not missing, f"Missing binary patterns: {missing}. Found: {list(loot_dir.rglob('*.bin'))}"

def test_manspider_can_start_from_specific_path(self, smb_server_full, tmp_path):
"""
MANSPIDER should be able to start crawling from a specific path
within a share, instead of always starting at the share root.

This models a UNC-like path such as:
\\\\share.evilcorp.local\\windows$\\users\\john\\

For the test SMB server, we simulate this by creating a nested
directory structure under the single test share and verifying
that only files under the specified start path are crawled.
"""
from man_spider.lib.spider import MANSPIDER

server, share_path = smb_server_full
loot_dir = tmp_path / "loot"
loot_dir.mkdir()

# Create nested directory structure: windows/users/john
nested_dir = share_path / "windows" / "users" / "john"
nested_dir.mkdir(parents=True, exist_ok=True)

# Move one known text file into the nested directory
# and leave another at the share root as a control.
root_text_file = share_path / "test-ascii.txt"
nested_text_file = share_path / "test-utf8.txt"

if nested_text_file.exists():
shutil.move(str(nested_text_file), nested_dir / nested_text_file.name)

target = Target("127.0.0.1", server.port)

options = create_test_options(
targets=[target],
loot_dir=loot_dir,
content=["Password123"],
extensions=[".txt"],
# Future behavior: start crawling from a specific path within the share.
# In a real CLI invocation this would look like combining:
# --sharenames windows$
# --start-path \\users\\john
# Here we capture the desired semantics in the options object.
sharenames=["testshare"],
start_path="\\windows\\users\\john",
)

spider = MANSPIDER(options)
spider.start()

# We expect to find files under the nested path (test-utf8.txt)
found_nested = self._find_matching_files(loot_dir, ["testutf8.txt"], ".txt")
assert "testutf8.txt" in found_nested, (
"Expected to find test-utf8.txt when starting from \\windows\\users\\john"
)

# We do NOT expect to find files that live only at the share root (test-ascii.txt)
found_root = self._find_matching_files(loot_dir, ["testascii"], ".txt")
assert not found_root, "Did not expect to crawl files at the share root when a specific start path is set"