Skip to content
51 changes: 51 additions & 0 deletions man_spider/lib/logger.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import os
import sys
import json
import logging
from copy import copy
from sys import stdout
Expand All @@ -7,6 +10,54 @@
from logging.handlers import QueueHandler, QueueListener


### JSON LINES OUTPUT ###

# JSONL destination, configured once via set_json_output() before forking so that
# child/worker processes inherit it (we force the "fork" start method).
_json_output_path = None # file path, when writing JSONL to a file
_json_to_stdout = False # when True, JSONL goes to stdout and logs go to stderr


def set_json_output(path):
"""
Configure JSONL output. "path" is:
- None / "" -> disabled
- "-" -> write JSONL to stdout, and redirect human-readable logs to stderr
- <filename> -> append JSONL to that file
"""
global _json_output_path, _json_to_stdout
if not path:
_json_output_path = None
_json_to_stdout = False
elif path == "-":
_json_output_path = None
_json_to_stdout = True
# keep stdout clean for JSONL: send all log records to stderr instead
console.setStream(sys.stderr)
else:
_json_output_path = str(path)
_json_to_stdout = False


def json_log(record):
"""
Emit a single record as one JSON line. No-op when --json wasn't specified.
Safe across processes: a short write() to an O_APPEND file, or a single
os.write() to stdout, is atomic on POSIX.
"""
if not (_json_output_path or _json_to_stdout):
return
try:
line = json.dumps(record, default=str) + "\n"
if _json_to_stdout:
os.write(1, line.encode("utf-8"))
else:
with open(_json_output_path, "a", encoding="utf-8") as f:
f.write(line)
except Exception:
pass


### PRETTY COLORS ###


Expand Down
37 changes: 28 additions & 9 deletions man_spider/lib/parser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,10 @@ class FileParser:
".dylib",
}

# limits for the matched strings captured for JSON output
max_match_samples = 20 # max distinct matched strings kept per pattern per file
match_sample_maxlen = 256 # max length of each captured matched string

def __init__(self, filters, quiet=False):
self.init_content_filters(filters)
self.quiet = quiet
Expand Down Expand Up @@ -210,15 +214,30 @@ def extract_text(self, file, pretty_filename):
except Exception:
pass

# count the matches
for _filter, match in self.match(text_content):
try:
matches[_filter] += 1
except KeyError:
matches[_filter] = 1

for _filter, match_count in matches.items():
log.info(ColoredFormatter.green(f'{pretty_filename}: matched "{_filter.pattern}" {match_count:,} times'))
# count the matches and capture a sample of the actual matched strings,
# including where in the file each one was found (1-based line/column)
for _filter, span in self.match(text_content):
entry = matches.get(_filter)
if entry is None:
entry = {"count": 0, "samples": []}
matches[_filter] = entry
entry["count"] += 1
if len(entry["samples"]) < self.max_match_samples:
start, end = span
value = text_content[start:end][: self.match_sample_maxlen]
line = text_content.count("\n", 0, start) + 1
column = start - text_content.rfind("\n", 0, start)
end_column = column + (end - start)
sample = {"match": value, "line": line, "column": column, "end_column": end_column}
if not any(s["match"] == value and s["line"] == line and s["column"] == column for s in entry["samples"]):
entry["samples"].append(sample)

for _filter, match_data in matches.items():
log.info(
ColoredFormatter.green(
f'{pretty_filename}: matched "{_filter.pattern}" {match_data["count"]:,} times'
)
)
# run grep for pretty output
if not self.quiet:
self.grep(binary_content, _filter.pattern)
Expand Down
16 changes: 10 additions & 6 deletions man_spider/lib/smb.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,15 +199,19 @@ def login(self, refresh=False, first_try=True):
if s in str(e):
log.warning(f"{self.server}: {s}: {self.username}")

log.debug(f"{self.server}: Trying guest session")
self.username = "Guest"
# Prefer a true null/anonymous session first: on Samba, the SRVSVC
# share-enumeration RPC (listShares) is permitted for anonymous logons
# but denied for the "Guest" account, so null must be tried before Guest
# to match `smbclient -L -N` behaviour.
log.debug(f"{self.server}: Trying null session")
self.username = ""
self.password = ""
self.domain = ""
self.nthash = ""
guest_success = self.login(refresh=True, first_try=False)
if not guest_success:
log.debug(f"{self.server}: Switching to null session")
self.username = ""
null_success = self.login(refresh=True, first_try=False)
if not null_success:
log.debug(f"{self.server}: Switching to guest session")
self.username = "Guest"
self.login(refresh=True, first_try=False)

return False
Expand Down
46 changes: 45 additions & 1 deletion man_spider/lib/spiderling.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from man_spider.lib.file import *
from man_spider.lib.util import *
from man_spider.lib.errors import *
from man_spider.lib.logger import json_log
from man_spider.lib.processpool import *


Expand Down Expand Up @@ -59,6 +60,8 @@ def __init__(self, target, parent):
try:
self.parent = parent
self.target = target
# ensures enumerated shares are logged/emitted only once per target
self._shares_logged = False

# unless we're only searching local files, connect to target
if type(self.target) == pathlib.PosixPath:
Expand Down Expand Up @@ -129,6 +132,17 @@ def go(self):
# otherwise, just save it
elif not self.local:
log.info(f"{self.target}: {file.share}\\{file.name} ({bytes_to_human(file.size)})")
json_log(
{
"type": "file",
"target": self.target.host,
"port": self.target.port,
"share": file.share,
"path": file.name,
"size": file.size,
"downloaded": not self.parent.no_download,
}
)
if not self.parent.no_download:
self.save_file(file)

Expand Down Expand Up @@ -181,6 +195,19 @@ def parse_file(self, file):
try:
if type(file) == RemoteFile:
matches = self.parent.parser.parse_file(str(file.tmp_filename), pretty_filename=str(file))
for _filter, match_data in matches.items():
json_log(
{
"type": "content_match",
"target": self.target.host,
"port": self.target.port,
"share": file.share,
"path": file.name,
"pattern": _filter.pattern,
"count": match_data["count"],
"matches": match_data["samples"],
}
)
if matches and not self.parent.no_download:
self.save_file(file)
else:
Expand Down Expand Up @@ -210,8 +237,25 @@ def shares(self):
# Keep track of shares we've already yielded to avoid duplicates
yielded_shares = set()

enumerated_shares = self.smb_client.shares

# always report the shares we found, even while spidering with filters
if not self._shares_logged:
self._shares_logged = True
if enumerated_shares:
log.info(f"{self.target}: {len(enumerated_shares)} shares: {', '.join(enumerated_shares)}")
for share in enumerated_shares:
json_log(
{
"type": "share",
"target": self.target.host,
"port": self.target.port,
"share": share,
}
)

# First, yield enumerated shares that match filters
for share in self.smb_client.shares:
for share in enumerated_shares:
if self.share_match(share):
yielded_shares.add(share.lower())
yield share
Expand Down
80 changes: 67 additions & 13 deletions man_spider/manspider.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,37 @@ def go(options):
)
sleep(2)

# exit if no filters were specified
# if no filters were specified, just enumerate and print shares for each
# remote target (like `smbclient -L`) instead of spidering
if not (options.filenames or options.extensions or options.exclude_extensions or options.content):
log.error("Please specify at least one of --filenames, --content, --extensions, or --exclude-extensions")
remote_targets = [t for t in options.targets if not isinstance(t, pathlib.PosixPath)]
if not remote_targets:
log.error("Please specify at least one of --filenames, --content, --extensions, or --exclude-extensions")
return
log.info("No filters specified; listing shares only")
for target in remote_targets:
smb_client = SMBClient(
target.host,
options.username,
options.password,
options.domain,
options.hash,
options.kerberos,
options.aes_key,
options.dc_ip,
port=target.port,
)
if smb_client.login() is None:
log.warning(f"{target.host}: Could not connect")
continue
shares = smb_client.shares
if shares:
log.info(f"{target.host}: {len(shares)} shares:")
for share in shares:
log.info(f" {share}")
json_log({"type": "share", "target": target.host, "port": target.port, "share": share})
else:
log.warning(f"{target.host}: No shares found (or enumeration denied)")
return

# exit if --maxdepth is invalid
Expand Down Expand Up @@ -80,6 +108,16 @@ def load_content_wordlist(filepath, options):

def main():

# The logging setup (lib/logger.py) shares a multiprocessing Queue between the
# parent's QueueListener (console output) and the child worker's QueueHandler.
# Python 3.14 changed the default start method on Linux to "forkserver", which
# re-imports modules in the child and creates a *new*, unlistened queue -> no
# console output. Force "fork" so parent and child share the same queue.
try:
multiprocessing.set_start_method("fork", force=True)
except (ValueError, RuntimeError):
pass

interrupted = False

examples = """
Expand Down Expand Up @@ -226,6 +264,16 @@ def main():
metavar="DATE",
help="only show files modified before this date (format: YYYY-MM-DD)",
)
parser.add_argument(
"--json",
dest="json",
nargs="?",
const="-",
default=None,
metavar="FILE",
help="write results (shares, matched/looted files) as JSON Lines; to FILE, "
"or to stdout if no FILE is given (in which case logs are sent to stderr)",
)

syntax_error = False
try:
Expand All @@ -241,6 +289,9 @@ def main():
if options.verbose:
log.setLevel("DEBUG")

# configure JSON Lines output (must happen before forking the worker)
set_json_output(options.json)

if options.kerberos and "KRB5CCNAME" not in os.environ:
log.error("KRB5CCNAME is not set in the environment")
sys.exit(1)
Expand All @@ -264,17 +315,20 @@ def main():
else:
options.modified_before = None

# make sure extension formats are valid
for i, extension in enumerate(options.extensions):
if extension and not extension.startswith("."):
extension = f".{extension}"
options.extensions[i] = extension.lower()

# make sure extension blacklist is valid
for i, extension in enumerate(options.exclude_extensions):
if not extension.startswith("."):
extension = f".{extension}"
options.exclude_extensions[i] = extension.lower()
# normalize extensions: split each entry on whitespace so that a quoted
# `--extensions "ini cfg"` behaves the same as `--extensions ini cfg`,
# then ensure a leading dot and lowercase
def normalize_extensions(extensions):
normalized = []
for entry in extensions:
for extension in entry.split():
if not extension.startswith("."):
extension = f".{extension}"
normalized.append(extension.lower())
return normalized

options.extensions = normalize_extensions(options.extensions)
options.exclude_extensions = normalize_extensions(options.exclude_extensions)

# lowercase share names
options.sharenames = [s.lower() for s in options.sharenames]
Expand Down
Loading