Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@
requires = ["setuptools>=45", "wheel"]
build-backend = "setuptools.build_meta"

[tool.pytest.ini_options]
python_files = ["test*.py"]
testpaths = ["tests"]
addopts = ["-p", "no:cacheprovider"]

[tool.ruff]
# Exclude a variety of commonly ignored directories.
exclude = [
Expand Down Expand Up @@ -75,4 +80,4 @@ skip-magic-trailing-comma = false
line-ending = "auto"

[tool.ruff.lint.isort]
known-first-party = ["smda"]
known-first-party = ["smda"]
15 changes: 15 additions & 0 deletions smda/Disassembler.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,23 @@ def _populateBinaryInfo(self, loader, file_path=""):
binary_info.base_addr = loader.getBaseAddress()
binary_info.bitness = loader.getBitness()
binary_info.architecture = loader.getArchitecture()
binary_info.abi = loader.getAbi()
binary_info.code_areas = loader.getCodeAreas()
return binary_info

def _ensureHashes(self, binary_info):
if binary_info.sha256 and binary_info.sha1 and binary_info.md5:
return
data = binary_info.raw_data if binary_info.raw_data else binary_info.binary
if data is None:
return
if not binary_info.sha256:
binary_info.sha256 = hashlib.sha256(data).hexdigest()
if not binary_info.sha1:
binary_info.sha1 = hashlib.sha1(data).hexdigest()
if not binary_info.md5:
binary_info.md5 = hashlib.md5(data).hexdigest()

def disassembleFile(self, file_path, pdb_path=""):
start = datetime.datetime.now(datetime.timezone.utc)
try:
Expand Down Expand Up @@ -162,6 +176,7 @@ def disassembleBuffer(
def _disassemble(self, binary_info, timeout=0):
self._start_time = datetime.datetime.now(datetime.timezone.utc)
self._timeout = timeout
self._ensureHashes(binary_info)
if self.disassembler:
self.disassembly = self.disassembler.analyzeBuffer(binary_info, self._callbackAnalysisTimeout)
return SmdaReport(self.disassembly, config=self.config)
Expand Down
5 changes: 1 addition & 4 deletions smda/common/BinaryInfo.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import hashlib
import logging

import lief
Expand Down Expand Up @@ -37,10 +36,8 @@ def __init__(self, binary):
self.binary = binary
self.raw_data = binary
self.binary_size = len(binary)
self.sha256 = hashlib.sha256(binary).hexdigest()
self.sha1 = hashlib.sha1(binary).hexdigest()
self.md5 = hashlib.md5(binary).hexdigest()
self._lief_binary = None
self.abi = ""

def getBinaryData(self):
"""Safely retrieves binary data from either raw_data or a file path."""
Expand Down
5 changes: 5 additions & 0 deletions smda/common/SmdaReport.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

class SmdaReport:
architecture = None
abi = None
base_addr = None
binary_size = None
binweight = None
Expand Down Expand Up @@ -58,6 +59,7 @@ class SmdaReport:
def __init__(self, disassembly=None, config=None, buffer=None):
if disassembly is not None:
self.architecture = disassembly.binary_info.architecture
self.abi = disassembly.binary_info.abi
self.base_addr = disassembly.binary_info.base_addr
self.binary_size = disassembly.binary_info.binary_size
self.binweight = 0
Expand Down Expand Up @@ -223,6 +225,7 @@ def fromFile(cls, file_path):
def fromDict(cls, report_dict) -> Optional["SmdaReport"]:
smda_report = cls(None)
smda_report.architecture = report_dict["architecture"]
smda_report.abi = report_dict.get("abi", "")
smda_report.base_addr = report_dict["base_addr"]
smda_report.binary_size = report_dict["binary_size"]
smda_report.bitness = report_dict["bitness"]
Expand Down Expand Up @@ -261,6 +264,7 @@ def fromDict(cls, report_dict) -> Optional["SmdaReport"]:
smda_report.timestamp = datetime.datetime.strptime(report_dict["timestamp"], "%Y-%m-%dT%H-%M-%S")
binary_info = BinaryInfo(b"")
binary_info.architecture = smda_report.architecture
binary_info.abi = smda_report.abi
binary_info.base_addr = smda_report.base_addr
binary_info.binary_size = smda_report.binary_size
binary_info.oep = smda_report.oep
Expand All @@ -287,6 +291,7 @@ def toDict(self) -> dict:
transformed_code_sections.append(("", 0, 0))
return {
"architecture": self.architecture,
"abi": self.abi,
"base_addr": self.base_addr,
"binary_size": self.binary_size,
"bitness": self.bitness,
Expand Down
8 changes: 8 additions & 0 deletions smda/utility/DelphiKbFileLoader.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,11 @@ def getBitness(binary):
@staticmethod
def getCodeAreas(binary):
return []

@staticmethod
def getArchitecture(binary):
return "intel"

@staticmethod
def getAbi(binary):
return ""
12 changes: 11 additions & 1 deletion smda/utility/ElfFileLoader.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,10 +198,20 @@ def mapBinary(binary):
LOGGER.debug("ELF: final mapped size: 0x%x", len(mapped_binary))
return bytes(mapped_binary)

@staticmethod
def getAbi(binary):
abi = ""
try:
elffile = lief.parse(binary)
if elffile:
abi = elffile.header.identity_os_abi.name
except lief.bad_file as exc:
LOGGER.warning("Failed to determine ELF ABI: %s", exc)
return abi

@staticmethod
def getArchitecture(binary):
architecture = "intel"
# TODO 20250205 determine ABI based on this: https://lief.re/doc/latest/formats/elf/python.html#header
return architecture

@staticmethod
Expand Down
5 changes: 5 additions & 0 deletions smda/utility/FileLoader.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ class FileLoader:
_raw_data = b""
_base_addr = 0
_bitness = 0
_abi = ""
_architecture = ""
_code_areas = []
file_loaders = [PeFileLoader, ElfFileLoader, MachoFileLoader, DelphiKbFileLoader]
Expand Down Expand Up @@ -40,6 +41,7 @@ def _loadFile(self, buffer=None):
self._bitness = loader.getBitness(self._raw_data)
self._code_areas = loader.getCodeAreas(self._raw_data)
self._architecture = loader.getArchitecture(self._raw_data)
self._abi = loader.getAbi(self._raw_data)
break
else:
self._data = self._raw_data
Expand All @@ -53,6 +55,9 @@ def getRawData(self):
def getBaseAddress(self):
return self._base_addr

def getAbi(self):
return self._abi

def getArchitecture(self):
return self._architecture

Expand Down
4 changes: 4 additions & 0 deletions smda/utility/MachoFileLoader.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,10 @@ def mapBinary(binary):
LOGGER.debug("MachO: final mapped size: 0x%x", len(mapped_binary))
return bytes(mapped_binary)

@staticmethod
def getAbi(binary):
return ""

@staticmethod
def getArchitecture(binary):
# TODO add machine types whenever we add more architectures
Expand Down
4 changes: 4 additions & 0 deletions smda/utility/PeFileLoader.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,10 @@ def getOEP(binary):
oep_rva = struct.unpack("I", binary[pe_offset + 0x28 : pe_offset + 0x2C])[0]
return oep_rva

@staticmethod
def getAbi(binary):
return ""

@staticmethod
def getArchitecture(binary):
architecture = "intel"
Expand Down
2 changes: 2 additions & 0 deletions tests/testFileFormatParsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,13 +76,15 @@ def testElfParsingWithBashlite(self):
binary_info.file_path = ""
binary_info.base_addr = loader.getBaseAddress()
binary_info.bitness = loader.getBitness()
binary_info.abi = loader.getAbi()
binary_info.code_areas = loader.getCodeAreas()
binary_info.oep = binary_info.getOep()
controlled_disassembly = disasm._disassemble(binary_info)
assert controlled_disassembly.num_functions == 177
bashlite_unmapped_disassembly = disasm.disassembleUnmappedBuffer(bashlite_binary)
assert bashlite_unmapped_disassembly.num_functions == 177
assert len([f.function_name for f in bashlite_unmapped_disassembly.getFunctions() if f.function_name]) == 174
assert binary_info.abi == "SYSTEMV"
# test section extraction
sections = {name: (start, end) for name, start, end in binary_info.getSections()}
assert len(sections) > 0
Expand Down
2 changes: 1 addition & 1 deletion tests/testIntegration.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def testCutwailMarshalling(self):
assert report_as_dict["status"] == "ok"
assert report_as_dict["base_addr"] == 0x4000000
assert report_as_dict["statistics"]["num_instructions"] == 1611
assert report_as_dict["sha256"] == "a348a0ddfab135d152b684d561a3215ab6c472570facd3d75aa2c7ee845a8e2b"
assert report_as_dict["sha256"] == "46686681e2be012ce26219eec1e765f8f2db9fc7a33ca802482050cef189334f"
# compare our manual file loading with unmapped buffer
assert self.cutwail_disassembly.num_instructions == self.cutwail_unmapped_disassembly.num_instructions
SmdaReport.fromDict(report_as_dict)
Expand Down