Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 24 additions & 10 deletions impacket/ese.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,11 @@
FLAGS_NEW_FORMAT = 0x2000
FLAGS_NEW_CHECKSUM = 0x2000

# On 16 KiB and 32 KiB pages, the raw 16-bit tag state appears to store the total
# tag count in the lower 12 bits. The upper 4 bits seem to represent a reserved tag count.
FIRST_AVAILABLE_PAGE_TAG_MASK = 0x0fff
FIRST_AVAILABLE_PAGE_TAG_RESERVED_SHIFT = 12

# Tag Flags
TAG_UNKNOWN = 0x1
TAG_DEFUNCT = 0x2
Expand Down Expand Up @@ -436,8 +441,17 @@ def __init__(self, db, data=None):
self.__DBHeader = db
self.data = data
self.record = None
self.tagCount = 0
self.tagReserved = 1
if data is not None:
self.record = ESENT_PAGE_HEADER(self.__DBHeader['Version'], self.__DBHeader['FileFormatRevision'], self.__DBHeader['PageSize'], data)
self.tagCount = self.record['FirstAvailablePageTag']
if self.__DBHeader['FileFormatRevision'] >= 0x11 and self.__DBHeader['PageSize'] > 8192:
# TODO: The upper 4 bits may encode how many leading tags are reserved on large pages.
# Logical node counts should be derived from the effective reserved-tag count
# instead of assuming only tag 0 is reserved, the logical node count should be tagCount - tagReserved.
Copy link

Copilot AI Mar 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment block under the large-page if is over-indented (lines after the TODO have extra indentation). This makes the code harder to read; align the comment indentation with the rest of the block.

Suggested change
# Logical node counts should be derived from the effective reserved-tag count
# instead of assuming only tag 0 is reserved, the logical node count should be tagCount - tagReserved.
# Logical node counts should be derived from the effective reserved-tag count
# instead of assuming only tag 0 is reserved, the logical node count should be tagCount - tagReserved.

Copilot uses AI. Check for mistakes.
self.tagReserved = (self.record['FirstAvailablePageTag'] >> FIRST_AVAILABLE_PAGE_TAG_RESERVED_SHIFT) or 1
self.tagCount = self.record['FirstAvailablePageTag'] & FIRST_AVAILABLE_PAGE_TAG_MASK
Copy link

Copilot AI Mar 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tagReserved is parsed for large pages but is never used when iterating tags (most loops still start at 1). If tagReserved can be > 1, the parser may still try to interpret reserved tags as records; consider iterating from tagReserved (or otherwise skipping reserved tags) when walking leaf/branch tags.

Suggested change
# Logical node counts should be derived from the effective reserved-tag count
# instead of assuming only tag 0 is reserved, the logical node count should be tagCount - tagReserved.
self.tagReserved = (self.record['FirstAvailablePageTag'] >> FIRST_AVAILABLE_PAGE_TAG_RESERVED_SHIFT) or 1
self.tagCount = self.record['FirstAvailablePageTag'] & FIRST_AVAILABLE_PAGE_TAG_MASK
# Logical node counts should be derived from the effective reserved-tag count
# instead of assuming only tag 0 is reserved, the logical node count should be tagCount - tagReserved.
raw_tag_field = self.record['FirstAvailablePageTag']
self.tagReserved = (raw_tag_field >> FIRST_AVAILABLE_PAGE_TAG_RESERVED_SHIFT) or 1
physicalTagCount = raw_tag_field & FIRST_AVAILABLE_PAGE_TAG_MASK
# On large pages, adjust tagCount so it represents the logical node count (excluding all reserved tags).
# When tagReserved == 1 (the legacy assumption), this reduces to the original behavior.
self.tagCount = physicalTagCount - (self.tagReserved - 1)

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Intentional. This PR fixes the confirmed #1924 crash by correcting the large-page
tag count parsing. tagReserved is modeled for parity with dissect.esedb, but
I am not changing logical-node traversal without a sample exposing effective
tagReserved > 1, since that would require a broader change than this bug fix.

Comment on lines +449 to +452
Copy link

Copilot AI Mar 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change fixes a specific crash/regression for 32 KiB pages by masking FirstAvailablePageTag to 12 bits. Please add a regression test (unit-level if possible) that builds/parses a page header where FirstAvailablePageTag has high bits set (e.g., 0x100c) and asserts tagCount == 0x000c and tag iteration does not raise.

Copilot uses AI. Check for mistakes.
Comment on lines +449 to +452
Copy link

Copilot AI Mar 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The large-page condition in ESENT_PAGE.__init__ only checks FileFormatRevision/PageSize, but other large-page parsing logic in this module (e.g., getTag()) also gates on Version == 0x620. Consider aligning the predicate here with getTag() to avoid masking FirstAvailablePageTag on database versions that don’t use the 12-bit tag-count encoding.

Copilot uses AI. Check for mistakes.

def printFlags(self):
flags = self.record['PageFlags']
Expand Down Expand Up @@ -465,14 +479,14 @@ def printFlags(self):
def dump(self):
baseOffset = len(self.record)
self.record.dump()
tags = self.data[-4*self.record['FirstAvailablePageTag']:]
tags = self.data[-4*self.tagCount:]

print("FLAGS: ")
self.printFlags()

print()

for i in range(self.record['FirstAvailablePageTag']):
for i in range(self.tagCount):
tag = tags[-4:]
if self.__DBHeader['Version'] == 0x620 and self.__DBHeader['FileFormatRevision'] > 11 and self.__DBHeader['PageSize'] > 8192:
Copy link

Copilot AI Mar 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In dump(), the large-page tag decoding check uses FileFormatRevision > 11, but elsewhere the Windows 7+ boundary is treated as >= 0x11 / >= 17 (see getTag() and ESENT_PAGE_HEADER). To keep behavior consistent and avoid applying the large-page decoding to revisions 0x0c–0x10, update this condition to match the same threshold used elsewhere.

Suggested change
if self.__DBHeader['Version'] == 0x620 and self.__DBHeader['FileFormatRevision'] > 11 and self.__DBHeader['PageSize'] > 8192:
if self.__DBHeader['Version'] == 0x620 and self.__DBHeader['FileFormatRevision'] >= 0x11 and self.__DBHeader['PageSize'] > 8192:

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree the condition is inconsistent with the rest of the module, but it is outside the crash path fixed here and I do not have a sample showing that revisions 0x0c..0x10 are mishandled by dump(). I’d prefer to keep this PR scoped
to the reproducible

valueSize = unpack('<H', tag[:2])[0] & 0x7fff
Expand Down Expand Up @@ -510,7 +524,7 @@ def dump(self):
leafHeader.dump()

# Print the leaf/branch tags
for tagNum in range(1,self.record['FirstAvailablePageTag']):
for tagNum in range(1,self.tagCount):
flags, data = self.getTag(tagNum)
if self.record['PageFlags'] & FLAGS_LEAF == 0:
# Branch page
Expand Down Expand Up @@ -540,10 +554,10 @@ def dump(self):
hexdump(leafEntry['EntryData'])

def getTag(self, tagNum):
if self.record['FirstAvailablePageTag'] < tagNum:
if self.tagCount <= tagNum:
raise Exception('Trying to grab an unknown tag 0x%x' % tagNum)

tags = self.data[-4*self.record['FirstAvailablePageTag']:]
tags = self.data[-4*self.tagCount:]
baseOffset = len(self.record)
for i in range(tagNum):
tags = tags[:-4]
Expand Down Expand Up @@ -658,7 +672,7 @@ def __addLongValue(self, entry):

def parsePage(self, page):
# Print the leaf/branch tags
for tagNum in range(1,page.record['FirstAvailablePageTag']):
for tagNum in range(1,page.tagCount):
flags, data = page.getTag(tagNum)
if page.record['PageFlags'] & FLAGS_LEAF > 0:
# Leaf page
Expand All @@ -678,7 +692,7 @@ def parseCatalog(self, pageNum):
page = self.getPage(pageNum)
self.parsePage(page)

for i in range(1, page.record['FirstAvailablePageTag']):
for i in range(1, page.tagCount):
flags, data = page.getTag(i)
if page.record['PageFlags'] & FLAGS_LEAF == 0:
# Branch page
Expand Down Expand Up @@ -721,10 +735,10 @@ def openTable(self, tableName):
done = False
while done is False:
page = self.getPage(pageNum)
if page.record['FirstAvailablePageTag'] <= 1:
if page.tagCount <= 1:
# There are no records
done = True
for i in range(1, page.record['FirstAvailablePageTag']):
for i in range(1, page.tagCount):
flags, data = page.getTag(i)
if page.record['PageFlags'] & FLAGS_LEAF == 0:
# Branch page, move on to the next page
Expand All @@ -747,7 +761,7 @@ def openTable(self, tableName):
def __getNextTag(self, cursor):
page = cursor['CurrentPageData']

if cursor['CurrentTag'] >= page.record['FirstAvailablePageTag']:
if cursor['CurrentTag'] >= page.tagCount:
# No more data in this page, chau
return None

Expand Down
Loading