Compare commits

...

11 Commits

Author SHA1 Message Date
Willi Ballenthin
34d37c9129 ghidra: fix lints 2025-11-04 09:24:22 +00:00
Willi Ballenthin
92b6916030 ghidra: fix lints 2025-11-04 09:22:07 +00:00
Willi Ballenthin
14996956ea binja: fix lints 2025-11-03 12:42:26 +00:00
Willi Ballenthin
2ce7c6a388 ghidra: fix lints 2025-11-03 12:40:29 +00:00
Willi Ballenthin
5b48ae009a ghidra: fix lints 2025-11-03 12:36:50 +00:00
Willi Ballenthin
abdd18d897 binja: fix docstring
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
2025-11-03 13:29:01 +01:00
Willi Ballenthin
9f94375391 ghidra: raise exception on failed VA -> file offset conversion
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
2025-11-03 13:28:42 +01:00
Willi Ballenthin
8f9678af4f changelog 2025-11-03 12:27:18 +00:00
Willi Ballenthin
38dc92d2fa bn: use FileOffsetAddress for embedded PE
closes binary ninja: embedded pe: offsets are virtual addresses rather than file offsets
Fixes #2748
2025-11-03 12:24:04 +00:00
Willi Ballenthin
92e8e49532 ghidra: use FileOffsetAddress for embedded PE
closes ghidra: embedded pe: offsets are virtual addresses rather than file offsets
Fixes #2747
2025-11-03 12:19:55 +00:00
Willi Ballenthin
6a727fa8c0 ida: use FileOffsetAddress for embedded PE
closes ida: embedded pe: offsets are virtual addresses rather than file offsets
Fixes #2746
2025-11-03 12:07:32 +00:00
6 changed files with 77 additions and 13 deletions

View File

@@ -34,6 +34,7 @@
### Bug Fixes
- binja: fix a crash during feature extraction when the MLIL is unavailable @xusheng6 #2714
- embedded pe: use FileOffset rather than AbsoluteVirtualAddress for IDA, Ghidra, and Binary Ninja @williballenthin #2745
### capa Explorer Web

View File

@@ -32,7 +32,7 @@ from capa.features.common import (
Characteristic,
)
from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress, AbsoluteVirtualAddress
from capa.features.extractors.binja.helpers import read_c_string, unmangle_c_name
from capa.features.extractors.binja.helpers import read_c_string, unmangle_c_name, va_to_file_offset
def check_segment_for_pe(bv: BinaryView, seg: Segment) -> Iterator[tuple[Feature, Address]]:
@@ -46,7 +46,8 @@ def check_segment_for_pe(bv: BinaryView, seg: Segment) -> Iterator[tuple[Feature
buf = bv.read(seg.start, seg.length)
for offset, _ in capa.features.extractors.helpers.carve_pe(buf, start):
yield Characteristic("embedded pe"), FileOffsetAddress(seg.start + offset)
file_off = va_to_file_offset(bv, seg.start + offset)
yield Characteristic("embedded pe"), FileOffsetAddress(file_off)
def extract_file_embedded_pe(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
@@ -122,7 +123,8 @@ def extract_file_section_names(bv: BinaryView) -> Iterator[tuple[Feature, Addres
def extract_file_strings(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
"""extract ASCII and UTF-16 LE strings"""
for s in bv.strings:
yield String(s.value), FileOffsetAddress(s.start)
file_off = va_to_file_offset(bv, s.start)
yield String(s.value), FileOffsetAddress(file_off)
def extract_file_function_names(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:

View File

@@ -84,3 +84,29 @@ def get_llil_instr_at_addr(bv: BinaryView, addr: int) -> Optional[LowLevelILInst
if arch.get_instruction_low_level_il(buffer, addr, llil) == 0:
return None
return llil[0]
def va_to_file_offset(bv: BinaryView, va: int) -> int:
"""Map a BinaryView virtual address to a file offset using segment/section data offsets.
Assumes a modern Binary Ninja API where Segment and Section objects expose
a `data_offset` attribute which is the file offset of the start of the
segment/section. The file offset is computed as:
file_offset = segment.data_offset + (va - segment.start)
If no containing segment/section is found, fall back to returning the
given virtual address as an integer.
"""
# prefer segments (they map ranges of the file view)
for seg in bv.segments:
if seg.start <= va < seg.start + seg.length:
return int(seg.data_offset + (va - seg.start))
# otherwise check sections
for _, sec in bv.sections.items():
if sec.start <= va < sec.start + sec.length:
return int(sec.data_offset + (va - sec.start))
# fallback
return int(va)

View File

@@ -85,10 +85,11 @@ def extract_file_embedded_pe() -> Iterator[tuple[Feature, Address]]:
continue
for off, _ in find_embedded_pe(capa.features.extractors.ghidra.helpers.get_block_bytes(block), mz_xor):
# add offset back to block start
ea: int = block.getStart().add(off).getOffset()
# add offset back to block start (Address)
addr = block.getStart().add(off)
off_file = capa.features.extractors.ghidra.helpers.addr_to_file_offset(addr)
yield Characteristic("embedded pe"), FileOffsetAddress(ea)
yield Characteristic("embedded pe"), FileOffsetAddress(int(off_file))
def extract_file_export_names() -> Iterator[tuple[Feature, Address]]:
@@ -140,12 +141,14 @@ def extract_file_strings() -> Iterator[tuple[Feature, Address]]:
p_bytes = capa.features.extractors.ghidra.helpers.get_block_bytes(block)
for s in capa.features.extractors.strings.extract_ascii_strings(p_bytes):
offset = block.getStart().getOffset() + s.offset
yield String(s.s), FileOffsetAddress(offset)
addr = block.getStart().add(s.offset)
offset = capa.features.extractors.ghidra.helpers.addr_to_file_offset(addr)
yield String(s.s), FileOffsetAddress(int(offset))
for s in capa.features.extractors.strings.extract_unicode_strings(p_bytes):
offset = block.getStart().getOffset() + s.offset
yield String(s.s), FileOffsetAddress(offset)
addr = block.getStart().add(s.offset)
offset = capa.features.extractors.ghidra.helpers.addr_to_file_offset(addr)
yield String(s.s), FileOffsetAddress(int(offset))
def extract_file_function_names() -> Iterator[tuple[Feature, Address]]:

View File

@@ -306,3 +306,31 @@ def find_data_references_from_insn(insn, max_depth: int = 10):
break
yield to_addr
def addr_to_file_offset(addr: ghidra.program.model.address.Address) -> int:
"""Map a Ghidra Address to a file offset using section information.
Assumes a modern Ghidra version where MemoryBlock provides
`getStartingOffset()` and `getStart()/getEnd()` are available.
Algorithm:
- iterate memory blocks, find the block containing `addr`
- compute section-relative offset = addr - block.start
- compute file offset = block.getStartingOffset() + section-relative offset
- if no block matches, fall back to subtracting program image base
"""
prog = currentProgram() # type: ignore[name-defined] # noqa: F821
aoff = addr.getOffset()
for block in prog.getMemory().getBlocks(): # type: ignore[name-defined] # noqa: F821
bstart = block.getStart().getOffset()
bend = block.getEnd().getOffset()
if bstart <= aoff <= bend:
sec_rel = aoff - bstart
file_base = block.getStartingOffset()
return int(file_base + sec_rel)
# if no block matched, fall back to image-base subtraction
base = prog.getImageBase().getOffset()
return int(aoff - base)

View File

@@ -20,6 +20,7 @@ import idc
import idaapi
import idautils
import ida_entry
import ida_loader
import capa.ida.helpers
import capa.features.extractors.common
@@ -87,7 +88,8 @@ def extract_file_embedded_pe() -> Iterator[tuple[Feature, Address]]:
"""
for seg in capa.features.extractors.ida.helpers.get_segments(skip_header_segments=True):
for ea, _ in check_segment_for_pe(seg):
yield Characteristic("embedded pe"), FileOffsetAddress(ea)
off = ida_loader.get_fileregion_offset(ea)
yield Characteristic("embedded pe"), FileOffsetAddress(off)
def extract_file_export_names() -> Iterator[tuple[Feature, Address]]:
@@ -161,10 +163,12 @@ def extract_file_strings() -> Iterator[tuple[Feature, Address]]:
# differing to common string extractor factor in segment offset here
for s in capa.features.extractors.strings.extract_ascii_strings(seg_buff):
yield String(s.s), FileOffsetAddress(seg.start_ea + s.offset)
off = ida_loader.get_fileregion_offset(seg.start_ea + s.offset)
yield String(s.s), FileOffsetAddress(off)
for s in capa.features.extractors.strings.extract_unicode_strings(seg_buff):
yield String(s.s), FileOffsetAddress(seg.start_ea + s.offset)
off = ida_loader.get_fileregion_offset(seg.start_ea + s.offset)
yield String(s.s), FileOffsetAddress(off)
def extract_file_function_names() -> Iterator[tuple[Feature, Address]]: