mirror of
https://github.com/mandiant/capa.git
synced 2025-12-12 15:49:46 -08:00
Compare commits
11 Commits
3687bb95e9
...
fix-2745
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
34d37c9129 | ||
|
|
92b6916030 | ||
|
|
14996956ea | ||
|
|
2ce7c6a388 | ||
|
|
5b48ae009a | ||
|
|
abdd18d897 | ||
|
|
9f94375391 | ||
|
|
8f9678af4f | ||
|
|
38dc92d2fa | ||
|
|
92e8e49532 | ||
|
|
6a727fa8c0 |
@@ -34,6 +34,7 @@
|
||||
### Bug Fixes
|
||||
|
||||
- binja: fix a crash during feature extraction when the MLIL is unavailable @xusheng6 #2714
|
||||
- embedded pe: use FileOffset rather than AbsoluteVirtualAddress for IDA, Ghidra, and Binary Ninja @williballenthin #2745
|
||||
|
||||
### capa Explorer Web
|
||||
|
||||
|
||||
@@ -32,7 +32,7 @@ from capa.features.common import (
|
||||
Characteristic,
|
||||
)
|
||||
from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.binja.helpers import read_c_string, unmangle_c_name
|
||||
from capa.features.extractors.binja.helpers import read_c_string, unmangle_c_name, va_to_file_offset
|
||||
|
||||
|
||||
def check_segment_for_pe(bv: BinaryView, seg: Segment) -> Iterator[tuple[Feature, Address]]:
|
||||
@@ -46,7 +46,8 @@ def check_segment_for_pe(bv: BinaryView, seg: Segment) -> Iterator[tuple[Feature
|
||||
buf = bv.read(seg.start, seg.length)
|
||||
|
||||
for offset, _ in capa.features.extractors.helpers.carve_pe(buf, start):
|
||||
yield Characteristic("embedded pe"), FileOffsetAddress(seg.start + offset)
|
||||
file_off = va_to_file_offset(bv, seg.start + offset)
|
||||
yield Characteristic("embedded pe"), FileOffsetAddress(file_off)
|
||||
|
||||
|
||||
def extract_file_embedded_pe(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
||||
@@ -122,7 +123,8 @@ def extract_file_section_names(bv: BinaryView) -> Iterator[tuple[Feature, Addres
|
||||
def extract_file_strings(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract ASCII and UTF-16 LE strings"""
|
||||
for s in bv.strings:
|
||||
yield String(s.value), FileOffsetAddress(s.start)
|
||||
file_off = va_to_file_offset(bv, s.start)
|
||||
yield String(s.value), FileOffsetAddress(file_off)
|
||||
|
||||
|
||||
def extract_file_function_names(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
||||
|
||||
@@ -84,3 +84,29 @@ def get_llil_instr_at_addr(bv: BinaryView, addr: int) -> Optional[LowLevelILInst
|
||||
if arch.get_instruction_low_level_il(buffer, addr, llil) == 0:
|
||||
return None
|
||||
return llil[0]
|
||||
|
||||
|
||||
def va_to_file_offset(bv: BinaryView, va: int) -> int:
|
||||
"""Map a BinaryView virtual address to a file offset using segment/section data offsets.
|
||||
|
||||
Assumes a modern Binary Ninja API where Segment and Section objects expose
|
||||
a `data_offset` attribute which is the file offset of the start of the
|
||||
segment/section. The file offset is computed as:
|
||||
|
||||
file_offset = segment.data_offset + (va - segment.start)
|
||||
|
||||
If no containing segment/section is found, fall back to returning the
|
||||
given virtual address as an integer.
|
||||
"""
|
||||
# prefer segments (they map ranges of the file view)
|
||||
for seg in bv.segments:
|
||||
if seg.start <= va < seg.start + seg.length:
|
||||
return int(seg.data_offset + (va - seg.start))
|
||||
|
||||
# otherwise check sections
|
||||
for _, sec in bv.sections.items():
|
||||
if sec.start <= va < sec.start + sec.length:
|
||||
return int(sec.data_offset + (va - sec.start))
|
||||
|
||||
# fallback
|
||||
return int(va)
|
||||
|
||||
@@ -85,10 +85,11 @@ def extract_file_embedded_pe() -> Iterator[tuple[Feature, Address]]:
|
||||
continue
|
||||
|
||||
for off, _ in find_embedded_pe(capa.features.extractors.ghidra.helpers.get_block_bytes(block), mz_xor):
|
||||
# add offset back to block start
|
||||
ea: int = block.getStart().add(off).getOffset()
|
||||
# add offset back to block start (Address)
|
||||
addr = block.getStart().add(off)
|
||||
off_file = capa.features.extractors.ghidra.helpers.addr_to_file_offset(addr)
|
||||
|
||||
yield Characteristic("embedded pe"), FileOffsetAddress(ea)
|
||||
yield Characteristic("embedded pe"), FileOffsetAddress(int(off_file))
|
||||
|
||||
|
||||
def extract_file_export_names() -> Iterator[tuple[Feature, Address]]:
|
||||
@@ -140,12 +141,14 @@ def extract_file_strings() -> Iterator[tuple[Feature, Address]]:
|
||||
p_bytes = capa.features.extractors.ghidra.helpers.get_block_bytes(block)
|
||||
|
||||
for s in capa.features.extractors.strings.extract_ascii_strings(p_bytes):
|
||||
offset = block.getStart().getOffset() + s.offset
|
||||
yield String(s.s), FileOffsetAddress(offset)
|
||||
addr = block.getStart().add(s.offset)
|
||||
offset = capa.features.extractors.ghidra.helpers.addr_to_file_offset(addr)
|
||||
yield String(s.s), FileOffsetAddress(int(offset))
|
||||
|
||||
for s in capa.features.extractors.strings.extract_unicode_strings(p_bytes):
|
||||
offset = block.getStart().getOffset() + s.offset
|
||||
yield String(s.s), FileOffsetAddress(offset)
|
||||
addr = block.getStart().add(s.offset)
|
||||
offset = capa.features.extractors.ghidra.helpers.addr_to_file_offset(addr)
|
||||
yield String(s.s), FileOffsetAddress(int(offset))
|
||||
|
||||
|
||||
def extract_file_function_names() -> Iterator[tuple[Feature, Address]]:
|
||||
|
||||
@@ -306,3 +306,31 @@ def find_data_references_from_insn(insn, max_depth: int = 10):
|
||||
break
|
||||
|
||||
yield to_addr
|
||||
|
||||
|
||||
def addr_to_file_offset(addr: ghidra.program.model.address.Address) -> int:
|
||||
"""Map a Ghidra Address to a file offset using section information.
|
||||
|
||||
Assumes a modern Ghidra version where MemoryBlock provides
|
||||
`getStartingOffset()` and `getStart()/getEnd()` are available.
|
||||
|
||||
Algorithm:
|
||||
- iterate memory blocks, find the block containing `addr`
|
||||
- compute section-relative offset = addr - block.start
|
||||
- compute file offset = block.getStartingOffset() + section-relative offset
|
||||
- if no block matches, fall back to subtracting program image base
|
||||
"""
|
||||
prog = currentProgram() # type: ignore[name-defined] # noqa: F821
|
||||
aoff = addr.getOffset()
|
||||
|
||||
for block in prog.getMemory().getBlocks(): # type: ignore[name-defined] # noqa: F821
|
||||
bstart = block.getStart().getOffset()
|
||||
bend = block.getEnd().getOffset()
|
||||
if bstart <= aoff <= bend:
|
||||
sec_rel = aoff - bstart
|
||||
file_base = block.getStartingOffset()
|
||||
return int(file_base + sec_rel)
|
||||
|
||||
# if no block matched, fall back to image-base subtraction
|
||||
base = prog.getImageBase().getOffset()
|
||||
return int(aoff - base)
|
||||
|
||||
@@ -20,6 +20,7 @@ import idc
|
||||
import idaapi
|
||||
import idautils
|
||||
import ida_entry
|
||||
import ida_loader
|
||||
|
||||
import capa.ida.helpers
|
||||
import capa.features.extractors.common
|
||||
@@ -87,7 +88,8 @@ def extract_file_embedded_pe() -> Iterator[tuple[Feature, Address]]:
|
||||
"""
|
||||
for seg in capa.features.extractors.ida.helpers.get_segments(skip_header_segments=True):
|
||||
for ea, _ in check_segment_for_pe(seg):
|
||||
yield Characteristic("embedded pe"), FileOffsetAddress(ea)
|
||||
off = ida_loader.get_fileregion_offset(ea)
|
||||
yield Characteristic("embedded pe"), FileOffsetAddress(off)
|
||||
|
||||
|
||||
def extract_file_export_names() -> Iterator[tuple[Feature, Address]]:
|
||||
@@ -161,10 +163,12 @@ def extract_file_strings() -> Iterator[tuple[Feature, Address]]:
|
||||
|
||||
# differing to common string extractor factor in segment offset here
|
||||
for s in capa.features.extractors.strings.extract_ascii_strings(seg_buff):
|
||||
yield String(s.s), FileOffsetAddress(seg.start_ea + s.offset)
|
||||
off = ida_loader.get_fileregion_offset(seg.start_ea + s.offset)
|
||||
yield String(s.s), FileOffsetAddress(off)
|
||||
|
||||
for s in capa.features.extractors.strings.extract_unicode_strings(seg_buff):
|
||||
yield String(s.s), FileOffsetAddress(seg.start_ea + s.offset)
|
||||
off = ida_loader.get_fileregion_offset(seg.start_ea + s.offset)
|
||||
yield String(s.s), FileOffsetAddress(off)
|
||||
|
||||
|
||||
def extract_file_function_names() -> Iterator[tuple[Feature, Address]]:
|
||||
|
||||
Reference in New Issue
Block a user