mirror of
https://github.com/mandiant/capa.git
synced 2025-12-12 15:49:46 -08:00
@@ -28,6 +28,7 @@
|
||||
- ELF: fix parsing of symtab #1704 @williballenthin
|
||||
- result document: don't use deprecated pydantic functions #1718 @williballenthin
|
||||
- pytest: don't mark IDA tests as pytest tests #1719 @williballenthin
|
||||
- ghidra: fix ints_to_bytes performance #1761 @mike-hunhoff
|
||||
|
||||
### capa explorer IDA Pro plugin
|
||||
- fix unhandled exception when resolving rule path #1693 @mike-hunhoff
|
||||
|
||||
@@ -22,7 +22,7 @@ from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress, Absolu
|
||||
MAX_OFFSET_PE_AFTER_MZ = 0x200
|
||||
|
||||
|
||||
def check_segment_for_pe() -> Iterator[Tuple[int, int]]:
|
||||
def find_embedded_pe() -> Iterator[Tuple[int, int]]:
|
||||
"""check segment for embedded PE
|
||||
|
||||
adapted for Ghidra from:
|
||||
@@ -39,10 +39,11 @@ def check_segment_for_pe() -> Iterator[Tuple[int, int]]:
|
||||
]
|
||||
|
||||
todo = []
|
||||
start_addr = currentProgram().getMinAddress().add(1) # type: ignore [name-defined] # noqa: F821
|
||||
for mzx, pex, i in mz_xor:
|
||||
# find all segment offsets containing XOR'd "MZ" bytes
|
||||
off: ghidra.program.model.address.GenericAddress
|
||||
for off in capa.features.extractors.ghidra.helpers.find_byte_sequence(mzx):
|
||||
for off in capa.features.extractors.ghidra.helpers.find_byte_sequence(start_addr, mzx):
|
||||
todo.append((off, mzx, pex, i))
|
||||
|
||||
seg_max = currentProgram().getMaxAddress() # type: ignore [name-defined] # noqa: F821
|
||||
@@ -73,8 +74,7 @@ def check_segment_for_pe() -> Iterator[Tuple[int, int]]:
|
||||
|
||||
def extract_file_embedded_pe() -> Iterator[Tuple[Feature, Address]]:
|
||||
"""extract embedded PE features"""
|
||||
|
||||
for ea, _ in check_segment_for_pe():
|
||||
for ea, _ in find_embedded_pe():
|
||||
yield Characteristic("embedded pe"), FileOffsetAddress(ea)
|
||||
|
||||
|
||||
|
||||
@@ -20,24 +20,25 @@ from capa.features.address import AbsoluteVirtualAddress
|
||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle
|
||||
|
||||
|
||||
def fix_byte(b: int) -> bytes:
|
||||
"""Transform signed ints from Java into bytes for Python
|
||||
def ints_to_bytes(bytez: List[int]) -> bytes:
|
||||
"""convert Java signed ints to Python bytes
|
||||
|
||||
args:
|
||||
b: signed int returned from Java processing
|
||||
bytez: list of Java signed ints
|
||||
"""
|
||||
return (b & 0xFF).to_bytes(1, "little")
|
||||
return bytes([b & 0xFF for b in bytez])
|
||||
|
||||
|
||||
def find_byte_sequence(seq: bytes) -> Iterator[int]:
|
||||
def find_byte_sequence(addr: ghidra.program.model.address.Address, seq: bytes) -> Iterator[int]:
|
||||
"""yield all ea of a given byte sequence
|
||||
|
||||
args:
|
||||
addr: start address
|
||||
seq: bytes to search e.g. b"\x01\x03"
|
||||
"""
|
||||
seqstr = "".join([f"\\x{b:02x}" for b in seq])
|
||||
# .add(1) to avoid false positives on regular PE files
|
||||
eas = findBytes(currentProgram().getMinAddress().add(1), seqstr, java.lang.Integer.MAX_VALUE, 1) # type: ignore [name-defined] # noqa: F821
|
||||
eas = findBytes(addr, seqstr, java.lang.Integer.MAX_VALUE, 1) # type: ignore [name-defined] # noqa: F821
|
||||
|
||||
yield from eas
|
||||
|
||||
|
||||
@@ -48,15 +49,10 @@ def get_bytes(addr: ghidra.program.model.address.Address, length: int) -> bytes:
|
||||
addr: Address to begin pull from
|
||||
length: length of bytes to pull
|
||||
"""
|
||||
|
||||
bytez = b""
|
||||
try:
|
||||
signed_ints = getBytes(addr, length) # type: ignore [name-defined] # noqa: F821
|
||||
for b in signed_ints:
|
||||
bytez = bytez + fix_byte(b)
|
||||
return bytez
|
||||
return ints_to_bytes(getBytes(addr, length)) # type: ignore [name-defined] # noqa: F821
|
||||
except RuntimeError:
|
||||
return bytez
|
||||
return b""
|
||||
|
||||
|
||||
def get_block_bytes(block: ghidra.program.model.mem.MemoryBlock) -> bytes:
|
||||
@@ -65,15 +61,7 @@ def get_block_bytes(block: ghidra.program.model.mem.MemoryBlock) -> bytes:
|
||||
args:
|
||||
block: MemoryBlock to pull from
|
||||
"""
|
||||
|
||||
bytez = b""
|
||||
try:
|
||||
signed_ints = getBytes(block.getStart(), block.getEnd().getOffset() - block.getStart().getOffset()) # type: ignore [name-defined] # noqa: F821
|
||||
for b in signed_ints:
|
||||
bytez = bytez + fix_byte(b)
|
||||
return bytez
|
||||
except RuntimeError:
|
||||
return bytez
|
||||
return get_bytes(block.getStart(), block.getSize())
|
||||
|
||||
|
||||
def get_function_symbols() -> Iterator[FunctionHandle]:
|
||||
|
||||
@@ -32,8 +32,9 @@ class GHIDRAIO:
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
self.offset = 0
|
||||
self.bytez = self.get_file_bytes()
|
||||
self.bytes_ = self.get_bytes()
|
||||
|
||||
def seek(self, offset, whence=0):
|
||||
assert whence == 0
|
||||
@@ -42,31 +43,23 @@ class GHIDRAIO:
|
||||
def read(self, size):
|
||||
logger.debug("reading 0x%x bytes at 0x%x (ea: 0x%x)", size, self.offset, currentProgram().getImageBase().add(self.offset).getOffset()) # type: ignore [name-defined] # noqa: F821
|
||||
|
||||
b_len = len(self.bytez)
|
||||
if size > b_len - self.offset:
|
||||
if size > len(self.bytes_) - self.offset:
|
||||
logger.debug("cannot read 0x%x bytes at 0x%x (ea: BADADDR)", size, self.offset)
|
||||
return b""
|
||||
else:
|
||||
read_bytes = b""
|
||||
read = [
|
||||
capa.features.extractors.ghidra.helpers.fix_byte(b)
|
||||
for b in self.bytez[self.offset : self.offset + size]
|
||||
]
|
||||
for b in read:
|
||||
read_bytes = read_bytes + b
|
||||
return read_bytes
|
||||
return self.bytes_[self.offset : self.offset + size]
|
||||
|
||||
def close(self):
|
||||
return
|
||||
|
||||
def get_file_bytes(self):
|
||||
fbytes = currentProgram().getMemory().getAllFileBytes()[0] # type: ignore [name-defined] # noqa: F821
|
||||
bytez = b""
|
||||
for i in range(fbytes.getSize()):
|
||||
# getOriginalByte() allows for raw file parsing on the Ghidra side
|
||||
# other functions will fail as Ghidra will think that it's reading uninitialized memory
|
||||
bytez = bytez + capa.features.extractors.ghidra.helpers.fix_byte(fbytes.getOriginalByte(i))
|
||||
return bytez
|
||||
def get_bytes(self):
|
||||
file_bytes = currentProgram().getMemory().getAllFileBytes()[0] # type: ignore [name-defined] # noqa: F821
|
||||
|
||||
# getOriginalByte() allows for raw file parsing on the Ghidra side
|
||||
# other functions will fail as Ghidra will think that it's reading uninitialized memory
|
||||
bytes_ = [file_bytes.getOriginalByte(i) for i in range(file_bytes.getSize())]
|
||||
|
||||
return capa.features.extractors.ghidra.helpers.ints_to_bytes(bytes_)
|
||||
|
||||
|
||||
def is_supported_ghidra_version():
|
||||
|
||||
Reference in New Issue
Block a user