mirror of
https://github.com/mandiant/capa.git
synced 2025-12-12 23:59:48 -08:00
smda: use Addresses
This commit is contained in:
@@ -5,7 +5,7 @@ import dnfile
|
||||
import pefile
|
||||
|
||||
from capa.features.common import OS, OS_ANY, ARCH_ANY, ARCH_I386, ARCH_AMD64, FORMAT_DOTNET, Arch, Format, Feature
|
||||
from capa.features.address import NO_ADDRESS, Address, DNTokenAddress, DNTokenOffsetAddress, AbsoluteVirtualAddress
|
||||
from capa.features.address import NO_ADDRESS, Address, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.base_extractor import FeatureExtractor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -1,9 +1,12 @@
|
||||
import string
|
||||
import struct
|
||||
from typing import Tuple, Iterator
|
||||
|
||||
from capa.features.common import Characteristic
|
||||
from capa.features.common import Feature, Characteristic
|
||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||
from capa.features.basicblock import BasicBlock
|
||||
from capa.features.extractors.helpers import MIN_STACKSTRING_LEN
|
||||
from capa.features.extractors.base_extractor import BBHandle, FunctionHandle
|
||||
|
||||
|
||||
def _bb_has_tight_loop(f, bb):
|
||||
@@ -13,10 +16,10 @@ def _bb_has_tight_loop(f, bb):
|
||||
return bb.offset in f.blockrefs[bb.offset] if bb.offset in f.blockrefs else False
|
||||
|
||||
|
||||
def extract_bb_tight_loop(f, bb):
|
||||
def extract_bb_tight_loop(f: FunctionHandle, bb: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""check basic block for tight loop indicators"""
|
||||
if _bb_has_tight_loop(f, bb):
|
||||
yield Characteristic("tight loop"), bb.offset
|
||||
if _bb_has_tight_loop(f.inner, bb.inner):
|
||||
yield Characteristic("tight loop"), bb.address
|
||||
|
||||
|
||||
def _bb_has_stackstring(f, bb):
|
||||
@@ -37,10 +40,10 @@ def get_operands(smda_ins):
|
||||
return [o.strip() for o in smda_ins.operands.split(",")]
|
||||
|
||||
|
||||
def extract_stackstring(f, bb):
|
||||
def extract_stackstring(f: FunctionHandle, bb: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""check basic block for stackstring indicators"""
|
||||
if _bb_has_stackstring(f, bb):
|
||||
yield Characteristic("stack string"), bb.offset
|
||||
if _bb_has_stackstring(f.inner, bb.inner):
|
||||
yield Characteristic("stack string"), bb.address
|
||||
|
||||
|
||||
def is_mov_imm_to_stack(smda_ins):
|
||||
@@ -107,21 +110,21 @@ def get_printable_len(instr):
|
||||
return 0
|
||||
|
||||
|
||||
def extract_features(f, bb):
|
||||
def extract_features(f: FunctionHandle, bb: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
extract features from the given basic block.
|
||||
|
||||
args:
|
||||
f (smda.common.SmdaFunction): the function from which to extract features
|
||||
bb (smda.common.SmdaBasicBlock): the basic block to process.
|
||||
f: the function from which to extract features
|
||||
bb: the basic block to process.
|
||||
|
||||
yields:
|
||||
Tuple[Feature, int]: the features and their location found in this basic block.
|
||||
Tuple[Feature, Address]: the features and their location found in this basic block.
|
||||
"""
|
||||
yield BasicBlock(), bb.offset
|
||||
yield BasicBlock(), bb.address
|
||||
for bb_handler in BASIC_BLOCK_HANDLERS:
|
||||
for feature, va in bb_handler(f, bb):
|
||||
yield feature, va
|
||||
for feature, addr in bb_handler(f, bb):
|
||||
yield feature, addr
|
||||
|
||||
|
||||
BASIC_BLOCK_HANDLERS = (
|
||||
|
||||
@@ -9,8 +9,8 @@ import capa.features.extractors.smda.global_
|
||||
import capa.features.extractors.smda.function
|
||||
import capa.features.extractors.smda.basicblock
|
||||
from capa.features.common import Feature
|
||||
from capa.features.address import Address
|
||||
from capa.features.extractors.base_extractor import FeatureExtractor
|
||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, FeatureExtractor
|
||||
|
||||
|
||||
class SmdaFeatureExtractor(FeatureExtractor):
|
||||
@@ -27,7 +27,7 @@ class SmdaFeatureExtractor(FeatureExtractor):
|
||||
self.global_features.extend(capa.features.extractors.smda.global_.extract_arch(self.smda_report))
|
||||
|
||||
def get_base_address(self):
|
||||
return self.smda_report.base_addr
|
||||
return AbsoluteVirtualAddress(self.smda_report.base_addr)
|
||||
|
||||
def extract_global_features(self):
|
||||
yield from self.global_features
|
||||
@@ -37,21 +37,21 @@ class SmdaFeatureExtractor(FeatureExtractor):
|
||||
|
||||
def get_functions(self):
|
||||
for function in self.smda_report.getFunctions():
|
||||
yield function
|
||||
yield FunctionHandle(address=AbsoluteVirtualAddress(function.offset), inner=function)
|
||||
|
||||
def extract_function_features(self, f):
|
||||
yield from capa.features.extractors.smda.function.extract_features(f)
|
||||
|
||||
def get_basic_blocks(self, f):
|
||||
for bb in f.getBlocks():
|
||||
yield bb
|
||||
yield BBHandle(address=AbsoluteVirtualAddress(bb.offset), inner=bb)
|
||||
|
||||
def extract_basic_block_features(self, f, bb):
|
||||
yield from capa.features.extractors.smda.basicblock.extract_features(f, bb)
|
||||
|
||||
def get_instructions(self, f, bb):
|
||||
for smda_ins in bb.getInstructions():
|
||||
yield smda_ins
|
||||
yield InsnHandle(address=AbsoluteVirtualAddress(smda_ins.offset), inner=smda_ins)
|
||||
|
||||
def extract_insn_features(self, f, bb, insn):
|
||||
yield from capa.features.extractors.smda.insn.extract_features(f, bb, insn)
|
||||
|
||||
@@ -6,11 +6,12 @@ import capa.features.extractors.helpers
|
||||
import capa.features.extractors.strings
|
||||
from capa.features.file import Export, Import, Section
|
||||
from capa.features.common import String, Characteristic
|
||||
from capa.features.address import FileOffsetAddress, AbsoluteVirtualAddress
|
||||
|
||||
|
||||
def extract_file_embedded_pe(buf, **kwargs):
|
||||
for offset, _ in capa.features.extractors.helpers.carve_pe(buf, 1):
|
||||
yield Characteristic("embedded pe"), offset
|
||||
yield Characteristic("embedded pe"), FileOffsetAddress(offset)
|
||||
|
||||
|
||||
def extract_file_export_names(buf, **kwargs):
|
||||
@@ -18,7 +19,7 @@ def extract_file_export_names(buf, **kwargs):
|
||||
|
||||
if lief_binary is not None:
|
||||
for function in lief_binary.exported_functions:
|
||||
yield Export(function.name), function.address
|
||||
yield Export(function.name), AbsoluteVirtualAddress(function.address)
|
||||
|
||||
|
||||
def extract_file_import_names(smda_report, buf):
|
||||
@@ -33,10 +34,10 @@ def extract_file_import_names(smda_report, buf):
|
||||
va = func.iat_address + smda_report.base_addr
|
||||
if func.name:
|
||||
for name in capa.features.extractors.helpers.generate_symbols(library_name, func.name):
|
||||
yield Import(name), va
|
||||
yield Import(name), AbsoluteVirtualAddress(va)
|
||||
elif func.is_ordinal:
|
||||
for name in capa.features.extractors.helpers.generate_symbols(library_name, "#%s" % func.ordinal):
|
||||
yield Import(name), va
|
||||
yield Import(name), AbsoluteVirtualAddress(va)
|
||||
|
||||
|
||||
def extract_file_section_names(buf, **kwargs):
|
||||
@@ -46,7 +47,7 @@ def extract_file_section_names(buf, **kwargs):
|
||||
if lief_binary and lief_binary.sections:
|
||||
base_address = lief_binary.optional_header.imagebase
|
||||
for section in lief_binary.sections:
|
||||
yield Section(section.name), base_address + section.virtual_address
|
||||
yield Section(section.name), AbsoluteVirtualAddress(base_address + section.virtual_address)
|
||||
|
||||
|
||||
def extract_file_strings(buf, **kwargs):
|
||||
@@ -54,10 +55,10 @@ def extract_file_strings(buf, **kwargs):
|
||||
extract ASCII and UTF-16 LE strings from file
|
||||
"""
|
||||
for s in capa.features.extractors.strings.extract_ascii_strings(buf):
|
||||
yield String(s.s), s.offset
|
||||
yield String(s.s), FileOffsetAddress(s.offset)
|
||||
|
||||
for s in capa.features.extractors.strings.extract_unicode_strings(buf):
|
||||
yield String(s.s), s.offset
|
||||
yield String(s.s), FileOffsetAddress(s.offset)
|
||||
|
||||
|
||||
def extract_file_function_names(smda_report, **kwargs):
|
||||
@@ -87,8 +88,8 @@ def extract_features(smda_report, buf):
|
||||
"""
|
||||
|
||||
for file_handler in FILE_HANDLERS:
|
||||
for feature, va in file_handler(smda_report=smda_report, buf=buf):
|
||||
yield feature, va
|
||||
for feature, addr in file_handler(smda_report=smda_report, buf=buf):
|
||||
yield feature, addr
|
||||
|
||||
|
||||
FILE_HANDLERS = (
|
||||
|
||||
@@ -1,38 +1,42 @@
|
||||
from capa.features.common import Characteristic
|
||||
from typing import Tuple, Iterator
|
||||
|
||||
from capa.features.common import Feature, Characteristic
|
||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||
from capa.features.extractors import loops
|
||||
from capa.features.extractors.base_extractor import FunctionHandle
|
||||
|
||||
|
||||
def extract_function_calls_to(f):
|
||||
for inref in f.inrefs:
|
||||
yield Characteristic("calls to"), inref
|
||||
def extract_function_calls_to(f: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
for inref in f.inner.inrefs:
|
||||
yield Characteristic("calls to"), AbsoluteVirtualAddress(inref)
|
||||
|
||||
|
||||
def extract_function_loop(f):
|
||||
def extract_function_loop(f: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
parse if a function has a loop
|
||||
"""
|
||||
edges = []
|
||||
for bb_from, bb_tos in f.blockrefs.items():
|
||||
for bb_from, bb_tos in f.inner.blockrefs.items():
|
||||
for bb_to in bb_tos:
|
||||
edges.append((bb_from, bb_to))
|
||||
|
||||
if edges and loops.has_loop(edges):
|
||||
yield Characteristic("loop"), f.offset
|
||||
yield Characteristic("loop"), f.address
|
||||
|
||||
|
||||
def extract_features(f):
|
||||
def extract_features(f: FunctionHandle):
|
||||
"""
|
||||
extract features from the given function.
|
||||
|
||||
args:
|
||||
f (smda.common.SmdaFunction): the function from which to extract features
|
||||
f: the function from which to extract features
|
||||
|
||||
yields:
|
||||
Tuple[Feature, int]: the features and their location found in this function.
|
||||
Tuple[Feature, Address]: the features and their location found in this function.
|
||||
"""
|
||||
for func_handler in FUNCTION_HANDLERS:
|
||||
for feature, va in func_handler(f):
|
||||
yield feature, va
|
||||
for feature, addr in func_handler(f):
|
||||
yield feature, addr
|
||||
|
||||
|
||||
FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop)
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import logging
|
||||
|
||||
from capa.features.common import ARCH_I386, ARCH_AMD64, Arch
|
||||
from capa.features.address import NO_ADDRESS
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -8,9 +9,9 @@ logger = logging.getLogger(__name__)
|
||||
def extract_arch(smda_report):
|
||||
if smda_report.architecture == "intel":
|
||||
if smda_report.bitness == 32:
|
||||
yield Arch(ARCH_I386), 0x0
|
||||
yield Arch(ARCH_I386), NO_ADDRESS
|
||||
elif smda_report.bitness == 64:
|
||||
yield Arch(ARCH_AMD64), 0x0
|
||||
yield Arch(ARCH_AMD64), NO_ADDRESS
|
||||
else:
|
||||
# we likely end up here:
|
||||
# 1. handling a new architecture (e.g. aarch64)
|
||||
|
||||
@@ -1,12 +1,15 @@
|
||||
import re
|
||||
import string
|
||||
import struct
|
||||
from typing import Tuple, Iterator
|
||||
|
||||
from smda.common.SmdaReport import SmdaReport
|
||||
import smda
|
||||
|
||||
import capa.features.extractors.helpers
|
||||
from capa.features.insn import API, MAX_STRUCTURE_SIZE, Number, Offset, Mnemonic, OperandNumber, OperandOffset
|
||||
from capa.features.common import MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, String, Characteristic
|
||||
from capa.features.common import MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, String, Feature, Characteristic
|
||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle
|
||||
|
||||
# security cookie checks may perform non-zeroing XORs, these are expected within a certain
|
||||
# byte range within the first and returning basic blocks, this helps to reduce FP features
|
||||
@@ -15,17 +18,20 @@ PATTERN_HEXNUM = re.compile(r"[+\-] (?P<num>0x[a-fA-F0-9]+)")
|
||||
PATTERN_SINGLENUM = re.compile(r"[+\-] (?P<num>[0-9])")
|
||||
|
||||
|
||||
def extract_insn_api_features(f, bb, insn):
|
||||
def extract_insn_api_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""parse API features from the given instruction."""
|
||||
if insn.offset in f.apirefs:
|
||||
api_entry = f.apirefs[insn.offset]
|
||||
f: smda.Function = fh.inner
|
||||
insn: smda.Insn = ih.inner
|
||||
|
||||
if ih.address in f.apirefs:
|
||||
api_entry = f.apirefs[ih.address]
|
||||
# reformat
|
||||
dll_name, api_name = api_entry.split("!")
|
||||
dll_name = dll_name.split(".")[0]
|
||||
dll_name = dll_name.lower()
|
||||
for name in capa.features.extractors.helpers.generate_symbols(dll_name, api_name):
|
||||
yield API(name), insn.offset
|
||||
elif insn.offset in f.outrefs:
|
||||
yield API(name), ih.address
|
||||
elif ih.address in f.outrefs:
|
||||
current_function = f
|
||||
current_instruction = insn
|
||||
for index in range(THUNK_CHAIN_DEPTH_DELTA):
|
||||
@@ -44,7 +50,7 @@ def extract_insn_api_features(f, bb, insn):
|
||||
dll_name = dll_name.split(".")[0]
|
||||
dll_name = dll_name.lower()
|
||||
for name in capa.features.extractors.helpers.generate_symbols(dll_name, api_name):
|
||||
yield API(name), insn.offset
|
||||
yield API(name), ih.address
|
||||
elif referenced_function.num_instructions == 1 and referenced_function.num_outrefs == 1:
|
||||
current_function = referenced_function
|
||||
current_instruction = [i for i in referenced_function.getInstructions()][0]
|
||||
@@ -52,11 +58,14 @@ def extract_insn_api_features(f, bb, insn):
|
||||
return
|
||||
|
||||
|
||||
def extract_insn_number_features(f, bb, insn):
|
||||
def extract_insn_number_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""parse number features from the given instruction."""
|
||||
# example:
|
||||
#
|
||||
# push 3136B0h ; dwControlCode
|
||||
f: smda.Function = fh.inner
|
||||
insn: smda.Insn = ih.inner
|
||||
|
||||
operands = [o.strip() for o in insn.operands.split(",")]
|
||||
if insn.mnemonic == "add" and operands[0] in ["esp", "rsp"]:
|
||||
# skip things like:
|
||||
@@ -72,8 +81,8 @@ def extract_insn_number_features(f, bb, insn):
|
||||
except ValueError:
|
||||
continue
|
||||
else:
|
||||
yield Number(value), insn.offset
|
||||
yield OperandNumber(i, value), insn.offset
|
||||
yield Number(value), ih.address
|
||||
yield OperandNumber(i, value), ih.address
|
||||
|
||||
if insn.mnemonic == "add" and 0 < value < MAX_STRUCTURE_SIZE:
|
||||
# for pattern like:
|
||||
@@ -81,8 +90,8 @@ def extract_insn_number_features(f, bb, insn):
|
||||
# add eax, 0x10
|
||||
#
|
||||
# assume 0x10 is also an offset (imagine eax is a pointer).
|
||||
yield Offset(value), insn.offset
|
||||
yield OperandOffset(i, value), insn.offset
|
||||
yield Offset(value), ih.address
|
||||
yield OperandOffset(i, value), ih.address
|
||||
|
||||
|
||||
def read_bytes(smda_report, va, num_bytes=None):
|
||||
@@ -131,12 +140,15 @@ def derefs(smda_report, p):
|
||||
p = val
|
||||
|
||||
|
||||
def extract_insn_bytes_features(f, bb, insn):
|
||||
def extract_insn_bytes_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
parse byte sequence features from the given instruction.
|
||||
example:
|
||||
# push offset iid_004118d4_IShellLinkA ; riid
|
||||
"""
|
||||
f: smda.Function = fh.inner
|
||||
insn: smda.Insn = ih.inner
|
||||
|
||||
for data_ref in insn.getDataRefs():
|
||||
for v in derefs(f.smda_report, data_ref):
|
||||
bytes_read = read_bytes(f.smda_report, v)
|
||||
@@ -145,7 +157,7 @@ def extract_insn_bytes_features(f, bb, insn):
|
||||
if capa.features.extractors.helpers.all_zeros(bytes_read):
|
||||
continue
|
||||
|
||||
yield Bytes(bytes_read), insn.offset
|
||||
yield Bytes(bytes_read), ih.address
|
||||
|
||||
|
||||
def detect_ascii_len(smda_report, offset):
|
||||
@@ -189,24 +201,29 @@ def read_string(smda_report, offset):
|
||||
return read_bytes(smda_report, offset, ulen).decode("utf-16")
|
||||
|
||||
|
||||
def extract_insn_string_features(f, bb, insn):
|
||||
def extract_insn_string_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""parse string features from the given instruction."""
|
||||
# example:
|
||||
#
|
||||
# push offset aAcr ; "ACR > "
|
||||
f: smda.Function = fh.inner
|
||||
insn: smda.Insn = ih.inner
|
||||
|
||||
for data_ref in insn.getDataRefs():
|
||||
for v in derefs(f.smda_report, data_ref):
|
||||
string_read = read_string(f.smda_report, v)
|
||||
if string_read:
|
||||
yield String(string_read.rstrip("\x00")), insn.offset
|
||||
yield String(string_read.rstrip("\x00")), ih.address
|
||||
|
||||
|
||||
def extract_insn_offset_features(f, bb, insn):
|
||||
def extract_insn_offset_features(f, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""parse structure offset features from the given instruction."""
|
||||
# examples:
|
||||
#
|
||||
# mov eax, [esi + 4]
|
||||
# mov eax, [esi + ecx + 16384]
|
||||
insn: smda.Insn = ih.inner
|
||||
|
||||
operands = [o.strip() for o in insn.operands.split(",")]
|
||||
for i, operand in enumerate(operands):
|
||||
if "esp" in operand or "ebp" in operand or "rbp" in operand:
|
||||
@@ -234,13 +251,13 @@ def extract_insn_offset_features(f, bb, insn):
|
||||
# lea eax, [ebx + 1]
|
||||
#
|
||||
# assume 1 is also an offset (imagine ebx is a zero register).
|
||||
yield Number(number), insn.offset
|
||||
yield OperandNumber(i, number), insn.offset
|
||||
yield Number(number), ih.address
|
||||
yield OperandNumber(i, number), ih.address
|
||||
|
||||
continue
|
||||
|
||||
yield Offset(number), insn.offset
|
||||
yield OperandOffset(i, number), insn.offset
|
||||
yield Offset(number), ih.address
|
||||
yield OperandOffset(i, number), ih.address
|
||||
|
||||
|
||||
def is_security_cookie(f, bb, insn):
|
||||
@@ -254,21 +271,26 @@ def is_security_cookie(f, bb, insn):
|
||||
for index, block in enumerate(f.getBlocks()):
|
||||
# expect security cookie init in first basic block within first bytes (instructions)
|
||||
block_instructions = [i for i in block.getInstructions()]
|
||||
if index == 0 and insn.offset < (block_instructions[0].offset + SECURITY_COOKIE_BYTES_DELTA):
|
||||
if index == 0 and ih.address < (block_instructions[0].offset + SECURITY_COOKIE_BYTES_DELTA):
|
||||
return True
|
||||
# ... or within last bytes (instructions) before a return
|
||||
if block_instructions[-1].mnemonic.startswith("ret") and insn.offset > (
|
||||
if block_instructions[-1].mnemonic.startswith("ret") and ih.address > (
|
||||
block_instructions[-1].offset - SECURITY_COOKIE_BYTES_DELTA
|
||||
):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def extract_insn_nzxor_characteristic_features(f, bb, insn):
|
||||
def extract_insn_nzxor_characteristic_features(
|
||||
fh: FunctionHandle, bh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
parse non-zeroing XOR instruction from the given instruction.
|
||||
ignore expected non-zeroing XORs, e.g. security cookies.
|
||||
"""
|
||||
f: smda.Function = fh.inner
|
||||
bb: smda.BasicBlock = bh.inner
|
||||
insn: smda.Insn = ih.inner
|
||||
|
||||
if insn.mnemonic not in ("xor", "xorpd", "xorps", "pxor"):
|
||||
return
|
||||
@@ -280,18 +302,20 @@ def extract_insn_nzxor_characteristic_features(f, bb, insn):
|
||||
if is_security_cookie(f, bb, insn):
|
||||
return
|
||||
|
||||
yield Characteristic("nzxor"), insn.offset
|
||||
yield Characteristic("nzxor"), ih.address
|
||||
|
||||
|
||||
def extract_insn_mnemonic_features(f, bb, insn):
|
||||
def extract_insn_mnemonic_features(f, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""parse mnemonic features from the given instruction."""
|
||||
yield Mnemonic(insn.mnemonic), insn.offset
|
||||
yield Mnemonic(ih.inner.mnemonic), ih.address
|
||||
|
||||
|
||||
def extract_insn_obfs_call_plus_5_characteristic_features(f, bb, insn):
|
||||
def extract_insn_obfs_call_plus_5_characteristic_features(f, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
parse call $+5 instruction from the given instruction.
|
||||
"""
|
||||
insn: smda.Insn = ih.inner
|
||||
|
||||
if insn.mnemonic != "call":
|
||||
return
|
||||
|
||||
@@ -299,13 +323,14 @@ def extract_insn_obfs_call_plus_5_characteristic_features(f, bb, insn):
|
||||
return
|
||||
|
||||
if int(insn.operands, 16) == insn.offset + 5:
|
||||
yield Characteristic("call $+5"), insn.offset
|
||||
yield Characteristic("call $+5"), ih.address
|
||||
|
||||
|
||||
def extract_insn_peb_access_characteristic_features(f, bb, insn):
|
||||
def extract_insn_peb_access_characteristic_features(f, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
parse peb access from the given function. fs:[0x30] on x86, gs:[0x60] on x64
|
||||
"""
|
||||
insn: smda.Insn = ih.inner
|
||||
|
||||
if insn.mnemonic not in ["push", "mov"]:
|
||||
return
|
||||
@@ -313,65 +338,75 @@ def extract_insn_peb_access_characteristic_features(f, bb, insn):
|
||||
operands = [o.strip() for o in insn.operands.split(",")]
|
||||
for operand in operands:
|
||||
if "fs:" in operand and "0x30" in operand:
|
||||
yield Characteristic("peb access"), insn.offset
|
||||
yield Characteristic("peb access"), ih.address
|
||||
elif "gs:" in operand and "0x60" in operand:
|
||||
yield Characteristic("peb access"), insn.offset
|
||||
yield Characteristic("peb access"), ih.address
|
||||
|
||||
|
||||
def extract_insn_segment_access_features(f, bb, insn):
|
||||
def extract_insn_segment_access_features(f, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""parse the instruction for access to fs or gs"""
|
||||
insn: smda.Insn = ih.inner
|
||||
|
||||
operands = [o.strip() for o in insn.operands.split(",")]
|
||||
for operand in operands:
|
||||
if "fs:" in operand:
|
||||
yield Characteristic("fs access"), insn.offset
|
||||
yield Characteristic("fs access"), ih.address
|
||||
elif "gs:" in operand:
|
||||
yield Characteristic("gs access"), insn.offset
|
||||
yield Characteristic("gs access"), ih.address
|
||||
|
||||
|
||||
def extract_insn_cross_section_cflow(f, bb, insn):
|
||||
def extract_insn_cross_section_cflow(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
inspect the instruction for a CALL or JMP that crosses section boundaries.
|
||||
"""
|
||||
f: smda.Function = fh.inner
|
||||
insn: smda.Insn = ih.inner
|
||||
|
||||
if insn.mnemonic in ["call", "jmp"]:
|
||||
if insn.offset in f.apirefs:
|
||||
if ih.address in f.apirefs:
|
||||
return
|
||||
|
||||
smda_report = insn.smda_function.smda_report
|
||||
if insn.offset in f.outrefs:
|
||||
for target in f.outrefs[insn.offset]:
|
||||
if smda_report.getSection(insn.offset) != smda_report.getSection(target):
|
||||
yield Characteristic("cross section flow"), insn.offset
|
||||
if ih.address in f.outrefs:
|
||||
for target in f.outrefs[ih.address]:
|
||||
if smda_report.getSection(ih.address) != smda_report.getSection(target):
|
||||
yield Characteristic("cross section flow"), ih.address
|
||||
elif insn.operands.startswith("0x"):
|
||||
target = int(insn.operands, 16)
|
||||
if smda_report.getSection(insn.offset) != smda_report.getSection(target):
|
||||
yield Characteristic("cross section flow"), insn.offset
|
||||
if smda_report.getSection(ih.address) != smda_report.getSection(target):
|
||||
yield Characteristic("cross section flow"), ih.address
|
||||
|
||||
|
||||
# this is a feature that's most relevant at the function scope,
|
||||
# however, its most efficient to extract at the instruction scope.
|
||||
def extract_function_calls_from(f, bb, insn):
|
||||
def extract_function_calls_from(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
f: smda.Function = fh.inner
|
||||
insn: smda.Insn = ih.inner
|
||||
|
||||
if insn.mnemonic != "call":
|
||||
return
|
||||
|
||||
if insn.offset in f.outrefs:
|
||||
for outref in f.outrefs[insn.offset]:
|
||||
yield Characteristic("calls from"), outref
|
||||
if ih.address in f.outrefs:
|
||||
for outref in f.outrefs[ih.address]:
|
||||
yield Characteristic("calls from"), AbsoluteVirtualAddress(outref)
|
||||
|
||||
if outref == f.offset:
|
||||
# if we found a jump target and it's the function address
|
||||
# mark as recursive
|
||||
yield Characteristic("recursive call"), outref
|
||||
if insn.offset in f.apirefs:
|
||||
yield Characteristic("calls from"), insn.offset
|
||||
yield Characteristic("recursive call"), AbsoluteVirtualAddress(outref)
|
||||
if ih.address in f.apirefs:
|
||||
yield Characteristic("calls from"), ih.address
|
||||
|
||||
|
||||
# this is a feature that's most relevant at the function or basic block scope,
|
||||
# however, its most efficient to extract at the instruction scope.
|
||||
def extract_function_indirect_call_characteristic_features(f, bb, insn):
|
||||
def extract_function_indirect_call_characteristic_features(f, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
extract indirect function call characteristic (e.g., call eax or call dword ptr [edx+4])
|
||||
does not include calls like => call ds:dword_ABD4974
|
||||
"""
|
||||
insn: smda.Insn = ih.inner
|
||||
|
||||
if insn.mnemonic != "call":
|
||||
return
|
||||
if insn.operands.startswith("0x"):
|
||||
@@ -383,7 +418,7 @@ def extract_function_indirect_call_characteristic_features(f, bb, insn):
|
||||
# call edx
|
||||
# call dword ptr [eax+50h]
|
||||
# call qword ptr [rsp+78h]
|
||||
yield Characteristic("indirect call"), insn.offset
|
||||
yield Characteristic("indirect call"), ih.address
|
||||
|
||||
|
||||
def extract_features(f, bb, insn):
|
||||
@@ -391,16 +426,16 @@ def extract_features(f, bb, insn):
|
||||
extract features from the given insn.
|
||||
|
||||
args:
|
||||
f (smda.common.SmdaFunction): the function to process.
|
||||
bb (smda.common.SmdaBasicBlock): the basic block to process.
|
||||
insn (smda.common.SmdaInstruction): the instruction to process.
|
||||
f: the function to process.
|
||||
bb: the basic block to process.
|
||||
insn: the instruction to process.
|
||||
|
||||
yields:
|
||||
Tuple[Feature, int]: the features and their location found in this insn.
|
||||
Tuple[Feature, Address]: the features and their location found in this insn.
|
||||
"""
|
||||
for insn_handler in INSTRUCTION_HANDLERS:
|
||||
for feature, va in insn_handler(f, bb, insn):
|
||||
yield feature, va
|
||||
for feature, addr in insn_handler(f, bb, insn):
|
||||
yield feature, addr
|
||||
|
||||
|
||||
INSTRUCTION_HANDLERS = (
|
||||
|
||||
Reference in New Issue
Block a user