mirror of
https://github.com/mandiant/capa.git
synced 2025-12-12 15:49:46 -08:00
Merge branch 'dotnet-main' of github.com:mandiant/capa into feature-981
This commit is contained in:
@@ -12,6 +12,7 @@
|
||||
### Breaking Changes
|
||||
|
||||
- instruction scope and operand feature are new and are not backwards compatible with older versions of capa
|
||||
- Python 3.7 is now the minimum supported Python version #866 @williballenthin
|
||||
- remove /x32 and /x64 flavors of number and operand features #932 @williballenthin
|
||||
- the tool now accepts multiple paths to rules, and JSON doc updated accordingly @williballenthin
|
||||
|
||||
|
||||
@@ -22,6 +22,7 @@ class Address(abc.ABC):
|
||||
|
||||
class AbsoluteVirtualAddress(int, Address):
|
||||
"""an absolute memory address"""
|
||||
|
||||
def __new__(cls, v):
|
||||
assert v > 0
|
||||
return int.__new__(cls, v)
|
||||
@@ -35,6 +36,7 @@ class RelativeVirtualAddress(int, Address):
|
||||
|
||||
class FileOffsetAddress(int, Address):
|
||||
"""an address relative to the start of a file"""
|
||||
|
||||
def __new__(cls, v):
|
||||
assert v > 0
|
||||
return int.__new__(cls, v)
|
||||
@@ -42,6 +44,7 @@ class FileOffsetAddress(int, Address):
|
||||
|
||||
class DNTokenAddress(Token, Address):
|
||||
"""a .NET token"""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
|
||||
@@ -382,3 +382,8 @@ def get_function_blocks(f):
|
||||
def is_basic_block_return(bb):
|
||||
"""check if basic block is return block"""
|
||||
return bb.type == idaapi.fcb_ret
|
||||
|
||||
|
||||
def has_sib(oper) -> bool:
|
||||
# via: https://reverseengineering.stackexchange.com/a/14300
|
||||
return oper.specflag1 == 1
|
||||
|
||||
@@ -135,6 +135,15 @@ def extract_insn_number_features(f, bb, insn):
|
||||
yield Number(const), insn.ea
|
||||
yield OperandNumber(i, const), insn.ea
|
||||
|
||||
if insn.itype == idaapi.NN_add and 0 < const < MAX_STRUCTURE_SIZE and op.type == idaapi.o_imm:
|
||||
# for pattern like:
|
||||
#
|
||||
# add eax, 0x10
|
||||
#
|
||||
# assume 0x10 is also an offset (imagine eax is a pointer).
|
||||
yield Offset(const), insn.ea
|
||||
yield OperandOffset(i, const), insn.ea
|
||||
|
||||
|
||||
def extract_insn_bytes_features(f, bb, insn):
|
||||
"""parse referenced byte sequences
|
||||
@@ -209,6 +218,25 @@ def extract_insn_offset_features(f, bb, insn):
|
||||
yield Offset(op_off), insn.ea
|
||||
yield OperandOffset(i, op_off), insn.ea
|
||||
|
||||
if (
|
||||
insn.itype == idaapi.NN_lea
|
||||
and i == 1
|
||||
# o_displ is used for both:
|
||||
# [eax+1]
|
||||
# [eax+ebx+2]
|
||||
and op.type == idaapi.o_displ
|
||||
# but the SIB is only present for [eax+ebx+2]
|
||||
# which we don't want
|
||||
and not capa.features.extractors.ida.helpers.has_sib(op)
|
||||
):
|
||||
# for pattern like:
|
||||
#
|
||||
# lea eax, [ebx + 1]
|
||||
#
|
||||
# assume 1 is also an offset (imagine ebx is a zero register).
|
||||
yield Number(op_off), insn.ea
|
||||
yield OperandNumber(i, op_off), insn.ea
|
||||
|
||||
|
||||
def contains_stack_cookie_keywords(s):
|
||||
"""check if string contains stack cookie keywords
|
||||
|
||||
@@ -5,7 +5,7 @@ import struct
|
||||
from smda.common.SmdaReport import SmdaReport
|
||||
|
||||
import capa.features.extractors.helpers
|
||||
from capa.features.insn import API, Number, Offset, Mnemonic
|
||||
from capa.features.insn import API, MAX_STRUCTURE_SIZE, Number, Offset, Mnemonic, OperandNumber, OperandOffset
|
||||
from capa.features.common import MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, String, Characteristic
|
||||
|
||||
# security cookie checks may perform non-zeroing XORs, these are expected within a certain
|
||||
@@ -64,15 +64,25 @@ def extract_insn_number_features(f, bb, insn):
|
||||
# .text:00401140 call sub_407E2B
|
||||
# .text:00401145 add esp, 0Ch
|
||||
return
|
||||
for operand in operands:
|
||||
for i, operand in enumerate(operands):
|
||||
try:
|
||||
# The result of bitwise operations is calculated as though carried out
|
||||
# in two’s complement with an infinite number of sign bits
|
||||
value = int(operand, 16) & ((1 << f.smda_report.bitness) - 1)
|
||||
|
||||
yield Number(value), insn.offset
|
||||
except:
|
||||
except ValueError:
|
||||
continue
|
||||
else:
|
||||
yield Number(value), insn.offset
|
||||
yield OperandNumber(i, value), insn.offset
|
||||
|
||||
if insn.mnemonic == "add" and 0 < value < MAX_STRUCTURE_SIZE:
|
||||
# for pattern like:
|
||||
#
|
||||
# add eax, 0x10
|
||||
#
|
||||
# assume 0x10 is also an offset (imagine eax is a pointer).
|
||||
yield Offset(value), insn.offset
|
||||
yield OperandOffset(i, value), insn.offset
|
||||
|
||||
|
||||
def read_bytes(smda_report, va, num_bytes=None):
|
||||
@@ -198,11 +208,10 @@ def extract_insn_offset_features(f, bb, insn):
|
||||
# mov eax, [esi + 4]
|
||||
# mov eax, [esi + ecx + 16384]
|
||||
operands = [o.strip() for o in insn.operands.split(",")]
|
||||
for operand in operands:
|
||||
if "ptr" not in operand:
|
||||
continue
|
||||
for i, operand in enumerate(operands):
|
||||
if "esp" in operand or "ebp" in operand or "rbp" in operand:
|
||||
continue
|
||||
|
||||
number = 0
|
||||
number_hex = re.search(PATTERN_HEXNUM, operand)
|
||||
number_int = re.search(PATTERN_SINGLENUM, operand)
|
||||
@@ -212,7 +221,26 @@ def extract_insn_offset_features(f, bb, insn):
|
||||
elif number_int:
|
||||
number = int(number_int.group("num"))
|
||||
number = -1 * number if number_int.group().startswith("-") else number
|
||||
|
||||
if "ptr" not in operand:
|
||||
if (
|
||||
insn.mnemonic == "lea"
|
||||
and i == 1
|
||||
and (operand.count("+") + operand.count("-")) == 1
|
||||
and operand.count("*") == 0
|
||||
):
|
||||
# for pattern like:
|
||||
#
|
||||
# lea eax, [ebx + 1]
|
||||
#
|
||||
# assume 1 is also an offset (imagine ebx is a zero register).
|
||||
yield Number(number), insn.offset
|
||||
yield OperandNumber(i, number), insn.offset
|
||||
|
||||
continue
|
||||
|
||||
yield Offset(number), insn.offset
|
||||
yield OperandOffset(i, number), insn.offset
|
||||
|
||||
|
||||
def is_security_cookie(f, bb, insn):
|
||||
|
||||
@@ -19,7 +19,7 @@ import envi.archs.amd64.disasm
|
||||
|
||||
import capa.features.extractors.helpers
|
||||
import capa.features.extractors.viv.helpers
|
||||
from capa.features.insn import API, Number, Offset, Mnemonic, OperandNumber, OperandOffset
|
||||
from capa.features.insn import API, MAX_STRUCTURE_SIZE, Number, Offset, Mnemonic, OperandNumber, OperandOffset
|
||||
from capa.features.common import MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, String, Feature, Characteristic
|
||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle
|
||||
@@ -579,6 +579,15 @@ def extract_op_number_features(
|
||||
yield Number(v), ihandle.address
|
||||
yield OperandNumber(i, v), ihandle.address
|
||||
|
||||
if insn.mnem == "add" and 0 < v < MAX_STRUCTURE_SIZE and isinstance(oper, envi.archs.i386.disasm.i386ImmOper):
|
||||
# for pattern like:
|
||||
#
|
||||
# add eax, 0x10
|
||||
#
|
||||
# assume 0x10 is also an offset (imagine eax is a pointer).
|
||||
yield Offset(v), insn.va
|
||||
yield OperandOffset(i, v), insn.va
|
||||
|
||||
|
||||
def extract_op_offset_features(f, bb, ihandle: InsnHandle, i, oper: envi.Operand) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""parse structure offset features from the given operand."""
|
||||
@@ -608,6 +617,15 @@ def extract_op_offset_features(f, bb, ihandle: InsnHandle, i, oper: envi.Operand
|
||||
yield Offset(v), ihandle.address
|
||||
yield OperandOffset(i, v), ihandle.address
|
||||
|
||||
if insn.mnem == "lea" and i == 1 and not f.vw.probeMemory(v, 1, envi.memory.MM_READ):
|
||||
# for pattern like:
|
||||
#
|
||||
# lea eax, [ebx + 1]
|
||||
#
|
||||
# assume 1 is also an offset (imagine ebx is a zero register).
|
||||
yield Number(v), insn.va
|
||||
yield OperandNumber(i, v), insn.va
|
||||
|
||||
# like: [esi + ecx + 16384]
|
||||
# reg ^ ^
|
||||
# index ^
|
||||
|
||||
@@ -29,6 +29,10 @@ class Number(Feature):
|
||||
return capa.render.utils.hex(self.value)
|
||||
|
||||
|
||||
# max recognized structure size (and therefore, offset size)
|
||||
MAX_STRUCTURE_SIZE = 0x10000
|
||||
|
||||
|
||||
class Offset(Feature):
|
||||
def __init__(self, value: int, description=None):
|
||||
super(Offset, self).__init__(value, description=description)
|
||||
|
||||
Submodule tests/data updated: 12c64af268...11ae8d0d38
@@ -37,7 +37,8 @@ from capa.features.common import (
|
||||
)
|
||||
|
||||
CD = os.path.dirname(__file__)
|
||||
DNFILE_TESTFILES = "dnfile-testfiles"
|
||||
DOTNET_DIR = os.path.join(CD, "data", "dotnet")
|
||||
DNFILE_TESTFILES = os.path.join(DOTNET_DIR, "dnfile-testfiles")
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
@@ -181,6 +182,14 @@ def extract_basic_block_features(extractor, f, bb):
|
||||
return features
|
||||
|
||||
|
||||
# f may not be hashable (e.g. ida func_t) so cannot @lru_cache this
|
||||
def extract_instruction_features(extractor, f, bb, insn):
|
||||
features = collections.defaultdict(set)
|
||||
for feature, va in extractor.extract_insn_features(f, bb, insn):
|
||||
features[feature].add(va)
|
||||
return features
|
||||
|
||||
|
||||
# note: too reduce the testing time it's recommended to reuse already existing test samples, if possible
|
||||
def get_data_path_by_name(name):
|
||||
if name == "mimikatz":
|
||||
@@ -234,7 +243,7 @@ def get_data_path_by_name(name):
|
||||
elif name.startswith("b9f5b"):
|
||||
return os.path.join(CD, "data", "b9f5bd514485fb06da39beff051b9fdc.exe_")
|
||||
elif name.startswith("mixed-mode-64"):
|
||||
return os.path.join(CD, "data", DNFILE_TESTFILES, "mixed-mode", "ModuleCode", "bin", "ModuleCode_amd64.exe")
|
||||
return os.path.join(DNFILE_TESTFILES, "mixed-mode", "ModuleCode", "bin", "ModuleCode_amd64.exe")
|
||||
else:
|
||||
raise ValueError("unexpected sample fixture: %s" % name)
|
||||
|
||||
@@ -317,6 +326,13 @@ def get_basic_block(extractor, f, va):
|
||||
raise ValueError("basic block not found")
|
||||
|
||||
|
||||
def get_instruction(extractor, f, bb, va):
|
||||
for insn in extractor.get_instructions(f, bb):
|
||||
if int(insn) == va:
|
||||
return insn
|
||||
raise ValueError("instruction not found")
|
||||
|
||||
|
||||
def resolve_scope(scope):
|
||||
if scope == "file":
|
||||
|
||||
@@ -328,8 +344,32 @@ def resolve_scope(scope):
|
||||
|
||||
inner_file.__name__ = scope
|
||||
return inner_file
|
||||
elif "insn=" in scope:
|
||||
# like `function=0x401000,bb=0x40100A,insn=0x40100A`
|
||||
assert "function=" in scope
|
||||
assert "bb=" in scope
|
||||
assert "insn=" in scope
|
||||
fspec, _, spec = scope.partition(",")
|
||||
bbspec, _, ispec = spec.partition(",")
|
||||
fva = int(fspec.partition("=")[2], 0x10)
|
||||
bbva = int(bbspec.partition("=")[2], 0x10)
|
||||
iva = int(ispec.partition("=")[2], 0x10)
|
||||
|
||||
def inner_insn(extractor):
|
||||
f = get_function(extractor, fva)
|
||||
bb = get_basic_block(extractor, f, bbva)
|
||||
insn = get_instruction(extractor, f, bb, iva)
|
||||
features = extract_instruction_features(extractor, f, bb, insn)
|
||||
for k, vs in extract_global_features(extractor).items():
|
||||
features[k].update(vs)
|
||||
return features
|
||||
|
||||
inner_insn.__name__ = scope
|
||||
return inner_insn
|
||||
elif "bb=" in scope:
|
||||
# like `function=0x401000,bb=0x40100A`
|
||||
assert "function=" in scope
|
||||
assert "bb=" in scope
|
||||
fspec, _, bbspec = scope.partition(",")
|
||||
fva = int(fspec.partition("=")[2], 0x10)
|
||||
bbva = int(bbspec.partition("=")[2], 0x10)
|
||||
@@ -459,6 +499,30 @@ FEATURE_PRESENCE_TESTS = sorted(
|
||||
# insn/offset: negative
|
||||
("mimikatz", "function=0x4011FB", capa.features.insn.Offset(-0x1), True),
|
||||
("mimikatz", "function=0x4011FB", capa.features.insn.Offset(-0x2), True),
|
||||
#
|
||||
# insn/offset from mnemonic: add
|
||||
#
|
||||
# should not be considered, too big for an offset:
|
||||
# .text:00401D85 81 C1 00 00 00 80 add ecx, 80000000h
|
||||
("mimikatz", "function=0x401D64,bb=0x401D73,insn=0x401D85", capa.features.insn.Offset(0x80000000), False),
|
||||
# should not be considered, relative to stack:
|
||||
# .text:00401CF6 83 C4 10 add esp, 10h
|
||||
("mimikatz", "function=0x401CC7,bb=0x401CDE,insn=0x401CF6", capa.features.insn.Offset(0x10), False),
|
||||
# yes, this is also a offset (imagine eax is a pointer):
|
||||
# .text:0040223C 83 C0 04 add eax, 4
|
||||
("mimikatz", "function=0x402203,bb=0x402221,insn=0x40223C", capa.features.insn.Offset(0x4), True),
|
||||
#
|
||||
# insn/number from mnemonic: lea
|
||||
#
|
||||
# should not be considered, lea operand invalid encoding
|
||||
# .text:00471EE6 8D 1C 81 lea ebx, [ecx+eax*4]
|
||||
("mimikatz", "function=0x471EAB,bb=0x471ED8,insn=0x471EE6", capa.features.insn.Number(0x4), False),
|
||||
# should not be considered, lea operand invalid encoding
|
||||
# .text:004717B1 8D 4C 31 D0 lea ecx, [ecx+esi-30h]
|
||||
("mimikatz", "function=0x47153B,bb=0x4717AB,insn=0x4717B1", capa.features.insn.Number(-0x30), False),
|
||||
# yes, this is also a number (imagine edx is zero):
|
||||
# .text:004018C0 8D 4B 02 lea ecx, [ebx+2]
|
||||
("mimikatz", "function=0x401873,bb=0x4018B2,insn=0x4018C0", capa.features.insn.Number(0x2), True),
|
||||
# insn/api
|
||||
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptAcquireContextW"), True),
|
||||
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptAcquireContext"), True),
|
||||
|
||||
@@ -22,14 +22,6 @@ def test_smda_features(sample, scope, feature, expected):
|
||||
if scope.__name__ == "file" and isinstance(feature, capa.features.file.FunctionName) and expected is True:
|
||||
pytest.xfail("SMDA has no function ID")
|
||||
|
||||
if "bb=" in scope.__name__ and isinstance(feature, capa.features.insn.OperandNumber) and expected is True:
|
||||
# SMDA not currently maintained, see: https://github.com/mandiant/capa/issues/937
|
||||
pytest.xfail("SMDA doesn't support operand numbers")
|
||||
|
||||
if "bb=" in scope.__name__ and isinstance(feature, capa.features.insn.OperandOffset) and expected is True:
|
||||
# SMDA not currently maintained, see: https://github.com/mandiant/capa/issues/937
|
||||
pytest.xfail("SMDA doesn't support operand offsets")
|
||||
|
||||
fixtures.do_test_feature_presence(fixtures.get_smda_extractor, sample, scope, feature, expected)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user