mirror of
https://github.com/mandiant/capa.git
synced 2025-12-12 15:49:46 -08:00
Merge pull request #409 from fireeye/fix/extract-bytes
improve bytes feature extraction
This commit is contained in:
@@ -166,6 +166,10 @@ def basic_block_size(bb):
|
||||
|
||||
def read_bytes_at(ea, count):
|
||||
""" """
|
||||
# check if byte has a value, see get_wide_byte doc
|
||||
if not idc.is_loaded(ea):
|
||||
return b""
|
||||
|
||||
segm_end = idc.get_segm_end(ea)
|
||||
if ea + count > segm_end:
|
||||
return idc.get_bytes(ea, segm_end - ea)
|
||||
|
||||
@@ -148,6 +148,9 @@ def extract_insn_bytes_features(f, bb, insn):
|
||||
example:
|
||||
push offset iid_004118d4_IShellLinkA ; riid
|
||||
"""
|
||||
if idaapi.is_call_insn(insn):
|
||||
return
|
||||
|
||||
ref = capa.features.extractors.ida.helpers.find_data_reference_from_insn(insn)
|
||||
if ref != insn.ea:
|
||||
extracted_bytes = capa.features.extractors.ida.helpers.read_bytes_at(ref, MAX_BYTES_FEATURE_SIZE)
|
||||
|
||||
@@ -258,10 +258,10 @@ def extract_insn_bytes_features(f, bb, insn):
|
||||
example:
|
||||
# push offset iid_004118d4_IShellLinkA ; riid
|
||||
"""
|
||||
for oper in insn.opers:
|
||||
if insn.mnem == "call":
|
||||
continue
|
||||
if insn.mnem == "call":
|
||||
return
|
||||
|
||||
for oper in insn.opers:
|
||||
if isinstance(oper, envi.archs.i386.disasm.i386ImmOper):
|
||||
v = oper.getOperValue(oper)
|
||||
elif isinstance(oper, envi.archs.i386.disasm.i386RegMemOper):
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
import os
|
||||
import sys
|
||||
import os.path
|
||||
import binascii
|
||||
import contextlib
|
||||
import collections
|
||||
|
||||
@@ -444,6 +445,8 @@ FEATURE_PRESENCE_TESTS = [
|
||||
("mimikatz", "function=0x40105D", capa.features.Bytes("SCardTransmit".encode("utf-16le")), True),
|
||||
("mimikatz", "function=0x40105D", capa.features.Bytes("ACR > ".encode("utf-16le")), True),
|
||||
("mimikatz", "function=0x40105D", capa.features.Bytes("nope".encode("ascii")), False),
|
||||
# IDA features included byte sequences read from invalid memory, fixed in #409
|
||||
("mimikatz", "function=0x44570F", capa.features.Bytes(binascii.unhexlify("FF" * 256)), False),
|
||||
# insn/bytes, pointer to bytes
|
||||
("mimikatz", "function=0x44EDEF", capa.features.Bytes("INPUTEVENT".encode("utf-16le")), True),
|
||||
# insn/characteristic(nzxor)
|
||||
|
||||
Reference in New Issue
Block a user