Merge pull request #226 from fireeye/enhancement-223

IDA: resolve nested data references to strings/bytes
This commit is contained in:
Willi Ballenthin
2020-08-12 09:05:11 -06:00
committed by GitHub
3 changed files with 44 additions and 6 deletions

View File

@@ -331,3 +331,29 @@ def is_basic_block_tight_loop(bb):
if ref == bb.start_ea:
return True
return False
def find_data_reference_from_insn(insn, max_depth=10):
""" search for data reference from instruction, return address of instruction if no reference exists """
depth = 0
ea = insn.ea
while True:
data_refs = list(idautils.DataRefsFrom(ea))
if len(data_refs) != 1:
# break if no refs or more than one ref (assume nested pointers only have one data reference)
break
if ea == data_refs[0]:
# break if circular reference
break
depth += 1
if depth > max_depth:
# break if max depth
break
ea = data_refs[0]
return ea

View File

@@ -119,11 +119,8 @@ def extract_insn_bytes_features(f, bb, insn):
example:
push offset iid_004118d4_IShellLinkA ; riid
"""
if idaapi.is_call_insn(insn):
# ignore call instructions
return
for ref in idautils.DataRefsFrom(insn.ea):
ref = capa.features.extractors.ida.helpers.find_data_reference_from_insn(insn)
if ref != insn.ea:
extracted_bytes = capa.features.extractors.ida.helpers.read_bytes_at(ref, MAX_BYTES_FEATURE_SIZE)
if extracted_bytes and not capa.features.extractors.helpers.all_zeros(extracted_bytes):
yield Bytes(extracted_bytes), insn.ea
@@ -140,7 +137,8 @@ def extract_insn_string_features(f, bb, insn):
example:
push offset aAcr ; "ACR > "
"""
for ref in idautils.DataRefsFrom(insn.ea):
ref = capa.features.extractors.ida.helpers.find_data_reference_from_insn(insn)
if ref != insn.ea:
found = capa.features.extractors.ida.helpers.find_string_at(ref)
if found:
yield String(found), insn.ea

View File

@@ -100,6 +100,13 @@ def test_string_features():
assert capa.features.String("bcrypt.dll") not in features
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
def test_string_pointer_features():
f = get_extractor().get_function(0x0044EDEF)
features = extract_function_features(f)
assert capa.features.String("INPUTEVENT") in features
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
def test_byte_features():
f = get_extractor().get_function(0x40105D)
@@ -109,6 +116,13 @@ def test_byte_features():
assert wanted.evaluate(features) == True
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
def test_bytes_pointer_features():
f = get_extractor().get_function(0x0044EDEF)
features = extract_function_features(f)
assert capa.features.Bytes("INPUTEVENT".encode("utf-16le")).evaluate(features) == True
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
def test_number_features():
f = get_extractor().get_function(0x40105D)