extract api features for thunk chains

closes #341
This commit is contained in:
Moritz Raabe
2020-10-20 14:48:04 +02:00
parent a442536246
commit 9a738ba413
5 changed files with 68 additions and 18 deletions

View File

@@ -16,6 +16,9 @@ import capa.engine
logger = logging.getLogger(__name__)
MAX_BYTES_FEATURE_SIZE = 0x100
# thunks may be chained so we specify a delta to control the depth to which these chains are explored
THUNK_CHAIN_DEPTH_DELTA = 5
# identifiers for supported architectures names that tweak a feature
# for example, offset/x32
ARCH_X32 = "x32"

View File

@@ -12,17 +12,21 @@ import idautils
import capa.features.extractors.helpers
import capa.features.extractors.ida.helpers
from capa.features import ARCH_X32, ARCH_X64, MAX_BYTES_FEATURE_SIZE, Bytes, String, Characteristic
from capa.features import (
ARCH_X32,
ARCH_X64,
MAX_BYTES_FEATURE_SIZE,
THUNK_CHAIN_DEPTH_DELTA,
Bytes,
String,
Characteristic,
)
from capa.features.insn import API, Number, Offset, Mnemonic
# security cookie checks may perform non-zeroing XORs, these are expected within a certain
# byte range within the first and returning basic blocks, this helps to reduce FP features
SECURITY_COOKIE_BYTES_DELTA = 0x40
# thunks may be chained so we specify a delta here to control the depth to which these chains
# are explored
THUNK_CHAIN_DEPTH_DELTA = 0x5
def get_arch(ctx):
"""
@@ -73,7 +77,7 @@ def check_for_api_call(ctx, insn):
break
f = idaapi.get_func(ref)
if not (f.flags & idaapi.FUNC_THUNK):
if not f or not (f.flags & idaapi.FUNC_THUNK):
break
if info:

View File

@@ -0,0 +1,20 @@
# Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
from vivisect.const import REF_CODE
def get_coderef_from(vw, va):
"""
return first code `tova` whose origin is the specified va
vivisect xref tuple: (fromva, tova, reftype, rflags)
"""
xrefs = vw.getXrefsFrom(va, REF_CODE)
if len(xrefs) > 0:
return xrefs[0][1]
else:
return None

View File

@@ -7,11 +7,19 @@
# See the License for the specific language governing permissions and limitations under the License.
import envi.memory
import vivisect.const
import envi.archs.i386.disasm
import capa.features.extractors.helpers
from capa.features import ARCH_X32, ARCH_X64, MAX_BYTES_FEATURE_SIZE, Bytes, String, Characteristic
import capa.features.extractors.viv.helpers
from capa.features import (
ARCH_X32,
ARCH_X64,
MAX_BYTES_FEATURE_SIZE,
THUNK_CHAIN_DEPTH_DELTA,
Bytes,
String,
Characteristic,
)
from capa.features.insn import API, Number, Offset, Mnemonic
from capa.features.extractors.viv.indirect_calls import NotFoundError, resolve_indirect_call
@@ -86,21 +94,30 @@ def extract_insn_api_features(f, bb, insn):
#
# this is also how calls to internal functions may be decoded on x64.
# see Lab21-01.exe_:0x140001178
elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386PcRelOper):
target = insn.opers[0].getOperValue(insn)
#
# follow chained thunks, e.g. in 82bf6347acf15e5d883715dc289d8a2b at 0x14005E0FF in
# 0x140059342 (viv) / 0x14005E0C0 (IDA)
# 14005E0FF call j_ElfClearEventLogFileW (14005AAF8)
# 14005AAF8 jmp ElfClearEventLogFileW (14005E196)
# 14005E196 jmp cs:__imp_ElfClearEventLogFileW
try:
thunk = f.vw.getFunctionMeta(target, "Thunk")
except vivisect.exc.InvalidFunction:
elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386PcRelOper):
imports = get_imports(f.vw)
target = capa.features.extractors.viv.helpers.get_coderef_from(f.vw, insn.va)
if not target:
return
else:
if thunk:
dll, _, symbol = thunk.rpartition(".")
if symbol.startswith("ord"):
symbol = "#" + symbol[len("ord") :]
for _ in range(THUNK_CHAIN_DEPTH_DELTA):
if target in imports:
dll, symbol = imports[target]
for name in capa.features.extractors.helpers.generate_symbols(dll, symbol):
print("nested thunk starting at %X in %X (%d)" % (insn.va, f.va, _))
yield API(name), insn.va
target = capa.features.extractors.viv.helpers.get_coderef_from(f.vw, target)
if not target:
return
# call via import on x64
# see Lab21-01.exe_:0x14000118C
elif isinstance(insn.opers[0], envi.archs.amd64.disasm.Amd64RipRelOper):

View File

@@ -144,6 +144,8 @@ def get_data_path_by_name(name):
return os.path.join(CD, "data", "c91887d861d9bd4a5872249b641bc9f9.exe_")
elif name.startswith("64d9f"):
return os.path.join(CD, "data", "64d9f7d96b99467f36e22fada623c3bb.dll_")
elif name.startswith("82bf6"):
return os.path.join(CD, "data", "82BF6347ACF15E5D883715DC289D8A2B.exe_")
else:
raise ValueError("unexpected sample fixture")
@@ -180,6 +182,8 @@ def get_sample_md5_by_name(name):
return "c91887d861d9bd4a5872249b641bc9f9"
elif name.startswith("64d9f"):
return "64d9f7d96b99467f36e22fada623c3bb"
elif name.startswith("82bf6"):
return "82bf6347acf15e5d883715dc289d8a2b"
else:
raise ValueError("unexpected sample fixture")
@@ -369,6 +373,8 @@ FEATURE_PRESENCE_TESTS = [
True,
),
("kernel32-64", "function=0x1800202B0", capa.features.insn.API("RtlCaptureContext"), True),
# insn/api: x64 nested thunk
("82bf6", "function=0x140059342", capa.features.insn.API("ElfClearEventLogFile"), True),
# insn/api: resolve indirect calls
("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.CreatePipe"), True),
("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.SetHandleInformation"), True),