From bd63ded1dd52efcbf51c2646b72d075bce7e0ad6 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Thu, 27 May 2021 12:59:00 -0600 Subject: [PATCH] file scope API features (#568) * smda: minor unrelated fixes * file features: extract API features at file scope for library functions closes #567 * changelog * ida: add file-scope API feature Co-authored-by: mike-hunhoff * fix lints from pylance * features: use "function-name" for recognized linked functions * pep8 * pep8 * rules: remove incorrect feature scope * tests: xfail SMDA tests relying on function id * tests: fixtures: order tests by sample, ideally improving memory usage * pep8 * pep8 * smda: xfail two more tests Co-authored-by: mike-hunhoff --- CHANGELOG.md | 1 + capa/features/__init__.py | 4 +- capa/features/extractors/ida/file.py | 13 +- capa/features/extractors/smda/file.py | 14 +- capa/features/extractors/viv/file.py | 16 +- capa/features/file.py | 8 + capa/rules.py | 4 + tests/fixtures.py | 355 +++++++++++++------------- tests/test_rules.py | 25 ++ tests/test_smda_features.py | 32 ++- 10 files changed, 289 insertions(+), 183 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b93d6307..599c42be 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ It includes many new rules, including all new techniques introduced in MITRE ATT - main: use FLIRT signatures to identify and ignore library code #446 @williballenthin - explorer: IDA 7.6 support #497 @williballenthin - scripts: capa2yara.py convert capa rules to YARA rules #561 @ruppde +- rule: add file-scope feature (`function-name`) for recognized library functions #567 @williballenthin - main: auto detect shellcode based on file extension #516 @mr-tz - main: more detailed progress bar output when matching functions #562 @mr-tz diff --git a/capa/features/__init__.py b/capa/features/__init__.py index c88cd153..175573ee 100644 --- a/capa/features/__init__.py +++ b/capa/features/__init__.py @@ -7,11 +7,11 @@ # See the License for the specific language governing permissions and limitations under the License. import re -import sys import codecs import logging import capa.engine +import capa.features logger = logging.getLogger(__name__) MAX_BYTES_FEATURE_SIZE = 0x100 @@ -202,7 +202,7 @@ class _MatchedRegex(Regex): class StringFactory(object): - def __new__(self, value, description=None): + def __new__(cls, value, description=None): if value.startswith("/") and (value.endswith("/") or value.endswith("/i")): return Regex(value, description=description) return String(value, description=description) diff --git a/capa/features/extractors/ida/file.py b/capa/features/extractors/ida/file.py index 6f55ed45..872978ae 100644 --- a/capa/features/extractors/ida/file.py +++ b/capa/features/extractors/ida/file.py @@ -16,7 +16,7 @@ import capa.features.extractors.helpers import capa.features.extractors.strings import capa.features.extractors.ida.helpers from capa.features import String, Characteristic -from capa.features.file import Export, Import, Section +from capa.features.file import Export, Import, Section, FunctionName def check_segment_for_pe(seg): @@ -143,6 +143,16 @@ def extract_file_strings(): yield String(s.s), (seg.start_ea + s.offset) +def extract_file_function_names(): + """ + extract the names of statically-linked library functions. + """ + for ea in idautils.Functions(): + if idaapi.get_func(ea).flags & idaapi.FUNC_LIB: + name = idaapi.get_name(ea) + yield FunctionName(name), ea + + def extract_features(): """extract file features""" for file_handler in FILE_HANDLERS: @@ -156,6 +166,7 @@ FILE_HANDLERS = ( extract_file_strings, extract_file_section_names, extract_file_embedded_pe, + extract_file_function_names, ) diff --git a/capa/features/extractors/smda/file.py b/capa/features/extractors/smda/file.py index 6f690dbe..7f9ed473 100644 --- a/capa/features/extractors/smda/file.py +++ b/capa/features/extractors/smda/file.py @@ -79,8 +79,8 @@ def extract_file_import_names(smda_report, file_path): library_name = imported_library.name.lower() library_name = library_name[:-4] if library_name.endswith(".dll") else library_name for func in imported_library.entries: + va = func.iat_address + smda_report.base_addr if func.name: - va = func.iat_address + smda_report.base_addr for name in capa.features.extractors.helpers.generate_symbols(library_name, func.name): yield Import(name), va elif func.is_ordinal: @@ -112,6 +112,16 @@ def extract_file_strings(smda_report, file_path): yield String(s.s), s.offset +def extract_file_function_names(smda_report, file_path): + """ + extract the names of statically-linked library functions. + """ + if False: + # using a `yield` here to force this to be a generator, not function. + yield NotImplementedError("SMDA doesn't have library matching") + return + + def extract_features(smda_report, file_path): """ extract file features from given workspace @@ -125,7 +135,6 @@ def extract_features(smda_report, file_path): """ for file_handler in FILE_HANDLERS: - result = file_handler(smda_report, file_path) for feature, va in file_handler(smda_report, file_path): yield feature, va @@ -136,4 +145,5 @@ FILE_HANDLERS = ( extract_file_import_names, extract_file_section_names, extract_file_strings, + extract_file_function_names, ) diff --git a/capa/features/extractors/viv/file.py b/capa/features/extractors/viv/file.py index feb6381d..274125af 100644 --- a/capa/features/extractors/viv/file.py +++ b/capa/features/extractors/viv/file.py @@ -7,11 +7,14 @@ # See the License for the specific language governing permissions and limitations under the License. import PE.carve as pe_carve # vivisect PE +import viv_utils +import viv_utils.flirt +import capa.features.insn import capa.features.extractors.helpers import capa.features.extractors.strings from capa.features import String, Characteristic -from capa.features.file import Export, Import, Section +from capa.features.file import Export, Import, Section, FunctionName def extract_file_embedded_pe(vw, file_path): @@ -80,6 +83,16 @@ def extract_file_strings(vw, file_path): yield String(s.s), s.offset +def extract_file_function_names(vw, file_path): + """ + extract the names of statically-linked library functions. + """ + for va in sorted(vw.getFunctions()): + if viv_utils.flirt.is_library_function(vw, va): + name = viv_utils.get_function_name(vw, va) + yield FunctionName(name), va + + def extract_features(vw, file_path): """ extract file features from given workspace @@ -103,4 +116,5 @@ FILE_HANDLERS = ( extract_file_import_names, extract_file_section_names, extract_file_strings, + extract_file_function_names, ) diff --git a/capa/features/file.py b/capa/features/file.py index f4629a55..16413816 100644 --- a/capa/features/file.py +++ b/capa/features/file.py @@ -25,3 +25,11 @@ class Section(Feature): def __init__(self, value, description=None): # value is section name super(Section, self).__init__(value, description=description) + + +class FunctionName(Feature): + """recognized name for statically linked function""" + + def __init__(self, name, description=None): + # value is function name + super(FunctionName, self).__init__(name, description=description) diff --git a/capa/rules.py b/capa/rules.py index de3bf4b9..493b2f07 100644 --- a/capa/rules.py +++ b/capa/rules.py @@ -23,6 +23,7 @@ import io import yaml import ruamel.yaml +import capa.rules import capa.engine import capa.features import capa.features.file @@ -70,6 +71,7 @@ SUPPORTED_FEATURES = { capa.features.file.Export, capa.features.file.Import, capa.features.file.Section, + capa.features.file.FunctionName, capa.features.Characteristic("embedded pe"), capa.features.String, }, @@ -230,6 +232,8 @@ def parse_feature(key): return capa.features.file.Section elif key == "match": return capa.features.MatchedRule + elif key == "function-name": + return capa.features.file.FunctionName else: raise InvalidRule("unexpected statement: %s" % key) diff --git a/tests/fixtures.py b/tests/fixtures.py index 9c7bd6a4..b93997f2 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -64,7 +64,7 @@ def xfail(condition, reason=None): # need to limit cache size so GitHub Actions doesn't run out of memory, see #545 -@lru_cache(maxsize=6) +@lru_cache(maxsize=1) def get_viv_extractor(path): import capa.features.extractors.viv @@ -314,179 +314,186 @@ def parametrize(params, values, **kwargs): return pytest.mark.parametrize(params, values, ids=ids, **kwargs) -FEATURE_PRESENCE_TESTS = [ - # file/characteristic("embedded pe") - ("pma12-04", "file", capa.features.Characteristic("embedded pe"), True), - # file/string - ("mimikatz", "file", capa.features.String("SCardControl"), True), - ("mimikatz", "file", capa.features.String("SCardTransmit"), True), - ("mimikatz", "file", capa.features.String("ACR > "), True), - ("mimikatz", "file", capa.features.String("nope"), False), - # file/sections - ("mimikatz", "file", capa.features.file.Section(".text"), True), - ("mimikatz", "file", capa.features.file.Section(".nope"), False), - # IDA doesn't extract unmapped sections by default - # ("mimikatz", "file", capa.features.file.Section(".rsrc"), True), - # file/exports - ("kernel32", "file", capa.features.file.Export("BaseThreadInitThunk"), True), - ("kernel32", "file", capa.features.file.Export("lstrlenW"), True), - ("kernel32", "file", capa.features.file.Export("nope"), False), - # file/imports - ("mimikatz", "file", capa.features.file.Import("advapi32.CryptSetHashParam"), True), - ("mimikatz", "file", capa.features.file.Import("CryptSetHashParam"), True), - ("mimikatz", "file", capa.features.file.Import("kernel32.IsWow64Process"), True), - ("mimikatz", "file", capa.features.file.Import("msvcrt.exit"), True), - ("mimikatz", "file", capa.features.file.Import("cabinet.#11"), True), - ("mimikatz", "file", capa.features.file.Import("#11"), False), - ("mimikatz", "file", capa.features.file.Import("#nope"), False), - ("mimikatz", "file", capa.features.file.Import("nope"), False), - ("mimikatz", "file", capa.features.file.Import("advapi32.CryptAcquireContextW"), True), - ("mimikatz", "file", capa.features.file.Import("advapi32.CryptAcquireContext"), True), - ("mimikatz", "file", capa.features.file.Import("CryptAcquireContextW"), True), - ("mimikatz", "file", capa.features.file.Import("CryptAcquireContext"), True), - # function/characteristic(loop) - ("mimikatz", "function=0x401517", capa.features.Characteristic("loop"), True), - ("mimikatz", "function=0x401000", capa.features.Characteristic("loop"), False), - # bb/characteristic(tight loop) - ("mimikatz", "function=0x402EC4", capa.features.Characteristic("tight loop"), True), - ("mimikatz", "function=0x401000", capa.features.Characteristic("tight loop"), False), - # bb/characteristic(stack string) - ("mimikatz", "function=0x4556E5", capa.features.Characteristic("stack string"), True), - ("mimikatz", "function=0x401000", capa.features.Characteristic("stack string"), False), - # bb/characteristic(tight loop) - ("mimikatz", "function=0x402EC4,bb=0x402F8E", capa.features.Characteristic("tight loop"), True), - ("mimikatz", "function=0x401000,bb=0x401000", capa.features.Characteristic("tight loop"), False), - # insn/mnemonic - ("mimikatz", "function=0x40105D", capa.features.insn.Mnemonic("push"), True), - ("mimikatz", "function=0x40105D", capa.features.insn.Mnemonic("movzx"), True), - ("mimikatz", "function=0x40105D", capa.features.insn.Mnemonic("xor"), True), - ("mimikatz", "function=0x40105D", capa.features.insn.Mnemonic("in"), False), - ("mimikatz", "function=0x40105D", capa.features.insn.Mnemonic("out"), False), - # insn/number - ("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF), True), - ("mimikatz", "function=0x40105D", capa.features.insn.Number(0x3136B0), True), - # insn/number: stack adjustments - ("mimikatz", "function=0x40105D", capa.features.insn.Number(0xC), False), - ("mimikatz", "function=0x40105D", capa.features.insn.Number(0x10), False), - # insn/number: arch flavors - ("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF), True), - ("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF, arch=ARCH_X32), True), - ("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF, arch=ARCH_X64), False), - # insn/offset - ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0), True), - ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x4), True), - ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0xC), True), - # insn/offset, issue #276 - ("64d9f", "function=0x10001510,bb=0x100015B0", capa.features.insn.Offset(0x4000), True), - # insn/offset: stack references - ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x8), False), - ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x10), False), - # insn/offset: negative - ("mimikatz", "function=0x4011FB", capa.features.insn.Offset(-0x1), True), - ("mimikatz", "function=0x4011FB", capa.features.insn.Offset(-0x2), True), - # insn/offset: arch flavors - ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0), True), - ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0, arch=ARCH_X32), True), - ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0, arch=ARCH_X64), False), - # insn/api - ("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptAcquireContextW"), True), - ("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptAcquireContext"), True), - ("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptGenKey"), True), - ("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptImportKey"), True), - ("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptDestroyKey"), True), - ("mimikatz", "function=0x403BAC", capa.features.insn.API("CryptAcquireContextW"), True), - ("mimikatz", "function=0x403BAC", capa.features.insn.API("CryptAcquireContext"), True), - ("mimikatz", "function=0x403BAC", capa.features.insn.API("CryptGenKey"), True), - ("mimikatz", "function=0x403BAC", capa.features.insn.API("CryptImportKey"), True), - ("mimikatz", "function=0x403BAC", capa.features.insn.API("CryptDestroyKey"), True), - ("mimikatz", "function=0x403BAC", capa.features.insn.API("Nope"), False), - ("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.Nope"), False), - # insn/api: thunk - ("mimikatz", "function=0x4556E5", capa.features.insn.API("advapi32.LsaQueryInformationPolicy"), True), - ("mimikatz", "function=0x4556E5", capa.features.insn.API("LsaQueryInformationPolicy"), True), - # insn/api: x64 - ( - "kernel32-64", - "function=0x180001010", - capa.features.insn.API("RtlVirtualUnwind"), - True, - ), - ("kernel32-64", "function=0x180001010", capa.features.insn.API("RtlVirtualUnwind"), True), - # insn/api: x64 thunk - ( - "kernel32-64", - "function=0x1800202B0", - capa.features.insn.API("RtlCaptureContext"), - True, - ), - ("kernel32-64", "function=0x1800202B0", capa.features.insn.API("RtlCaptureContext"), True), - # insn/api: x64 nested thunk - ("al-khaser x64", "function=0x14004B4F0", capa.features.insn.API("__vcrt_GetModuleHandle"), True), - # insn/api: call via jmp - ("mimikatz", "function=0x40B3C6", capa.features.insn.API("LocalFree"), True), - ("c91887...", "function=0x40156F", capa.features.insn.API("CloseClipboard"), True), - # TODO ignore thunk functions that call via jmp? - # insn/api: resolve indirect calls - ("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.CreatePipe"), True), - ("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.SetHandleInformation"), True), - ("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.CloseHandle"), True), - ("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.WriteFile"), True), - # insn/string - ("mimikatz", "function=0x40105D", capa.features.String("SCardControl"), True), - ("mimikatz", "function=0x40105D", capa.features.String("SCardTransmit"), True), - ("mimikatz", "function=0x40105D", capa.features.String("ACR > "), True), - ("mimikatz", "function=0x40105D", capa.features.String("nope"), False), - ("773290...", "function=0x140001140", capa.features.String(r"%s:\\OfficePackagesForWDAG"), True), - # insn/regex, issue #262 - ("pma16-01", "function=0x4021B0", capa.features.Regex("HTTP/1.0"), True), - ("pma16-01", "function=0x4021B0", capa.features.Regex("www.practicalmalwareanalysis.com"), False), - # insn/string, pointer to string - ("mimikatz", "function=0x44EDEF", capa.features.String("INPUTEVENT"), True), - # insn/string, direct memory reference - ("mimikatz", "function=0x46D6CE", capa.features.String("(null)"), True), - # insn/bytes - ("mimikatz", "function=0x40105D", capa.features.Bytes("SCardControl".encode("utf-16le")), True), - ("mimikatz", "function=0x40105D", capa.features.Bytes("SCardTransmit".encode("utf-16le")), True), - ("mimikatz", "function=0x40105D", capa.features.Bytes("ACR > ".encode("utf-16le")), True), - ("mimikatz", "function=0x40105D", capa.features.Bytes("nope".encode("ascii")), False), - # IDA features included byte sequences read from invalid memory, fixed in #409 - ("mimikatz", "function=0x44570F", capa.features.Bytes(binascii.unhexlify("FF" * 256)), False), - # insn/bytes, pointer to bytes - ("mimikatz", "function=0x44EDEF", capa.features.Bytes("INPUTEVENT".encode("utf-16le")), True), - # insn/characteristic(nzxor) - ("mimikatz", "function=0x410DFC", capa.features.Characteristic("nzxor"), True), - ("mimikatz", "function=0x40105D", capa.features.Characteristic("nzxor"), False), - # insn/characteristic(nzxor): no security cookies - ("mimikatz", "function=0x46D534", capa.features.Characteristic("nzxor"), False), - # insn/characteristic(nzxor): xorps - # viv needs fixup to recognize function, see above - ("3b13b...", "function=0x10006860", capa.features.Characteristic("nzxor"), True), - # insn/characteristic(peb access) - ("kernel32-64", "function=0x1800017D0", capa.features.Characteristic("peb access"), True), - ("mimikatz", "function=0x4556E5", capa.features.Characteristic("peb access"), False), - # insn/characteristic(gs access) - ("kernel32-64", "function=0x180001068", capa.features.Characteristic("gs access"), True), - ("mimikatz", "function=0x4556E5", capa.features.Characteristic("gs access"), False), - # insn/characteristic(cross section flow) - ("a1982...", "function=0x4014D0", capa.features.Characteristic("cross section flow"), True), - # insn/characteristic(cross section flow): imports don't count - ("kernel32-64", "function=0x180001068", capa.features.Characteristic("cross section flow"), False), - ("mimikatz", "function=0x4556E5", capa.features.Characteristic("cross section flow"), False), - # insn/characteristic(recursive call) - ("mimikatz", "function=0x40640e", capa.features.Characteristic("recursive call"), True), - # before this we used ambiguous (0x4556E5, False), which has a data reference / indirect recursive call, see #386 - ("mimikatz", "function=0x4175FF", capa.features.Characteristic("recursive call"), False), - # insn/characteristic(indirect call) - ("mimikatz", "function=0x4175FF", capa.features.Characteristic("indirect call"), True), - ("mimikatz", "function=0x4556E5", capa.features.Characteristic("indirect call"), False), - # insn/characteristic(calls from) - ("mimikatz", "function=0x4556E5", capa.features.Characteristic("calls from"), True), - ("mimikatz", "function=0x4702FD", capa.features.Characteristic("calls from"), False), - # function/characteristic(calls to) - ("mimikatz", "function=0x40105D", capa.features.Characteristic("calls to"), True), - # before this we used ambiguous (0x4556E5, False), which has a data reference / indirect recursive call, see #386 - ("mimikatz", "function=0x456BB9", capa.features.Characteristic("calls to"), False), -] +FEATURE_PRESENCE_TESTS = sorted( + [ + # file/characteristic("embedded pe") + ("pma12-04", "file", capa.features.Characteristic("embedded pe"), True), + # file/string + ("mimikatz", "file", capa.features.String("SCardControl"), True), + ("mimikatz", "file", capa.features.String("SCardTransmit"), True), + ("mimikatz", "file", capa.features.String("ACR > "), True), + ("mimikatz", "file", capa.features.String("nope"), False), + # file/sections + ("mimikatz", "file", capa.features.file.Section(".text"), True), + ("mimikatz", "file", capa.features.file.Section(".nope"), False), + # IDA doesn't extract unmapped sections by default + # ("mimikatz", "file", capa.features.file.Section(".rsrc"), True), + # file/exports + ("kernel32", "file", capa.features.file.Export("BaseThreadInitThunk"), True), + ("kernel32", "file", capa.features.file.Export("lstrlenW"), True), + ("kernel32", "file", capa.features.file.Export("nope"), False), + # file/imports + ("mimikatz", "file", capa.features.file.Import("advapi32.CryptSetHashParam"), True), + ("mimikatz", "file", capa.features.file.Import("CryptSetHashParam"), True), + ("mimikatz", "file", capa.features.file.Import("kernel32.IsWow64Process"), True), + ("mimikatz", "file", capa.features.file.Import("msvcrt.exit"), True), + ("mimikatz", "file", capa.features.file.Import("cabinet.#11"), True), + ("mimikatz", "file", capa.features.file.Import("#11"), False), + ("mimikatz", "file", capa.features.file.Import("#nope"), False), + ("mimikatz", "file", capa.features.file.Import("nope"), False), + ("mimikatz", "file", capa.features.file.Import("advapi32.CryptAcquireContextW"), True), + ("mimikatz", "file", capa.features.file.Import("advapi32.CryptAcquireContext"), True), + ("mimikatz", "file", capa.features.file.Import("CryptAcquireContextW"), True), + ("mimikatz", "file", capa.features.file.Import("CryptAcquireContext"), True), + # function/characteristic(loop) + ("mimikatz", "function=0x401517", capa.features.Characteristic("loop"), True), + ("mimikatz", "function=0x401000", capa.features.Characteristic("loop"), False), + # bb/characteristic(tight loop) + ("mimikatz", "function=0x402EC4", capa.features.Characteristic("tight loop"), True), + ("mimikatz", "function=0x401000", capa.features.Characteristic("tight loop"), False), + # bb/characteristic(stack string) + ("mimikatz", "function=0x4556E5", capa.features.Characteristic("stack string"), True), + ("mimikatz", "function=0x401000", capa.features.Characteristic("stack string"), False), + # bb/characteristic(tight loop) + ("mimikatz", "function=0x402EC4,bb=0x402F8E", capa.features.Characteristic("tight loop"), True), + ("mimikatz", "function=0x401000,bb=0x401000", capa.features.Characteristic("tight loop"), False), + # insn/mnemonic + ("mimikatz", "function=0x40105D", capa.features.insn.Mnemonic("push"), True), + ("mimikatz", "function=0x40105D", capa.features.insn.Mnemonic("movzx"), True), + ("mimikatz", "function=0x40105D", capa.features.insn.Mnemonic("xor"), True), + ("mimikatz", "function=0x40105D", capa.features.insn.Mnemonic("in"), False), + ("mimikatz", "function=0x40105D", capa.features.insn.Mnemonic("out"), False), + # insn/number + ("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF), True), + ("mimikatz", "function=0x40105D", capa.features.insn.Number(0x3136B0), True), + # insn/number: stack adjustments + ("mimikatz", "function=0x40105D", capa.features.insn.Number(0xC), False), + ("mimikatz", "function=0x40105D", capa.features.insn.Number(0x10), False), + # insn/number: arch flavors + ("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF), True), + ("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF, arch=ARCH_X32), True), + ("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF, arch=ARCH_X64), False), + # insn/offset + ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0), True), + ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x4), True), + ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0xC), True), + # insn/offset, issue #276 + ("64d9f", "function=0x10001510,bb=0x100015B0", capa.features.insn.Offset(0x4000), True), + # insn/offset: stack references + ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x8), False), + ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x10), False), + # insn/offset: negative + ("mimikatz", "function=0x4011FB", capa.features.insn.Offset(-0x1), True), + ("mimikatz", "function=0x4011FB", capa.features.insn.Offset(-0x2), True), + # insn/offset: arch flavors + ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0), True), + ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0, arch=ARCH_X32), True), + ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0, arch=ARCH_X64), False), + # insn/api + ("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptAcquireContextW"), True), + ("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptAcquireContext"), True), + ("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptGenKey"), True), + ("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptImportKey"), True), + ("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptDestroyKey"), True), + ("mimikatz", "function=0x403BAC", capa.features.insn.API("CryptAcquireContextW"), True), + ("mimikatz", "function=0x403BAC", capa.features.insn.API("CryptAcquireContext"), True), + ("mimikatz", "function=0x403BAC", capa.features.insn.API("CryptGenKey"), True), + ("mimikatz", "function=0x403BAC", capa.features.insn.API("CryptImportKey"), True), + ("mimikatz", "function=0x403BAC", capa.features.insn.API("CryptDestroyKey"), True), + ("mimikatz", "function=0x403BAC", capa.features.insn.API("Nope"), False), + ("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.Nope"), False), + # insn/api: thunk + ("mimikatz", "function=0x4556E5", capa.features.insn.API("advapi32.LsaQueryInformationPolicy"), True), + ("mimikatz", "function=0x4556E5", capa.features.insn.API("LsaQueryInformationPolicy"), True), + # insn/api: x64 + ( + "kernel32-64", + "function=0x180001010", + capa.features.insn.API("RtlVirtualUnwind"), + True, + ), + ("kernel32-64", "function=0x180001010", capa.features.insn.API("RtlVirtualUnwind"), True), + # insn/api: x64 thunk + ( + "kernel32-64", + "function=0x1800202B0", + capa.features.insn.API("RtlCaptureContext"), + True, + ), + ("kernel32-64", "function=0x1800202B0", capa.features.insn.API("RtlCaptureContext"), True), + # insn/api: x64 nested thunk + ("al-khaser x64", "function=0x14004B4F0", capa.features.insn.API("__vcrt_GetModuleHandle"), True), + # insn/api: call via jmp + ("mimikatz", "function=0x40B3C6", capa.features.insn.API("LocalFree"), True), + ("c91887...", "function=0x40156F", capa.features.insn.API("CloseClipboard"), True), + # TODO ignore thunk functions that call via jmp? + # insn/api: resolve indirect calls + ("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.CreatePipe"), True), + ("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.SetHandleInformation"), True), + ("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.CloseHandle"), True), + ("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.WriteFile"), True), + # insn/string + ("mimikatz", "function=0x40105D", capa.features.String("SCardControl"), True), + ("mimikatz", "function=0x40105D", capa.features.String("SCardTransmit"), True), + ("mimikatz", "function=0x40105D", capa.features.String("ACR > "), True), + ("mimikatz", "function=0x40105D", capa.features.String("nope"), False), + ("773290...", "function=0x140001140", capa.features.String(r"%s:\\OfficePackagesForWDAG"), True), + # insn/regex, issue #262 + ("pma16-01", "function=0x4021B0", capa.features.Regex("HTTP/1.0"), True), + ("pma16-01", "function=0x4021B0", capa.features.Regex("www.practicalmalwareanalysis.com"), False), + # insn/string, pointer to string + ("mimikatz", "function=0x44EDEF", capa.features.String("INPUTEVENT"), True), + # insn/string, direct memory reference + ("mimikatz", "function=0x46D6CE", capa.features.String("(null)"), True), + # insn/bytes + ("mimikatz", "function=0x40105D", capa.features.Bytes("SCardControl".encode("utf-16le")), True), + ("mimikatz", "function=0x40105D", capa.features.Bytes("SCardTransmit".encode("utf-16le")), True), + ("mimikatz", "function=0x40105D", capa.features.Bytes("ACR > ".encode("utf-16le")), True), + ("mimikatz", "function=0x40105D", capa.features.Bytes("nope".encode("ascii")), False), + # IDA features included byte sequences read from invalid memory, fixed in #409 + ("mimikatz", "function=0x44570F", capa.features.Bytes(binascii.unhexlify("FF" * 256)), False), + # insn/bytes, pointer to bytes + ("mimikatz", "function=0x44EDEF", capa.features.Bytes("INPUTEVENT".encode("utf-16le")), True), + # insn/characteristic(nzxor) + ("mimikatz", "function=0x410DFC", capa.features.Characteristic("nzxor"), True), + ("mimikatz", "function=0x40105D", capa.features.Characteristic("nzxor"), False), + # insn/characteristic(nzxor): no security cookies + ("mimikatz", "function=0x46D534", capa.features.Characteristic("nzxor"), False), + # insn/characteristic(nzxor): xorps + # viv needs fixup to recognize function, see above + ("3b13b...", "function=0x10006860", capa.features.Characteristic("nzxor"), True), + # insn/characteristic(peb access) + ("kernel32-64", "function=0x1800017D0", capa.features.Characteristic("peb access"), True), + ("mimikatz", "function=0x4556E5", capa.features.Characteristic("peb access"), False), + # insn/characteristic(gs access) + ("kernel32-64", "function=0x180001068", capa.features.Characteristic("gs access"), True), + ("mimikatz", "function=0x4556E5", capa.features.Characteristic("gs access"), False), + # insn/characteristic(cross section flow) + ("a1982...", "function=0x4014D0", capa.features.Characteristic("cross section flow"), True), + # insn/characteristic(cross section flow): imports don't count + ("kernel32-64", "function=0x180001068", capa.features.Characteristic("cross section flow"), False), + ("mimikatz", "function=0x4556E5", capa.features.Characteristic("cross section flow"), False), + # insn/characteristic(recursive call) + ("mimikatz", "function=0x40640e", capa.features.Characteristic("recursive call"), True), + # before this we used ambiguous (0x4556E5, False), which has a data reference / indirect recursive call, see #386 + ("mimikatz", "function=0x4175FF", capa.features.Characteristic("recursive call"), False), + # insn/characteristic(indirect call) + ("mimikatz", "function=0x4175FF", capa.features.Characteristic("indirect call"), True), + ("mimikatz", "function=0x4556E5", capa.features.Characteristic("indirect call"), False), + # insn/characteristic(calls from) + ("mimikatz", "function=0x4556E5", capa.features.Characteristic("calls from"), True), + ("mimikatz", "function=0x4702FD", capa.features.Characteristic("calls from"), False), + # function/characteristic(calls to) + ("mimikatz", "function=0x40105D", capa.features.Characteristic("calls to"), True), + # before this we used ambiguous (0x4556E5, False), which has a data reference / indirect recursive call, see #386 + ("mimikatz", "function=0x456BB9", capa.features.Characteristic("calls to"), False), + # file/function-name + ("pma16-01", "file", capa.features.file.FunctionName("__aulldiv"), True), + ], + # order tests by (file, item) + # so that our LRU cache is most effective. + key=lambda t: (t[0], t[1]), +) FEATURE_PRESENCE_TESTS_IDA = [ # file/imports diff --git a/tests/test_rules.py b/tests/test_rules.py index 7035892e..adab915b 100644 --- a/tests/test_rules.py +++ b/tests/test_rules.py @@ -11,7 +11,10 @@ import textwrap import pytest import capa.rules +import capa.engine +import capa.features from capa.features import ARCH_X32, ARCH_X64, String +from capa.features.file import FunctionName from capa.features.insn import Number, Offset @@ -891,3 +894,25 @@ def test_rules_namespace_dependencies(): assert "rule 1" in r4 assert "rule 2" in r4 assert "rule 3" not in r4 + + +def test_function_name_features(): + rule = textwrap.dedent( + """ + rule: + meta: + name: test rule + scope: file + features: + - and: + - function-name: strcpy + - function-name: strcmp = copy from here to there + - function-name: strdup + description: duplicate a string + """ + ) + r = capa.rules.Rule.from_yaml(rule) + children = list(r.statement.get_children()) + assert (FunctionName("strcpy") in children) == True + assert (FunctionName("strcmp", description="copy from here to there") in children) == True + assert (FunctionName("strdup", description="duplicate a string") in children) == True diff --git a/tests/test_smda_features.py b/tests/test_smda_features.py index dae6185d..de1deebf 100644 --- a/tests/test_smda_features.py +++ b/tests/test_smda_features.py @@ -7,20 +7,46 @@ # See the License for the specific language governing permissions and limitations under the License. import sys +import pytest from fixtures import * +from fixtures import parametrize + +import capa.features.file -@parametrize( +def smda_parametrize(params, valuess, **kwargs): + """ + fixup pytest parametrization to mark a subset of tests as xfail. + + xfail SMDA tests that rely on function id. + """ + ret = [] + for values in valuess: + (sample, scope, feature, expected) = values + if scope == "file" and isinstance(feature, capa.features.file.FunctionName) and expected is True: + # pytest.param behaves like a list, but carries along associated marks, like xfail. + # + # https://stackoverflow.com/a/30575822/87207 + ret.append(pytest.param(*values, marks=pytest.mark.xfail(reason="SMDA has no function ID", strict=True))) + elif sample == "a1982..." and sys.platform == "win32": + ret.append(pytest.param(*values, marks=pytest.mark.xfail(reason="SMDA bug tracked #585", strict=True))) + elif sample == "al-khaser x64" and sys.platform == "win32": + ret.append(pytest.param(*values, marks=pytest.mark.xfail(reason="SMDA bug tracked #585", strict=True))) + else: + ret.append(values) + return parametrize(params, ret, **kwargs) + + +@smda_parametrize( "sample,scope,feature,expected", FEATURE_PRESENCE_TESTS, indirect=["sample", "scope"], ) -@pytest.mark.xfail(sys.platform == "win32", reason="SMDA bug: https://github.com/danielplohmann/smda/issues/20") def test_smda_features(sample, scope, feature, expected): do_test_feature_presence(get_smda_extractor, sample, scope, feature, expected) -@parametrize( +@smda_parametrize( "sample,scope,feature,expected", FEATURE_COUNT_TESTS, indirect=["sample", "scope"],