mirror of
https://github.com/mandiant/capa.git
synced 2025-12-12 07:40:38 -08:00
Compare commits
6 Commits
3687bb95e9
...
idalib-tes
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
dc08843e2d | ||
|
|
074f7c742c | ||
|
|
cf463676b2 | ||
|
|
b5e5840a63 | ||
|
|
f252b6bbd0 | ||
|
|
eda53ab3c1 |
@@ -138,6 +138,7 @@ repos:
|
||||
- "--ignore=tests/test_ghidra_features.py"
|
||||
- "--ignore=tests/test_ida_features.py"
|
||||
- "--ignore=tests/test_viv_features.py"
|
||||
- "--ignore=tests/test_idalib_features.py"
|
||||
- "--ignore=tests/test_main.py"
|
||||
- "--ignore=tests/test_scripts.py"
|
||||
always_run: true
|
||||
|
||||
@@ -47,6 +47,7 @@ Additionally a Binary Ninja bug has been fixed. Released binaries now include AR
|
||||
### New Features
|
||||
|
||||
- ci: add support for arm64 binary releases
|
||||
- tests: run tests against IDA via idalib @williballenthin #2742
|
||||
|
||||
### Breaking Changes
|
||||
|
||||
|
||||
@@ -18,6 +18,8 @@ import idaapi
|
||||
import idautils
|
||||
|
||||
import capa.features.extractors.ida.helpers
|
||||
from capa.features.file import FunctionName
|
||||
from capa.features.insn import API
|
||||
from capa.features.common import Feature, Characteristic
|
||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||
from capa.features.extractors import loops
|
||||
@@ -50,10 +52,22 @@ def extract_recursive_call(fh: FunctionHandle):
|
||||
yield Characteristic("recursive call"), fh.address
|
||||
|
||||
|
||||
def extract_function_alternative_names(fh: FunctionHandle):
|
||||
"""Get all alternative names for an address."""
|
||||
|
||||
for aname in capa.features.extractors.ida.helpers.get_function_alternative_names(fh.inner.start_ea):
|
||||
yield FunctionName(aname), fh.address
|
||||
|
||||
|
||||
def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
for func_handler in FUNCTION_HANDLERS:
|
||||
for feature, addr in func_handler(fh):
|
||||
yield feature, addr
|
||||
|
||||
|
||||
FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop, extract_recursive_call)
|
||||
FUNCTION_HANDLERS = (
|
||||
extract_function_calls_to,
|
||||
extract_function_loop,
|
||||
extract_recursive_call,
|
||||
extract_function_alternative_names,
|
||||
)
|
||||
|
||||
@@ -20,6 +20,7 @@ import idaapi
|
||||
import ida_nalt
|
||||
import idautils
|
||||
import ida_bytes
|
||||
import ida_funcs
|
||||
import ida_segment
|
||||
|
||||
from capa.features.address import AbsoluteVirtualAddress
|
||||
@@ -436,3 +437,23 @@ def is_basic_block_return(bb: idaapi.BasicBlock) -> bool:
|
||||
def has_sib(oper: idaapi.op_t) -> bool:
|
||||
# via: https://reverseengineering.stackexchange.com/a/14300
|
||||
return oper.specflag1 == 1
|
||||
|
||||
|
||||
def get_function_alternative_names(fva: int):
|
||||
"""Get all alternative names for an address."""
|
||||
|
||||
# Check indented comment
|
||||
cmt = ida_bytes.get_cmt(fva, False) # False = non-repeatable
|
||||
if cmt:
|
||||
for line in cmt.split("\n"):
|
||||
if line.startswith("Alternative name is '") and line.endswith("'"):
|
||||
name = line[len("Alternative name is '") : -1] # Extract name between quotes
|
||||
yield name
|
||||
|
||||
# Check function comment
|
||||
func_cmt = ida_funcs.get_func_cmt(idaapi.get_func(fva), False)
|
||||
if func_cmt:
|
||||
for line in func_cmt.split("\n"):
|
||||
if line.startswith("Alternative name is '") and line.endswith("'"):
|
||||
name = line[len("Alternative name is '") : -1]
|
||||
yield name
|
||||
|
||||
@@ -22,9 +22,11 @@ import idautils
|
||||
|
||||
import capa.features.extractors.helpers
|
||||
import capa.features.extractors.ida.helpers
|
||||
from capa.features.file import FunctionName
|
||||
from capa.features.insn import API, MAX_STRUCTURE_SIZE, Number, Offset, Mnemonic, OperandNumber, OperandOffset
|
||||
from capa.features.common import MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, String, Feature, Characteristic
|
||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.ida.function import extract_function_alternative_names
|
||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle
|
||||
|
||||
# security cookie checks may perform non-zeroing XORs, these are expected within a certain
|
||||
@@ -129,8 +131,8 @@ def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle)
|
||||
# not a function (start)
|
||||
return
|
||||
|
||||
if target_func.flags & idaapi.FUNC_LIB:
|
||||
name = idaapi.get_name(target_func.start_ea)
|
||||
name = idaapi.get_name(target_func.start_ea)
|
||||
if target_func.flags & idaapi.FUNC_LIB or not name.startswith("sub_"):
|
||||
yield API(name), ih.address
|
||||
if name.startswith("_"):
|
||||
# some linkers may prefix linked routines with a `_` to avoid name collisions.
|
||||
@@ -139,6 +141,10 @@ def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle)
|
||||
# see: https://stackoverflow.com/a/2628384/87207
|
||||
yield API(name[1:]), ih.address
|
||||
|
||||
for altname in capa.features.extractors.ida.helpers.get_function_alternative_names(target_func.start_ea):
|
||||
yield FunctionName(altname), ih.address
|
||||
yield API(altname), ih.address
|
||||
|
||||
|
||||
def extract_insn_number_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
import logging
|
||||
import contextlib
|
||||
import collections
|
||||
from pathlib import Path
|
||||
@@ -21,6 +21,7 @@ from functools import lru_cache
|
||||
import pytest
|
||||
|
||||
import capa.main
|
||||
import capa.helpers
|
||||
import capa.features.file
|
||||
import capa.features.insn
|
||||
import capa.features.common
|
||||
@@ -53,6 +54,7 @@ from capa.features.extractors.base_extractor import (
|
||||
)
|
||||
from capa.features.extractors.dnfile.extractor import DnfileFeatureExtractor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
CD = Path(__file__).resolve().parent
|
||||
DOTNET_DIR = CD / "data" / "dotnet"
|
||||
DNFILE_TESTFILES = DOTNET_DIR / "dnfile-testfiles"
|
||||
@@ -200,6 +202,65 @@ def get_binja_extractor(path: Path):
|
||||
return extractor
|
||||
|
||||
|
||||
# we can't easily cache this because the extractor relies on global state (the opened database)
|
||||
# which also has to be closed elsewhere. so, the idalib tests will just take a little bit to run.
|
||||
def get_idalib_extractor(path: Path):
|
||||
import capa.features.extractors.ida.idalib as idalib
|
||||
|
||||
if not idalib.has_idalib():
|
||||
raise RuntimeError("cannot find IDA idalib module.")
|
||||
|
||||
if not idalib.load_idalib():
|
||||
raise RuntimeError("failed to load IDA idalib module.")
|
||||
|
||||
import idapro
|
||||
import ida_auto
|
||||
|
||||
import capa.features.extractors.ida.extractor
|
||||
|
||||
logger.debug("idalib: opening database...")
|
||||
|
||||
idapro.enable_console_messages(False)
|
||||
# - 0 - Success (database not packed)
|
||||
# - 1 - Success (database was packed)
|
||||
# - 2 - User cancelled or 32-64 bit conversion failed
|
||||
# - 4 - Database initialization failed
|
||||
# - -1 - Generic errors (database already open, auto-analysis failed, etc.)
|
||||
# - -2 - User cancelled operation
|
||||
ret = idapro.open_database(str(path), run_auto_analysis=True)
|
||||
if ret not in (0, 1):
|
||||
raise RuntimeError("failed to analyze input file")
|
||||
|
||||
logger.debug("idalib: waiting for analysis...")
|
||||
ida_auto.auto_wait()
|
||||
logger.debug("idalib: opened database.")
|
||||
|
||||
extractor = capa.features.extractors.ida.extractor.IdaFeatureExtractor()
|
||||
fixup_idalib(path, extractor)
|
||||
return extractor
|
||||
|
||||
|
||||
def fixup_idalib(path: Path, extractor):
|
||||
"""
|
||||
IDA fixups to overcome differences between backends
|
||||
"""
|
||||
import idaapi
|
||||
import ida_funcs
|
||||
|
||||
def remove_library_id_flag(fva):
|
||||
f = idaapi.get_func(fva)
|
||||
f.flags &= ~ida_funcs.FUNC_LIB
|
||||
ida_funcs.update_func(f)
|
||||
|
||||
if "kernel32-64" in path.name:
|
||||
# remove (correct) library function id, so we can test x64 thunk
|
||||
remove_library_id_flag(0x1800202B0)
|
||||
|
||||
if "al-khaser_x64" in path.name:
|
||||
# remove (correct) library function id, so we can test x64 nested thunk
|
||||
remove_library_id_flag(0x14004B4F0)
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def get_cape_extractor(path):
|
||||
from capa.helpers import load_json_from_path
|
||||
@@ -894,20 +955,8 @@ FEATURE_PRESENCE_TESTS = sorted(
|
||||
("mimikatz", "function=0x4556E5", capa.features.insn.API("advapi32.LsaQueryInformationPolicy"), False),
|
||||
("mimikatz", "function=0x4556E5", capa.features.insn.API("LsaQueryInformationPolicy"), True),
|
||||
# insn/api: x64
|
||||
(
|
||||
"kernel32-64",
|
||||
"function=0x180001010",
|
||||
capa.features.insn.API("RtlVirtualUnwind"),
|
||||
True,
|
||||
),
|
||||
("kernel32-64", "function=0x180001010", capa.features.insn.API("RtlVirtualUnwind"), True),
|
||||
# insn/api: x64 thunk
|
||||
(
|
||||
"kernel32-64",
|
||||
"function=0x1800202B0",
|
||||
capa.features.insn.API("RtlCaptureContext"),
|
||||
True,
|
||||
),
|
||||
("kernel32-64", "function=0x1800202B0", capa.features.insn.API("RtlCaptureContext"), True),
|
||||
# insn/api: x64 nested thunk
|
||||
("al-khaser x64", "function=0x14004B4F0", capa.features.insn.API("__vcrt_GetModuleHandle"), True),
|
||||
@@ -995,20 +1044,20 @@ FEATURE_PRESENCE_TESTS = sorted(
|
||||
("pma16-01", "file", OS(OS_WINDOWS), True),
|
||||
("pma16-01", "file", OS(OS_LINUX), False),
|
||||
("mimikatz", "file", OS(OS_WINDOWS), True),
|
||||
("pma16-01", "function=0x404356", OS(OS_WINDOWS), True),
|
||||
("pma16-01", "function=0x404356,bb=0x4043B9", OS(OS_WINDOWS), True),
|
||||
("pma16-01", "function=0x401100", OS(OS_WINDOWS), True),
|
||||
("pma16-01", "function=0x401100,bb=0x401130", OS(OS_WINDOWS), True),
|
||||
("mimikatz", "function=0x40105D", OS(OS_WINDOWS), True),
|
||||
("pma16-01", "file", Arch(ARCH_I386), True),
|
||||
("pma16-01", "file", Arch(ARCH_AMD64), False),
|
||||
("mimikatz", "file", Arch(ARCH_I386), True),
|
||||
("pma16-01", "function=0x404356", Arch(ARCH_I386), True),
|
||||
("pma16-01", "function=0x404356,bb=0x4043B9", Arch(ARCH_I386), True),
|
||||
("pma16-01", "function=0x401100", Arch(ARCH_I386), True),
|
||||
("pma16-01", "function=0x401100,bb=0x401130", Arch(ARCH_I386), True),
|
||||
("mimikatz", "function=0x40105D", Arch(ARCH_I386), True),
|
||||
("pma16-01", "file", Format(FORMAT_PE), True),
|
||||
("pma16-01", "file", Format(FORMAT_ELF), False),
|
||||
("mimikatz", "file", Format(FORMAT_PE), True),
|
||||
# format is also a global feature
|
||||
("pma16-01", "function=0x404356", Format(FORMAT_PE), True),
|
||||
("pma16-01", "function=0x401100", Format(FORMAT_PE), True),
|
||||
("mimikatz", "function=0x456BB9", Format(FORMAT_PE), True),
|
||||
# elf support
|
||||
("7351f.elf", "file", OS(OS_LINUX), True),
|
||||
|
||||
58
tests/test_idalib_features.py
Normal file
58
tests/test_idalib_features.py
Normal file
@@ -0,0 +1,58 @@
|
||||
# Copyright 2020 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import logging
|
||||
|
||||
import pytest
|
||||
import fixtures
|
||||
|
||||
import capa.features.extractors.ida.idalib
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
idalib_present = capa.features.extractors.ida.idalib.has_idalib()
|
||||
|
||||
|
||||
@pytest.mark.skipif(idalib_present is False, reason="Skip idalib tests if the idalib Python API is not installed")
|
||||
@fixtures.parametrize(
|
||||
"sample,scope,feature,expected",
|
||||
fixtures.FEATURE_PRESENCE_TESTS + fixtures.FEATURE_SYMTAB_FUNC_TESTS,
|
||||
indirect=["sample", "scope"],
|
||||
)
|
||||
def test_idalib_features(sample, scope, feature, expected):
|
||||
try:
|
||||
fixtures.do_test_feature_presence(fixtures.get_idalib_extractor, sample, scope, feature, expected)
|
||||
finally:
|
||||
logger.debug("closing database...")
|
||||
import idapro
|
||||
|
||||
idapro.close_database(save=False)
|
||||
logger.debug("opened database.")
|
||||
|
||||
|
||||
@pytest.mark.skipif(idalib_present is False, reason="Skip idalib tests if the idalib Python API is not installed")
|
||||
@fixtures.parametrize(
|
||||
"sample,scope,feature,expected",
|
||||
fixtures.FEATURE_COUNT_TESTS,
|
||||
indirect=["sample", "scope"],
|
||||
)
|
||||
def test_idalib_feature_counts(sample, scope, feature, expected):
|
||||
try:
|
||||
fixtures.do_test_feature_count(fixtures.get_idalib_extractor, sample, scope, feature, expected)
|
||||
finally:
|
||||
logger.debug("closing database...")
|
||||
import idapro
|
||||
|
||||
idapro.close_database(save=False)
|
||||
logger.debug("closed database.")
|
||||
Reference in New Issue
Block a user