Compare commits

...

6 Commits

Author SHA1 Message Date
mr-tz
dc08843e2d address idalib-based test fails 2025-12-11 14:18:13 +00:00
Moritz
074f7c742c Merge branch 'master' into idalib-tests 2025-11-24 19:52:40 +01:00
Willi Ballenthin
cf463676b2 fixtures: remove dups 2025-11-03 12:47:12 +01:00
Willi Ballenthin
b5e5840a63 lints 2025-10-29 20:29:08 +01:00
Willi Ballenthin
f252b6bbd0 changelog 2025-10-29 20:23:12 +01:00
Willi Ballenthin
eda53ab3c1 tests: add feature tests for idalib 2025-10-29 20:20:57 +01:00
7 changed files with 171 additions and 21 deletions

View File

@@ -138,6 +138,7 @@ repos:
- "--ignore=tests/test_ghidra_features.py"
- "--ignore=tests/test_ida_features.py"
- "--ignore=tests/test_viv_features.py"
- "--ignore=tests/test_idalib_features.py"
- "--ignore=tests/test_main.py"
- "--ignore=tests/test_scripts.py"
always_run: true

View File

@@ -47,6 +47,7 @@ Additionally a Binary Ninja bug has been fixed. Released binaries now include AR
### New Features
- ci: add support for arm64 binary releases
- tests: run tests against IDA via idalib @williballenthin #2742
### Breaking Changes

View File

@@ -18,6 +18,8 @@ import idaapi
import idautils
import capa.features.extractors.ida.helpers
from capa.features.file import FunctionName
from capa.features.insn import API
from capa.features.common import Feature, Characteristic
from capa.features.address import Address, AbsoluteVirtualAddress
from capa.features.extractors import loops
@@ -50,10 +52,22 @@ def extract_recursive_call(fh: FunctionHandle):
yield Characteristic("recursive call"), fh.address
def extract_function_alternative_names(fh: FunctionHandle):
"""Get all alternative names for an address."""
for aname in capa.features.extractors.ida.helpers.get_function_alternative_names(fh.inner.start_ea):
yield FunctionName(aname), fh.address
def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
for func_handler in FUNCTION_HANDLERS:
for feature, addr in func_handler(fh):
yield feature, addr
FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop, extract_recursive_call)
FUNCTION_HANDLERS = (
extract_function_calls_to,
extract_function_loop,
extract_recursive_call,
extract_function_alternative_names,
)

View File

@@ -20,6 +20,7 @@ import idaapi
import ida_nalt
import idautils
import ida_bytes
import ida_funcs
import ida_segment
from capa.features.address import AbsoluteVirtualAddress
@@ -436,3 +437,23 @@ def is_basic_block_return(bb: idaapi.BasicBlock) -> bool:
def has_sib(oper: idaapi.op_t) -> bool:
# via: https://reverseengineering.stackexchange.com/a/14300
return oper.specflag1 == 1
def get_function_alternative_names(fva: int):
"""Get all alternative names for an address."""
# Check indented comment
cmt = ida_bytes.get_cmt(fva, False) # False = non-repeatable
if cmt:
for line in cmt.split("\n"):
if line.startswith("Alternative name is '") and line.endswith("'"):
name = line[len("Alternative name is '") : -1] # Extract name between quotes
yield name
# Check function comment
func_cmt = ida_funcs.get_func_cmt(idaapi.get_func(fva), False)
if func_cmt:
for line in func_cmt.split("\n"):
if line.startswith("Alternative name is '") and line.endswith("'"):
name = line[len("Alternative name is '") : -1]
yield name

View File

@@ -22,9 +22,11 @@ import idautils
import capa.features.extractors.helpers
import capa.features.extractors.ida.helpers
from capa.features.file import FunctionName
from capa.features.insn import API, MAX_STRUCTURE_SIZE, Number, Offset, Mnemonic, OperandNumber, OperandOffset
from capa.features.common import MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, String, Feature, Characteristic
from capa.features.address import Address, AbsoluteVirtualAddress
from capa.features.extractors.ida.function import extract_function_alternative_names
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle
# security cookie checks may perform non-zeroing XORs, these are expected within a certain
@@ -129,8 +131,8 @@ def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle)
# not a function (start)
return
if target_func.flags & idaapi.FUNC_LIB:
name = idaapi.get_name(target_func.start_ea)
name = idaapi.get_name(target_func.start_ea)
if target_func.flags & idaapi.FUNC_LIB or not name.startswith("sub_"):
yield API(name), ih.address
if name.startswith("_"):
# some linkers may prefix linked routines with a `_` to avoid name collisions.
@@ -139,6 +141,10 @@ def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle)
# see: https://stackoverflow.com/a/2628384/87207
yield API(name[1:]), ih.address
for altname in capa.features.extractors.ida.helpers.get_function_alternative_names(target_func.start_ea):
yield FunctionName(altname), ih.address
yield API(altname), ih.address
def extract_insn_number_features(
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle

View File

@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import contextlib
import collections
from pathlib import Path
@@ -21,6 +21,7 @@ from functools import lru_cache
import pytest
import capa.main
import capa.helpers
import capa.features.file
import capa.features.insn
import capa.features.common
@@ -53,6 +54,7 @@ from capa.features.extractors.base_extractor import (
)
from capa.features.extractors.dnfile.extractor import DnfileFeatureExtractor
logger = logging.getLogger(__name__)
CD = Path(__file__).resolve().parent
DOTNET_DIR = CD / "data" / "dotnet"
DNFILE_TESTFILES = DOTNET_DIR / "dnfile-testfiles"
@@ -200,6 +202,65 @@ def get_binja_extractor(path: Path):
return extractor
# we can't easily cache this because the extractor relies on global state (the opened database)
# which also has to be closed elsewhere. so, the idalib tests will just take a little bit to run.
def get_idalib_extractor(path: Path):
import capa.features.extractors.ida.idalib as idalib
if not idalib.has_idalib():
raise RuntimeError("cannot find IDA idalib module.")
if not idalib.load_idalib():
raise RuntimeError("failed to load IDA idalib module.")
import idapro
import ida_auto
import capa.features.extractors.ida.extractor
logger.debug("idalib: opening database...")
idapro.enable_console_messages(False)
# - 0 - Success (database not packed)
# - 1 - Success (database was packed)
# - 2 - User cancelled or 32-64 bit conversion failed
# - 4 - Database initialization failed
# - -1 - Generic errors (database already open, auto-analysis failed, etc.)
# - -2 - User cancelled operation
ret = idapro.open_database(str(path), run_auto_analysis=True)
if ret not in (0, 1):
raise RuntimeError("failed to analyze input file")
logger.debug("idalib: waiting for analysis...")
ida_auto.auto_wait()
logger.debug("idalib: opened database.")
extractor = capa.features.extractors.ida.extractor.IdaFeatureExtractor()
fixup_idalib(path, extractor)
return extractor
def fixup_idalib(path: Path, extractor):
"""
IDA fixups to overcome differences between backends
"""
import idaapi
import ida_funcs
def remove_library_id_flag(fva):
f = idaapi.get_func(fva)
f.flags &= ~ida_funcs.FUNC_LIB
ida_funcs.update_func(f)
if "kernel32-64" in path.name:
# remove (correct) library function id, so we can test x64 thunk
remove_library_id_flag(0x1800202B0)
if "al-khaser_x64" in path.name:
# remove (correct) library function id, so we can test x64 nested thunk
remove_library_id_flag(0x14004B4F0)
@lru_cache(maxsize=1)
def get_cape_extractor(path):
from capa.helpers import load_json_from_path
@@ -894,20 +955,8 @@ FEATURE_PRESENCE_TESTS = sorted(
("mimikatz", "function=0x4556E5", capa.features.insn.API("advapi32.LsaQueryInformationPolicy"), False),
("mimikatz", "function=0x4556E5", capa.features.insn.API("LsaQueryInformationPolicy"), True),
# insn/api: x64
(
"kernel32-64",
"function=0x180001010",
capa.features.insn.API("RtlVirtualUnwind"),
True,
),
("kernel32-64", "function=0x180001010", capa.features.insn.API("RtlVirtualUnwind"), True),
# insn/api: x64 thunk
(
"kernel32-64",
"function=0x1800202B0",
capa.features.insn.API("RtlCaptureContext"),
True,
),
("kernel32-64", "function=0x1800202B0", capa.features.insn.API("RtlCaptureContext"), True),
# insn/api: x64 nested thunk
("al-khaser x64", "function=0x14004B4F0", capa.features.insn.API("__vcrt_GetModuleHandle"), True),
@@ -995,20 +1044,20 @@ FEATURE_PRESENCE_TESTS = sorted(
("pma16-01", "file", OS(OS_WINDOWS), True),
("pma16-01", "file", OS(OS_LINUX), False),
("mimikatz", "file", OS(OS_WINDOWS), True),
("pma16-01", "function=0x404356", OS(OS_WINDOWS), True),
("pma16-01", "function=0x404356,bb=0x4043B9", OS(OS_WINDOWS), True),
("pma16-01", "function=0x401100", OS(OS_WINDOWS), True),
("pma16-01", "function=0x401100,bb=0x401130", OS(OS_WINDOWS), True),
("mimikatz", "function=0x40105D", OS(OS_WINDOWS), True),
("pma16-01", "file", Arch(ARCH_I386), True),
("pma16-01", "file", Arch(ARCH_AMD64), False),
("mimikatz", "file", Arch(ARCH_I386), True),
("pma16-01", "function=0x404356", Arch(ARCH_I386), True),
("pma16-01", "function=0x404356,bb=0x4043B9", Arch(ARCH_I386), True),
("pma16-01", "function=0x401100", Arch(ARCH_I386), True),
("pma16-01", "function=0x401100,bb=0x401130", Arch(ARCH_I386), True),
("mimikatz", "function=0x40105D", Arch(ARCH_I386), True),
("pma16-01", "file", Format(FORMAT_PE), True),
("pma16-01", "file", Format(FORMAT_ELF), False),
("mimikatz", "file", Format(FORMAT_PE), True),
# format is also a global feature
("pma16-01", "function=0x404356", Format(FORMAT_PE), True),
("pma16-01", "function=0x401100", Format(FORMAT_PE), True),
("mimikatz", "function=0x456BB9", Format(FORMAT_PE), True),
# elf support
("7351f.elf", "file", OS(OS_LINUX), True),

View File

@@ -0,0 +1,58 @@
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import pytest
import fixtures
import capa.features.extractors.ida.idalib
logger = logging.getLogger(__name__)
idalib_present = capa.features.extractors.ida.idalib.has_idalib()
@pytest.mark.skipif(idalib_present is False, reason="Skip idalib tests if the idalib Python API is not installed")
@fixtures.parametrize(
"sample,scope,feature,expected",
fixtures.FEATURE_PRESENCE_TESTS + fixtures.FEATURE_SYMTAB_FUNC_TESTS,
indirect=["sample", "scope"],
)
def test_idalib_features(sample, scope, feature, expected):
try:
fixtures.do_test_feature_presence(fixtures.get_idalib_extractor, sample, scope, feature, expected)
finally:
logger.debug("closing database...")
import idapro
idapro.close_database(save=False)
logger.debug("opened database.")
@pytest.mark.skipif(idalib_present is False, reason="Skip idalib tests if the idalib Python API is not installed")
@fixtures.parametrize(
"sample,scope,feature,expected",
fixtures.FEATURE_COUNT_TESTS,
indirect=["sample", "scope"],
)
def test_idalib_feature_counts(sample, scope, feature, expected):
try:
fixtures.do_test_feature_count(fixtures.get_idalib_extractor, sample, scope, feature, expected)
finally:
logger.debug("closing database...")
import idapro
idapro.close_database(save=False)
logger.debug("closed database.")