From eda53ab3c198a6bd0c78591e18e2548da33b3555 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 29 Oct 2025 20:20:57 +0100 Subject: [PATCH 01/23] tests: add feature tests for idalib --- tests/fixtures.py | 40 +++++++++++++++++++++++- tests/test_idalib_features.py | 58 +++++++++++++++++++++++++++++++++++ 2 files changed, 97 insertions(+), 1 deletion(-) create mode 100644 tests/test_idalib_features.py diff --git a/tests/fixtures.py b/tests/fixtures.py index b9199061..7383cd3c 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -12,7 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. - +import io +import logging import contextlib import collections from pathlib import Path @@ -21,6 +22,7 @@ from functools import lru_cache import pytest import capa.main +import capa.helpers import capa.features.file import capa.features.insn import capa.features.common @@ -53,6 +55,7 @@ from capa.features.extractors.base_extractor import ( ) from capa.features.extractors.dnfile.extractor import DnfileFeatureExtractor +logger = logging.getLogger(__name__) CD = Path(__file__).resolve().parent DOTNET_DIR = CD / "data" / "dotnet" DNFILE_TESTFILES = DOTNET_DIR / "dnfile-testfiles" @@ -199,6 +202,41 @@ def get_binja_extractor(path: Path): return extractor +# we can't easily cache this because the extractor relies on global state (the opened database) +# which also has to be closed elsewhere. so, the idalib tests will just take a little bit to run. +def get_idalib_extractor(path: Path): + import capa.features.extractors.ida.idalib as idalib + + if not idalib.has_idalib(): + raise RuntimeError("cannot find IDA idalib module.") + + if not idalib.load_idalib(): + raise RuntimeError("failed to load IDA idalib module.") + + import idapro + import ida_auto + + import capa.features.extractors.ida.extractor + + logger.debug("idalib: opening database...") + + idapro.enable_console_messages(False) + # - 0 - Success (database not packed) + # - 1 - Success (database was packed) + # - 2 - User cancelled or 32-64 bit conversion failed + # - 4 - Database initialization failed + # - -1 - Generic errors (database already open, auto-analysis failed, etc.) + # - -2 - User cancelled operation + ret = idapro.open_database(str(path), run_auto_analysis=True) + if ret not in (0, 1): + raise RuntimeError("failed to analyze input file") + + logger.debug("idalib: waiting for analysis...") + ida_auto.auto_wait() + logger.debug("idalib: opened database.") + + return capa.features.extractors.ida.extractor.IdaFeatureExtractor() + @lru_cache(maxsize=1) def get_cape_extractor(path): diff --git a/tests/test_idalib_features.py b/tests/test_idalib_features.py new file mode 100644 index 00000000..4eaaee9b --- /dev/null +++ b/tests/test_idalib_features.py @@ -0,0 +1,58 @@ + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +import pytest + +import capa.features.extractors.ida.idalib + +import fixtures + +logger = logging.getLogger(__name__) + +idalib_present = capa.features.extractors.ida.idalib.has_idalib() + + +@pytest.mark.skipif(idalib_present is False, reason="Skip idalib tests if the idalib Python API is not installed") +@fixtures.parametrize( + "sample,scope,feature,expected", + fixtures.FEATURE_PRESENCE_TESTS + fixtures.FEATURE_SYMTAB_FUNC_TESTS, + indirect=["sample", "scope"], +) +def test_idalib_features(sample, scope, feature, expected): + try: + fixtures.do_test_feature_presence(fixtures.get_idalib_extractor, sample, scope, feature, expected) + finally: + logger.debug("closing database...") + import idapro + idapro.close_database(save=False) + logger.debug("opened database.") + + +@pytest.mark.skipif(idalib_present is False, reason="Skip idalib tests if the idalib Python API is not installed") +@fixtures.parametrize( + "sample,scope,feature,expected", + fixtures.FEATURE_COUNT_TESTS, + indirect=["sample", "scope"], +) +def test_idalib_feature_counts(sample, scope, feature, expected): + try: + fixtures.do_test_feature_count(fixtures.get_idalib_extractor, sample, scope, feature, expected) + finally: + logger.debug("closing database...") + import idapro + idapro.close_database(save=False) + logger.debug("closed database.") From f252b6bbd07829ce81e18f7f6a061e1176e63516 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 29 Oct 2025 20:23:12 +0100 Subject: [PATCH 02/23] changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e01ea6c2..4efbed87 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ ### New Features - ci: add support for arm64 binary releases +- tests: run tests against IDA via idalib @williballenthin #2742 ### Breaking Changes From b5e5840a63f6124b98eecb27899e3e336dc49d36 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 29 Oct 2025 20:29:08 +0100 Subject: [PATCH 03/23] lints --- .pre-commit-config.yaml | 1 + tests/fixtures.py | 2 +- tests/test_idalib_features.py | 6 +++--- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 262b600e..d26911c8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -138,6 +138,7 @@ repos: - "--ignore=tests/test_ghidra_features.py" - "--ignore=tests/test_ida_features.py" - "--ignore=tests/test_viv_features.py" + - "--ignore=tests/test_idalib_features.py" - "--ignore=tests/test_main.py" - "--ignore=tests/test_scripts.py" always_run: true diff --git a/tests/fixtures.py b/tests/fixtures.py index 7383cd3c..70fa4897 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import io import logging import contextlib import collections @@ -202,6 +201,7 @@ def get_binja_extractor(path: Path): return extractor + # we can't easily cache this because the extractor relies on global state (the opened database) # which also has to be closed elsewhere. so, the idalib tests will just take a little bit to run. def get_idalib_extractor(path: Path): diff --git a/tests/test_idalib_features.py b/tests/test_idalib_features.py index 4eaaee9b..ebefb8eb 100644 --- a/tests/test_idalib_features.py +++ b/tests/test_idalib_features.py @@ -1,4 +1,3 @@ - # Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -16,11 +15,10 @@ import logging import pytest +import fixtures import capa.features.extractors.ida.idalib -import fixtures - logger = logging.getLogger(__name__) idalib_present = capa.features.extractors.ida.idalib.has_idalib() @@ -38,6 +36,7 @@ def test_idalib_features(sample, scope, feature, expected): finally: logger.debug("closing database...") import idapro + idapro.close_database(save=False) logger.debug("opened database.") @@ -54,5 +53,6 @@ def test_idalib_feature_counts(sample, scope, feature, expected): finally: logger.debug("closing database...") import idapro + idapro.close_database(save=False) logger.debug("closed database.") From cf463676b2126e699426a50edf50c593de310efb Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Mon, 3 Nov 2025 12:47:12 +0100 Subject: [PATCH 04/23] fixtures: remove dups --- tests/fixtures.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/tests/fixtures.py b/tests/fixtures.py index 70fa4897..f1906cf1 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -932,20 +932,8 @@ FEATURE_PRESENCE_TESTS = sorted( ("mimikatz", "function=0x4556E5", capa.features.insn.API("advapi32.LsaQueryInformationPolicy"), False), ("mimikatz", "function=0x4556E5", capa.features.insn.API("LsaQueryInformationPolicy"), True), # insn/api: x64 - ( - "kernel32-64", - "function=0x180001010", - capa.features.insn.API("RtlVirtualUnwind"), - True, - ), ("kernel32-64", "function=0x180001010", capa.features.insn.API("RtlVirtualUnwind"), True), # insn/api: x64 thunk - ( - "kernel32-64", - "function=0x1800202B0", - capa.features.insn.API("RtlCaptureContext"), - True, - ), ("kernel32-64", "function=0x1800202B0", capa.features.insn.API("RtlCaptureContext"), True), # insn/api: x64 nested thunk ("al-khaser x64", "function=0x14004B4F0", capa.features.insn.API("__vcrt_GetModuleHandle"), True), From dc08843e2df0f9996a21ef4a8aaab1bb27856c98 Mon Sep 17 00:00:00 2001 From: mr-tz Date: Thu, 11 Dec 2025 14:18:13 +0000 Subject: [PATCH 05/23] address idalib-based test fails --- capa/features/extractors/ida/function.py | 16 ++++++++++- capa/features/extractors/ida/helpers.py | 21 ++++++++++++++ capa/features/extractors/ida/insn.py | 10 +++++-- tests/fixtures.py | 35 ++++++++++++++++++++---- 4 files changed, 73 insertions(+), 9 deletions(-) diff --git a/capa/features/extractors/ida/function.py b/capa/features/extractors/ida/function.py index 30c16a1e..5fd6c84a 100644 --- a/capa/features/extractors/ida/function.py +++ b/capa/features/extractors/ida/function.py @@ -18,6 +18,8 @@ import idaapi import idautils import capa.features.extractors.ida.helpers +from capa.features.file import FunctionName +from capa.features.insn import API from capa.features.common import Feature, Characteristic from capa.features.address import Address, AbsoluteVirtualAddress from capa.features.extractors import loops @@ -50,10 +52,22 @@ def extract_recursive_call(fh: FunctionHandle): yield Characteristic("recursive call"), fh.address +def extract_function_alternative_names(fh: FunctionHandle): + """Get all alternative names for an address.""" + + for aname in capa.features.extractors.ida.helpers.get_function_alternative_names(fh.inner.start_ea): + yield FunctionName(aname), fh.address + + def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]: for func_handler in FUNCTION_HANDLERS: for feature, addr in func_handler(fh): yield feature, addr -FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop, extract_recursive_call) +FUNCTION_HANDLERS = ( + extract_function_calls_to, + extract_function_loop, + extract_recursive_call, + extract_function_alternative_names, +) diff --git a/capa/features/extractors/ida/helpers.py b/capa/features/extractors/ida/helpers.py index 365a2067..d3837e9f 100644 --- a/capa/features/extractors/ida/helpers.py +++ b/capa/features/extractors/ida/helpers.py @@ -20,6 +20,7 @@ import idaapi import ida_nalt import idautils import ida_bytes +import ida_funcs import ida_segment from capa.features.address import AbsoluteVirtualAddress @@ -436,3 +437,23 @@ def is_basic_block_return(bb: idaapi.BasicBlock) -> bool: def has_sib(oper: idaapi.op_t) -> bool: # via: https://reverseengineering.stackexchange.com/a/14300 return oper.specflag1 == 1 + + +def get_function_alternative_names(fva: int): + """Get all alternative names for an address.""" + + # Check indented comment + cmt = ida_bytes.get_cmt(fva, False) # False = non-repeatable + if cmt: + for line in cmt.split("\n"): + if line.startswith("Alternative name is '") and line.endswith("'"): + name = line[len("Alternative name is '") : -1] # Extract name between quotes + yield name + + # Check function comment + func_cmt = ida_funcs.get_func_cmt(idaapi.get_func(fva), False) + if func_cmt: + for line in func_cmt.split("\n"): + if line.startswith("Alternative name is '") and line.endswith("'"): + name = line[len("Alternative name is '") : -1] + yield name diff --git a/capa/features/extractors/ida/insn.py b/capa/features/extractors/ida/insn.py index 0e92b21f..4d06d99c 100644 --- a/capa/features/extractors/ida/insn.py +++ b/capa/features/extractors/ida/insn.py @@ -22,9 +22,11 @@ import idautils import capa.features.extractors.helpers import capa.features.extractors.ida.helpers +from capa.features.file import FunctionName from capa.features.insn import API, MAX_STRUCTURE_SIZE, Number, Offset, Mnemonic, OperandNumber, OperandOffset from capa.features.common import MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, String, Feature, Characteristic from capa.features.address import Address, AbsoluteVirtualAddress +from capa.features.extractors.ida.function import extract_function_alternative_names from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle # security cookie checks may perform non-zeroing XORs, these are expected within a certain @@ -129,8 +131,8 @@ def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) # not a function (start) return - if target_func.flags & idaapi.FUNC_LIB: - name = idaapi.get_name(target_func.start_ea) + name = idaapi.get_name(target_func.start_ea) + if target_func.flags & idaapi.FUNC_LIB or not name.startswith("sub_"): yield API(name), ih.address if name.startswith("_"): # some linkers may prefix linked routines with a `_` to avoid name collisions. @@ -139,6 +141,10 @@ def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) # see: https://stackoverflow.com/a/2628384/87207 yield API(name[1:]), ih.address + for altname in capa.features.extractors.ida.helpers.get_function_alternative_names(target_func.start_ea): + yield FunctionName(altname), ih.address + yield API(altname), ih.address + def extract_insn_number_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle diff --git a/tests/fixtures.py b/tests/fixtures.py index f1906cf1..7d1de651 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -235,7 +235,30 @@ def get_idalib_extractor(path: Path): ida_auto.auto_wait() logger.debug("idalib: opened database.") - return capa.features.extractors.ida.extractor.IdaFeatureExtractor() + extractor = capa.features.extractors.ida.extractor.IdaFeatureExtractor() + fixup_idalib(path, extractor) + return extractor + + +def fixup_idalib(path: Path, extractor): + """ + IDA fixups to overcome differences between backends + """ + import idaapi + import ida_funcs + + def remove_library_id_flag(fva): + f = idaapi.get_func(fva) + f.flags &= ~ida_funcs.FUNC_LIB + ida_funcs.update_func(f) + + if "kernel32-64" in path.name: + # remove (correct) library function id, so we can test x64 thunk + remove_library_id_flag(0x1800202B0) + + if "al-khaser_x64" in path.name: + # remove (correct) library function id, so we can test x64 nested thunk + remove_library_id_flag(0x14004B4F0) @lru_cache(maxsize=1) @@ -1021,20 +1044,20 @@ FEATURE_PRESENCE_TESTS = sorted( ("pma16-01", "file", OS(OS_WINDOWS), True), ("pma16-01", "file", OS(OS_LINUX), False), ("mimikatz", "file", OS(OS_WINDOWS), True), - ("pma16-01", "function=0x404356", OS(OS_WINDOWS), True), - ("pma16-01", "function=0x404356,bb=0x4043B9", OS(OS_WINDOWS), True), + ("pma16-01", "function=0x401100", OS(OS_WINDOWS), True), + ("pma16-01", "function=0x401100,bb=0x401130", OS(OS_WINDOWS), True), ("mimikatz", "function=0x40105D", OS(OS_WINDOWS), True), ("pma16-01", "file", Arch(ARCH_I386), True), ("pma16-01", "file", Arch(ARCH_AMD64), False), ("mimikatz", "file", Arch(ARCH_I386), True), - ("pma16-01", "function=0x404356", Arch(ARCH_I386), True), - ("pma16-01", "function=0x404356,bb=0x4043B9", Arch(ARCH_I386), True), + ("pma16-01", "function=0x401100", Arch(ARCH_I386), True), + ("pma16-01", "function=0x401100,bb=0x401130", Arch(ARCH_I386), True), ("mimikatz", "function=0x40105D", Arch(ARCH_I386), True), ("pma16-01", "file", Format(FORMAT_PE), True), ("pma16-01", "file", Format(FORMAT_ELF), False), ("mimikatz", "file", Format(FORMAT_PE), True), # format is also a global feature - ("pma16-01", "function=0x404356", Format(FORMAT_PE), True), + ("pma16-01", "function=0x401100", Format(FORMAT_PE), True), ("mimikatz", "function=0x456BB9", Format(FORMAT_PE), True), # elf support ("7351f.elf", "file", OS(OS_LINUX), True), From 9c6db00775e5555cbf6fe689355567f9b105eb75 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Tue, 16 Dec 2025 13:11:21 +0100 Subject: [PATCH 06/23] ci: add configuration for idalib tests --- .github/workflows/tests.yml | 49 +++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 4d14d41a..020addc9 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -206,3 +206,52 @@ jobs: GHIDRA_INSTALL_DIR: ${{ github.workspace }}/.github/ghidra/ghidra_${{ matrix.ghidra-version }}_PUBLIC run: pytest -v tests/test_ghidra_features.py + idalib-tests: + name: IDA tests for ${{ matrix.python-version }} + runs-on: ubuntu-22.04 + needs: [tests] + env: + IDA_LICENSE_ID: ${{ secrets.IDA_LICENSE_ID }} + strategy: + fail-fast: false + matrix: + python-version: ["3.10", "3.13"] + ida: + - version: 9.0 + slug: "release/9.0/ida-essential/ida-essential_90_x64linux.run" + - version: 9.2 + slug: "release/9.2/ida-essential/ida-essential_92_x64linux.run" + steps: + - name: Checkout capa with submodules + # do only run if IDA_LICENSE_ID is available, have to do this in every step, see https://github.com/orgs/community/discussions/26726#discussioncomment-3253118 + if: ${{ env.IDA_LICENSE_ID != 0 }} + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + submodules: recursive + - name: Set up Python ${{ matrix.python-version }} + if: ${{ env.IDA_LICENSE_ID != 0 }} + uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0 + with: + python-version: ${{ matrix.python-version }} + - name: Setup uv + if: ${{ env.IDA_LICENSE_ID != 0 }} + uses: astral-sh/setup-uv@v6 + - name: Install dependencies + if: ${{ env.IDA_LICENSE_ID != 0 }} + run: sudo apt-get install -y libyaml-dev + - name: Install capa + if: ${{ env.IDA_LICENSE_ID != 0 }} + run: | + pip install -r requirements.txt + pip install -e .[dev,scripts] + pip install idapro + - name: Install IDA ${{ matrix.ida.version }} + if: ${{ env.IDA_LICENSE_ID != 0 }} + run: | + uv run hcli --disable-updates ida install --download-id ${{ matrix.ida.slug }} --license-id ${{ secrets.IDA_LICENSE_ID }} --set-default --yes + env: + HCLI_API_KEY: ${{ secrets.HCLI_API_KEY }} + IDA_LICENSE_ID: ${{ secrets.IDA_LICENSE_ID }} + - name: Run tests + if: ${{ env.IDA_LICENSE_ID != 0 }} + run: pytest -v tests/test_idalib_features.py # explicitly refer to the idalib tests for performance. other tests run above. From 132e64a99170b181e2c3040590c030e83e80e19e Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Tue, 16 Dec 2025 13:51:21 +0100 Subject: [PATCH 07/23] tests: idalib: better detect missing idapro package --- tests/test_idalib_features.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/test_idalib_features.py b/tests/test_idalib_features.py index ebefb8eb..016f450a 100644 --- a/tests/test_idalib_features.py +++ b/tests/test_idalib_features.py @@ -22,6 +22,11 @@ import capa.features.extractors.ida.idalib logger = logging.getLogger(__name__) idalib_present = capa.features.extractors.ida.idalib.has_idalib() +if idalib_present: + try: + import idapro # noqa: F401 [imported but unused] + except ImportError: + idalib_present = False @pytest.mark.skipif(idalib_present is False, reason="Skip idalib tests if the idalib Python API is not installed") From 82be20be64c8ad6da6f4a5538573c6c13eb88bbd Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Tue, 16 Dec 2025 16:54:15 +0100 Subject: [PATCH 08/23] loader: idalib: disable lumina see #2742 in which Lumina names overwrote names provided by debug info --- capa/loader.py | 35 +++++++++++++++++------------------ tests/fixtures.py | 22 +++++++++++++--------- 2 files changed, 30 insertions(+), 27 deletions(-) diff --git a/capa/loader.py b/capa/loader.py index c5446897..a79f444f 100644 --- a/capa/loader.py +++ b/capa/loader.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import io import os import logging import datetime @@ -23,24 +22,13 @@ from pathlib import Path from rich.console import Console from typing_extensions import assert_never -import capa.perf import capa.rules -import capa.engine -import capa.helpers import capa.version -import capa.render.json -import capa.rules.cache -import capa.render.default -import capa.render.verbose import capa.features.common import capa.features.freeze as frz -import capa.render.vverbose import capa.features.extractors -import capa.render.result_document import capa.render.result_document as rdoc import capa.features.extractors.common -import capa.features.extractors.base_extractor -import capa.features.extractors.cape.extractor from capa.rules import RuleSet from capa.engine import MatchResults from capa.exceptions import UnsupportedOSError, UnsupportedArchError, UnsupportedFormatError @@ -346,12 +334,23 @@ def get_extractor( import capa.features.extractors.ida.extractor logger.debug("idalib: opening database...") - # idalib writes to stdout (ugh), so we have to capture that - # so as not to screw up structured output. - with capa.helpers.stdout_redirector(io.BytesIO()): - with console.status("analyzing program...", spinner="dots"): - if idapro.open_database(str(input_path), run_auto_analysis=True): - raise RuntimeError("failed to analyze input file") + idapro.enable_console_messages(False) + with console.status("analyzing program...", spinner="dots"): + # we set the primary and secondary Lumina servers to 0.0.0.0 to disable Lumina, + # which sometimes provides bad names, including overwriting names from debug info. + # + # return values from open_database: + # 0 - Success (database not packed) + # 1 - Success (database was packed) + # 2 - User cancelled or 32-64 bit conversion failed + # 4 - Database initialization failed + # -1 - Generic errors (database already open, auto-analysis failed, etc.) + # -2 - User cancelled operation + ret = idapro.open_database( + str(input_path), run_auto_analysis=True, args="-Olumina:host=0.0.0.0 -Osecondary_lumina:host=0.0.0.0" + ) + if ret not in (0, 1): + raise RuntimeError("failed to analyze input file") logger.debug("idalib: waiting for analysis...") ida_auto.auto_wait() diff --git a/tests/fixtures.py b/tests/fixtures.py index daf55388..c660977e 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -20,8 +20,6 @@ from functools import lru_cache import pytest -import capa.main -import capa.helpers import capa.features.file import capa.features.insn import capa.features.common @@ -221,13 +219,19 @@ def get_idalib_extractor(path: Path): logger.debug("idalib: opening database...") idapro.enable_console_messages(False) - # - 0 - Success (database not packed) - # - 1 - Success (database was packed) - # - 2 - User cancelled or 32-64 bit conversion failed - # - 4 - Database initialization failed - # - -1 - Generic errors (database already open, auto-analysis failed, etc.) - # - -2 - User cancelled operation - ret = idapro.open_database(str(path), run_auto_analysis=True) + # we set the primary and secondary Lumina servers to 0.0.0.0 to disable Lumina, + # which sometimes provides bad names, including overwriting names from debug info. + # + # return values from open_database: + # 0 - Success (database not packed) + # 1 - Success (database was packed) + # 2 - User cancelled or 32-64 bit conversion failed + # 4 - Database initialization failed + # -1 - Generic errors (database already open, auto-analysis failed, etc.) + # -2 - User cancelled operation + ret = idapro.open_database( + str(path), run_auto_analysis=True, args="-Olumina:host=0.0.0.0 -Osecondary_lumina:host=0.0.0.0" + ) if ret not in (0, 1): raise RuntimeError("failed to analyze input file") From e1fd18480541ef2d74bea26ddbe67a9705fab61b Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Tue, 16 Dec 2025 17:24:39 +0100 Subject: [PATCH 09/23] ida: function: extract function name somehow we were extracting alternate names but not function names --- capa/features/extractors/ida/function.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/capa/features/extractors/ida/function.py b/capa/features/extractors/ida/function.py index 5fd6c84a..956db439 100644 --- a/capa/features/extractors/ida/function.py +++ b/capa/features/extractors/ida/function.py @@ -52,6 +52,18 @@ def extract_recursive_call(fh: FunctionHandle): yield Characteristic("recursive call"), fh.address +def extract_function_name(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]: + ea = fh.inner.start_ea + name = idaapi.get_name(ea) + yield FunctionName(name), fh.address + if name.startswith("_"): + # some linkers may prefix linked routines with a `_` to avoid name collisions. + # extract features for both the mangled and un-mangled representations. + # e.g. `_fwrite` -> `fwrite` + # see: https://stackoverflow.com/a/2628384/87207 + yield FunctionName(name[1:]), fh.address + + def extract_function_alternative_names(fh: FunctionHandle): """Get all alternative names for an address.""" @@ -69,5 +81,6 @@ FUNCTION_HANDLERS = ( extract_function_calls_to, extract_function_loop, extract_recursive_call, + extract_function_name, extract_function_alternative_names, ) From 87fb96d08b72fa10e59a72a7dd848f91d182527d Mon Sep 17 00:00:00 2001 From: mr-tz Date: Wed, 17 Dec 2025 19:29:19 +0000 Subject: [PATCH 10/23] load resource for test sample --- tests/fixtures.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/fixtures.py b/tests/fixtures.py index c660977e..9dd2f24d 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -219,6 +219,12 @@ def get_idalib_extractor(path: Path): logger.debug("idalib: opening database...") idapro.enable_console_messages(False) + + # load resource if explicitly needed for test + load_resource = "" + if "Lab 12-04.exe_" in path.name: + load_resource = " -R" + # we set the primary and secondary Lumina servers to 0.0.0.0 to disable Lumina, # which sometimes provides bad names, including overwriting names from debug info. # @@ -230,7 +236,7 @@ def get_idalib_extractor(path: Path): # -1 - Generic errors (database already open, auto-analysis failed, etc.) # -2 - User cancelled operation ret = idapro.open_database( - str(path), run_auto_analysis=True, args="-Olumina:host=0.0.0.0 -Osecondary_lumina:host=0.0.0.0" + str(path), run_auto_analysis=True, args=f"-Olumina:host=0.0.0.0 -Osecondary_lumina:host=0.0.0.0{load_resource}" ) if ret not in (0, 1): raise RuntimeError("failed to analyze input file") From 4fb6ac0d1b453be86badfe6d1bffe188ce4e952a Mon Sep 17 00:00:00 2001 From: mr-tz Date: Wed, 17 Dec 2025 19:29:38 +0000 Subject: [PATCH 11/23] add ida version to test matrix name --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 020addc9..c8d8de63 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -207,7 +207,7 @@ jobs: run: pytest -v tests/test_ghidra_features.py idalib-tests: - name: IDA tests for ${{ matrix.python-version }} + name: IDA ${{ matrix.ida.version }} tests for ${{ matrix.python-version }} runs-on: ubuntu-22.04 needs: [tests] env: From 200c8037dd991e8cdcf2bb9bc63ee590b6c9aaa1 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Thu, 18 Dec 2025 13:59:50 +0100 Subject: [PATCH 12/23] tests: fix logging message --- tests/test_idalib_features.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_idalib_features.py b/tests/test_idalib_features.py index 016f450a..954cf6b8 100644 --- a/tests/test_idalib_features.py +++ b/tests/test_idalib_features.py @@ -43,7 +43,7 @@ def test_idalib_features(sample, scope, feature, expected): import idapro idapro.close_database(save=False) - logger.debug("opened database.") + logger.debug("closed database.") @pytest.mark.skipif(idalib_present is False, reason="Skip idalib tests if the idalib Python API is not installed") From c5808c4c41d87220bddc3b066a2cb931ef0ba6a2 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Thu, 18 Dec 2025 14:00:50 +0100 Subject: [PATCH 13/23] tests: idalib: use 9.1 instead of 9.0 as min ver 9.0 doesn't support disabling lumina (or loading resources, for that matter, too) --- .github/workflows/tests.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index c8d8de63..d155c659 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -217,8 +217,8 @@ jobs: matrix: python-version: ["3.10", "3.13"] ida: - - version: 9.0 - slug: "release/9.0/ida-essential/ida-essential_90_x64linux.run" + - version: 9.1 + slug: "release/9.1/ida-essential/ida-essential_91_x64linux.run" - version: 9.2 slug: "release/9.2/ida-essential/ida-essential_92_x64linux.run" steps: From 1d561bd0382c5cd483473532bff9b9665b2435b8 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Tue, 6 Jan 2026 10:04:22 +0100 Subject: [PATCH 14/23] tests: idalib: xfail two tests on 9.0 and 9.1 --- tests/test_idalib_features.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/tests/test_idalib_features.py b/tests/test_idalib_features.py index 954cf6b8..a16b7eca 100644 --- a/tests/test_idalib_features.py +++ b/tests/test_idalib_features.py @@ -13,11 +13,14 @@ # limitations under the License. import logging +from pathlib import Path import pytest import fixtures import capa.features.extractors.ida.idalib +from capa.features.file import FunctionName +from capa.features.insn import API logger = logging.getLogger(__name__) @@ -25,8 +28,12 @@ idalib_present = capa.features.extractors.ida.idalib.has_idalib() if idalib_present: try: import idapro # noqa: F401 [imported but unused] + import ida_kernwin + + kernel_version: str = ida_kernwin.get_kernel_version() except ImportError: idalib_present = False + kernel_version = "0.0" @pytest.mark.skipif(idalib_present is False, reason="Skip idalib tests if the idalib Python API is not installed") @@ -35,7 +42,16 @@ if idalib_present: fixtures.FEATURE_PRESENCE_TESTS + fixtures.FEATURE_SYMTAB_FUNC_TESTS, indirect=["sample", "scope"], ) -def test_idalib_features(sample, scope, feature, expected): +def test_idalib_features(sample: Path, scope, feature, expected): + if kernel_version in {"9.0", "9.1"} and sample.name.startswith("2bf18d"): + if isinstance(feature, (API, FunctionName)) and feature.value == "__libc_connect": + # see discussion here: https://github.com/mandiant/capa/pull/2742#issuecomment-3674146335 + # + # > i confirmed that there were changes in 9.2 related to the ELF loader handling names, + # > so I think its reasonable to conclude that 9.1 and older had a bug that + # > prevented this name from surfacing. + pytest.xfail(f"IDA {kernel_version} does not extract all ELF symbols") + try: fixtures.do_test_feature_presence(fixtures.get_idalib_extractor, sample, scope, feature, expected) finally: From f72bd49a5f31f1bb16087f092a0f69c93677e565 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Tue, 6 Jan 2026 10:10:06 +0100 Subject: [PATCH 15/23] ci: enable testing of IDA 9.0 --- .github/workflows/tests.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index d155c659..95155b4f 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -217,6 +217,8 @@ jobs: matrix: python-version: ["3.10", "3.13"] ida: + - version: 9.0 + slug: "release/9.0/ida-essential/ida-essential_90_x64linux.run" - version: 9.1 slug: "release/9.1/ida-essential/ida-essential_91_x64linux.run" - version: 9.2 From 412ab62c421c52015cb481ace9fbad2dc93abcd3 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Tue, 6 Jan 2026 13:37:53 +0100 Subject: [PATCH 16/23] ida: pep8 --- capa/features/extractors/ida/function.py | 1 - capa/features/extractors/ida/insn.py | 1 - 2 files changed, 2 deletions(-) diff --git a/capa/features/extractors/ida/function.py b/capa/features/extractors/ida/function.py index 956db439..28b77259 100644 --- a/capa/features/extractors/ida/function.py +++ b/capa/features/extractors/ida/function.py @@ -19,7 +19,6 @@ import idautils import capa.features.extractors.ida.helpers from capa.features.file import FunctionName -from capa.features.insn import API from capa.features.common import Feature, Characteristic from capa.features.address import Address, AbsoluteVirtualAddress from capa.features.extractors import loops diff --git a/capa/features/extractors/ida/insn.py b/capa/features/extractors/ida/insn.py index 4d06d99c..86fd14b8 100644 --- a/capa/features/extractors/ida/insn.py +++ b/capa/features/extractors/ida/insn.py @@ -26,7 +26,6 @@ from capa.features.file import FunctionName from capa.features.insn import API, MAX_STRUCTURE_SIZE, Number, Offset, Mnemonic, OperandNumber, OperandOffset from capa.features.common import MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, String, Feature, Characteristic from capa.features.address import Address, AbsoluteVirtualAddress -from capa.features.extractors.ida.function import extract_function_alternative_names from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle # security cookie checks may perform non-zeroing XORs, these are expected within a certain From 03cc901f7ba82d1e7b5f22801c6dcbf34f783469 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Tue, 6 Jan 2026 14:42:16 +0100 Subject: [PATCH 17/23] tests: idalib: xfail resource test on 9.0 --- tests/test_idalib_features.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/test_idalib_features.py b/tests/test_idalib_features.py index a16b7eca..8604b94e 100644 --- a/tests/test_idalib_features.py +++ b/tests/test_idalib_features.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - import logging from pathlib import Path @@ -21,6 +20,7 @@ import fixtures import capa.features.extractors.ida.idalib from capa.features.file import FunctionName from capa.features.insn import API +from capa.features.common import Characteristic logger = logging.getLogger(__name__) @@ -52,6 +52,13 @@ def test_idalib_features(sample: Path, scope, feature, expected): # > prevented this name from surfacing. pytest.xfail(f"IDA {kernel_version} does not extract all ELF symbols") + if kernel_version in {"9.0"} and sample.name.startswith("Practical Malware Analysis Lab 12-04.exe_"): + if isinstance(feature, Characteristic) and feature.value == "embedded pe": + # see discussion here: https://github.com/mandiant/capa/pull/2742#issuecomment-3667086165 + # + # idalib for IDA 9.0 doesn't support argv arguments, so we can't ask that resources are loaded + pytest.xfail("idalib 9.0 does not support loading resource segments") + try: fixtures.do_test_feature_presence(fixtures.get_idalib_extractor, sample, scope, feature, expected) finally: From c89871f257cbc95d387272677b9a4e6973e7d5a0 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Tue, 13 Jan 2026 13:24:18 +0100 Subject: [PATCH 18/23] ci: pin setup-uv --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 95155b4f..94032cff 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -237,7 +237,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Setup uv if: ${{ env.IDA_LICENSE_ID != 0 }} - uses: astral-sh/setup-uv@v6 + uses: astral-sh/setup-uv@61cb8a9741eeb8a550a1b8544337180c0fc8476b # v7.2.0 - name: Install dependencies if: ${{ env.IDA_LICENSE_ID != 0 }} run: sudo apt-get install -y libyaml-dev From ce67d99e49408358dfc33f13af8b6bdb9fc7f0b3 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Tue, 13 Jan 2026 13:36:51 +0100 Subject: [PATCH 19/23] ida: skip function-name features for default names (sub_*) --- capa/features/extractors/ida/function.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/capa/features/extractors/ida/function.py b/capa/features/extractors/ida/function.py index 28b77259..6ff1f28d 100644 --- a/capa/features/extractors/ida/function.py +++ b/capa/features/extractors/ida/function.py @@ -54,6 +54,10 @@ def extract_recursive_call(fh: FunctionHandle): def extract_function_name(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]: ea = fh.inner.start_ea name = idaapi.get_name(ea) + if name.startswith("sub_"): + # skip default names, like "sub_401000" + return + yield FunctionName(name), fh.address if name.startswith("_"): # some linkers may prefix linked routines with a `_` to avoid name collisions. From 3646fcefa2c3093f794ca7cf11d593406064682a Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Tue, 13 Jan 2026 13:39:56 +0100 Subject: [PATCH 20/23] ida: helpers: refactor discovery of alternative names --- capa/features/extractors/ida/helpers.py | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/capa/features/extractors/ida/helpers.py b/capa/features/extractors/ida/helpers.py index d3837e9f..ea0b21c8 100644 --- a/capa/features/extractors/ida/helpers.py +++ b/capa/features/extractors/ida/helpers.py @@ -439,21 +439,14 @@ def has_sib(oper: idaapi.op_t) -> bool: return oper.specflag1 == 1 +def find_alternative_names(cmt: str): + for line in cmt.split("\n"): + if line.startswith("Alternative name is '") and line.endswith("'"): + name = line[len("Alternative name is '") : -1] # Extract name between quotes + yield name + + def get_function_alternative_names(fva: int): """Get all alternative names for an address.""" - - # Check indented comment - cmt = ida_bytes.get_cmt(fva, False) # False = non-repeatable - if cmt: - for line in cmt.split("\n"): - if line.startswith("Alternative name is '") and line.endswith("'"): - name = line[len("Alternative name is '") : -1] # Extract name between quotes - yield name - - # Check function comment - func_cmt = ida_funcs.get_func_cmt(idaapi.get_func(fva), False) - if func_cmt: - for line in func_cmt.split("\n"): - if line.startswith("Alternative name is '") and line.endswith("'"): - name = line[len("Alternative name is '") : -1] - yield name + yield from find_alternative_names(ida_bytes.get_cmt(fva, False) or "") + yield from find_alternative_names(ida_funcs.get_func_cmt(idaapi.get_func(fva), False) or "") From 8d6b878e793136849d40b19a104ec53e47a20dc1 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Tue, 13 Jan 2026 13:41:24 +0100 Subject: [PATCH 21/23] ida: fix return value from open_database --- capa/loader.py | 5 ++--- tests/fixtures.py | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/capa/loader.py b/capa/loader.py index a79f444f..daabaac1 100644 --- a/capa/loader.py +++ b/capa/loader.py @@ -340,8 +340,7 @@ def get_extractor( # which sometimes provides bad names, including overwriting names from debug info. # # return values from open_database: - # 0 - Success (database not packed) - # 1 - Success (database was packed) + # 0 - Success # 2 - User cancelled or 32-64 bit conversion failed # 4 - Database initialization failed # -1 - Generic errors (database already open, auto-analysis failed, etc.) @@ -349,7 +348,7 @@ def get_extractor( ret = idapro.open_database( str(input_path), run_auto_analysis=True, args="-Olumina:host=0.0.0.0 -Osecondary_lumina:host=0.0.0.0" ) - if ret not in (0, 1): + if ret != 0: raise RuntimeError("failed to analyze input file") logger.debug("idalib: waiting for analysis...") diff --git a/tests/fixtures.py b/tests/fixtures.py index 9dd2f24d..cd5214cb 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -229,8 +229,7 @@ def get_idalib_extractor(path: Path): # which sometimes provides bad names, including overwriting names from debug info. # # return values from open_database: - # 0 - Success (database not packed) - # 1 - Success (database was packed) + # 0 - Success # 2 - User cancelled or 32-64 bit conversion failed # 4 - Database initialization failed # -1 - Generic errors (database already open, auto-analysis failed, etc.) @@ -238,7 +237,7 @@ def get_idalib_extractor(path: Path): ret = idapro.open_database( str(path), run_auto_analysis=True, args=f"-Olumina:host=0.0.0.0 -Osecondary_lumina:host=0.0.0.0{load_resource}" ) - if ret not in (0, 1): + if ret != 0: raise RuntimeError("failed to analyze input file") logger.debug("idalib: waiting for analysis...") From 0686305f43920275c1c7ad606cacb45b5c373cb5 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Tue, 13 Jan 2026 13:42:57 +0100 Subject: [PATCH 22/23] ida: loader: load resource sections to help discovery of embedded files --- capa/loader.py | 4 +++- tests/fixtures.py | 9 +++------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/capa/loader.py b/capa/loader.py index daabaac1..c0996610 100644 --- a/capa/loader.py +++ b/capa/loader.py @@ -339,6 +339,8 @@ def get_extractor( # we set the primary and secondary Lumina servers to 0.0.0.0 to disable Lumina, # which sometimes provides bad names, including overwriting names from debug info. # + # use -R to load resources, which can help us embedded PE files. + # # return values from open_database: # 0 - Success # 2 - User cancelled or 32-64 bit conversion failed @@ -346,7 +348,7 @@ def get_extractor( # -1 - Generic errors (database already open, auto-analysis failed, etc.) # -2 - User cancelled operation ret = idapro.open_database( - str(input_path), run_auto_analysis=True, args="-Olumina:host=0.0.0.0 -Osecondary_lumina:host=0.0.0.0" + str(input_path), run_auto_analysis=True, args="-Olumina:host=0.0.0.0 -Osecondary_lumina:host=0.0.0.0 -R" ) if ret != 0: raise RuntimeError("failed to analyze input file") diff --git a/tests/fixtures.py b/tests/fixtures.py index cd5214cb..bbe51e77 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -220,14 +220,11 @@ def get_idalib_extractor(path: Path): idapro.enable_console_messages(False) - # load resource if explicitly needed for test - load_resource = "" - if "Lab 12-04.exe_" in path.name: - load_resource = " -R" - # we set the primary and secondary Lumina servers to 0.0.0.0 to disable Lumina, # which sometimes provides bad names, including overwriting names from debug info. # + # use -R to load resources, which can help us embedded PE files. + # # return values from open_database: # 0 - Success # 2 - User cancelled or 32-64 bit conversion failed @@ -235,7 +232,7 @@ def get_idalib_extractor(path: Path): # -1 - Generic errors (database already open, auto-analysis failed, etc.) # -2 - User cancelled operation ret = idapro.open_database( - str(path), run_auto_analysis=True, args=f"-Olumina:host=0.0.0.0 -Osecondary_lumina:host=0.0.0.0{load_resource}" + str(path), run_auto_analysis=True, args="-Olumina:host=0.0.0.0 -Osecondary_lumina:host=0.0.0.0 -R" ) if ret != 0: raise RuntimeError("failed to analyze input file") From d1fc8446f6975f975c16248a2aeaf25c58acf454 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Tue, 13 Jan 2026 14:06:43 +0100 Subject: [PATCH 23/23] pyproject: ida: silence SWIG related warnings from IDA bindings --- pyproject.toml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 3f39a439..36c53c5c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -109,6 +109,13 @@ dependencies = [ ] dynamic = ["version"] +[tool.pytest.ini_options] +filterwarnings = [ + "ignore:builtin type SwigPyPacked has no __module__ attribute:DeprecationWarning", + "ignore:builtin type SwigPyObject has no __module__ attribute:DeprecationWarning", + "ignore:builtin type swigvarlink has no __module__ attribute:DeprecationWarning", +] + [tool.setuptools.dynamic] version = {attr = "capa.version.__version__"}