diff --git a/.github/pyinstaller/pyinstaller.spec b/.github/pyinstaller/pyinstaller.spec index e546ce1a..641da859 100644 --- a/.github/pyinstaller/pyinstaller.spec +++ b/.github/pyinstaller/pyinstaller.spec @@ -74,6 +74,7 @@ a = Analysis( # only be installed locally. "binaryninja", "ida", + "ghidra", # remove once https://github.com/mandiant/capa/issues/2681 has # been addressed by PyInstaller "pkg_resources", diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 4a0da8c4..a7ee9663 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -28,6 +28,10 @@ jobs: artifact_name: capa asset_name: linux python_version: '3.10' + # for Ghidra + java-version: '21' + ghidra-version: '12.0' + public-version: 'PUBLIC_20251205' - os: ubuntu-22.04-arm artifact_name: capa asset_name: linux-arm64 @@ -106,6 +110,23 @@ jobs: run: | 7z e "tests/data/dynamic/cape/v2.2/d46900384c78863420fb3e297d0a2f743cd2b6b3f7f82bf64059a168e07aceb7.json.gz" dist/capa -d "d46900384c78863420fb3e297d0a2f743cd2b6b3f7f82bf64059a168e07aceb7.json" + - name: Set up Java ${{ matrix.java-version }} + if: matrix.os == 'ubuntu-22.04' && matrix.python_version == '3.10' + uses: actions/setup-java@387ac29b308b003ca37ba93a6cab5eb57c8f5f93 # v4.0.0 + with: + distribution: 'temurin' + java-version: ${{ matrix.java-version }} + - name: Install Ghidra ${{ matrix.ghidra-version }} + if: matrix.os == 'ubuntu-22.04' && matrix.python_version == '3.10' + run: | + mkdir ./.github/ghidra + wget "https://github.com/NationalSecurityAgency/ghidra/releases/download/Ghidra_${{ matrix.ghidra-version }}_build/ghidra_${{ matrix.ghidra-version }}_${{ matrix.public-version }}.zip" -O ./.github/ghidra/ghidra_${{ matrix.ghidra-version }}_PUBLIC.zip + unzip .github/ghidra/ghidra_${{ matrix.ghidra-version }}_PUBLIC.zip -d .github/ghidra/ + - name: Does it run (Ghidra)? + if: matrix.os == 'ubuntu-22.04' && matrix.python_version == '3.10' + env: + GHIDRA_INSTALL_DIR: ${{ github.workspace }}/.github/ghidra/ghidra_${{ matrix.ghidra-version }}_PUBLIC + run: dist/capa -b ghidra -d "tests/data/Practical Malware Analysis Lab 01-01.dll_" - uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4.3.1 with: name: ${{ matrix.asset_name }} diff --git a/README.md b/README.md index 59ca03de..830b6739 100644 --- a/README.md +++ b/README.md @@ -291,9 +291,20 @@ It also uses your local changes to the .idb to extract better features, such as ![capa + IDA Pro integration](https://github.com/mandiant/capa/blob/master/doc/img/explorer_expanded.png) # Ghidra integration -If you use Ghidra, then you can use the [capa + Ghidra integration](/capa/ghidra/) to run capa's analysis directly on your Ghidra database and render the results in Ghidra's user interface. - +![capa + Ghidra integration](https://github.com/mandiant/capa/blob/master/doc/img/ghidra_backend_logo.png) + +If you use Ghidra, then you can instruct capa to analyze your samples using Ghidra. capa creates a temporary Ghidra project and uses PyGhidra to import the sample, analyze it, and extract features. The temporary project is deleted after analysis. + +## Prerequisites + +- Ghidra >= 12.0 must be installed and available to PyGhidra + +## Usage + +```bash +$ capa -b ghidra /path/to/sample +``` # blog posts - [Dynamic capa: Exploring Executable Run-Time Behavior with the CAPE Sandbox](https://www.mandiant.com/resources/blog/dynamic-capa-executable-behavior-cape-sandbox) diff --git a/capa/features/extractors/ghidra/extractor.py b/capa/features/extractors/ghidra/extractor.py index f189e31b..7aa6f027 100644 --- a/capa/features/extractors/ghidra/extractor.py +++ b/capa/features/extractors/ghidra/extractor.py @@ -19,6 +19,7 @@ from typing import Iterator import capa.features.extractors.ghidra.file import capa.features.extractors.ghidra.insn import capa.features.extractors.ghidra.global_ +import capa.features.extractors.ghidra.helpers as ghidra_helpers import capa.features.extractors.ghidra.function import capa.features.extractors.ghidra.basicblock from capa.features.common import Feature @@ -36,7 +37,6 @@ class GhidraFeatureExtractor(StaticFeatureExtractor): def __init__(self, ctx_manager=None, tmpdir=None): self.ctx_manager = ctx_manager self.tmpdir = tmpdir - import capa.features.extractors.ghidra.helpers as ghidra_helpers super().__init__( SampleHashes( @@ -66,8 +66,6 @@ class GhidraFeatureExtractor(StaticFeatureExtractor): weakref.finalize(self, cleanup, self.ctx_manager, self.tmpdir) def get_base_address(self): - import capa.features.extractors.ghidra.helpers as ghidra_helpers - return AbsoluteVirtualAddress(ghidra_helpers.get_current_program().getImageBase().getOffset()) def extract_global_features(self): @@ -77,7 +75,6 @@ class GhidraFeatureExtractor(StaticFeatureExtractor): yield from capa.features.extractors.ghidra.file.extract_features() def get_functions(self) -> Iterator[FunctionHandle]: - import capa.features.extractors.ghidra.helpers as ghidra_helpers for fhandle in ghidra_helpers.get_function_symbols(): fh: FunctionHandle = FunctionHandle( @@ -89,7 +86,6 @@ class GhidraFeatureExtractor(StaticFeatureExtractor): @staticmethod def get_function(addr: int) -> FunctionHandle: - import capa.features.extractors.ghidra.helpers as ghidra_helpers func = ghidra_helpers.get_flat_api().getFunctionContaining(ghidra_helpers.get_flat_api().toAddr(addr)) return FunctionHandle(address=AbsoluteVirtualAddress(func.getEntryPoint().getOffset()), inner=func) @@ -98,7 +94,6 @@ class GhidraFeatureExtractor(StaticFeatureExtractor): yield from capa.features.extractors.ghidra.function.extract_features(fh) def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]: - import capa.features.extractors.ghidra.helpers as ghidra_helpers yield from ghidra_helpers.get_function_blocks(fh) @@ -106,7 +101,6 @@ class GhidraFeatureExtractor(StaticFeatureExtractor): yield from capa.features.extractors.ghidra.basicblock.extract_features(fh, bbh) def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]: - import capa.features.extractors.ghidra.helpers as ghidra_helpers yield from ghidra_helpers.get_insn_in_range(bbh) diff --git a/capa/ghidra/README.md b/capa/ghidra/README.md deleted file mode 100644 index 4f6035ad..00000000 --- a/capa/ghidra/README.md +++ /dev/null @@ -1,17 +0,0 @@ -
- -
- -# capa + Ghidra - -[capa](https://github.com/mandiant/capa) is the FLARE team’s open-source tool that detects capabilities in executable files. [Ghidra](https://github.com/NationalSecurityAgency/ghidra) is an open-source software reverse engineering framework. capa + Ghidra brings capa’s detection capabilities to Ghidra using [PyGhidra](https://github.com/NationalSecurityAgency/ghidra/tree/master/Ghidra/Features/PyGhidra). - -## Prerequisites - -- Ghidra >= 12.0 must be installed and available to PyGhidra (e.g. set `GHIDRA_INSTALL_DIR` environment variable) - -## Usage - -```bash -$ capa -b ghidra /path/to/sample -``` diff --git a/doc/img/ghidra_headless_analyzer.png b/doc/img/ghidra_headless_analyzer.png deleted file mode 100644 index 3f3c68ed..00000000 Binary files a/doc/img/ghidra_headless_analyzer.png and /dev/null differ diff --git a/doc/img/ghidra_script_mngr_output.png b/doc/img/ghidra_script_mngr_output.png deleted file mode 100755 index 6d3024c9..00000000 Binary files a/doc/img/ghidra_script_mngr_output.png and /dev/null differ diff --git a/doc/img/ghidra_script_mngr_rules.png b/doc/img/ghidra_script_mngr_rules.png deleted file mode 100755 index 7bce6247..00000000 Binary files a/doc/img/ghidra_script_mngr_rules.png and /dev/null differ diff --git a/doc/img/ghidra_script_mngr_verbosity.png b/doc/img/ghidra_script_mngr_verbosity.png deleted file mode 100755 index ae23246c..00000000 Binary files a/doc/img/ghidra_script_mngr_verbosity.png and /dev/null differ