From 855463b3195aa74edfab110a5b8299f09f082c48 Mon Sep 17 00:00:00 2001 From: Colton Gabertan <66766340+colton-gabertan@users.noreply.github.com> Date: Wed, 5 Jul 2023 17:48:45 -0700 Subject: [PATCH] Add Ghidra Backend CI configuration, fix CHANGELOG (#1529) * ghidra-backend ci working, fix CHANGELOG * temp: Add backend-ghidra to CI test workflow & add versioning to matrix * lint to avoid failure * linting for CI * cleanup CI, integrate actions, simplify installations * fix gradle repo * fix typo * fix submodule checkout for rules & test data * fix relative test data path * remove unnecessary steps * add flag to mkdir to resolve pipeline failure --- .github/mypy/mypy.ini | 3 ++ .github/workflows/tests.yml | 51 ++++++++++-------- CHANGELOG.md | 2 +- capa/features/extractors/ghidra/extractor.py | 5 +- capa/features/extractors/ghidra/global_.py | 28 +++++----- capa/main.py | 9 ++-- tests/fixtures.py | 12 ----- tests/test_ghidra_features.py | 55 -------------------- 8 files changed, 56 insertions(+), 109 deletions(-) delete mode 100644 tests/test_ghidra_features.py diff --git a/.github/mypy/mypy.ini b/.github/mypy/mypy.ini index c80af3de..505d5772 100644 --- a/.github/mypy/mypy.ini +++ b/.github/mypy/mypy.ini @@ -83,3 +83,6 @@ ignore_missing_imports = True [mypy-netnode.*] ignore_missing_imports = True + +[mypy-ghidra.*] +ignore_missing_imports = True diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 841044ee..39cda1a3 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -2,9 +2,9 @@ name: CI on: push: - branches: [ master ] + branches: [ master, backend-ghidra ] pull_request: - branches: [ master ] + branches: [ master, backend-ghidra ] # save workspaces to speed up testing env: @@ -140,16 +140,22 @@ jobs: ghidra-tests: name: Ghidra tests for ${{ matrix.python-version }} runs-on: ubuntu-20.04 + needs: [code_style, rule_linter] strategy: fail-fast: false matrix: python-version: ["3.7", "3.11"] java-version: ["17"] + gradle-version: ["7.3"] + ghidra-version: ["10.3"] + public-version: ["PUBLIC_20230510"] # for ghidra releases + jep-version: ["4.1.1"] + ghidrathon-version: ["2.1.0"] steps: - name: Checkout capa with submodules uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c # v3.3.0 with: - submodules: recursive + submodules: true - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@d27e3f3d7c64b4bbf8e4abfb9b63b83e846e0435 # v4.5.0 with: @@ -159,32 +165,33 @@ jobs: with: distribution: 'temurin' java-version: ${{ matrix.java-version }} - - name: Set up Gradle 7.3 # must be done manually due to no gradle build in capa - run: | - mkdir /opt/gradle - wget "https://services.gradle.org/distributions/gradle-7.3-bin.zip" -O /opt/gradle/gradle-7.3.zip - unzip /opt/gradle/gradle-7.3.zip -d /opt/gradle - - name: Install Ghidra 10.3 + - name: Set up Gradle ${{ matrix.gradle-version }} + uses: gradle/gradle-build-action@40b6781dcdec2762ad36556682ac74e31030cfe2 # v2.5.1 + with: + gradle-version: ${{ matrix.gradle-version }} + - name: Install Jep ${{ matrix.jep-version }} + run : pip install jep==${{ matrix.jep-version }} + - name: Install Ghidra ${{ matrix.ghidra-version }} run: | mkdir ./.github/ghidra - wget "https://github.com/NationalSecurityAgency/ghidra/releases/download/Ghidra_10.3_build/ghidra_10.3_PUBLIC_20230510.zip" -O ./.github/ghidra/ghidra_10.3_PUBLIC.zip - unzip .github/ghidra/ghidra_10.3_PUBLIC.zip -d .github/ghidra/ - - name: Install Jep 4.1.1 - run : | - mkdir ./.github/jep - wget "https://github.com/ninia/jep/archive/refs/tags/v4.1.1.zip" -O ./.github/jep/jep-4.1.1.zip - unzip .github/jep/jep-4.1.1.zip -d .github/jep/ - pip install .github/jep/jep-4.1.1/ + mkdir ./.github/ghidra/project + wget "https://github.com/NationalSecurityAgency/ghidra/releases/download/Ghidra_${{ matrix.ghidra-version }}_build/ghidra_${{ matrix.ghidra-version }}_${{ matrix.public-version }}.zip" -O ./.github/ghidra/ghidra_${{ matrix.ghidra-version }}_PUBLIC.zip + unzip .github/ghidra/ghidra_${{ matrix.ghidra-version }}_PUBLIC.zip -d .github/ghidra/ - name: Install Ghidrathon run : | mkdir ./.github/ghidrathon - wget "https://github.com/mandiant/Ghidrathon/archive/refs/tags/v2.1.0.zip" -O ./.github/ghidrathon/ghidrathon-2.1.0.zip - unzip .github/ghidrathon/ghidrathon-2.1.0.zip -d .github/ghidrathon/ + mkdir -p ~/.ghidra/.ghidra_${{ matrix.ghidra-version }}_PUBLIC/Extensions + wget "https://github.com/mandiant/Ghidrathon/archive/refs/tags/v${{ matrix.ghidrathon-version }}.zip" -O ./.github/ghidrathon/ghidrathon-${{ matrix.ghidrathon-version }}.zip + unzip .github/ghidrathon/ghidrathon-${{ matrix.ghidrathon-version }}.zip -d .github/ghidrathon/ workdir=$(pwd) - /opt/gradle/gradle-7.3/bin/gradle -p ./.github/ghidrathon/Ghidrathon-2.1.0/ -PGHIDRA_INSTALL_DIR=$workdir/.github/ghidra/ghidra_10.3_PUBLIC - unzip .github/ghidrathon/Ghidrathon-2.1.0/dist/*.zip -d $workdir/.github/ghidra/ghidra_10.3_PUBLIC/Extensions + gradle -p ./.github/ghidrathon/Ghidrathon-${{ matrix.ghidrathon-version }}/ -PGHIDRA_INSTALL_DIR=$workdir/.github/ghidra/ghidra_${{ matrix.ghidra-version }}_PUBLIC + unzip .github/ghidrathon/Ghidrathon-${{ matrix.ghidrathon-version }}/dist/*.zip -d ~/.ghidra/.ghidra_${{ matrix.ghidra-version }}_PUBLIC/Extensions - name: Install pyyaml run: sudo apt-get install -y libyaml-dev - name: Install capa - run: pip install -e .[dev] + run: pip install -e .[dev] + - name: Run tests + run: | # runs main.py for now... + .github/ghidra/ghidra_${{ matrix.ghidra-version }}_PUBLIC/support/analyzeHeadless .github/ghidra/project ghidra_test -Import ./tests/data/'Practical Malware Analysis Lab 01-01.dll_' + .github/ghidra/ghidra_${{ matrix.ghidra-version }}_PUBLIC/support/analyzeHeadless .github/ghidra/project ghidra_test -process 'Practical Malware Analysis Lab 01-01.dll_' -ScriptPath ./capa -PostScript main.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 27915d5e..ec03dad3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,6 @@ ### New Features - Utility script to detect feature overlap between new and existing CAPA rules [#1451](https://github.com/mandiant/capa/issues/1451) [@Aayush-Goel-04](https://github.com/aayush-goel-04) -- extractor: Implement Ghidra Backend [@colton-gabertan](https://github.com/colton-gabertan) ### Breaking Changes @@ -17,6 +16,7 @@ - communication/mailslot/read-from-mailslot nick.simonian@mandiant.com - nursery/hash-data-using-sha512managed-in-dotnet jonathanlepore@google.com - nursery/compiled-with-exescript jonathanlepore@google.com +- ### Bug Fixes diff --git a/capa/features/extractors/ghidra/extractor.py b/capa/features/extractors/ghidra/extractor.py index 2f5a593a..65b06706 100644 --- a/capa/features/extractors/ghidra/extractor.py +++ b/capa/features/extractors/ghidra/extractor.py @@ -1,11 +1,12 @@ import logging import contextlib -from typing import Tuple, Iterator +from typing import List, Tuple, Iterator +import capa.features.extractors.ghidra.global_ from capa.features.common import Feature from capa.features.address import Address, AbsoluteVirtualAddress from capa.features.extractors.base_extractor import FeatureExtractor -import capa.features.extractors.ghidra.global_ + class GhidraFeatureExtractor(FeatureExtractor): def __init__(self): diff --git a/capa/features/extractors/ghidra/global_.py b/capa/features/extractors/ghidra/global_.py index 2409bf0c..33a1237c 100644 --- a/capa/features/extractors/ghidra/global_.py +++ b/capa/features/extractors/ghidra/global_.py @@ -1,13 +1,17 @@ import logging import contextlib from io import BytesIO -from typing import Tuple, Iterator +from typing import Tuple, Iterator + +import ghidra import capa.features.extractors.elf from capa.features.common import OS, ARCH_I386, ARCH_AMD64, OS_WINDOWS, Arch, Feature from capa.features.address import NO_ADDRESS, Address logger = logging.getLogger(__name__) +currentProgram: ghidra.program.database.ProgramDB + def extract_os() -> Iterator[Tuple[Feature, Address]]: format_name: str = currentProgram.getExecutableFormat() @@ -16,16 +20,16 @@ def extract_os() -> Iterator[Tuple[Feature, Address]]: yield OS(OS_WINDOWS), NO_ADDRESS elif "ELF" in format_name: - program_memory = currentProgram.getMemory() # ghidra.program.database.mem.MemoryMapDB - fbytes_list = program_memory.getAllFileBytes() # java.util.List - fbytes = fbytes_list[0] # ghidra.program.database.mem.FileBytes + program_memory = currentProgram.getMemory() # ghidra.program.database.mem.MemoryMapDB + fbytes_list = program_memory.getAllFileBytes() # java.util.List + fbytes = fbytes_list[0] # ghidra.program.database.mem.FileBytes # Java likes to return signed ints, so we must convert them # back into unsigned bytes manually and write to BytesIO - # note: May be deprecated if Jep has implements better support for Java Lists - pb_arr = b'' + # note: May be deprecated if Jep has implements better support for Java Lists + pb_arr = b"" for i in range(fbytes.getSize()): - pb_arr = pb_arr + (fbytes.getOriginalByte(i) & 0xff).to_bytes(1, 'little') + pb_arr = pb_arr + (fbytes.getOriginalByte(i) & 0xFF).to_bytes(1, "little") buf = BytesIO(pb_arr) with contextlib.closing(buf) as f: @@ -49,15 +53,15 @@ def extract_os() -> Iterator[Tuple[Feature, Address]]: def extract_arch() -> Iterator[Tuple[Feature, Address]]: - lang_id = currentProgram.getMetadata().get('Language ID') + lang_id = currentProgram.getMetadata().get("Language ID") - if 'x86' in lang_id and '64' in lang_id: + if "x86" in lang_id and "64" in lang_id: yield Arch(ARCH_AMD64), NO_ADDRESS - elif 'x86' in lang_id and '32' in lang_id: + elif "x86" in lang_id and "32" in lang_id: yield Arch(ARCH_I386), NO_ADDRESS - elif 'x86' not in lang_id: + elif "x86" not in lang_id: logger.debug("unsupported architecture: non-32-bit nor non-64-bit intel") return @@ -68,5 +72,3 @@ def extract_arch() -> Iterator[Tuple[Feature, Address]]: # for (1), this logic will need to be updated as the format is implemented. logger.debug("unsupported architecture: %s", lang_id) return - - diff --git a/capa/main.py b/capa/main.py index d4978d31..8326759e 100644 --- a/capa/main.py +++ b/capa/main.py @@ -1319,9 +1319,11 @@ def ida_main(): def ghidra_main(): import capa.rules - #import capa.render.default - #import capa.features.extractors.ghidra.extractor + + # import capa.render.default + # import capa.features.extractors.ghidra.extractor import capa.features.extractors.ghidra.global_ + from capa.features.common import Feature logging.basicConfig(level=logging.INFO) logging.getLogger().setLevel(logging.INFO) @@ -1336,7 +1338,7 @@ def ghidra_main(): rules_path = os.path.join(get_default_root(), "rules") logger.debug("rule path: %s", rules_path) rules = get_rules([rules_path]) - + # temp test for OS & ARCH extractions globl_features: List[Tuple[Feature, Address]] = [] globl_features.extend(capa.features.extractors.ghidra.global_.extract_os()) @@ -1369,4 +1371,3 @@ if __name__ == "__main__": ghidra_main() else: sys.exit(main()) - diff --git a/tests/fixtures.py b/tests/fixtures.py index 612f49a7..04c9c53b 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -183,18 +183,6 @@ def get_binja_extractor(path): return extractor -@lru_cache(maxsize=1) -def get_ghidra_extractor(path): - import capa.features.extractors.ghidra.extractor - - extractor = capa.features.extractors.ghidra.extractor.GhidraFeatureExtractor(path) - - # overload the extractor so that the fixture exposes `extractor.path` - setattr(extractor, "path", path) - - return extractor - - def extract_global_features(extractor): features = collections.defaultdict(set) for feature, va in extractor.extract_global_features(): diff --git a/tests/test_ghidra_features.py b/tests/test_ghidra_features.py deleted file mode 100644 index ff8e6485..00000000 --- a/tests/test_ghidra_features.py +++ /dev/null @@ -1,55 +0,0 @@ -import sys -import logging -import os.path -import binascii -import traceback - -import pytest - -try: - sys.path.append(os.path.dirname(__file__)) - import fixtures - from fixtures import * -finally: - sys.path.pop() - - -logger = logging.getLogger("test_ghidra_features") - - -# We need to skip the ghidra test if we cannot import ghidra modules, e.g., in GitHub CI. -ghidra_present: bool = False -try: - import ghidra.program.flatapi as flatapi - ghidraapi = flatapi.FlatProgramAPI(currentProgram) - - try: - current_program_test = ghidraapi.getCurrentProgram() - except RuntimeError as e: - logger.warning("Ghidra runtime not detected") - else: - ghidra_present = True -except ImportError: - pass - - -@pytest.mark.skipif(ghidra_present is False, reason="Skip ghidra tests if the ghidra Python API is not installed") -@fixtures.parametrize( - "sample,scope,feature,expected", - fixtures.FEATURE_PRESENCE_TESTS, - indirect=["sample", "scope"], -) -def test_ghidra_features(sample, scope, feature, expected): - fixtures.do_test_feature_presence(fixtures.get_ghidra_extractor, sample, scope, feature, expected) - - -@pytest.mark.skipif(ghidra_present is False, reason="Skip ghidra tests if the ghidra Python API is not installed") -@fixtures.parametrize( - "sample,scope,feature,expected", - fixtures.FEATURE_COUNT_TESTS, - indirect=["sample", "scope"], -) -def test_ghidra_feature_counts(sample, scope, feature, expected): - fixtures.do_test_feature_count(fixtures.get_ghidra_extractor, sample, scope, feature, expected) - -