mirror of
https://github.com/mandiant/capa.git
synced 2025-12-12 15:49:46 -08:00
Merge pull request #1 from colton-gabertan/ghidra_backend
Ghidra backend
This commit is contained in:
54
.github/workflows/tests.yml
vendored
54
.github/workflows/tests.yml
vendored
@@ -1,6 +1,6 @@
|
||||
name: CI
|
||||
|
||||
on:
|
||||
on:
|
||||
push:
|
||||
branches: [ master ]
|
||||
pull_request:
|
||||
@@ -136,3 +136,55 @@ jobs:
|
||||
env:
|
||||
BN_LICENSE: ${{ secrets.BN_LICENSE }}
|
||||
run: pytest -v tests/test_binja_features.py # explicitly refer to the binja tests for performance. other tests run above.
|
||||
|
||||
ghidra-tests:
|
||||
name: Ghidra tests for ${{ matrix.python-version }}
|
||||
runs-on: ubuntu-20.04
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ["3.7", "3.11"]
|
||||
java-version: ["17"]
|
||||
steps:
|
||||
- name: Checkout capa with submodules
|
||||
uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c # v3.3.0
|
||||
with:
|
||||
submodules: recursive
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@d27e3f3d7c64b4bbf8e4abfb9b63b83e846e0435 # v4.5.0
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Set up Java ${{ matrix.java-version }}
|
||||
uses: actions/setup-java@5ffc13f4174014e2d4d4572b3d74c3fa61aeb2c2 # v3
|
||||
with:
|
||||
distribution: 'temurin'
|
||||
java-version: ${{ matrix.java-version }}
|
||||
- name: Set up Gradle 7.3 # must be done manually due to no gradle build in capa
|
||||
run: |
|
||||
mkdir /opt/gradle
|
||||
wget "https://services.gradle.org/distributions/gradle-7.3-bin.zip" -O /opt/gradle/gradle-7.3.zip
|
||||
unzip /opt/gradle/gradle-7.3.zip -d /opt/gradle
|
||||
- name: Install Ghidra 10.3
|
||||
run: |
|
||||
mkdir ./.github/ghidra
|
||||
wget "https://github.com/NationalSecurityAgency/ghidra/releases/download/Ghidra_10.3_build/ghidra_10.3_PUBLIC_20230510.zip" -O ./.github/ghidra/ghidra_10.3_PUBLIC.zip
|
||||
unzip .github/ghidra/ghidra_10.3_PUBLIC.zip -d .github/ghidra/
|
||||
- name: Install Jep 4.1.1
|
||||
run : |
|
||||
mkdir ./.github/jep
|
||||
wget "https://github.com/ninia/jep/archive/refs/tags/v4.1.1.zip" -O ./.github/jep/jep-4.1.1.zip
|
||||
unzip .github/jep/jep-4.1.1.zip -d .github/jep/
|
||||
pip install .github/jep/jep-4.1.1/
|
||||
- name: Install Ghidrathon
|
||||
run : |
|
||||
mkdir ./.github/ghidrathon
|
||||
wget "https://github.com/mandiant/Ghidrathon/archive/refs/tags/v2.1.0.zip" -O ./.github/ghidrathon/ghidrathon-2.1.0.zip
|
||||
unzip .github/ghidrathon/ghidrathon-2.1.0.zip -d .github/ghidrathon/
|
||||
workdir=$(pwd)
|
||||
/opt/gradle/gradle-7.3/bin/gradle -p ./.github/ghidrathon/Ghidrathon-2.1.0/ -PGHIDRA_INSTALL_DIR=$workdir/.github/ghidra/ghidra_10.3_PUBLIC
|
||||
unzip .github/ghidrathon/Ghidrathon-2.1.0/dist/*.zip -d $workdir/.github/ghidra/ghidra_10.3_PUBLIC/Extensions
|
||||
- name: Install pyyaml
|
||||
run: sudo apt-get install -y libyaml-dev
|
||||
- name: Install capa
|
||||
run: pip install -e .[dev]
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
|
||||
### New Features
|
||||
- Utility script to detect feature overlap between new and existing CAPA rules [#1451](https://github.com/mandiant/capa/issues/1451) [@Aayush-Goel-04](https://github.com/aayush-goel-04)
|
||||
- extractor: Implement Ghidra Backend [@colton-gabertan](https://github.com/colton-gabertan)
|
||||
|
||||
### Breaking Changes
|
||||
|
||||
@@ -16,7 +17,7 @@
|
||||
- communication/mailslot/read-from-mailslot nick.simonian@mandiant.com
|
||||
- nursery/hash-data-using-sha512managed-in-dotnet jonathanlepore@google.com
|
||||
- nursery/compiled-with-exescript jonathanlepore@google.com
|
||||
-
|
||||
|
||||
|
||||
### Bug Fixes
|
||||
- extractor: update vivisect Arch extraction #1334 @mr-tz
|
||||
|
||||
0
capa/features/extractors/ghidra/__init__.py
Normal file
0
capa/features/extractors/ghidra/__init__.py
Normal file
15
capa/features/extractors/ghidra/extractor.py
Normal file
15
capa/features/extractors/ghidra/extractor.py
Normal file
@@ -0,0 +1,15 @@
|
||||
import logging
|
||||
import contextlib
|
||||
from typing import Tuple, Iterator
|
||||
|
||||
from capa.features.common import Feature
|
||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.base_extractor import FeatureExtractor
|
||||
import capa.features.extractors.ghidra.global_
|
||||
|
||||
class GhidraFeatureExtractor(FeatureExtractor):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.global_features: List[Tuple[Feature, Address]] = []
|
||||
self.global_features.extend(capa.features.extractors.ghidra.global_.extract_os())
|
||||
self.global_features.extend(capa.features.extractors.ghidra.global_.extract_arch())
|
||||
0
capa/features/extractors/ghidra/file.py
Normal file
0
capa/features/extractors/ghidra/file.py
Normal file
81
capa/features/extractors/ghidra/global_.py
Normal file
81
capa/features/extractors/ghidra/global_.py
Normal file
@@ -0,0 +1,81 @@
|
||||
import logging
|
||||
import contextlib
|
||||
from io import BytesIO
|
||||
from typing import Tuple, Iterator
|
||||
|
||||
# imports for clarity
|
||||
# note: currentProgram is a static variable accessible in
|
||||
# the specific ghidra runtime environment
|
||||
import ghidra.program.database.mem
|
||||
import ghidra.program.flatapi as flatapi
|
||||
ghidraapi = flatapi.FlatProgramAPI(currentProgram) # Ghidrathon hacks :)
|
||||
|
||||
import capa.features.extractors.elf
|
||||
from capa.features.common import OS, ARCH_I386, ARCH_AMD64, OS_WINDOWS, Arch, Feature
|
||||
from capa.features.address import NO_ADDRESS, Address
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def extract_os() -> Iterator[Tuple[Feature, Address]]:
|
||||
current_program = ghidraapi.getCurrentProgram()
|
||||
format_name: str = current_program.getExecutableFormat()
|
||||
|
||||
if "PE" in format_name:
|
||||
yield OS(OS_WINDOWS), NO_ADDRESS
|
||||
|
||||
elif "ELF" in format_name:
|
||||
program_memory = current_program.getMemory() # ghidra.program.database.mem.MemoryMapDB
|
||||
fbytes_list = program_memory.getAllFileBytes() # java.util.List<FileBytes>
|
||||
fbytes = fbytes_list[0] # ghidra.program.database.mem.FileBytes
|
||||
|
||||
# Java likes to return signed ints, so we must convert them
|
||||
# back into unsigned bytes manually and write to BytesIO
|
||||
# note: May be deprecated if Jep has implements better support for Java Lists
|
||||
pb_arr = b''
|
||||
for i in range(fbytes.getSize()):
|
||||
pb_arr = pb_arr + (fbytes.getOriginalByte(i) & 0xff).to_bytes(1, 'little')
|
||||
buf = BytesIO(pb_arr)
|
||||
|
||||
with contextlib.closing(buf) as f:
|
||||
os = capa.features.extractors.elf.detect_elf_os(f)
|
||||
|
||||
yield OS(os), NO_ADDRESS
|
||||
|
||||
else:
|
||||
# we likely end up here:
|
||||
# 1. handling shellcode, or
|
||||
# 2. handling a new file format (e.g. macho)
|
||||
#
|
||||
# for (1) we can't do much - its shellcode and all bets are off.
|
||||
# we could maybe accept a further CLI argument to specify the OS,
|
||||
# but i think this would be rarely used.
|
||||
# rules that rely on OS conditions will fail to match on shellcode.
|
||||
#
|
||||
# for (2), this logic will need to be updated as the format is implemented.
|
||||
logger.debug("unsupported file format: %s, will not guess OS", format_name)
|
||||
return
|
||||
|
||||
|
||||
def extract_arch() -> Iterator[Tuple[Feature, Address]]:
|
||||
current_program = ghidraapi.getCurrentProgram()
|
||||
lang_id = current_program.getMetadata().get('Language ID')
|
||||
|
||||
if 'x86' in lang_id and '64' in lang_id:
|
||||
yield Arch(ARCH_AMD64), NO_ADDRESS
|
||||
|
||||
elif 'x86' in lang_id and '32' in lang_id:
|
||||
yield Arch(ARCH_I386), NO_ADDRESS
|
||||
|
||||
elif 'x86' not in lang_id:
|
||||
logger.debug("unsupported architecture: non-32-bit nor non-64-bit intel")
|
||||
return
|
||||
|
||||
else:
|
||||
# we likely end up here:
|
||||
# 1. handling a new architecture (e.g. aarch64)
|
||||
#
|
||||
# for (1), this logic will need to be updated as the format is implemented.
|
||||
logger.debug("unsupported architecture: %s", lang_id)
|
||||
return
|
||||
|
||||
|
||||
39
capa/main.py
39
capa/main.py
@@ -1317,6 +1317,33 @@ def ida_main():
|
||||
print(capa.render.default.render(meta, rules, capabilities))
|
||||
|
||||
|
||||
def ghidra_main():
|
||||
import capa.rules
|
||||
#import capa.render.default
|
||||
#import capa.features.extractors.ghidra.extractor
|
||||
import capa.features.extractors.ghidra.global_
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logging.getLogger().setLevel(logging.INFO)
|
||||
|
||||
logger.debug("-" * 80)
|
||||
logger.debug(" Using default embedded rules.")
|
||||
logger.debug(" ")
|
||||
logger.debug(" You can see the current default rule set here:")
|
||||
logger.debug(" https://github.com/mandiant/capa-rules")
|
||||
logger.debug("-" * 80)
|
||||
|
||||
rules_path = os.path.join(get_default_root(), "rules")
|
||||
logger.debug("rule path: %s", rules_path)
|
||||
rules = get_rules([rules_path])
|
||||
|
||||
# temp test for OS & ARCH extractions
|
||||
globl_features: List[Tuple[Feature, Address]] = []
|
||||
globl_features.extend(capa.features.extractors.ghidra.global_.extract_os())
|
||||
globl_features.extend(capa.features.extractors.ghidra.global_.extract_arch())
|
||||
print(globl_features)
|
||||
|
||||
|
||||
def is_runtime_ida():
|
||||
try:
|
||||
import idc
|
||||
@@ -1326,8 +1353,20 @@ def is_runtime_ida():
|
||||
return True
|
||||
|
||||
|
||||
def is_runtime_ghidra():
|
||||
try:
|
||||
import ghidra.program.flatapi
|
||||
except ImportError:
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if is_runtime_ida():
|
||||
ida_main()
|
||||
elif is_runtime_ghidra():
|
||||
ghidra_main()
|
||||
else:
|
||||
sys.exit(main())
|
||||
|
||||
|
||||
2
rules
2
rules
Submodule rules updated: 188e65528e...312d4cad89
@@ -183,6 +183,18 @@ def get_binja_extractor(path):
|
||||
return extractor
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def get_ghidra_extractor(path):
|
||||
import capa.features.extractors.ghidra.extractor
|
||||
|
||||
extractor = capa.features.extractors.ghidra.extractor.GhidraFeatureExtractor(path)
|
||||
|
||||
# overload the extractor so that the fixture exposes `extractor.path`
|
||||
setattr(extractor, "path", path)
|
||||
|
||||
return extractor
|
||||
|
||||
|
||||
def extract_global_features(extractor):
|
||||
features = collections.defaultdict(set)
|
||||
for feature, va in extractor.extract_global_features():
|
||||
|
||||
55
tests/test_ghidra_features.py
Normal file
55
tests/test_ghidra_features.py
Normal file
@@ -0,0 +1,55 @@
|
||||
import sys
|
||||
import logging
|
||||
import os.path
|
||||
import binascii
|
||||
import traceback
|
||||
|
||||
import pytest
|
||||
|
||||
try:
|
||||
sys.path.append(os.path.dirname(__file__))
|
||||
import fixtures
|
||||
from fixtures import *
|
||||
finally:
|
||||
sys.path.pop()
|
||||
|
||||
|
||||
logger = logging.getLogger("test_ghidra_features")
|
||||
|
||||
|
||||
# We need to skip the ghidra test if we cannot import ghidra modules, e.g., in GitHub CI.
|
||||
ghidra_present: bool = False
|
||||
try:
|
||||
import ghidra.program.flatapi as flatapi
|
||||
ghidraapi = flatapi.FlatProgramAPI(currentProgram)
|
||||
|
||||
try:
|
||||
current_program_test = ghidraapi.getCurrentProgram()
|
||||
except RuntimeError as e:
|
||||
logger.warning("Ghidra runtime not detected")
|
||||
else:
|
||||
ghidra_present = True
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
@pytest.mark.skipif(ghidra_present is False, reason="Skip ghidra tests if the ghidra Python API is not installed")
|
||||
@fixtures.parametrize(
|
||||
"sample,scope,feature,expected",
|
||||
fixtures.FEATURE_PRESENCE_TESTS,
|
||||
indirect=["sample", "scope"],
|
||||
)
|
||||
def test_ghidra_features(sample, scope, feature, expected):
|
||||
fixtures.do_test_feature_presence(fixtures.get_ghidra_extractor, sample, scope, feature, expected)
|
||||
|
||||
|
||||
@pytest.mark.skipif(ghidra_present is False, reason="Skip ghidra tests if the ghidra Python API is not installed")
|
||||
@fixtures.parametrize(
|
||||
"sample,scope,feature,expected",
|
||||
fixtures.FEATURE_COUNT_TESTS,
|
||||
indirect=["sample", "scope"],
|
||||
)
|
||||
def test_ghidra_feature_counts(sample, scope, feature, expected):
|
||||
fixtures.do_test_feature_count(fixtures.get_ghidra_extractor, sample, scope, feature, expected)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user