Merge pull request #1 from colton-gabertan/ghidra_backend

Ghidra backend
This commit is contained in:
Colton Gabertan
2023-06-02 23:24:43 -07:00
committed by GitHub
10 changed files with 258 additions and 3 deletions

View File

@@ -1,6 +1,6 @@
name: CI
on:
on:
push:
branches: [ master ]
pull_request:
@@ -136,3 +136,55 @@ jobs:
env:
BN_LICENSE: ${{ secrets.BN_LICENSE }}
run: pytest -v tests/test_binja_features.py # explicitly refer to the binja tests for performance. other tests run above.
ghidra-tests:
name: Ghidra tests for ${{ matrix.python-version }}
runs-on: ubuntu-20.04
strategy:
fail-fast: false
matrix:
python-version: ["3.7", "3.11"]
java-version: ["17"]
steps:
- name: Checkout capa with submodules
uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c # v3.3.0
with:
submodules: recursive
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@d27e3f3d7c64b4bbf8e4abfb9b63b83e846e0435 # v4.5.0
with:
python-version: ${{ matrix.python-version }}
- name: Set up Java ${{ matrix.java-version }}
uses: actions/setup-java@5ffc13f4174014e2d4d4572b3d74c3fa61aeb2c2 # v3
with:
distribution: 'temurin'
java-version: ${{ matrix.java-version }}
- name: Set up Gradle 7.3 # must be done manually due to no gradle build in capa
run: |
mkdir /opt/gradle
wget "https://services.gradle.org/distributions/gradle-7.3-bin.zip" -O /opt/gradle/gradle-7.3.zip
unzip /opt/gradle/gradle-7.3.zip -d /opt/gradle
- name: Install Ghidra 10.3
run: |
mkdir ./.github/ghidra
wget "https://github.com/NationalSecurityAgency/ghidra/releases/download/Ghidra_10.3_build/ghidra_10.3_PUBLIC_20230510.zip" -O ./.github/ghidra/ghidra_10.3_PUBLIC.zip
unzip .github/ghidra/ghidra_10.3_PUBLIC.zip -d .github/ghidra/
- name: Install Jep 4.1.1
run : |
mkdir ./.github/jep
wget "https://github.com/ninia/jep/archive/refs/tags/v4.1.1.zip" -O ./.github/jep/jep-4.1.1.zip
unzip .github/jep/jep-4.1.1.zip -d .github/jep/
pip install .github/jep/jep-4.1.1/
- name: Install Ghidrathon
run : |
mkdir ./.github/ghidrathon
wget "https://github.com/mandiant/Ghidrathon/archive/refs/tags/v2.1.0.zip" -O ./.github/ghidrathon/ghidrathon-2.1.0.zip
unzip .github/ghidrathon/ghidrathon-2.1.0.zip -d .github/ghidrathon/
workdir=$(pwd)
/opt/gradle/gradle-7.3/bin/gradle -p ./.github/ghidrathon/Ghidrathon-2.1.0/ -PGHIDRA_INSTALL_DIR=$workdir/.github/ghidra/ghidra_10.3_PUBLIC
unzip .github/ghidrathon/Ghidrathon-2.1.0/dist/*.zip -d $workdir/.github/ghidra/ghidra_10.3_PUBLIC/Extensions
- name: Install pyyaml
run: sudo apt-get install -y libyaml-dev
- name: Install capa
run: pip install -e .[dev]

View File

@@ -4,6 +4,7 @@
### New Features
- Utility script to detect feature overlap between new and existing CAPA rules [#1451](https://github.com/mandiant/capa/issues/1451) [@Aayush-Goel-04](https://github.com/aayush-goel-04)
- extractor: Implement Ghidra Backend [@colton-gabertan](https://github.com/colton-gabertan)
### Breaking Changes
@@ -16,7 +17,7 @@
- communication/mailslot/read-from-mailslot nick.simonian@mandiant.com
- nursery/hash-data-using-sha512managed-in-dotnet jonathanlepore@google.com
- nursery/compiled-with-exescript jonathanlepore@google.com
-
### Bug Fixes
- extractor: update vivisect Arch extraction #1334 @mr-tz

View File

@@ -0,0 +1,15 @@
import logging
import contextlib
from typing import Tuple, Iterator
from capa.features.common import Feature
from capa.features.address import Address, AbsoluteVirtualAddress
from capa.features.extractors.base_extractor import FeatureExtractor
import capa.features.extractors.ghidra.global_
class GhidraFeatureExtractor(FeatureExtractor):
def __init__(self):
super().__init__()
self.global_features: List[Tuple[Feature, Address]] = []
self.global_features.extend(capa.features.extractors.ghidra.global_.extract_os())
self.global_features.extend(capa.features.extractors.ghidra.global_.extract_arch())

View File

View File

@@ -0,0 +1,81 @@
import logging
import contextlib
from io import BytesIO
from typing import Tuple, Iterator
# imports for clarity
# note: currentProgram is a static variable accessible in
# the specific ghidra runtime environment
import ghidra.program.database.mem
import ghidra.program.flatapi as flatapi
ghidraapi = flatapi.FlatProgramAPI(currentProgram) # Ghidrathon hacks :)
import capa.features.extractors.elf
from capa.features.common import OS, ARCH_I386, ARCH_AMD64, OS_WINDOWS, Arch, Feature
from capa.features.address import NO_ADDRESS, Address
logger = logging.getLogger(__name__)
def extract_os() -> Iterator[Tuple[Feature, Address]]:
current_program = ghidraapi.getCurrentProgram()
format_name: str = current_program.getExecutableFormat()
if "PE" in format_name:
yield OS(OS_WINDOWS), NO_ADDRESS
elif "ELF" in format_name:
program_memory = current_program.getMemory() # ghidra.program.database.mem.MemoryMapDB
fbytes_list = program_memory.getAllFileBytes() # java.util.List<FileBytes>
fbytes = fbytes_list[0] # ghidra.program.database.mem.FileBytes
# Java likes to return signed ints, so we must convert them
# back into unsigned bytes manually and write to BytesIO
# note: May be deprecated if Jep has implements better support for Java Lists
pb_arr = b''
for i in range(fbytes.getSize()):
pb_arr = pb_arr + (fbytes.getOriginalByte(i) & 0xff).to_bytes(1, 'little')
buf = BytesIO(pb_arr)
with contextlib.closing(buf) as f:
os = capa.features.extractors.elf.detect_elf_os(f)
yield OS(os), NO_ADDRESS
else:
# we likely end up here:
# 1. handling shellcode, or
# 2. handling a new file format (e.g. macho)
#
# for (1) we can't do much - its shellcode and all bets are off.
# we could maybe accept a further CLI argument to specify the OS,
# but i think this would be rarely used.
# rules that rely on OS conditions will fail to match on shellcode.
#
# for (2), this logic will need to be updated as the format is implemented.
logger.debug("unsupported file format: %s, will not guess OS", format_name)
return
def extract_arch() -> Iterator[Tuple[Feature, Address]]:
current_program = ghidraapi.getCurrentProgram()
lang_id = current_program.getMetadata().get('Language ID')
if 'x86' in lang_id and '64' in lang_id:
yield Arch(ARCH_AMD64), NO_ADDRESS
elif 'x86' in lang_id and '32' in lang_id:
yield Arch(ARCH_I386), NO_ADDRESS
elif 'x86' not in lang_id:
logger.debug("unsupported architecture: non-32-bit nor non-64-bit intel")
return
else:
# we likely end up here:
# 1. handling a new architecture (e.g. aarch64)
#
# for (1), this logic will need to be updated as the format is implemented.
logger.debug("unsupported architecture: %s", lang_id)
return

View File

@@ -1317,6 +1317,33 @@ def ida_main():
print(capa.render.default.render(meta, rules, capabilities))
def ghidra_main():
import capa.rules
#import capa.render.default
#import capa.features.extractors.ghidra.extractor
import capa.features.extractors.ghidra.global_
logging.basicConfig(level=logging.INFO)
logging.getLogger().setLevel(logging.INFO)
logger.debug("-" * 80)
logger.debug(" Using default embedded rules.")
logger.debug(" ")
logger.debug(" You can see the current default rule set here:")
logger.debug(" https://github.com/mandiant/capa-rules")
logger.debug("-" * 80)
rules_path = os.path.join(get_default_root(), "rules")
logger.debug("rule path: %s", rules_path)
rules = get_rules([rules_path])
# temp test for OS & ARCH extractions
globl_features: List[Tuple[Feature, Address]] = []
globl_features.extend(capa.features.extractors.ghidra.global_.extract_os())
globl_features.extend(capa.features.extractors.ghidra.global_.extract_arch())
print(globl_features)
def is_runtime_ida():
try:
import idc
@@ -1326,8 +1353,20 @@ def is_runtime_ida():
return True
def is_runtime_ghidra():
try:
import ghidra.program.flatapi
except ImportError:
return False
else:
return True
if __name__ == "__main__":
if is_runtime_ida():
ida_main()
elif is_runtime_ghidra():
ghidra_main()
else:
sys.exit(main())

2
rules

Submodule rules updated: 188e65528e...312d4cad89

View File

@@ -183,6 +183,18 @@ def get_binja_extractor(path):
return extractor
@lru_cache(maxsize=1)
def get_ghidra_extractor(path):
import capa.features.extractors.ghidra.extractor
extractor = capa.features.extractors.ghidra.extractor.GhidraFeatureExtractor(path)
# overload the extractor so that the fixture exposes `extractor.path`
setattr(extractor, "path", path)
return extractor
def extract_global_features(extractor):
features = collections.defaultdict(set)
for feature, va in extractor.extract_global_features():

View File

@@ -0,0 +1,55 @@
import sys
import logging
import os.path
import binascii
import traceback
import pytest
try:
sys.path.append(os.path.dirname(__file__))
import fixtures
from fixtures import *
finally:
sys.path.pop()
logger = logging.getLogger("test_ghidra_features")
# We need to skip the ghidra test if we cannot import ghidra modules, e.g., in GitHub CI.
ghidra_present: bool = False
try:
import ghidra.program.flatapi as flatapi
ghidraapi = flatapi.FlatProgramAPI(currentProgram)
try:
current_program_test = ghidraapi.getCurrentProgram()
except RuntimeError as e:
logger.warning("Ghidra runtime not detected")
else:
ghidra_present = True
except ImportError:
pass
@pytest.mark.skipif(ghidra_present is False, reason="Skip ghidra tests if the ghidra Python API is not installed")
@fixtures.parametrize(
"sample,scope,feature,expected",
fixtures.FEATURE_PRESENCE_TESTS,
indirect=["sample", "scope"],
)
def test_ghidra_features(sample, scope, feature, expected):
fixtures.do_test_feature_presence(fixtures.get_ghidra_extractor, sample, scope, feature, expected)
@pytest.mark.skipif(ghidra_present is False, reason="Skip ghidra tests if the ghidra Python API is not installed")
@fixtures.parametrize(
"sample,scope,feature,expected",
fixtures.FEATURE_COUNT_TESTS,
indirect=["sample", "scope"],
)
def test_ghidra_feature_counts(sample, scope, feature, expected):
fixtures.do_test_feature_count(fixtures.get_ghidra_extractor, sample, scope, feature, expected)