mirror of
https://github.com/mandiant/capa.git
synced 2025-12-12 07:40:38 -08:00
Compare commits
28 Commits
v9.3.0
...
0ba5f9664a
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0ba5f9664a | ||
|
|
98873c8570 | ||
|
|
3687bb95e9 | ||
|
|
7175714f9e | ||
|
|
32c201d9b1 | ||
|
|
784e0346d9 | ||
|
|
be1ccb0776 | ||
|
|
9b42b45d21 | ||
|
|
d17264c928 | ||
|
|
f313852e70 | ||
|
|
c0ae1352c6 | ||
|
|
ccb3e6de74 | ||
|
|
26c6ffd62d | ||
|
|
18923601c7 | ||
|
|
1568ce4832 | ||
|
|
ffce77b13d | ||
|
|
895b2440c0 | ||
|
|
c901f809a2 | ||
|
|
308b3e5c1c | ||
|
|
7844ebb144 | ||
|
|
e393cff0e1 | ||
|
|
7780b9e8a8 | ||
|
|
8d39765e7b | ||
|
|
dec0bcfe79 | ||
|
|
99ccecba4e | ||
|
|
af27463c37 | ||
|
|
f4f47b4d55 | ||
|
|
adc2401136 |
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "9.3.0"
|
||||
current_version = "9.3.1"
|
||||
|
||||
[[tool.bumpversion.files]]
|
||||
filename = "capa/version.py"
|
||||
|
||||
4
.github/workflows/build.yml
vendored
4
.github/workflows/build.yml
vendored
@@ -46,8 +46,8 @@ jobs:
|
||||
# artifact_name: capa.exe
|
||||
# asset_name: windows-arm64
|
||||
# python_version: '3.12'
|
||||
- os: macos-13
|
||||
# use older macOS for assumed better portability
|
||||
- os: macos-15-intel
|
||||
# macos-15-intel is the lowest native intel build
|
||||
artifact_name: capa
|
||||
asset_name: macos
|
||||
python_version: '3.10'
|
||||
|
||||
45
.github/workflows/tests.yml
vendored
45
.github/workflows/tests.yml
vendored
@@ -42,10 +42,10 @@ jobs:
|
||||
- name: Checkout capa
|
||||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
||||
# use latest available python to take advantage of best performance
|
||||
- name: Set up Python 3.12
|
||||
- name: Set up Python 3.13
|
||||
uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
|
||||
with:
|
||||
python-version: "3.12"
|
||||
python-version: "3.13"
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
pip install -r requirements.txt
|
||||
@@ -70,10 +70,10 @@ jobs:
|
||||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
||||
with:
|
||||
submodules: recursive
|
||||
- name: Set up Python 3.12
|
||||
- name: Set up Python 3.13
|
||||
uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
|
||||
with:
|
||||
python-version: "3.12"
|
||||
python-version: "3.13"
|
||||
- name: Install capa
|
||||
run: |
|
||||
pip install -r requirements.txt
|
||||
@@ -88,13 +88,11 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-22.04, windows-2022, macos-13]
|
||||
os: [ubuntu-22.04, ubuntu-22.04-arm, windows-2022, macos-15-intel, macos-14]
|
||||
# across all operating systems
|
||||
python-version: ["3.10", "3.11"]
|
||||
python-version: ["3.10", "3.13"]
|
||||
include:
|
||||
# on Ubuntu run these as well
|
||||
- os: ubuntu-22.04
|
||||
python-version: "3.10"
|
||||
- os: ubuntu-22.04
|
||||
python-version: "3.11"
|
||||
- os: ubuntu-22.04
|
||||
@@ -131,7 +129,7 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ["3.10", "3.11"]
|
||||
python-version: ["3.10", "3.13"]
|
||||
steps:
|
||||
- name: Checkout capa with submodules
|
||||
# do only run if BN_SERIAL is available, have to do this in every step, see https://github.com/orgs/community/discussions/26726#discussioncomment-3253118
|
||||
@@ -173,11 +171,10 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ["3.10", "3.11"]
|
||||
java-version: ["17"]
|
||||
ghidra-version: ["11.0.1"]
|
||||
public-version: ["PUBLIC_20240130"] # for ghidra releases
|
||||
ghidrathon-version: ["4.0.0"]
|
||||
python-version: ["3.10", "3.13"]
|
||||
java-version: ["21"]
|
||||
ghidra-version: ["12.0"]
|
||||
public-version: ["PUBLIC_20251205"] # for ghidra releases
|
||||
steps:
|
||||
- name: Checkout capa with submodules
|
||||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
||||
@@ -197,25 +194,13 @@ jobs:
|
||||
mkdir ./.github/ghidra
|
||||
wget "https://github.com/NationalSecurityAgency/ghidra/releases/download/Ghidra_${{ matrix.ghidra-version }}_build/ghidra_${{ matrix.ghidra-version }}_${{ matrix.public-version }}.zip" -O ./.github/ghidra/ghidra_${{ matrix.ghidra-version }}_PUBLIC.zip
|
||||
unzip .github/ghidra/ghidra_${{ matrix.ghidra-version }}_PUBLIC.zip -d .github/ghidra/
|
||||
- name: Install Ghidrathon
|
||||
run : |
|
||||
mkdir ./.github/ghidrathon
|
||||
wget "https://github.com/mandiant/Ghidrathon/releases/download/v${{ matrix.ghidrathon-version }}/Ghidrathon-v${{ matrix.ghidrathon-version}}.zip" -O ./.github/ghidrathon/ghidrathon-v${{ matrix.ghidrathon-version }}.zip
|
||||
unzip .github/ghidrathon/ghidrathon-v${{ matrix.ghidrathon-version }}.zip -d .github/ghidrathon/
|
||||
python -m pip install -r .github/ghidrathon/requirements.txt
|
||||
python .github/ghidrathon/ghidrathon_configure.py $(pwd)/.github/ghidra/ghidra_${{ matrix.ghidra-version }}_PUBLIC
|
||||
unzip .github/ghidrathon/Ghidrathon-v${{ matrix.ghidrathon-version }}.zip -d .github/ghidra/ghidra_${{ matrix.ghidra-version }}_PUBLIC/Ghidra/Extensions
|
||||
- name: Install pyyaml
|
||||
run: sudo apt-get install -y libyaml-dev
|
||||
- name: Install capa
|
||||
run: |
|
||||
pip install -r requirements.txt
|
||||
pip install -e .[dev,scripts]
|
||||
pip install -e .[dev]
|
||||
- name: Run tests
|
||||
run: |
|
||||
mkdir ./.github/ghidra/project
|
||||
.github/ghidra/ghidra_${{ matrix.ghidra-version }}_PUBLIC/support/analyzeHeadless .github/ghidra/project ghidra_test -Import ./tests/data/mimikatz.exe_ -ScriptPath ./tests/ -PostScript test_ghidra_features.py > ../output.log
|
||||
cat ../output.log
|
||||
exit_code=$(cat ../output.log | grep exit | awk '{print $NF}')
|
||||
exit $exit_code
|
||||
env:
|
||||
GHIDRA_INSTALL_DIR: ${{ github.workspace }}/.github/ghidra/ghidra_${{ matrix.ghidra-version }}_PUBLIC
|
||||
run: pytest -v tests/test_ghidra_features.py
|
||||
|
||||
|
||||
27
CHANGELOG.md
27
CHANGELOG.md
@@ -4,13 +4,20 @@
|
||||
|
||||
### New Features
|
||||
|
||||
- ghidra: support PyGhidra @mike-hunhoff #2788
|
||||
|
||||
### Breaking Changes
|
||||
|
||||
### New Rules (0)
|
||||
### New Rules (4)
|
||||
|
||||
- nursery/run-as-nodejs-native-module mehunhoff@google.com
|
||||
- nursery/inject-shellcode-using-thread-pool-work-insertion-with-tp_io still@teamt5.org
|
||||
- nursery/inject-shellcode-using-thread-pool-work-insertion-with-tp_timer still@teamt5.org
|
||||
- nursery/inject-shellcode-using-thread-pool-work-insertion-with-tp_work still@teamt5.org
|
||||
-
|
||||
|
||||
### Bug Fixes
|
||||
- Fixed insecure deserialization vulnerability in YAML loading @0x1622 (#2770)
|
||||
|
||||
### capa Explorer Web
|
||||
|
||||
@@ -18,6 +25,24 @@
|
||||
|
||||
### Development
|
||||
|
||||
- ci: deprecate macos-13 runner and use Python v3.13 for testing @mike-hunhoff #2777
|
||||
|
||||
### Raw diffs
|
||||
- [capa v9.3.1...master](https://github.com/mandiant/capa/compare/v9.3.1...master)
|
||||
- [capa-rules v9.3.1...master](https://github.com/mandiant/capa-rules/compare/v9.3.1...master)
|
||||
|
||||
## v9.3.1
|
||||
|
||||
This patch release fixes a missing import for the capa explorer plugin for IDA Pro.
|
||||
|
||||
### Bug Fixes
|
||||
|
||||
- add missing ida-netnode dependency to project.toml @mike-hunhoff #2765
|
||||
|
||||
### Development
|
||||
|
||||
- ci: bump binja min version @mike-hunhoff #2763
|
||||
|
||||
### Raw diffs
|
||||
- [capa v9.3.0...master](https://github.com/mandiant/capa/compare/v9.3.0...master)
|
||||
- [capa-rules v9.3.0...master](https://github.com/mandiant/capa-rules/compare/v9.3.0...master)
|
||||
|
||||
@@ -83,7 +83,7 @@ def bb_contains_stackstring(bb: ghidra.program.model.block.CodeBlock) -> bool:
|
||||
true if basic block contains enough moves of constant bytes to the stack
|
||||
"""
|
||||
count = 0
|
||||
for insn in currentProgram().getListing().getInstructions(bb, True): # type: ignore [name-defined] # noqa: F821
|
||||
for insn in capa.features.extractors.ghidra.helpers.get_current_program().getListing().getInstructions(bb, True):
|
||||
if is_mov_imm_to_stack(insn):
|
||||
count += get_printable_len(insn.getScalar(1))
|
||||
if count > MIN_STACKSTRING_LEN:
|
||||
@@ -96,7 +96,9 @@ def _bb_has_tight_loop(bb: ghidra.program.model.block.CodeBlock):
|
||||
parse tight loops, true if last instruction in basic block branches to bb start
|
||||
"""
|
||||
# Reverse Ordered, first InstructionDB
|
||||
last_insn = currentProgram().getListing().getInstructions(bb, False).next() # type: ignore [name-defined] # noqa: F821
|
||||
last_insn = (
|
||||
capa.features.extractors.ghidra.helpers.get_current_program().getListing().getInstructions(bb, False).next()
|
||||
)
|
||||
|
||||
if last_insn.getFlowType().isJump():
|
||||
return last_insn.getAddress(0) == bb.getMinAddress()
|
||||
@@ -140,20 +142,3 @@ def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Featur
|
||||
for bb_handler in BASIC_BLOCK_HANDLERS:
|
||||
for feature, addr in bb_handler(fh, bbh):
|
||||
yield feature, addr
|
||||
|
||||
|
||||
def main():
|
||||
features = []
|
||||
from capa.features.extractors.ghidra.extractor import GhidraFeatureExtractor
|
||||
|
||||
for fh in GhidraFeatureExtractor().get_functions():
|
||||
for bbh in capa.features.extractors.ghidra.helpers.get_function_blocks(fh):
|
||||
features.extend(list(extract_features(fh, bbh)))
|
||||
|
||||
import pprint
|
||||
|
||||
pprint.pprint(features) # noqa: T203
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
36
capa/features/extractors/ghidra/context.py
Normal file
36
capa/features/extractors/ghidra/context.py
Normal file
@@ -0,0 +1,36 @@
|
||||
# Copyright 2023 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class GhidraContext:
|
||||
def __init__(self, program, flat_api, monitor):
|
||||
self.program = program
|
||||
self.flat_api = flat_api
|
||||
self.monitor = monitor
|
||||
|
||||
|
||||
_context: Optional[GhidraContext] = None
|
||||
|
||||
|
||||
def set_context(program, flat_api, monitor):
|
||||
global _context
|
||||
_context = GhidraContext(program, flat_api, monitor)
|
||||
|
||||
|
||||
def get_context() -> GhidraContext:
|
||||
if _context is None:
|
||||
raise RuntimeError("GhidraContext not initialized")
|
||||
return _context
|
||||
@@ -12,6 +12,8 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import weakref
|
||||
import contextlib
|
||||
from typing import Iterator
|
||||
|
||||
import capa.features.extractors.ghidra.file
|
||||
@@ -31,19 +33,21 @@ from capa.features.extractors.base_extractor import (
|
||||
|
||||
|
||||
class GhidraFeatureExtractor(StaticFeatureExtractor):
|
||||
def __init__(self):
|
||||
def __init__(self, ctx_manager=None, tmpdir=None):
|
||||
self.ctx_manager = ctx_manager
|
||||
self.tmpdir = tmpdir
|
||||
import capa.features.extractors.ghidra.helpers as ghidra_helpers
|
||||
|
||||
super().__init__(
|
||||
SampleHashes(
|
||||
md5=capa.ghidra.helpers.get_file_md5(),
|
||||
md5=ghidra_helpers.get_current_program().getExecutableMD5(),
|
||||
# ghidra doesn't expose this hash.
|
||||
# https://ghidra.re/ghidra_docs/api/ghidra/program/model/listing/Program.html
|
||||
#
|
||||
# the hashes are stored in the database, not computed on the fly,
|
||||
# so it's probably not trivial to add SHA1.
|
||||
sha1="",
|
||||
sha256=capa.ghidra.helpers.get_file_sha256(),
|
||||
sha256=ghidra_helpers.get_current_program().getExecutableSHA256(),
|
||||
)
|
||||
)
|
||||
|
||||
@@ -55,8 +59,16 @@ class GhidraFeatureExtractor(StaticFeatureExtractor):
|
||||
self.externs = ghidra_helpers.get_file_externs()
|
||||
self.fakes = ghidra_helpers.map_fake_import_addrs()
|
||||
|
||||
# Register cleanup to run when the extractor is garbage collected or when the program exits.
|
||||
# We use weakref.finalize instead of __del__ to avoid issues with reference cycles and
|
||||
# to ensure deterministic cleanup on interpreter shutdown.
|
||||
if self.ctx_manager or self.tmpdir:
|
||||
weakref.finalize(self, cleanup, self.ctx_manager, self.tmpdir)
|
||||
|
||||
def get_base_address(self):
|
||||
return AbsoluteVirtualAddress(currentProgram().getImageBase().getOffset()) # type: ignore [name-defined] # noqa: F821
|
||||
import capa.features.extractors.ghidra.helpers as ghidra_helpers
|
||||
|
||||
return AbsoluteVirtualAddress(ghidra_helpers.get_current_program().getImageBase().getOffset())
|
||||
|
||||
def extract_global_features(self):
|
||||
yield from self.global_features
|
||||
@@ -77,7 +89,9 @@ class GhidraFeatureExtractor(StaticFeatureExtractor):
|
||||
|
||||
@staticmethod
|
||||
def get_function(addr: int) -> FunctionHandle:
|
||||
func = getFunctionContaining(toAddr(addr)) # type: ignore [name-defined] # noqa: F821
|
||||
import capa.features.extractors.ghidra.helpers as ghidra_helpers
|
||||
|
||||
func = ghidra_helpers.get_flat_api().getFunctionContaining(ghidra_helpers.get_flat_api().toAddr(addr))
|
||||
return FunctionHandle(address=AbsoluteVirtualAddress(func.getEntryPoint().getOffset()), inner=func)
|
||||
|
||||
def extract_function_features(self, fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
@@ -98,3 +112,12 @@ class GhidraFeatureExtractor(StaticFeatureExtractor):
|
||||
|
||||
def extract_insn_features(self, fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle):
|
||||
yield from capa.features.extractors.ghidra.insn.extract_features(fh, bbh, ih)
|
||||
|
||||
|
||||
def cleanup(ctx_manager, tmpdir):
|
||||
if ctx_manager:
|
||||
with contextlib.suppress(Exception):
|
||||
ctx_manager.__exit__(None, None, None)
|
||||
if tmpdir:
|
||||
with contextlib.suppress(Exception):
|
||||
tmpdir.cleanup()
|
||||
|
||||
@@ -80,7 +80,7 @@ def extract_file_embedded_pe() -> Iterator[tuple[Feature, Address]]:
|
||||
for i in range(256)
|
||||
]
|
||||
|
||||
for block in currentProgram().getMemory().getBlocks(): # type: ignore [name-defined] # noqa: F821
|
||||
for block in capa.features.extractors.ghidra.helpers.get_current_program().getMemory().getBlocks():
|
||||
if not all((block.isLoaded(), block.isInitialized(), "Headers" not in block.getName())):
|
||||
continue
|
||||
|
||||
@@ -93,9 +93,37 @@ def extract_file_embedded_pe() -> Iterator[tuple[Feature, Address]]:
|
||||
|
||||
def extract_file_export_names() -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract function exports"""
|
||||
st = currentProgram().getSymbolTable() # type: ignore [name-defined] # noqa: F821
|
||||
program = capa.features.extractors.ghidra.helpers.get_current_program()
|
||||
st = program.getSymbolTable()
|
||||
|
||||
for addr in st.getExternalEntryPointIterator():
|
||||
yield Export(st.getPrimarySymbol(addr).getName()), AbsoluteVirtualAddress(addr.getOffset())
|
||||
sym = st.getPrimarySymbol(addr)
|
||||
name = sym.getName()
|
||||
|
||||
# Check for forwarded export
|
||||
is_forwarded = False
|
||||
refs = program.getReferenceManager().getReferencesFrom(addr)
|
||||
for ref in refs:
|
||||
if ref.getToAddress().isExternalAddress():
|
||||
ext_sym = st.getPrimarySymbol(ref.getToAddress())
|
||||
if ext_sym:
|
||||
ext_loc = program.getExternalManager().getExternalLocation(ext_sym)
|
||||
if ext_loc:
|
||||
# It is a forwarded export
|
||||
libname = ext_loc.getLibraryName()
|
||||
if libname.lower().endswith(".dll"):
|
||||
libname = libname[:-4]
|
||||
|
||||
forwarded_name = f"{libname}.{ext_loc.getLabel()}"
|
||||
forwarded_name = capa.features.extractors.helpers.reformat_forwarded_export_name(forwarded_name)
|
||||
|
||||
yield Export(forwarded_name), AbsoluteVirtualAddress(addr.getOffset())
|
||||
yield Characteristic("forwarded export"), AbsoluteVirtualAddress(addr.getOffset())
|
||||
is_forwarded = True
|
||||
break
|
||||
|
||||
if not is_forwarded:
|
||||
yield Export(name), AbsoluteVirtualAddress(addr.getOffset())
|
||||
|
||||
|
||||
def extract_file_import_names() -> Iterator[tuple[Feature, Address]]:
|
||||
@@ -110,7 +138,7 @@ def extract_file_import_names() -> Iterator[tuple[Feature, Address]]:
|
||||
- importname
|
||||
"""
|
||||
|
||||
for f in currentProgram().getFunctionManager().getExternalFunctions(): # type: ignore [name-defined] # noqa: F821
|
||||
for f in capa.features.extractors.ghidra.helpers.get_current_program().getFunctionManager().getExternalFunctions():
|
||||
for r in f.getSymbol().getReferences():
|
||||
if r.getReferenceType().isData():
|
||||
addr = r.getFromAddress().getOffset() # gets pointer to fake external addr
|
||||
@@ -126,14 +154,14 @@ def extract_file_import_names() -> Iterator[tuple[Feature, Address]]:
|
||||
def extract_file_section_names() -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract section names"""
|
||||
|
||||
for block in currentProgram().getMemory().getBlocks(): # type: ignore [name-defined] # noqa: F821
|
||||
for block in capa.features.extractors.ghidra.helpers.get_current_program().getMemory().getBlocks():
|
||||
yield Section(block.getName()), AbsoluteVirtualAddress(block.getStart().getOffset())
|
||||
|
||||
|
||||
def extract_file_strings() -> Iterator[tuple[Feature, Address]]:
|
||||
"""extract ASCII and UTF-16 LE strings"""
|
||||
|
||||
for block in currentProgram().getMemory().getBlocks(): # type: ignore [name-defined] # noqa: F821
|
||||
for block in capa.features.extractors.ghidra.helpers.get_current_program().getMemory().getBlocks():
|
||||
if not block.isInitialized():
|
||||
continue
|
||||
|
||||
@@ -153,7 +181,8 @@ def extract_file_function_names() -> Iterator[tuple[Feature, Address]]:
|
||||
extract the names of statically-linked library functions.
|
||||
"""
|
||||
|
||||
for sym in currentProgram().getSymbolTable().getAllSymbols(True): # type: ignore [name-defined] # noqa: F821
|
||||
for sym in capa.features.extractors.ghidra.helpers.get_current_program().getSymbolTable().getAllSymbols(True):
|
||||
|
||||
# .isExternal() misses more than this config for the function symbols
|
||||
if sym.getSymbolType() == SymbolType.FUNCTION and sym.getSource() == SourceType.ANALYSIS and sym.isGlobal():
|
||||
name = sym.getName() # starts to resolve names based on Ghidra's FidDB
|
||||
@@ -170,7 +199,7 @@ def extract_file_function_names() -> Iterator[tuple[Feature, Address]]:
|
||||
|
||||
|
||||
def extract_file_format() -> Iterator[tuple[Feature, Address]]:
|
||||
ef = currentProgram().getExecutableFormat() # type: ignore [name-defined] # noqa: F821
|
||||
ef = capa.features.extractors.ghidra.helpers.get_current_program().getExecutableFormat()
|
||||
if "PE" in ef:
|
||||
yield Format(FORMAT_PE), NO_ADDRESS
|
||||
elif "ELF" in ef:
|
||||
|
||||
@@ -26,18 +26,22 @@ from capa.features.extractors.base_extractor import FunctionHandle
|
||||
|
||||
def extract_function_calls_to(fh: FunctionHandle):
|
||||
"""extract callers to a function"""
|
||||
f: ghidra.program.database.function.FunctionDB = fh.inner
|
||||
f: "ghidra.program.database.function.FunctionDB" = fh.inner
|
||||
for ref in f.getSymbol().getReferences():
|
||||
if ref.getReferenceType().isCall():
|
||||
yield Characteristic("calls to"), AbsoluteVirtualAddress(ref.getFromAddress().getOffset())
|
||||
|
||||
|
||||
def extract_function_loop(fh: FunctionHandle):
|
||||
f: ghidra.program.database.function.FunctionDB = fh.inner
|
||||
f: "ghidra.program.database.function.FunctionDB" = fh.inner
|
||||
|
||||
edges = []
|
||||
for block in SimpleBlockIterator(BasicBlockModel(currentProgram()), f.getBody(), monitor()): # type: ignore [name-defined] # noqa: F821
|
||||
dests = block.getDestinations(monitor()) # type: ignore [name-defined] # noqa: F821
|
||||
for block in SimpleBlockIterator(
|
||||
BasicBlockModel(capa.features.extractors.ghidra.helpers.get_current_program()),
|
||||
f.getBody(),
|
||||
capa.features.extractors.ghidra.helpers.get_monitor(),
|
||||
):
|
||||
dests = block.getDestinations(capa.features.extractors.ghidra.helpers.get_monitor())
|
||||
s_addrs = block.getStartAddresses()
|
||||
|
||||
while dests.hasNext(): # For loop throws Python TypeError
|
||||
@@ -49,16 +53,17 @@ def extract_function_loop(fh: FunctionHandle):
|
||||
|
||||
|
||||
def extract_recursive_call(fh: FunctionHandle):
|
||||
f: ghidra.program.database.function.FunctionDB = fh.inner
|
||||
f: "ghidra.program.database.function.FunctionDB" = fh.inner
|
||||
|
||||
for func in f.getCalledFunctions(monitor()): # type: ignore [name-defined] # noqa: F821
|
||||
for func in f.getCalledFunctions(capa.features.extractors.ghidra.helpers.get_monitor()):
|
||||
if func.getEntryPoint().getOffset() == f.getEntryPoint().getOffset():
|
||||
yield Characteristic("recursive call"), AbsoluteVirtualAddress(f.getEntryPoint().getOffset())
|
||||
|
||||
|
||||
def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||
for func_handler in FUNCTION_HANDLERS:
|
||||
for feature, addr in func_handler(fh):
|
||||
"""extract function features"""
|
||||
for function_handler in FUNCTION_HANDLERS:
|
||||
for feature, addr in function_handler(fh):
|
||||
yield feature, addr
|
||||
|
||||
|
||||
|
||||
@@ -26,7 +26,7 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def extract_os() -> Iterator[tuple[Feature, Address]]:
|
||||
format_name: str = currentProgram().getExecutableFormat() # type: ignore [name-defined] # noqa: F821
|
||||
format_name: str = capa.features.extractors.ghidra.helpers.get_current_program().getExecutableFormat()
|
||||
|
||||
if "PE" in format_name:
|
||||
yield OS(OS_WINDOWS), NO_ADDRESS
|
||||
@@ -53,7 +53,7 @@ def extract_os() -> Iterator[tuple[Feature, Address]]:
|
||||
|
||||
|
||||
def extract_arch() -> Iterator[tuple[Feature, Address]]:
|
||||
lang_id = currentProgram().getMetadata().get("Language ID") # type: ignore [name-defined] # noqa: F821
|
||||
lang_id = capa.features.extractors.ghidra.helpers.get_current_program().getMetadata().get("Language ID")
|
||||
|
||||
if "x86" in lang_id and "64" in lang_id:
|
||||
yield Arch(ARCH_AMD64), NO_ADDRESS
|
||||
|
||||
@@ -22,9 +22,22 @@ from ghidra.program.model.symbol import SourceType, SymbolType
|
||||
from ghidra.program.model.address import AddressSpace
|
||||
|
||||
import capa.features.extractors.helpers
|
||||
import capa.features.extractors.ghidra.context as ghidra_context
|
||||
from capa.features.common import THUNK_CHAIN_DEPTH_DELTA
|
||||
from capa.features.address import AbsoluteVirtualAddress
|
||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle
|
||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle
|
||||
|
||||
|
||||
def get_current_program():
|
||||
return ghidra_context.get_context().program
|
||||
|
||||
|
||||
def get_monitor():
|
||||
return ghidra_context.get_context().monitor
|
||||
|
||||
|
||||
def get_flat_api():
|
||||
return ghidra_context.get_context().flat_api
|
||||
|
||||
|
||||
def ints_to_bytes(bytez: list[int]) -> bytes:
|
||||
@@ -36,7 +49,7 @@ def ints_to_bytes(bytez: list[int]) -> bytes:
|
||||
return bytes([b & 0xFF for b in bytez])
|
||||
|
||||
|
||||
def find_byte_sequence(addr: ghidra.program.model.address.Address, seq: bytes) -> Iterator[int]:
|
||||
def find_byte_sequence(addr: "ghidra.program.model.address.Address", seq: bytes) -> Iterator[int]:
|
||||
"""yield all ea of a given byte sequence
|
||||
|
||||
args:
|
||||
@@ -44,12 +57,12 @@ def find_byte_sequence(addr: ghidra.program.model.address.Address, seq: bytes) -
|
||||
seq: bytes to search e.g. b"\x01\x03"
|
||||
"""
|
||||
seqstr = "".join([f"\\x{b:02x}" for b in seq])
|
||||
eas = findBytes(addr, seqstr, java.lang.Integer.MAX_VALUE, 1) # type: ignore [name-defined] # noqa: F821
|
||||
eas = get_flat_api().findBytes(addr, seqstr, java.lang.Integer.MAX_VALUE, 1)
|
||||
|
||||
yield from eas
|
||||
|
||||
|
||||
def get_bytes(addr: ghidra.program.model.address.Address, length: int) -> bytes:
|
||||
def get_bytes(addr: "ghidra.program.model.address.Address", length: int) -> bytes:
|
||||
"""yield length bytes at addr
|
||||
|
||||
args:
|
||||
@@ -57,12 +70,12 @@ def get_bytes(addr: ghidra.program.model.address.Address, length: int) -> bytes:
|
||||
length: length of bytes to pull
|
||||
"""
|
||||
try:
|
||||
return ints_to_bytes(getBytes(addr, length)) # type: ignore [name-defined] # noqa: F821
|
||||
except RuntimeError:
|
||||
return ints_to_bytes(get_flat_api().getBytes(addr, int(length)))
|
||||
except Exception:
|
||||
return b""
|
||||
|
||||
|
||||
def get_block_bytes(block: ghidra.program.model.mem.MemoryBlock) -> bytes:
|
||||
def get_block_bytes(block: "ghidra.program.model.mem.MemoryBlock") -> bytes:
|
||||
"""yield all bytes in a given block
|
||||
|
||||
args:
|
||||
@@ -73,20 +86,21 @@ def get_block_bytes(block: ghidra.program.model.mem.MemoryBlock) -> bytes:
|
||||
|
||||
def get_function_symbols():
|
||||
"""yield all non-external function symbols"""
|
||||
yield from currentProgram().getFunctionManager().getFunctionsNoStubs(True) # type: ignore [name-defined] # noqa: F821
|
||||
yield from get_current_program().getFunctionManager().getFunctionsNoStubs(True)
|
||||
|
||||
|
||||
def get_function_blocks(fh: FunctionHandle) -> Iterator[BBHandle]:
|
||||
"""yield BBHandle for each bb in a given function"""
|
||||
def get_function_blocks(fh: "capa.features.extractors.base_extractor.FunctionHandle") -> Iterator[BBHandle]:
|
||||
"""
|
||||
yield the basic blocks of the function
|
||||
"""
|
||||
|
||||
func: ghidra.program.database.function.FunctionDB = fh.inner
|
||||
for bb in SimpleBlockIterator(BasicBlockModel(currentProgram()), func.getBody(), monitor()): # type: ignore [name-defined] # noqa: F821
|
||||
yield BBHandle(address=AbsoluteVirtualAddress(bb.getMinAddress().getOffset()), inner=bb)
|
||||
for block in SimpleBlockIterator(BasicBlockModel(get_current_program()), fh.inner.getBody(), get_monitor()):
|
||||
yield BBHandle(address=AbsoluteVirtualAddress(block.getMinAddress().getOffset()), inner=block)
|
||||
|
||||
|
||||
def get_insn_in_range(bbh: BBHandle) -> Iterator[InsnHandle]:
|
||||
"""yield InshHandle for each insn in a given basicblock"""
|
||||
for insn in currentProgram().getListing().getInstructions(bbh.inner, True): # type: ignore [name-defined] # noqa: F821
|
||||
for insn in get_current_program().getListing().getInstructions(bbh.inner, True):
|
||||
yield InsnHandle(address=AbsoluteVirtualAddress(insn.getAddress().getOffset()), inner=insn)
|
||||
|
||||
|
||||
@@ -95,7 +109,7 @@ def get_file_imports() -> dict[int, list[str]]:
|
||||
|
||||
import_dict: dict[int, list[str]] = {}
|
||||
|
||||
for f in currentProgram().getFunctionManager().getExternalFunctions(): # type: ignore [name-defined] # noqa: F821
|
||||
for f in get_current_program().getFunctionManager().getExternalFunctions():
|
||||
for r in f.getSymbol().getReferences():
|
||||
if r.getReferenceType().isData():
|
||||
addr = r.getFromAddress().getOffset() # gets pointer to fake external addr
|
||||
@@ -133,7 +147,7 @@ def get_file_externs() -> dict[int, list[str]]:
|
||||
|
||||
extern_dict: dict[int, list[str]] = {}
|
||||
|
||||
for sym in currentProgram().getSymbolTable().getAllSymbols(True): # type: ignore [name-defined] # noqa: F821
|
||||
for sym in get_current_program().getSymbolTable().getAllSymbols(True):
|
||||
# .isExternal() misses more than this config for the function symbols
|
||||
if sym.getSymbolType() == SymbolType.FUNCTION and sym.getSource() == SourceType.ANALYSIS and sym.isGlobal():
|
||||
name = sym.getName() # starts to resolve names based on Ghidra's FidDB
|
||||
@@ -171,7 +185,7 @@ def map_fake_import_addrs() -> dict[int, list[int]]:
|
||||
"""
|
||||
fake_dict: dict[int, list[int]] = {}
|
||||
|
||||
for f in currentProgram().getFunctionManager().getExternalFunctions(): # type: ignore [name-defined] # noqa: F821
|
||||
for f in get_current_program().getFunctionManager().getExternalFunctions():
|
||||
for r in f.getSymbol().getReferences():
|
||||
if r.getReferenceType().isData():
|
||||
fake_dict.setdefault(f.getEntryPoint().getOffset(), []).append(r.getFromAddress().getOffset())
|
||||
@@ -180,7 +194,7 @@ def map_fake_import_addrs() -> dict[int, list[int]]:
|
||||
|
||||
|
||||
def check_addr_for_api(
|
||||
addr: ghidra.program.model.address.Address,
|
||||
addr: "ghidra.program.model.address.Address",
|
||||
fakes: dict[int, list[int]],
|
||||
imports: dict[int, list[str]],
|
||||
externs: dict[int, list[str]],
|
||||
@@ -202,18 +216,18 @@ def check_addr_for_api(
|
||||
return False
|
||||
|
||||
|
||||
def is_call_or_jmp(insn: ghidra.program.database.code.InstructionDB) -> bool:
|
||||
def is_call_or_jmp(insn: "ghidra.program.database.code.InstructionDB") -> bool:
|
||||
return any(mnem in insn.getMnemonicString() for mnem in ["CALL", "J"]) # JMP, JNE, JNZ, etc
|
||||
|
||||
|
||||
def is_sp_modified(insn: ghidra.program.database.code.InstructionDB) -> bool:
|
||||
def is_sp_modified(insn: "ghidra.program.database.code.InstructionDB") -> bool:
|
||||
for i in range(insn.getNumOperands()):
|
||||
if insn.getOperandType(i) == OperandType.REGISTER:
|
||||
return "SP" in insn.getRegister(i).getName() and insn.getOperandRefType(i).isWrite()
|
||||
return False
|
||||
|
||||
|
||||
def is_stack_referenced(insn: ghidra.program.database.code.InstructionDB) -> bool:
|
||||
def is_stack_referenced(insn: "ghidra.program.database.code.InstructionDB") -> bool:
|
||||
"""generic catch-all for stack references"""
|
||||
for i in range(insn.getNumOperands()):
|
||||
if insn.getOperandType(i) == OperandType.REGISTER:
|
||||
@@ -225,7 +239,7 @@ def is_stack_referenced(insn: ghidra.program.database.code.InstructionDB) -> boo
|
||||
return any(ref.isStackReference() for ref in insn.getReferencesFrom())
|
||||
|
||||
|
||||
def is_zxor(insn: ghidra.program.database.code.InstructionDB) -> bool:
|
||||
def is_zxor(insn: "ghidra.program.database.code.InstructionDB") -> bool:
|
||||
# assume XOR insn
|
||||
# XOR's against the same operand zero out
|
||||
ops = []
|
||||
@@ -241,29 +255,29 @@ def is_zxor(insn: ghidra.program.database.code.InstructionDB) -> bool:
|
||||
return all(n == operands[0] for n in operands)
|
||||
|
||||
|
||||
def handle_thunk(addr: ghidra.program.model.address.Address):
|
||||
def handle_thunk(addr: "ghidra.program.model.address.Address"):
|
||||
"""Follow thunk chains down to a reasonable depth"""
|
||||
ref = addr
|
||||
for _ in range(THUNK_CHAIN_DEPTH_DELTA):
|
||||
thunk_jmp = getInstructionAt(ref) # type: ignore [name-defined] # noqa: F821
|
||||
thunk_jmp = get_flat_api().getInstructionAt(ref)
|
||||
if thunk_jmp and is_call_or_jmp(thunk_jmp):
|
||||
if OperandType.isAddress(thunk_jmp.getOperandType(0)):
|
||||
ref = thunk_jmp.getAddress(0)
|
||||
else:
|
||||
thunk_dat = getDataContaining(ref) # type: ignore [name-defined] # noqa: F821
|
||||
thunk_dat = get_flat_api().getDataContaining(ref)
|
||||
if thunk_dat and thunk_dat.isDefined() and thunk_dat.isPointer():
|
||||
ref = thunk_dat.getValue()
|
||||
break # end of thunk chain reached
|
||||
return ref
|
||||
|
||||
|
||||
def dereference_ptr(insn: ghidra.program.database.code.InstructionDB):
|
||||
def dereference_ptr(insn: "ghidra.program.database.code.InstructionDB"):
|
||||
addr_code = OperandType.ADDRESS | OperandType.CODE
|
||||
to_deref = insn.getAddress(0)
|
||||
dat = getDataContaining(to_deref) # type: ignore [name-defined] # noqa: F821
|
||||
dat = get_flat_api().getDataContaining(to_deref)
|
||||
|
||||
if insn.getOperandType(0) == addr_code:
|
||||
thfunc = getFunctionContaining(to_deref) # type: ignore [name-defined] # noqa: F821
|
||||
thfunc = get_flat_api().getFunctionContaining(to_deref)
|
||||
if thfunc and thfunc.isThunk():
|
||||
return handle_thunk(to_deref)
|
||||
else:
|
||||
@@ -294,7 +308,7 @@ def find_data_references_from_insn(insn, max_depth: int = 10):
|
||||
to_addr = reference.getToAddress()
|
||||
|
||||
for _ in range(max_depth - 1):
|
||||
data = getDataAt(to_addr) # type: ignore [name-defined] # noqa: F821
|
||||
data = get_flat_api().getDataAt(to_addr)
|
||||
if data and data.isPointer():
|
||||
ptr_value = data.getValue()
|
||||
|
||||
|
||||
@@ -234,7 +234,7 @@ def extract_insn_bytes_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
||||
push offset iid_004118d4_IShellLinkA ; riid
|
||||
"""
|
||||
for addr in capa.features.extractors.ghidra.helpers.find_data_references_from_insn(ih.inner):
|
||||
data = getDataAt(addr) # type: ignore [name-defined] # noqa: F821
|
||||
data = capa.features.extractors.ghidra.helpers.get_flat_api().getDataAt(addr)
|
||||
if data and not data.hasStringValue():
|
||||
extracted_bytes = capa.features.extractors.ghidra.helpers.get_bytes(addr, MAX_BYTES_FEATURE_SIZE)
|
||||
if extracted_bytes and not capa.features.extractors.helpers.all_zeros(extracted_bytes):
|
||||
@@ -249,9 +249,9 @@ def extract_insn_string_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandl
|
||||
push offset aAcr ; "ACR > "
|
||||
"""
|
||||
for addr in capa.features.extractors.ghidra.helpers.find_data_references_from_insn(ih.inner):
|
||||
data = getDataAt(addr) # type: ignore [name-defined] # noqa: F821
|
||||
data = capa.features.extractors.ghidra.helpers.get_flat_api().getDataAt(addr)
|
||||
if data and data.hasStringValue():
|
||||
yield String(data.getValue()), ih.address
|
||||
yield String(str(data.getValue())), ih.address
|
||||
|
||||
|
||||
def extract_insn_mnemonic_features(
|
||||
@@ -361,8 +361,8 @@ def extract_insn_cross_section_cflow(
|
||||
if capa.features.extractors.ghidra.helpers.check_addr_for_api(ref, fakes, imports, externs):
|
||||
return
|
||||
|
||||
this_mem_block = getMemoryBlock(insn.getAddress()) # type: ignore [name-defined] # noqa: F821
|
||||
ref_block = getMemoryBlock(ref) # type: ignore [name-defined] # noqa: F821
|
||||
this_mem_block = capa.features.extractors.ghidra.helpers.get_flat_api().getMemoryBlock(insn.getAddress())
|
||||
ref_block = capa.features.extractors.ghidra.helpers.get_flat_api().getMemoryBlock(ref)
|
||||
if ref_block != this_mem_block:
|
||||
yield Characteristic("cross section flow"), ih.address
|
||||
|
||||
@@ -425,19 +425,19 @@ def check_nzxor_security_cookie_delta(
|
||||
Check if insn within last addr of last bb - delta
|
||||
"""
|
||||
|
||||
model = SimpleBlockModel(currentProgram()) # type: ignore [name-defined] # noqa: F821
|
||||
model = SimpleBlockModel(capa.features.extractors.ghidra.helpers.get_current_program())
|
||||
insn_addr = insn.getAddress()
|
||||
func_asv = fh.getBody()
|
||||
|
||||
first_addr = func_asv.getMinAddress()
|
||||
if insn_addr < first_addr.add(SECURITY_COOKIE_BYTES_DELTA):
|
||||
first_bb = model.getFirstCodeBlockContaining(first_addr, monitor()) # type: ignore [name-defined] # noqa: F821
|
||||
first_bb = model.getFirstCodeBlockContaining(first_addr, capa.features.extractors.ghidra.helpers.get_monitor())
|
||||
if first_bb.contains(insn_addr):
|
||||
return True
|
||||
|
||||
last_addr = func_asv.getMaxAddress()
|
||||
if insn_addr > last_addr.add(SECURITY_COOKIE_BYTES_DELTA * -1):
|
||||
last_bb = model.getFirstCodeBlockContaining(last_addr, monitor()) # type: ignore [name-defined] # noqa: F821
|
||||
last_bb = model.getFirstCodeBlockContaining(last_addr, capa.features.extractors.ghidra.helpers.get_monitor())
|
||||
if last_bb.contains(insn_addr):
|
||||
return True
|
||||
|
||||
|
||||
@@ -4,104 +4,14 @@
|
||||
|
||||
# capa + Ghidra
|
||||
|
||||
[capa](https://github.com/mandiant/capa) is the FLARE team’s open-source tool that detects capabilities in executable files. [Ghidra](https://github.com/NationalSecurityAgency/ghidra) is an open-source software reverse engineering framework created and maintained by the National Security Agency Research Directorate. capa + Ghidra brings capa’s detection capabilities directly to Ghidra’s user interface helping speed up your reverse engineering tasks by identifying what parts of a program suggest interesting behavior, such as setting a registry value. You can execute the included Python 3 scripts [capa_explorer.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_explorer.py) or [capa_ghidra.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_ghidra.py) to run capa’s analysis and view the results in Ghidra. You may be asking yourself, “Python 3 scripts in Ghidra?”. You read that correctly. This integration is written entirely in Python 3 and relies on [Ghidrathon]( https://github.com/mandiant/ghidrathon), an open source Ghidra extension that adds Python 3 scripting to Ghidra.
|
||||
[capa](https://github.com/mandiant/capa) is the FLARE team’s open-source tool that detects capabilities in executable files. [Ghidra](https://github.com/NationalSecurityAgency/ghidra) is an open-source software reverse engineering framework. capa + Ghidra brings capa’s detection capabilities to Ghidra using [PyGhidra](https://github.com/NationalSecurityAgency/ghidra/tree/master/Ghidra/Features/PyGhidra).
|
||||
|
||||
Check out our capa + Ghidra blog posts:
|
||||
* [Riding Dragons: capa Harnesses Ghidra](https://www.mandiant.com/resources/blog/capa-harnesses-ghidra)
|
||||
## Prerequisites
|
||||
|
||||
## UI Integration
|
||||
[capa_explorer.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_explorer.py) renders capa results in Ghidra's UI to help you quickly navigate them. This includes adding matched functions to Ghidra’s Symbol Tree and Bookmarks windows and adding comments to functions that indicate matched capabilities and features. You can execute this script using Ghidra’s Script Manager window.
|
||||
|
||||
### Symbol Tree Window
|
||||
Matched functions are added to Ghidra's Symbol Tree window under a custom namespace that maps to the capabilities' [capa namespace](https://github.com/mandiant/capa-rules/blob/master/doc/format.md#rule-namespace).
|
||||
<div align="center">
|
||||
<img src="https://github.com/mandiant/capa/assets/66766340/eeae33f4-99d4-42dc-a5e8-4c1b8c661492" width=300>
|
||||
</div>
|
||||
|
||||
### Comments
|
||||
|
||||
Comments are added at the beginning of matched functions indicating matched capabilities and inline comments are added to functions indicating matched features. You can view these comments in Ghidra’s Disassembly Listing and Decompile windows.
|
||||
<div align="center">
|
||||
<img src="https://github.com/mandiant/capa/assets/66766340/bb2b4170-7fd4-45fc-8c7b-ff8f2e2f101b" width=1000>
|
||||
</div>
|
||||
|
||||
### Bookmarks
|
||||
|
||||
Bookmarks are added to functions that matched a capability that is mapped to a MITRE ATT&CK and/or Malware Behavior Catalog (MBC) technique. You can view these bookmarks in Ghidra's Bookmarks window.
|
||||
<div align="center">
|
||||
<img src="https://github.com/mandiant/capa/assets/66766340/7f9a66a9-7be7-4223-91c6-4b8fc4651336" width=825>
|
||||
</div>
|
||||
|
||||
## Text-based Integration
|
||||
|
||||
[capa_ghidra.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_ghidra.py) outputs text-based capa results that mirror the output of capa’s standalone tool. You can execute this script using Ghidra’s Script Manager and view its output in Ghidra’s Console window.
|
||||
|
||||
<div align="center">
|
||||
<img src="../../doc/img/ghidra_script_mngr_output.png" width=700>
|
||||
</div>
|
||||
|
||||
You can also execute [capa_ghidra.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_ghidra.py) using Ghidra's Headless Analyzer to view its output in a terminal window.
|
||||
|
||||
<div align="center">
|
||||
<img src="../../doc/img/ghidra_headless_analyzer.png">
|
||||
</div>
|
||||
|
||||
# Getting Started
|
||||
|
||||
## Requirements
|
||||
|
||||
| Tool | Version | Source |
|
||||
|------------|---------|--------|
|
||||
| capa | `>= 7.0.0` | https://github.com/mandiant/capa/releases |
|
||||
| Ghidrathon | `>= 3.0.0` | https://github.com/mandiant/Ghidrathon/releases |
|
||||
| Ghidra | `>= 10.3.2` | https://github.com/NationalSecurityAgency/ghidra/releases |
|
||||
| Python | `>= 3.10.0` | https://www.python.org/downloads |
|
||||
|
||||
## Installation
|
||||
|
||||
**Note**: capa + Ghidra relies on [Ghidrathon]( https://github.com/mandiant/ghidrathon) to execute Python 3 code in Ghidra. You must first install and configure Ghidrathon using the [steps outlined in its README]( https://github.com/mandiant/ghidrathon?tab=readme-ov-file#installing-ghidrathon). Then, you must use the Python 3 interpreter that you configured with Ghidrathon to complete the following steps:
|
||||
|
||||
1. Install capa and its dependencies from PyPI using the following command:
|
||||
```bash
|
||||
$ pip install flare-capa
|
||||
```
|
||||
|
||||
2. Download and extract the [official capa rules](https://github.com/mandiant/capa-rules/releases) that match the capa version you have installed. You can use the following command to view the version of capa you have installed:
|
||||
```bash
|
||||
$ pip show flare-capa
|
||||
OR
|
||||
$ capa --version
|
||||
```
|
||||
|
||||
3. Copy [capa_explorer.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_explorer.py) and [capa_ghidra.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_ghidra.py) to your `ghidra_scripts` directory or manually add the parent directory of each script using Ghidra’s Script Manager.
|
||||
- Ghidra >= 12.0 must be installed and available to PyGhidra (e.g. set `GHIDRA_INSTALL_DIR` environment variable)
|
||||
|
||||
## Usage
|
||||
|
||||
You can execute [capa_explorer.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_explorer.py) and [capa_ghidra.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_ghidra.py) using Ghidra’s Script Manager. [capa_ghidra.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_ghidra.py) can also be executed using Ghidra's Headless Analyzer.
|
||||
|
||||
### Execution using Ghidra’s Script Manager
|
||||
|
||||
You can execute [capa_explorer.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_explorer.py) and [capa_ghidra.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_ghidra.py) using Ghidra's Script Manager as follows:
|
||||
1. Navigate to `Window > Script Manager`
|
||||
2. Expand the `Python 3 > capa` category
|
||||
3. Double-click a script to execute it
|
||||
|
||||
Both scripts ask you to provide the path of your capa rules directory (see installation step 2). [capa_ghidra.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_ghidra.py) also has you choose one of `default`, `verbose`, and `vverbose` output formats which mirror the output formats of capa’s standalone tool.
|
||||
|
||||
### Execution using Ghidra’s Headless Analyzer
|
||||
|
||||
You can execute [capa_ghidra.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_ghidra.py) using Ghidra’s Headless Analyzer by invoking the `analyzeHeadless` script included with Ghidra in its `support` directory. The following arguments must be provided:
|
||||
|
||||
| Argument | Description |
|
||||
|----|----|
|
||||
|`<project_path>`| Path to Ghidra project|
|
||||
| `<project_name>`| Name of Ghidra Project|
|
||||
| `-Process <sample_name>` OR `-Import <sample_path>`| Name of sample `<sample_name>` already imported into `<project_name>` OR absolute path of sample `<sample_path>` to import into `<project_name>`|
|
||||
| `-ScriptPath <script_path>`| OPTIONAL parent directory `<script_path>` of [capa_ghidra.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_ghidra.py)|
|
||||
| `-PostScript capa_ghidra.py`| Execute [capa_ghidra.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_ghidra.py) after Ghidra analysis|
|
||||
| `"<script_args>"`| Quoted string `"<script_args>"` containing script arguments passed to [capa_ghidra.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_ghidra.py) that must specify a capa rules path and optionally the output format (`--verbose`, `--vverbose`, `--json`) – you can specify `”help”` to view the script’s help message |
|
||||
|
||||
The following is an example of combining these arguments into a single `analyzeHeadless` script command:
|
||||
```bash
|
||||
$ analyzeHeadless /home/wumbo/demo demo -Import /home/wumbo/capa/tests/data/Practical\ Malware\ Analysis\ Lab\ 01-01.dll_ -PostScript capa_ghidra.py "/home/wumbo/capa/rules --verbose"
|
||||
$ capa -b ghidra /path/to/sample
|
||||
```
|
||||
|
||||
@@ -1,385 +0,0 @@
|
||||
# Run capa against loaded Ghidra database and render results in Ghidra UI
|
||||
# @author Colton Gabertan (gabertan.colton@gmail.com)
|
||||
# @category Python 3.capa
|
||||
|
||||
# Copyright 2024 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import sys
|
||||
import json
|
||||
import logging
|
||||
import pathlib
|
||||
from typing import Any
|
||||
|
||||
from ghidra.app.cmd.label import AddLabelCmd, CreateNamespacesCmd
|
||||
from ghidra.program.model.symbol import Namespace, SourceType, SymbolType
|
||||
|
||||
import capa
|
||||
import capa.main
|
||||
import capa.rules
|
||||
import capa.render.json
|
||||
import capa.ghidra.helpers
|
||||
import capa.capabilities.common
|
||||
import capa.features.extractors.ghidra.extractor
|
||||
|
||||
logger = logging.getLogger("capa_explorer")
|
||||
|
||||
|
||||
def add_bookmark(addr, txt, category="CapaExplorer"):
|
||||
"""create bookmark at addr"""
|
||||
currentProgram().getBookmarkManager().setBookmark(addr, "Info", category, txt) # type: ignore [name-defined] # noqa: F821
|
||||
|
||||
|
||||
def create_namespace(namespace_str):
|
||||
"""create new Ghidra namespace for each capa namespace"""
|
||||
|
||||
cmd = CreateNamespacesCmd(namespace_str, SourceType.USER_DEFINED)
|
||||
cmd.applyTo(currentProgram()) # type: ignore [name-defined] # noqa: F821
|
||||
return cmd.getNamespace()
|
||||
|
||||
|
||||
def create_label(ghidra_addr, name, capa_namespace):
|
||||
"""custom label cmd to overlay symbols under capa-generated namespaces"""
|
||||
|
||||
# prevent duplicate labels under the same capa-generated namespace
|
||||
symbol_table = currentProgram().getSymbolTable() # type: ignore [name-defined] # noqa: F821
|
||||
for sym in symbol_table.getSymbols(ghidra_addr):
|
||||
if sym.getName(True) == capa_namespace.getName(True) + Namespace.DELIMITER + name:
|
||||
return
|
||||
|
||||
# create SymbolType.LABEL at addr
|
||||
# prioritize capa-generated namespace (duplicate match @ new addr), else put under global Ghidra one (new match)
|
||||
cmd = AddLabelCmd(ghidra_addr, name, True, SourceType.USER_DEFINED)
|
||||
cmd.applyTo(currentProgram()) # type: ignore [name-defined] # noqa: F821
|
||||
|
||||
# assign new match overlay label to capa-generated namespace
|
||||
cmd.getSymbol().setNamespace(capa_namespace)
|
||||
return
|
||||
|
||||
|
||||
class CapaMatchData:
|
||||
def __init__(
|
||||
self,
|
||||
namespace,
|
||||
scope,
|
||||
capability,
|
||||
matches,
|
||||
attack: list[dict[Any, Any]],
|
||||
mbc: list[dict[Any, Any]],
|
||||
):
|
||||
self.namespace = namespace
|
||||
self.scope = scope
|
||||
self.capability = capability
|
||||
self.matches = matches
|
||||
self.attack = attack
|
||||
self.mbc = mbc
|
||||
|
||||
def bookmark_functions(self):
|
||||
"""create bookmarks for MITRE ATT&CK & MBC mappings"""
|
||||
|
||||
if self.attack == [] and self.mbc == []:
|
||||
return
|
||||
|
||||
for key in self.matches.keys():
|
||||
addr = toAddr(hex(key)) # type: ignore [name-defined] # noqa: F821
|
||||
func = getFunctionContaining(addr) # type: ignore [name-defined] # noqa: F821
|
||||
|
||||
# bookmark & tag MITRE ATT&CK tactics & MBC @ function scope
|
||||
if func is not None:
|
||||
func_addr = func.getEntryPoint()
|
||||
|
||||
if self.attack != []:
|
||||
for item in self.attack:
|
||||
attack_txt = ""
|
||||
for part in item.get("parts", {}):
|
||||
attack_txt = attack_txt + part + Namespace.DELIMITER
|
||||
attack_txt = attack_txt + item.get("id", {})
|
||||
add_bookmark(func_addr, attack_txt, "CapaExplorer::MITRE ATT&CK")
|
||||
|
||||
if self.mbc != []:
|
||||
for item in self.mbc:
|
||||
mbc_txt = ""
|
||||
for part in item.get("parts", {}):
|
||||
mbc_txt = mbc_txt + part + Namespace.DELIMITER
|
||||
mbc_txt = mbc_txt + item.get("id", {})
|
||||
add_bookmark(func_addr, mbc_txt, "CapaExplorer::MBC")
|
||||
|
||||
def set_plate_comment(self, ghidra_addr):
|
||||
"""set plate comments at matched functions"""
|
||||
comment = getPlateComment(ghidra_addr) # type: ignore [name-defined] # noqa: F821
|
||||
rule_path = self.namespace.replace(Namespace.DELIMITER, "/")
|
||||
# 2 calls to avoid duplicate comments via subsequent script runs
|
||||
if comment is None:
|
||||
# first comment @ function
|
||||
comment = rule_path + "\n"
|
||||
setPlateComment(ghidra_addr, comment) # type: ignore [name-defined] # noqa: F821
|
||||
elif rule_path not in comment:
|
||||
comment = comment + rule_path + "\n"
|
||||
setPlateComment(ghidra_addr, comment) # type: ignore [name-defined] # noqa: F821
|
||||
else:
|
||||
return
|
||||
|
||||
def set_pre_comment(self, ghidra_addr, sub_type, description):
|
||||
"""set pre comments at subscoped matches of main rules"""
|
||||
comment = getPreComment(ghidra_addr) # type: ignore [name-defined] # noqa: F821
|
||||
if comment is None:
|
||||
comment = "capa: " + sub_type + "(" + description + ")" + ' matched in "' + self.capability + '"\n'
|
||||
setPreComment(ghidra_addr, comment) # type: ignore [name-defined] # noqa: F821
|
||||
elif self.capability not in comment:
|
||||
comment = (
|
||||
comment + "capa: " + sub_type + "(" + description + ")" + ' matched in "' + self.capability + '"\n'
|
||||
)
|
||||
setPreComment(ghidra_addr, comment) # type: ignore [name-defined] # noqa: F821
|
||||
else:
|
||||
return
|
||||
|
||||
def label_matches(self):
|
||||
"""label findings at function scopes and comment on subscope matches"""
|
||||
capa_namespace = create_namespace(self.namespace)
|
||||
symbol_table = currentProgram().getSymbolTable() # type: ignore [name-defined] # noqa: F821
|
||||
|
||||
# handle function main scope of matched rule
|
||||
# these will typically contain further matches within
|
||||
if self.scope == "function":
|
||||
for addr in self.matches.keys():
|
||||
ghidra_addr = toAddr(hex(addr)) # type: ignore [name-defined] # noqa: F821
|
||||
|
||||
# classify new function label under capa-generated namespace
|
||||
sym = symbol_table.getPrimarySymbol(ghidra_addr)
|
||||
if sym is not None:
|
||||
if sym.getSymbolType() == SymbolType.FUNCTION:
|
||||
create_label(ghidra_addr, sym.getName(), capa_namespace)
|
||||
self.set_plate_comment(ghidra_addr)
|
||||
|
||||
# parse the corresponding nodes, and pre-comment subscope matched features
|
||||
# under the encompassing function(s)
|
||||
for sub_match in self.matches.get(addr):
|
||||
for loc, node in sub_match.items():
|
||||
sub_ghidra_addr = toAddr(hex(loc)) # type: ignore [name-defined] # noqa: F821
|
||||
if sub_ghidra_addr == ghidra_addr:
|
||||
# skip duplicates
|
||||
continue
|
||||
|
||||
# precomment subscope matches under the function
|
||||
if node != {}:
|
||||
for sub_type, description in parse_node(node):
|
||||
self.set_pre_comment(sub_ghidra_addr, sub_type, description)
|
||||
else:
|
||||
# resolve the encompassing function for the capa namespace
|
||||
# of non-function scoped main matches
|
||||
for addr in self.matches.keys():
|
||||
ghidra_addr = toAddr(hex(addr)) # type: ignore [name-defined] # noqa: F821
|
||||
|
||||
# basic block / insn scoped main matches
|
||||
# Ex. See "Create Process on Windows" Rule
|
||||
func = getFunctionContaining(ghidra_addr) # type: ignore [name-defined] # noqa: F821
|
||||
if func is not None:
|
||||
func_addr = func.getEntryPoint()
|
||||
create_label(func_addr, func.getName(), capa_namespace)
|
||||
self.set_plate_comment(func_addr)
|
||||
|
||||
# create subscope match precomments
|
||||
for sub_match in self.matches.get(addr):
|
||||
for loc, node in sub_match.items():
|
||||
sub_ghidra_addr = toAddr(hex(loc)) # type: ignore [name-defined] # noqa: F821
|
||||
|
||||
if node != {}:
|
||||
if func is not None:
|
||||
# basic block/ insn scope under resolved function
|
||||
for sub_type, description in parse_node(node):
|
||||
self.set_pre_comment(sub_ghidra_addr, sub_type, description)
|
||||
else:
|
||||
# this would be a global/file scoped main match
|
||||
# try to resolve the encompassing function via the subscope match, instead
|
||||
# Ex. "run as service" rule
|
||||
sub_func = getFunctionContaining(sub_ghidra_addr) # type: ignore [name-defined] # noqa: F821
|
||||
if sub_func is not None:
|
||||
sub_func_addr = sub_func.getEntryPoint()
|
||||
# place function in capa namespace & create the subscope match label in Ghidra's global namespace
|
||||
create_label(sub_func_addr, sub_func.getName(), capa_namespace)
|
||||
self.set_plate_comment(sub_func_addr)
|
||||
for sub_type, description in parse_node(node):
|
||||
self.set_pre_comment(sub_ghidra_addr, sub_type, description)
|
||||
else:
|
||||
# addr is in some other file section like .data
|
||||
# represent this location with a label symbol under the capa namespace
|
||||
# Ex. See "Reference Base64 String" rule
|
||||
for sub_type, description in parse_node(node):
|
||||
# in many cases, these will be ghidra-labeled data, so just add the existing
|
||||
# label symbol to the capa namespace
|
||||
for sym in symbol_table.getSymbols(sub_ghidra_addr):
|
||||
if sym.getSymbolType() == SymbolType.LABEL:
|
||||
sym.setNamespace(capa_namespace)
|
||||
self.set_pre_comment(sub_ghidra_addr, sub_type, description)
|
||||
|
||||
|
||||
def get_capabilities():
|
||||
rules_dir: str = ""
|
||||
try:
|
||||
selected_dir = askDirectory("Choose capa rules directory", "Ok") # type: ignore [name-defined] # noqa: F821
|
||||
if selected_dir:
|
||||
rules_dir = selected_dir.getPath()
|
||||
except RuntimeError:
|
||||
# RuntimeError thrown when user selects "Cancel"
|
||||
pass
|
||||
|
||||
if not rules_dir:
|
||||
logger.info("You must choose a capa rules directory before running capa.")
|
||||
return "" # return empty str to avoid handling both int and str types
|
||||
|
||||
rules_path: pathlib.Path = pathlib.Path(rules_dir)
|
||||
logger.info("running capa using rules from %s", str(rules_path))
|
||||
|
||||
rules = capa.rules.get_rules([rules_path])
|
||||
meta = capa.ghidra.helpers.collect_metadata([rules_path])
|
||||
extractor = capa.features.extractors.ghidra.extractor.GhidraFeatureExtractor()
|
||||
|
||||
capabilities = capa.capabilities.common.find_capabilities(rules, extractor, True)
|
||||
|
||||
if capa.capabilities.common.has_static_limitation(rules, capabilities, is_standalone=False):
|
||||
popup("capa explorer encountered warnings during analysis. Please check the console output for more information.") # type: ignore [name-defined] # noqa: F821
|
||||
logger.info("capa encountered warnings during analysis")
|
||||
|
||||
return capa.render.json.render(meta, rules, capabilities.matches)
|
||||
|
||||
|
||||
def get_locations(match_dict):
|
||||
"""recursively collect match addresses and associated nodes"""
|
||||
|
||||
for loc in match_dict.get("locations", {}):
|
||||
# either an rva (absolute)
|
||||
# or an offset into a file (file)
|
||||
if loc.get("type", "") in ("absolute", "file"):
|
||||
yield loc.get("value"), match_dict.get("node")
|
||||
|
||||
for child in match_dict.get("children", {}):
|
||||
yield from get_locations(child)
|
||||
|
||||
|
||||
def parse_node(node_data):
|
||||
"""pull match descriptions and sub features by parsing node dicts"""
|
||||
|
||||
node = node_data.get(node_data.get("type"))
|
||||
|
||||
if "description" in node:
|
||||
yield "description", node.get("description")
|
||||
|
||||
data = node.get(node.get("type"))
|
||||
if isinstance(data, (str, int)):
|
||||
feat_type = node.get("type")
|
||||
if isinstance(data, int):
|
||||
data = hex(data)
|
||||
yield feat_type, data
|
||||
|
||||
|
||||
def parse_json(capa_data):
|
||||
"""Parse json produced by capa"""
|
||||
|
||||
for rule, capability in capa_data.get("rules", {}).items():
|
||||
# structure to contain rule match address & supporting feature data
|
||||
# {rule match addr:[{feature addr:{node_data}}]}
|
||||
rule_matches: dict[Any, list[Any]] = {}
|
||||
for i in range(len(capability.get("matches"))):
|
||||
# grab rule match location
|
||||
match_loc = capability.get("matches")[i][0].get("value")
|
||||
if match_loc is None:
|
||||
# Ex. See "Reference Base64 string"
|
||||
# {'type':'no address'}
|
||||
match_loc = i
|
||||
rule_matches[match_loc] = []
|
||||
|
||||
# grab extracted feature locations & corresponding node data
|
||||
# feature[0]: location
|
||||
# feature[1]: node
|
||||
features = capability.get("matches")[i][1]
|
||||
feat_dict = {}
|
||||
for feature in get_locations(features):
|
||||
feat_dict[feature[0]] = feature[1]
|
||||
rule_matches[match_loc].append(feat_dict)
|
||||
|
||||
# dict data of currently matched rule
|
||||
meta = capability["meta"]
|
||||
|
||||
# get MITRE ATT&CK and MBC
|
||||
attack = meta.get("attack")
|
||||
if attack is None:
|
||||
attack = []
|
||||
mbc = meta.get("mbc")
|
||||
if mbc is None:
|
||||
mbc = []
|
||||
|
||||
# scope match for the rule
|
||||
scope = meta["scopes"].get("static")
|
||||
|
||||
fmt_rule = Namespace.DELIMITER + rule.replace(" ", "-")
|
||||
if "namespace" in meta:
|
||||
# split into list to help define child namespaces
|
||||
# this requires the correct delimiter used by Ghidra
|
||||
# Ex. 'communication/named-pipe/create/create pipe' -> capa::communication::named-pipe::create::create-pipe
|
||||
namespace_str = Namespace.DELIMITER.join(meta["namespace"].split("/"))
|
||||
namespace = "capa" + Namespace.DELIMITER + namespace_str + fmt_rule
|
||||
else:
|
||||
# lib rules via the official rules repo will not contain data
|
||||
# for the "namespaces" key, so format using rule itself
|
||||
# Ex. 'contain loop' -> capa::lib::contain-loop
|
||||
namespace = "capa" + Namespace.DELIMITER + "lib" + fmt_rule
|
||||
|
||||
yield CapaMatchData(namespace, scope, rule, rule_matches, attack, mbc)
|
||||
|
||||
|
||||
def main():
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logging.getLogger().setLevel(logging.INFO)
|
||||
|
||||
if isRunningHeadless(): # type: ignore [name-defined] # noqa: F821
|
||||
logger.error("unsupported Ghidra execution mode")
|
||||
return capa.main.E_UNSUPPORTED_GHIDRA_EXECUTION_MODE
|
||||
|
||||
if not capa.ghidra.helpers.is_supported_ghidra_version():
|
||||
logger.error("unsupported Ghidra version")
|
||||
return capa.main.E_UNSUPPORTED_GHIDRA_VERSION
|
||||
|
||||
if not capa.ghidra.helpers.is_supported_file_type():
|
||||
logger.error("unsupported file type")
|
||||
return capa.main.E_INVALID_FILE_TYPE
|
||||
|
||||
if not capa.ghidra.helpers.is_supported_arch_type():
|
||||
logger.error("unsupported file architecture")
|
||||
return capa.main.E_INVALID_FILE_ARCH
|
||||
|
||||
# capa_data will always contain {'meta':..., 'rules':...}
|
||||
# if the 'rules' key contains no values, then there were no matches
|
||||
capa_data = json.loads(get_capabilities())
|
||||
if capa_data.get("rules") is None:
|
||||
logger.info("capa explorer found no matches")
|
||||
popup("capa explorer found no matches.") # type: ignore [name-defined] # noqa: F821
|
||||
return capa.main.E_EMPTY_REPORT
|
||||
|
||||
for item in parse_json(capa_data):
|
||||
item.bookmark_functions()
|
||||
item.label_matches()
|
||||
logger.info("capa explorer analysis complete")
|
||||
popup("capa explorer analysis complete.\nPlease see results in the Bookmarks Window and Namespaces section of the Symbol Tree Window.") # type: ignore [name-defined] # noqa: F821
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if sys.version_info < (3, 10):
|
||||
from capa.exceptions import UnsupportedRuntimeError
|
||||
|
||||
raise UnsupportedRuntimeError("This version of capa can only be used with Python 3.10+")
|
||||
exit_code = main()
|
||||
if exit_code != 0:
|
||||
popup("capa explorer encountered errors during analysis. Please check the console output for more information.") # type: ignore [name-defined] # noqa: F821
|
||||
sys.exit(exit_code)
|
||||
@@ -1,174 +0,0 @@
|
||||
# Run capa against loaded Ghidra database and render results in Ghidra Console window
|
||||
# @author Mike Hunhoff (mehunhoff@google.com)
|
||||
# @category Python 3.capa
|
||||
|
||||
# Copyright 2023 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import sys
|
||||
import logging
|
||||
import pathlib
|
||||
import argparse
|
||||
|
||||
import capa
|
||||
import capa.main
|
||||
import capa.rules
|
||||
import capa.ghidra.helpers
|
||||
import capa.render.default
|
||||
import capa.capabilities.common
|
||||
import capa.features.extractors.ghidra.extractor
|
||||
|
||||
logger = logging.getLogger("capa_ghidra")
|
||||
|
||||
|
||||
def run_headless():
|
||||
parser = argparse.ArgumentParser(description="The FLARE team's open-source tool to integrate capa with Ghidra.")
|
||||
|
||||
parser.add_argument(
|
||||
"rules",
|
||||
type=str,
|
||||
help="path to rule file or directory",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-v", "--verbose", action="store_true", help="enable verbose result document (no effect with --json)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-vv", "--vverbose", action="store_true", help="enable very verbose result document (no effect with --json)"
|
||||
)
|
||||
parser.add_argument("-d", "--debug", action="store_true", help="enable debugging output on STDERR")
|
||||
parser.add_argument("-q", "--quiet", action="store_true", help="disable all output but errors")
|
||||
parser.add_argument("-j", "--json", action="store_true", help="emit JSON instead of text")
|
||||
|
||||
script_args = list(getScriptArgs()) # type: ignore [name-defined] # noqa: F821
|
||||
if not script_args or len(script_args) > 1:
|
||||
script_args = []
|
||||
else:
|
||||
script_args = script_args[0].split()
|
||||
for idx, arg in enumerate(script_args):
|
||||
if arg.lower() == "help":
|
||||
script_args[idx] = "--help"
|
||||
|
||||
args = parser.parse_args(args=script_args)
|
||||
|
||||
if args.quiet:
|
||||
logging.basicConfig(level=logging.WARNING)
|
||||
logging.getLogger().setLevel(logging.WARNING)
|
||||
elif args.debug:
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
logging.getLogger().setLevel(logging.DEBUG)
|
||||
else:
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logging.getLogger().setLevel(logging.INFO)
|
||||
|
||||
logger.debug("running in Ghidra headless mode")
|
||||
|
||||
rules_path = pathlib.Path(args.rules)
|
||||
|
||||
logger.debug("rule path: %s", rules_path)
|
||||
rules = capa.rules.get_rules([rules_path])
|
||||
|
||||
meta = capa.ghidra.helpers.collect_metadata([rules_path])
|
||||
extractor = capa.features.extractors.ghidra.extractor.GhidraFeatureExtractor()
|
||||
|
||||
capabilities = capa.capabilities.common.find_capabilities(rules, extractor, False)
|
||||
|
||||
meta.analysis.feature_counts = capabilities.feature_counts
|
||||
meta.analysis.library_functions = capabilities.library_functions
|
||||
meta.analysis.layout = capa.loader.compute_layout(rules, extractor, capabilities.matches)
|
||||
|
||||
if capa.capabilities.common.has_static_limitation(rules, capabilities, is_standalone=True):
|
||||
logger.info("capa encountered warnings during analysis")
|
||||
|
||||
if args.json:
|
||||
print(capa.render.json.render(meta, rules, capabilities.matches)) # noqa: T201
|
||||
elif args.vverbose:
|
||||
print(capa.render.vverbose.render(meta, rules, capabilities.matches)) # noqa: T201
|
||||
elif args.verbose:
|
||||
print(capa.render.verbose.render(meta, rules, capabilities.matches)) # noqa: T201
|
||||
else:
|
||||
print(capa.render.default.render(meta, rules, capabilities.matches)) # noqa: T201
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def run_ui():
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logging.getLogger().setLevel(logging.INFO)
|
||||
|
||||
rules_dir: str = ""
|
||||
try:
|
||||
selected_dir = askDirectory("Choose capa rules directory", "Ok") # type: ignore [name-defined] # noqa: F821
|
||||
if selected_dir:
|
||||
rules_dir = selected_dir.getPath()
|
||||
except RuntimeError:
|
||||
# RuntimeError thrown when user selects "Cancel"
|
||||
pass
|
||||
|
||||
if not rules_dir:
|
||||
logger.info("You must choose a capa rules directory before running capa.")
|
||||
return capa.main.E_MISSING_RULES
|
||||
|
||||
verbose = askChoice( # type: ignore [name-defined] # noqa: F821
|
||||
"capa output verbosity", "Choose capa output verbosity", ["default", "verbose", "vverbose"], "default"
|
||||
)
|
||||
|
||||
rules_path: pathlib.Path = pathlib.Path(rules_dir)
|
||||
logger.info("running capa using rules from %s", str(rules_path))
|
||||
|
||||
rules = capa.rules.get_rules([rules_path])
|
||||
|
||||
meta = capa.ghidra.helpers.collect_metadata([rules_path])
|
||||
extractor = capa.features.extractors.ghidra.extractor.GhidraFeatureExtractor()
|
||||
|
||||
capabilities = capa.capabilities.common.find_capabilities(rules, extractor, True)
|
||||
|
||||
meta.analysis.feature_counts = capabilities.feature_counts
|
||||
meta.analysis.library_functions = capabilities.library_functions
|
||||
meta.analysis.layout = capa.loader.compute_layout(rules, extractor, capabilities.matches)
|
||||
|
||||
if capa.capabilities.common.has_static_limitation(rules, capabilities, is_standalone=False):
|
||||
logger.info("capa encountered warnings during analysis")
|
||||
|
||||
if verbose == "vverbose":
|
||||
print(capa.render.vverbose.render(meta, rules, capabilities.matches)) # noqa: T201
|
||||
elif verbose == "verbose":
|
||||
print(capa.render.verbose.render(meta, rules, capabilities.matches)) # noqa: T201
|
||||
else:
|
||||
print(capa.render.default.render(meta, rules, capabilities.matches)) # noqa: T201
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def main():
|
||||
if not capa.ghidra.helpers.is_supported_ghidra_version():
|
||||
return capa.main.E_UNSUPPORTED_GHIDRA_VERSION
|
||||
|
||||
if not capa.ghidra.helpers.is_supported_file_type():
|
||||
return capa.main.E_INVALID_FILE_TYPE
|
||||
|
||||
if not capa.ghidra.helpers.is_supported_arch_type():
|
||||
return capa.main.E_INVALID_FILE_ARCH
|
||||
|
||||
if isRunningHeadless(): # type: ignore [name-defined] # noqa: F821
|
||||
return run_headless()
|
||||
else:
|
||||
return run_ui()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if sys.version_info < (3, 10):
|
||||
from capa.exceptions import UnsupportedRuntimeError
|
||||
|
||||
raise UnsupportedRuntimeError("This version of capa can only be used with Python 3.10+")
|
||||
sys.exit(main())
|
||||
@@ -22,6 +22,7 @@ import capa.version
|
||||
import capa.features.common
|
||||
import capa.features.freeze
|
||||
import capa.render.result_document as rdoc
|
||||
import capa.features.extractors.ghidra.context as ghidra_context
|
||||
import capa.features.extractors.ghidra.helpers
|
||||
from capa.features.address import AbsoluteVirtualAddress
|
||||
|
||||
@@ -31,6 +32,14 @@ logger = logging.getLogger("capa")
|
||||
SUPPORTED_FILE_TYPES = ("Executable and Linking Format (ELF)", "Portable Executable (PE)", "Raw Binary")
|
||||
|
||||
|
||||
def get_current_program():
|
||||
return ghidra_context.get_context().program
|
||||
|
||||
|
||||
def get_flat_api():
|
||||
return ghidra_context.get_context().flat_api
|
||||
|
||||
|
||||
class GHIDRAIO:
|
||||
"""
|
||||
An object that acts as a file-like object,
|
||||
@@ -48,7 +57,12 @@ class GHIDRAIO:
|
||||
self.offset = offset
|
||||
|
||||
def read(self, size):
|
||||
logger.debug("reading 0x%x bytes at 0x%x (ea: 0x%x)", size, self.offset, currentProgram().getImageBase().add(self.offset).getOffset()) # type: ignore [name-defined] # noqa: F821
|
||||
logger.debug(
|
||||
"reading 0x%x bytes at 0x%x (ea: 0x%x)",
|
||||
size,
|
||||
self.offset,
|
||||
get_current_program().getImageBase().add(self.offset).getOffset(),
|
||||
)
|
||||
|
||||
if size > len(self.bytes_) - self.offset:
|
||||
logger.debug("cannot read 0x%x bytes at 0x%x (ea: BADADDR)", size, self.offset)
|
||||
@@ -60,7 +74,7 @@ class GHIDRAIO:
|
||||
return
|
||||
|
||||
def get_bytes(self):
|
||||
file_bytes = currentProgram().getMemory().getAllFileBytes()[0] # type: ignore [name-defined] # noqa: F821
|
||||
file_bytes = get_current_program().getMemory().getAllFileBytes()[0]
|
||||
|
||||
# getOriginalByte() allows for raw file parsing on the Ghidra side
|
||||
# other functions will fail as Ghidra will think that it's reading uninitialized memory
|
||||
@@ -70,21 +84,32 @@ class GHIDRAIO:
|
||||
|
||||
|
||||
def is_supported_ghidra_version():
|
||||
version = float(getGhidraVersion()[:4]) # type: ignore [name-defined] # noqa: F821
|
||||
if version < 10.2:
|
||||
warning_msg = "capa does not support this Ghidra version"
|
||||
logger.warning(warning_msg)
|
||||
logger.warning("Your Ghidra version is: %s. Supported versions are: Ghidra >= 10.2", version)
|
||||
import ghidra.framework
|
||||
|
||||
version = ghidra.framework.Application.getApplicationVersion()
|
||||
try:
|
||||
# version format example: "11.1.2" or "11.4"
|
||||
major, minor = map(int, version.split(".")[:2])
|
||||
if major < 12:
|
||||
logger.error("-" * 80)
|
||||
logger.error(" Ghidra version %s is not supported.", version)
|
||||
logger.error(" ")
|
||||
logger.error(" capa requires Ghidra 12.0 or higher.")
|
||||
logger.error("-" * 80)
|
||||
return False
|
||||
except ValueError:
|
||||
logger.warning("could not parse Ghidra version: %s", version)
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def is_running_headless():
|
||||
return isRunningHeadless() # type: ignore [name-defined] # noqa: F821
|
||||
return True # PyGhidra is always headless in this context
|
||||
|
||||
|
||||
def is_supported_file_type():
|
||||
file_info = currentProgram().getExecutableFormat() # type: ignore [name-defined] # noqa: F821
|
||||
file_info = get_current_program().getExecutableFormat()
|
||||
if file_info not in SUPPORTED_FILE_TYPES:
|
||||
logger.error("-" * 80)
|
||||
logger.error(" Input file does not appear to be a supported file type.")
|
||||
@@ -99,7 +124,7 @@ def is_supported_file_type():
|
||||
|
||||
|
||||
def is_supported_arch_type():
|
||||
lang_id = str(currentProgram().getLanguageID()).lower() # type: ignore [name-defined] # noqa: F821
|
||||
lang_id = str(get_current_program().getLanguageID()).lower()
|
||||
|
||||
if not all((lang_id.startswith("x86"), any(arch in lang_id for arch in ("32", "64")))):
|
||||
logger.error("-" * 80)
|
||||
@@ -112,18 +137,18 @@ def is_supported_arch_type():
|
||||
|
||||
|
||||
def get_file_md5():
|
||||
return currentProgram().getExecutableMD5() # type: ignore [name-defined] # noqa: F821
|
||||
return get_current_program().getExecutableMD5()
|
||||
|
||||
|
||||
def get_file_sha256():
|
||||
return currentProgram().getExecutableSHA256() # type: ignore [name-defined] # noqa: F821
|
||||
return get_current_program().getExecutableSHA256()
|
||||
|
||||
|
||||
def collect_metadata(rules: list[Path]):
|
||||
md5 = get_file_md5()
|
||||
sha256 = get_file_sha256()
|
||||
|
||||
info = currentProgram().getLanguageID().toString() # type: ignore [name-defined] # noqa: F821
|
||||
info = get_current_program().getLanguageID().toString()
|
||||
if "x86" in info and "64" in info:
|
||||
arch = "x86_64"
|
||||
elif "x86" in info and "32" in info:
|
||||
@@ -131,11 +156,11 @@ def collect_metadata(rules: list[Path]):
|
||||
else:
|
||||
arch = "unknown arch"
|
||||
|
||||
format_name: str = currentProgram().getExecutableFormat() # type: ignore [name-defined] # noqa: F821
|
||||
format_name: str = get_current_program().getExecutableFormat()
|
||||
if "PE" in format_name:
|
||||
os = "windows"
|
||||
elif "ELF" in format_name:
|
||||
with contextlib.closing(capa.ghidra.helpers.GHIDRAIO()) as f:
|
||||
with contextlib.closing(GHIDRAIO()) as f:
|
||||
os = capa.features.extractors.elf.detect_elf_os(f)
|
||||
else:
|
||||
os = "unknown os"
|
||||
@@ -148,16 +173,18 @@ def collect_metadata(rules: list[Path]):
|
||||
md5=md5,
|
||||
sha1="",
|
||||
sha256=sha256,
|
||||
path=currentProgram().getExecutablePath(), # type: ignore [name-defined] # noqa: F821
|
||||
path=get_current_program().getExecutablePath(),
|
||||
),
|
||||
flavor=rdoc.Flavor.STATIC,
|
||||
analysis=rdoc.StaticAnalysis(
|
||||
format=currentProgram().getExecutableFormat(), # type: ignore [name-defined] # noqa: F821
|
||||
format=get_current_program().getExecutableFormat(),
|
||||
arch=arch,
|
||||
os=os,
|
||||
extractor="ghidra",
|
||||
rules=tuple(r.resolve().absolute().as_posix() for r in rules),
|
||||
base_address=capa.features.freeze.Address.from_capa(AbsoluteVirtualAddress(currentProgram().getImageBase().getOffset())), # type: ignore [name-defined] # noqa: F821
|
||||
base_address=capa.features.freeze.Address.from_capa(
|
||||
AbsoluteVirtualAddress(get_current_program().getImageBase().getOffset())
|
||||
),
|
||||
layout=rdoc.StaticLayout(
|
||||
functions=(),
|
||||
),
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
"plugin": {
|
||||
"name": "capa",
|
||||
"entryPoint": "capa_explorer.py",
|
||||
"version": "9.3.0",
|
||||
"version": "9.3.1",
|
||||
"idaVersions": ">=7.4",
|
||||
"description": "Identify capabilities in executable files using FLARE's capa framework",
|
||||
"license": "Apache-2.0",
|
||||
@@ -12,7 +12,7 @@
|
||||
"api-scripting-and-automation",
|
||||
"ui-ux-and-visualization"
|
||||
],
|
||||
"pythonDependencies": ["flare-capa==9.3.0"],
|
||||
"pythonDependencies": ["flare-capa==9.3.1"],
|
||||
"urls": {
|
||||
"repository": "https://github.com/mandiant/capa"
|
||||
},
|
||||
|
||||
@@ -79,6 +79,7 @@ BACKEND_VMRAY = "vmray"
|
||||
BACKEND_FREEZE = "freeze"
|
||||
BACKEND_BINEXPORT2 = "binexport2"
|
||||
BACKEND_IDA = "ida"
|
||||
BACKEND_GHIDRA = "ghidra"
|
||||
|
||||
|
||||
class CorruptFile(ValueError):
|
||||
@@ -351,6 +352,69 @@ def get_extractor(
|
||||
|
||||
return capa.features.extractors.ida.extractor.IdaFeatureExtractor()
|
||||
|
||||
elif backend == BACKEND_GHIDRA:
|
||||
import pyghidra
|
||||
|
||||
with console.status("analyzing program...", spinner="dots"):
|
||||
if not pyghidra.started():
|
||||
pyghidra.start()
|
||||
|
||||
import capa.ghidra.helpers
|
||||
|
||||
if not capa.ghidra.helpers.is_supported_ghidra_version():
|
||||
raise RuntimeError("unsupported Ghidra version")
|
||||
|
||||
import tempfile
|
||||
|
||||
tmpdir = tempfile.TemporaryDirectory()
|
||||
|
||||
project_cm = pyghidra.open_project(tmpdir.name, "CapaProject", create=True)
|
||||
project = project_cm.__enter__()
|
||||
try:
|
||||
from ghidra.util.task import TaskMonitor
|
||||
|
||||
monitor = TaskMonitor.DUMMY
|
||||
|
||||
# Import file
|
||||
loader = pyghidra.program_loader().project(project).source(str(input_path)).name(input_path.name)
|
||||
with loader.load() as load_results:
|
||||
load_results.save(monitor)
|
||||
|
||||
# Open program
|
||||
program, consumer = pyghidra.consume_program(project, "/" + input_path.name)
|
||||
|
||||
# Analyze
|
||||
pyghidra.analyze(program, monitor)
|
||||
|
||||
from ghidra.program.flatapi import FlatProgramAPI
|
||||
|
||||
flat_api = FlatProgramAPI(program)
|
||||
|
||||
import capa.features.extractors.ghidra.context as ghidra_context
|
||||
|
||||
ghidra_context.set_context(program, flat_api, monitor)
|
||||
|
||||
# Wrapper to handle cleanup of program (consumer) and project
|
||||
class GhidraContextWrapper:
|
||||
def __init__(self, project_cm, program, consumer):
|
||||
self.project_cm = project_cm
|
||||
self.program = program
|
||||
self.consumer = consumer
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
self.program.release(self.consumer)
|
||||
self.project_cm.__exit__(exc_type, exc_val, exc_tb)
|
||||
|
||||
cm = GhidraContextWrapper(project_cm, program, consumer)
|
||||
|
||||
except Exception:
|
||||
project_cm.__exit__(None, None, None)
|
||||
tmpdir.cleanup()
|
||||
raise
|
||||
|
||||
import capa.features.extractors.ghidra.extractor
|
||||
|
||||
return capa.features.extractors.ghidra.extractor.GhidraFeatureExtractor(ctx_manager=cm, tmpdir=tmpdir)
|
||||
else:
|
||||
raise ValueError("unexpected backend: " + backend)
|
||||
|
||||
|
||||
@@ -55,6 +55,7 @@ from capa.loader import (
|
||||
BACKEND_VMRAY,
|
||||
BACKEND_DOTNET,
|
||||
BACKEND_FREEZE,
|
||||
BACKEND_GHIDRA,
|
||||
BACKEND_PEFILE,
|
||||
BACKEND_DRAKVUF,
|
||||
BACKEND_BINEXPORT2,
|
||||
@@ -298,6 +299,7 @@ def install_common_args(parser, wanted=None):
|
||||
(BACKEND_BINJA, "Binary Ninja"),
|
||||
(BACKEND_DOTNET, ".NET"),
|
||||
(BACKEND_BINEXPORT2, "BinExport2"),
|
||||
(BACKEND_GHIDRA, "Ghidra"),
|
||||
(BACKEND_FREEZE, "capa freeze"),
|
||||
(BACKEND_CAPE, "CAPE"),
|
||||
(BACKEND_DRAKVUF, "DRAKVUF"),
|
||||
@@ -392,6 +394,7 @@ class ShouldExitError(Exception):
|
||||
"""raised when a main-related routine indicates the program should exit."""
|
||||
|
||||
def __init__(self, status_code: int):
|
||||
super().__init__(status_code)
|
||||
self.status_code = status_code
|
||||
|
||||
|
||||
|
||||
@@ -274,12 +274,8 @@ SUPPORTED_FEATURES[Scope.FUNCTION].update(SUPPORTED_FEATURES[Scope.BASIC_BLOCK])
|
||||
|
||||
|
||||
class InvalidRule(ValueError):
|
||||
def __init__(self, msg):
|
||||
super().__init__()
|
||||
self.msg = msg
|
||||
|
||||
def __str__(self):
|
||||
return f"invalid rule: {self.msg}"
|
||||
return f"invalid rule: {super().__str__()}"
|
||||
|
||||
def __repr__(self):
|
||||
return str(self)
|
||||
@@ -289,20 +285,15 @@ class InvalidRuleWithPath(InvalidRule):
|
||||
def __init__(self, path, msg):
|
||||
super().__init__(msg)
|
||||
self.path = path
|
||||
self.msg = msg
|
||||
self.__cause__ = None
|
||||
|
||||
def __str__(self):
|
||||
return f"invalid rule: {self.path}: {self.msg}"
|
||||
return f"invalid rule: {self.path}: {super(InvalidRule, self).__str__()}"
|
||||
|
||||
|
||||
class InvalidRuleSet(ValueError):
|
||||
def __init__(self, msg):
|
||||
super().__init__()
|
||||
self.msg = msg
|
||||
|
||||
def __str__(self):
|
||||
return f"invalid rule set: {self.msg}"
|
||||
return f"invalid rule set: {super().__str__()}"
|
||||
|
||||
def __repr__(self):
|
||||
return str(self)
|
||||
@@ -1102,15 +1093,15 @@ class Rule:
|
||||
@lru_cache()
|
||||
def _get_yaml_loader():
|
||||
try:
|
||||
# prefer to use CLoader to be fast, see #306
|
||||
# prefer to use CLoader to be fast, see #306 / CSafeLoader is the same as CLoader but with safe loading
|
||||
# on Linux, make sure you install libyaml-dev or similar
|
||||
# on Windows, get WHLs from pyyaml.org/pypi
|
||||
logger.debug("using libyaml CLoader.")
|
||||
return yaml.CLoader
|
||||
logger.debug("using libyaml CSafeLoader.")
|
||||
return yaml.CSafeLoader
|
||||
except Exception:
|
||||
logger.debug("unable to import libyaml CLoader, falling back to Python yaml parser.")
|
||||
logger.debug("unable to import libyaml CSafeLoader, falling back to Python yaml parser.")
|
||||
logger.debug("this will be slower to load rules.")
|
||||
return yaml.Loader
|
||||
return yaml.SafeLoader
|
||||
|
||||
@staticmethod
|
||||
def _get_ruamel_yaml_parser():
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
__version__ = "9.3.0"
|
||||
__version__ = "9.3.1"
|
||||
|
||||
|
||||
def get_major_version():
|
||||
|
||||
@@ -74,10 +74,12 @@ dependencies = [
|
||||
# comments and context.
|
||||
"pyyaml>=6",
|
||||
"colorama>=0.4",
|
||||
"ida-netnode>=3.0",
|
||||
"ida-settings>=3.1.0",
|
||||
"ruamel.yaml>=0.18",
|
||||
"pefile>=2023.2.7",
|
||||
"pyelftools>=0.31",
|
||||
"pyghidra>=3.0.0",
|
||||
"pydantic>=2",
|
||||
"rich>=13",
|
||||
"humanize>=4",
|
||||
@@ -121,14 +123,14 @@ dev = [
|
||||
# we want all developer environments to be consistent.
|
||||
# These dependencies are not used in production environments
|
||||
# and should not conflict with other libraries/tooling.
|
||||
"pre-commit==4.2.0",
|
||||
"pre-commit==4.5.0",
|
||||
"pytest==8.0.0",
|
||||
"pytest-sugar==1.1.1",
|
||||
"pytest-instafail==0.5.0",
|
||||
"flake8==7.3.0",
|
||||
"flake8-bugbear==24.12.12",
|
||||
"flake8-bugbear==25.10.21",
|
||||
"flake8-encodings==0.5.1",
|
||||
"flake8-comprehensions==3.16.0",
|
||||
"flake8-comprehensions==3.17.0",
|
||||
"flake8-logging-format==0.9.0",
|
||||
"flake8-no-implicit-concat==0.3.5",
|
||||
"flake8-print==5.0.0",
|
||||
@@ -136,8 +138,8 @@ dev = [
|
||||
"flake8-simplify==0.22.0",
|
||||
"flake8-use-pathlib==0.3.0",
|
||||
"flake8-copyright==0.2.4",
|
||||
"ruff==0.12.0",
|
||||
"black==25.1.0",
|
||||
"ruff==0.14.7",
|
||||
"black==25.11.0",
|
||||
"isort==6.0.0",
|
||||
"mypy==1.17.1",
|
||||
"mypy-protobuf==3.6.0",
|
||||
@@ -157,7 +159,7 @@ build = [
|
||||
# we want all developer environments to be consistent.
|
||||
# These dependencies are not used in production environments
|
||||
# and should not conflict with other libraries/tooling.
|
||||
"pyinstaller==6.14.1",
|
||||
"pyinstaller==6.16.0",
|
||||
"setuptools==80.9.0",
|
||||
"build==1.3.0"
|
||||
]
|
||||
|
||||
@@ -12,7 +12,7 @@ cxxfilt==0.3.0
|
||||
dncil==1.0.2
|
||||
dnfile==0.17.0
|
||||
funcy==2.0
|
||||
humanize==4.13.0
|
||||
humanize==4.14.0
|
||||
ida-netnode==3.0
|
||||
ida-settings==3.2.2
|
||||
intervaltree==3.1.0
|
||||
@@ -22,18 +22,19 @@ msgpack==1.0.8
|
||||
networkx==3.4.2
|
||||
pefile==2024.8.26
|
||||
pip==25.3
|
||||
protobuf==6.31.1
|
||||
protobuf==6.33.1
|
||||
pyasn1==0.5.1
|
||||
pyasn1-modules==0.3.0
|
||||
pycparser==2.22
|
||||
pycparser==2.23
|
||||
pydantic==2.12.4
|
||||
# pydantic pins pydantic-core,
|
||||
# but dependabot updates these separately (which is broken) and is annoying,
|
||||
# so we rely on pydantic to pull in the right version of pydantic-core.
|
||||
# pydantic-core==2.23.4
|
||||
xmltodict==0.14.2
|
||||
xmltodict==1.0.2
|
||||
pyelftools==0.32
|
||||
pygments==2.19.1
|
||||
pyghidra==3.0.0
|
||||
python-flirt==0.9.2
|
||||
pyyaml==6.0.2
|
||||
rich==14.2.0
|
||||
|
||||
2
rules
2
rules
Submodule rules updated: b0b486fe0c...6120dfb6e0
Submodule tests/data updated: 5ea5d9f572...cfca4022ee
@@ -227,13 +227,33 @@ def get_vmray_extractor(path):
|
||||
return VMRayExtractor.from_zipfile(path)
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
GHIDRA_CACHE: dict[Path, tuple] = {}
|
||||
|
||||
|
||||
def get_ghidra_extractor(path: Path):
|
||||
# we need to start PyGhidra before importing the extractor
|
||||
# because the extractor imports Ghidra modules that are only available after PyGhidra is started
|
||||
import pyghidra
|
||||
|
||||
if not pyghidra.started():
|
||||
pyghidra.start()
|
||||
|
||||
import capa.features.extractors.ghidra.context
|
||||
import capa.features.extractors.ghidra.extractor
|
||||
|
||||
extractor = capa.features.extractors.ghidra.extractor.GhidraFeatureExtractor()
|
||||
setattr(extractor, "path", path.as_posix())
|
||||
if path in GHIDRA_CACHE:
|
||||
extractor, program, flat_api, monitor = GHIDRA_CACHE[path]
|
||||
capa.features.extractors.ghidra.context.set_context(program, flat_api, monitor)
|
||||
return extractor
|
||||
|
||||
# We use a larger cache size to avoid re-opening the same file multiple times
|
||||
# which is very slow with Ghidra.
|
||||
extractor = capa.loader.get_extractor(
|
||||
path, FORMAT_AUTO, OS_AUTO, capa.loader.BACKEND_GHIDRA, [], disable_progress=True
|
||||
)
|
||||
|
||||
ctx = capa.features.extractors.ghidra.context.get_context()
|
||||
GHIDRA_CACHE[path] = (extractor, ctx.program, ctx.flat_api, ctx.monitor)
|
||||
return extractor
|
||||
|
||||
|
||||
|
||||
@@ -70,4 +70,4 @@ def test_standalone_binja_backend():
|
||||
@pytest.mark.skipif(binja_present is False, reason="Skip binja tests if the binaryninja Python API is not installed")
|
||||
def test_binja_version():
|
||||
version = binaryninja.core_version_info()
|
||||
assert version.major == 5 and version.minor == 1
|
||||
assert version.major == 5 and version.minor == 2
|
||||
|
||||
@@ -11,95 +11,35 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""
|
||||
Must invoke this script from within the Ghidra Runtime Environment
|
||||
"""
|
||||
import sys
|
||||
import logging
|
||||
from pathlib import Path
|
||||
import os
|
||||
import importlib.util
|
||||
|
||||
import pytest
|
||||
import fixtures
|
||||
|
||||
try:
|
||||
sys.path.append(str(Path(__file__).parent))
|
||||
import fixtures
|
||||
finally:
|
||||
sys.path.pop()
|
||||
import capa.features.common
|
||||
|
||||
ghidra_present = importlib.util.find_spec("pyghidra") is not None and "GHIDRA_INSTALL_DIR" in os.environ
|
||||
|
||||
|
||||
logger = logging.getLogger("test_ghidra_features")
|
||||
|
||||
ghidra_present: bool = False
|
||||
try:
|
||||
import ghidra # noqa: F401
|
||||
|
||||
ghidra_present = True
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
def standardize_posix_str(psx_str):
|
||||
"""fixture test passes the PosixPath to the test data
|
||||
|
||||
params: psx_str - PosixPath() to the test data
|
||||
return: string that matches test-id sample name
|
||||
"""
|
||||
|
||||
if "Practical Malware Analysis Lab" in str(psx_str):
|
||||
# <PosixPath>/'Practical Malware Analysis Lab 16-01.exe_' -> 'pma16-01'
|
||||
wanted_str = "pma" + str(psx_str).split("/")[-1][len("Practical Malware Analysis Lab ") : -5]
|
||||
else:
|
||||
# <PosixPath>/mimikatz.exe_ -> mimikatz
|
||||
wanted_str = str(psx_str).split("/")[-1][:-5]
|
||||
|
||||
if "_" in wanted_str:
|
||||
# al-khaser_x86 -> al-khaser x86
|
||||
wanted_str = wanted_str.replace("_", " ")
|
||||
|
||||
return wanted_str
|
||||
|
||||
|
||||
def check_input_file(wanted):
|
||||
"""check that test is running on the loaded sample
|
||||
|
||||
params: wanted - PosixPath() passed from test arg
|
||||
"""
|
||||
|
||||
import capa.ghidra.helpers as ghidra_helpers
|
||||
|
||||
found = ghidra_helpers.get_file_md5()
|
||||
sample_name = standardize_posix_str(wanted)
|
||||
|
||||
if not found.startswith(fixtures.get_sample_md5_by_name(sample_name)):
|
||||
raise RuntimeError(f"please run the tests against sample with MD5: `{found}`")
|
||||
|
||||
|
||||
@pytest.mark.skipif(ghidra_present is False, reason="Ghidra tests must be ran within Ghidra")
|
||||
@fixtures.parametrize("sample,scope,feature,expected", fixtures.FEATURE_PRESENCE_TESTS, indirect=["sample", "scope"])
|
||||
@pytest.mark.skipif(ghidra_present is False, reason="PyGhidra not installed")
|
||||
@fixtures.parametrize(
|
||||
"sample,scope,feature,expected",
|
||||
[
|
||||
t
|
||||
for t in fixtures.FEATURE_PRESENCE_TESTS
|
||||
# this test case is specific to Vivisect and its basic blocks do not align with Ghidra's analysis
|
||||
if t[0] != "294b8d..." or t[2] != capa.features.common.String("\r\n\x00:ht")
|
||||
],
|
||||
indirect=["sample", "scope"],
|
||||
)
|
||||
def test_ghidra_features(sample, scope, feature, expected):
|
||||
try:
|
||||
check_input_file(sample)
|
||||
except RuntimeError:
|
||||
pytest.skip(reason="Test must be ran against sample loaded in Ghidra")
|
||||
|
||||
fixtures.do_test_feature_presence(fixtures.get_ghidra_extractor, sample, scope, feature, expected)
|
||||
|
||||
|
||||
@pytest.mark.skipif(ghidra_present is False, reason="Ghidra tests must be ran within Ghidra")
|
||||
@pytest.mark.skipif(ghidra_present is False, reason="PyGhidra not installed")
|
||||
@fixtures.parametrize(
|
||||
"sample,scope,feature,expected", fixtures.FEATURE_COUNT_TESTS_GHIDRA, indirect=["sample", "scope"]
|
||||
)
|
||||
def test_ghidra_feature_counts(sample, scope, feature, expected):
|
||||
try:
|
||||
check_input_file(sample)
|
||||
except RuntimeError:
|
||||
pytest.skip(reason="Test must be ran against sample loaded in Ghidra")
|
||||
|
||||
fixtures.do_test_feature_count(fixtures.get_ghidra_extractor, sample, scope, feature, expected)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# No support for faulthandler module in Ghidrathon, see:
|
||||
# https://github.com/mandiant/Ghidrathon/issues/70
|
||||
sys.exit(pytest.main(["--pyargs", "-p no:faulthandler", "test_ghidra_features"]))
|
||||
|
||||
17
web/explorer/package-lock.json
generated
17
web/explorer/package-lock.json
generated
@@ -2272,10 +2272,11 @@
|
||||
}
|
||||
},
|
||||
"node_modules/glob": {
|
||||
"version": "10.4.2",
|
||||
"resolved": "https://registry.npmjs.org/glob/-/glob-10.4.2.tgz",
|
||||
"integrity": "sha512-GwMlUF6PkPo3Gk21UxkCohOv0PLcIXVtKyLlpEI28R/cO/4eNOdmLk3CMW1wROV/WR/EsZOWAfBbBOqYvs88/w==",
|
||||
"version": "10.5.0",
|
||||
"resolved": "https://registry.npmjs.org/glob/-/glob-10.5.0.tgz",
|
||||
"integrity": "sha512-DfXN8DfhJ7NH3Oe7cFmu3NCu1wKbkReJ8TorzSAFbSKrlNaQSKfIzqYqVY8zlbs2NLBbWpRiU52GX2PbaBVNkg==",
|
||||
"dev": true,
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"foreground-child": "^3.1.0",
|
||||
"jackspeak": "^3.1.2",
|
||||
@@ -2287,9 +2288,6 @@
|
||||
"bin": {
|
||||
"glob": "dist/esm/bin.mjs"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=16 || 14 >=14.18"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/isaacs"
|
||||
}
|
||||
@@ -2641,10 +2639,11 @@
|
||||
}
|
||||
},
|
||||
"node_modules/js-yaml": {
|
||||
"version": "4.1.0",
|
||||
"resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.0.tgz",
|
||||
"integrity": "sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==",
|
||||
"version": "4.1.1",
|
||||
"resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.1.tgz",
|
||||
"integrity": "sha512-qQKT4zQxXl8lLwBtHMWwaTcGfFOZviOJet3Oy/xmGk2gZH677CJM9EvtfdSkgWcATZhj/55JZ0rmy3myCT5lsA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"argparse": "^2.0.1"
|
||||
},
|
||||
|
||||
@@ -212,6 +212,11 @@
|
||||
|
||||
<h2 class="mt-3">Tool Updates</h2>
|
||||
|
||||
<h3 class="mt-2">v9.3.1 (<em>2025-11-19</em>)</h3>
|
||||
<p class="mt-0">
|
||||
This patch release fixes a missing import for the capa explorer plugin for IDA Pro.
|
||||
</p>
|
||||
|
||||
<h3 class="mt-2">v9.3.0 (<em>2025-11-12</em>)</h3>
|
||||
<p class="mt-0">
|
||||
capa v9.3.0 comes with over 20 new and/or impoved rules.
|
||||
|
||||
Reference in New Issue
Block a user