mirror of
https://github.com/mandiant/capa.git
synced 2025-12-12 07:40:38 -08:00
* elf: os: detect Android via clang compiler .ident note * elf: os: detect Android via dependency on liblog.so * main: split main into a bunch of "main routines" [wip] since there are a few references to BinExport2 that are in progress elsewhre. Next commit will remove them. * features: add BinExport2 declarations * BinExport2: initial skeleton of feature extraction * main: remove references to wip BinExport2 code * changelog * main: rename first position argument "input_file" closes #1946 * main: linters * main: move rule-related routines to capa.rules ref #1821 * main: extract routines to capa.loader module closes #1821 * add loader module * loader: learn to load freeze format * freeze: use new cli arg handling * Update capa/loader.py Co-authored-by: Moritz <mr-tz@users.noreply.github.com> * main: remove duplicate documentation * main: add doc about where some functions live * scripts: migrate to new main wrapper helper functions * scripts: port to main routines * main: better handle auto-detection of backend * scripts: migrate bulk-process to main wrappers * scripts: migrate scripts to main wrappers * main: rename *_from_args to *_from_cli * changelog * cache-ruleset: remove duplication * main: fix tag handling * cache-ruleset: fix cli args * cache-ruleset: fix special rule cli handling * scripts: fix type bytes * main: nicely format debug messages * helpers: ensure log messages aren't very long * flake8 config * binexport2: formatting * loader: learn to load BinExport2 files * main: debug log the format and backend * elf: add more arch constants * binexport: parse global features * binexport: extract file features * binexport2: begin to enumerate function/bb/insns * binexport: pass context to function/bb/insn extractors * binexport: linters * binexport: linters * scripts: add script to inspect binexport2 file * inspect-binexport: fix xref symbols * inspect-binexport: factor out the index building * binexport: move index to binexport extractor module * binexport: implement ELF/aarch64 GOT/thunk analyzer * binexport: implement API features * binexport: record the full vertex for a thunk * binexport: learn to extract numbers * binexport: number: skipped mapped numbers * binexport: fix basic block address indexing * binexport: rename function * binexport: extract operand numbers * binexport: learn to extract calls from characteristics * binexport: learn to extract mnemonics * pre-commit: skip protobuf file * binexport: better search for sample file * loader: add file extractors for BinExport2 * binexport: remove extra parameter * new black config * binexport: index string xrefs * binexport: learn to extract bytes and strings * binexport: cache parsed PE/ELF * binexport: handle Ghidra SYMBOL numbers * binexport2: handle binexport#78 (Ghidra only uses SYMBOL expresssions) * main: write error output to stderr, not stdout * scripts: add example detect-binexport2-capabilities.py * detect-binexport2-capabilities: more documentation/examples * elffile: recognize more architectures * binexport: handle read_memory errors * binexport: index flow graphs by address * binexport: cleanup logging * binexport: learn to extract function names * binexport: learn to extract all function features * binexport: learn to extract bb tight loops * elf: don't require vivisect just for type annotations * main: remove unused imports * rules: don't eagerly import ruamel until needed * loader: avoid eager imports of some backend-related code * changelog * fmt * binexport: better render optional fields * fix merge conflicts * fix formatting * remove Ghidra data reference madness * handle PermissionError when searching sample file for BinExport2 file * handle PermissionError when searching sample file for BinExport2 file * add Android as valid OS * inspect-binexport: strip strings * inspect-binexport: render operands * fix lints * ruff: update config layout * inspect-binexport: better align comments/xrefs * use explicit search paths to get sample for BinExport file * add initial BinExport tests * add/update BinExport tests and minor fixes * inspect-binexport: add perf tracking * inspect-binexport: cache rendered operands * lints * do not extract number features for ret instructions * Fix BinExport's "tight loop" feature extraction. `idx.target_edges_by_basic_block_index[basic_block_index]` is of type `List[Edges]`. The index `basic_block_index` was definitely not an element. * inspect-binexport: better render data section * linters * main: accept --format=binexport2 * binexport: insn: add support for parsing bare immediate int operands * binexport2: bb: fix tight loop detection ref #2050 * binexport: api: generate variations of Win32 APIs * lints * binexport: index: don't assume instruction index is 1:1 with address * be2: index instruction addresses * be2: temp remove bytes feature processing * binexport: read memory from an address space extracted from PE/ELF closes #2061 * be2: resolve thunks to imported functions * be2: check for be2 string reference before bytes/string extraction overhead * be2: remove unneeded check * be2: do not process thunks * be2: insn: polish thunk handling a bit * be2: pre-compute thunk targets * parse negative numbers * update tests to use Ghidra-generated BinExport file * remove unused import * black reformat * run tests always (for now) * binexport: tests: fix test case * binexport: extractor: fix insn lint * binexport: addressspace: use base address recovered from binexport file * Add nzxor charecteristic in BinExport extractor. by referencing vivisect implementation. * add tests, fix stack cookie detection * test BinExport feature PRs * reformat and fix * complete TODO descriptions * wip tests * binexport: add typing where applicable (#2106) * binexport2: revert import names from BinExport2 proto binexport2_pb.BinExport2 isnt a package so we can't import it like: from ...binexport2_pb.BinExport2 import CallGraph * fix stack offset numbers and disable offset tests * xfail OperandOffset * generate symbol variants * wip: read negative numbers * update tight loop tests * binexport: fix function loop feature detection * binexport: update binexport function loop tests * binexport: fix lints and imports * binexport: add back assert statement to thunk calculation * binexport: update tests to use Ghidra binexport file * binexport: add additional debug info to thunk calculation assert * binexport: update unit tests to focus on Ghidra * binexport: fix lints * binexport: remove Ghidra symbol madness and fix x86/amd64 stack offset number tests * binexport: use masking for Number features * binexport: ignore call/jmp immediates for intel architecture * binexport: check if immediate is a mapped address * binexport: emit offset features for immediates likely structure offsets * binexport: add twos complement wrapper insn.py * binexport: add support for x86 offset features * binexport: code refactor * binexport: init refactor for multi-arch instruction feature parsing * binexport: intel: emit indirect call characteristic * binexport: use helper method for instruction mnemonic * binexport: arm: emit offset features from stp instruction * binexport: arm: emit indirect call characteristic * binexport: arm: improve offset feature extraction * binexport: add workaroud for Ghidra bug that results in empty operands (no expressions) * binexport: skip x86 stack string tests * binexport: update mimikatz.exe_ feature count tests for Ghidra * core: loader: update binja import * core: loader: update binja imports * binexport: arm: ignore number features for add instruction manipulating stack * binexport: update unit tests * binexport: arm: ignore number features for sub instruction manipulating stack * binexport: arm: emit offset features for add instructions * binexport: remove TODO from tests workflow * binexport: update CHANGELOG * binexport: remove outdated TODOs * binexport: re-enable support for data references in inspect-binexport2.py * binexport: skip data references to code * binexport: remove outdated TODOs * Update scripts/inspect-binexport2.py * Update CHANGELOG.md * Update capa/helpers.py * Update capa/features/extractors/common.py * Update capa/features/extractors/binexport2/extractor.py * Update capa/features/extractors/binexport2/arch/arm/insn.py Co-authored-by: Moritz <mr-tz@users.noreply.github.com> * initial add * test binexport scripts * add tests using small ARM ELF * add method to get instruction by address * index instructions by address * adjust and extend tests * handle operator with no children bug * binexport: use instruction address index ref: https://github.com/mandiant/capa/pull/1950/files#r1728570811 * inspect binexport: handle lsl with no children add pruning phase to expression tree building to remove known-bad branches. This might address some of the data we're seeing due to: https://github.com/NationalSecurityAgency/ghidra/issues/6821 Also introduces a --instruction optional argument to dump the details of a specific instruction. * binexport: consolidate expression tree logic into helpers * binexport: index instruction indices by address * binexport: introduce instruction pattern matching Introduce intruction pattern matching to declaratively describe the instructions and operands that we want to extract. While there's a bit more code, its much more thoroughly tested, and is less brittle than the prior if/else/if/else/if/else implementation. * binexport: helpers: fix missing comment words * binexport: update tests to reflect updated test files * remove testing of feature branch --------- Co-authored-by: Moritz <mr-tz@users.noreply.github.com> Co-authored-by: Mike Hunhoff <mike.hunhoff@gmail.com> Co-authored-by: mr-tz <moritz.raabe@mandiant.com> Co-authored-by: Lin Chen <larch.lin.chen@gmail.com>
113 lines
4.5 KiB
Python
113 lines
4.5 KiB
Python
#!/usr/bin/env python2
|
|
"""
|
|
Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at: [package root]/LICENSE.txt
|
|
Unless required by applicable law or agreed to in writing, software distributed under the License
|
|
is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and limitations under the License.
|
|
|
|
detect-binexport2-capabilities.py
|
|
|
|
Detect capabilities in a BinExport2 file and write the results into the protobuf format.
|
|
|
|
Example:
|
|
|
|
$ python detect-binexport2-capabilities.py suspicious.BinExport2 | xxd | head
|
|
┌────────┬─────────────────────────┬─────────────────────────┬────────┬────────┐
|
|
│00000000│ 0a d4 05 0a 1a 32 30 32 ┊ 33 2d 30 32 2d 31 30 20 │_.•_•202┊3-02-10 │
|
|
│00000010│ 31 31 3a 34 39 3a 35 32 ┊ 2e 36 39 33 34 30 30 12 │11:49:52┊.693400•│
|
|
│00000020│ 05 35 2e 30 2e 30 1a 34 ┊ 74 65 73 74 73 2f 64 61 │•5.0.0•4┊tests/da│
|
|
│00000030│ 74 61 2f 50 72 61 63 74 ┊ 69 63 61 6c 20 4d 61 6c │ta/Pract┊ical Mal│
|
|
│00000040│ 77 61 72 65 20 41 6e 61 ┊ 6c 79 73 69 73 20 4c 61 │ware Ana┊lysis La│
|
|
│00000050│ 62 20 30 31 2d 30 31 2e ┊ 64 6c 6c 5f 1a 02 2d 6a │b 01-01.┊dll_••-j│
|
|
│00000060│ 22 c4 01 0a 20 32 39 30 ┊ 39 33 34 63 36 31 64 65 │".•_ 290┊934c61de│
|
|
│00000070│ 39 31 37 36 61 64 36 38 ┊ 32 66 66 64 64 36 35 66 │9176ad68┊2ffdd65f│
|
|
│00000080│ 30 61 36 36 39 12 28 61 ┊ 34 62 33 35 64 65 37 31 │0a669•(a┊4b35de71│
|
|
"""
|
|
import sys
|
|
import logging
|
|
import argparse
|
|
|
|
import capa.main
|
|
import capa.rules
|
|
import capa.engine
|
|
import capa.loader
|
|
import capa.helpers
|
|
import capa.features
|
|
import capa.exceptions
|
|
import capa.render.proto
|
|
import capa.render.verbose
|
|
import capa.features.freeze
|
|
import capa.capabilities.common
|
|
import capa.render.result_document as rd
|
|
from capa.loader import FORMAT_BINEXPORT2, BACKEND_BINEXPORT2
|
|
|
|
logger = logging.getLogger("capa.detect-binexport2-capabilities")
|
|
|
|
|
|
def main(argv=None):
|
|
if argv is None:
|
|
argv = sys.argv[1:]
|
|
|
|
parser = argparse.ArgumentParser(description="detect capabilities in programs.")
|
|
capa.main.install_common_args(
|
|
parser,
|
|
wanted={"format", "os", "backend", "input_file", "signatures", "rules", "tag"},
|
|
)
|
|
args = parser.parse_args(args=argv)
|
|
|
|
try:
|
|
capa.main.handle_common_args(args)
|
|
capa.main.ensure_input_exists_from_cli(args)
|
|
|
|
input_format = capa.main.get_input_format_from_cli(args)
|
|
assert input_format == FORMAT_BINEXPORT2
|
|
|
|
backend = capa.main.get_backend_from_cli(args, input_format)
|
|
assert backend == BACKEND_BINEXPORT2
|
|
|
|
sample_path = capa.main.get_sample_path_from_cli(args, backend)
|
|
assert sample_path is not None
|
|
os_ = capa.loader.get_os(sample_path)
|
|
|
|
rules = capa.main.get_rules_from_cli(args)
|
|
|
|
extractor = capa.main.get_extractor_from_cli(args, input_format, backend)
|
|
# alternatively, if you have all this handy in your library code:
|
|
#
|
|
# extractor = capa.loader.get_extractor(
|
|
# args.input_file,
|
|
# FORMAT_BINEXPORT2,
|
|
# os_,
|
|
# BACKEND_BINEXPORT2,
|
|
# sig_paths=[],
|
|
# sample_path=sample_path,
|
|
# )
|
|
#
|
|
# or even more concisely:
|
|
#
|
|
# be2 = capa.features.extractors.binexport2.get_binexport2(input_path)
|
|
# buf = sample_path.read_bytes()
|
|
# extractor = capa.features.extractors.binexport2.extractor.BinExport2FeatureExtractor(be2, buf)
|
|
except capa.main.ShouldExitError as e:
|
|
return e.status_code
|
|
|
|
capabilities, counts = capa.capabilities.common.find_capabilities(rules, extractor)
|
|
|
|
meta = capa.loader.collect_metadata(argv, args.input_file, input_format, os_, args.rules, extractor, counts)
|
|
meta.analysis.layout = capa.loader.compute_layout(rules, extractor, capabilities)
|
|
|
|
doc = rd.ResultDocument.from_capa(meta, rules, capabilities)
|
|
pb = capa.render.proto.doc_to_pb2(doc)
|
|
|
|
sys.stdout.buffer.write(pb.SerializeToString(deterministic=True))
|
|
sys.stdout.flush()
|
|
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|