mirror of
https://github.com/mandiant/capa.git
synced 2025-12-12 23:59:48 -08:00
Compare commits
89 Commits
push-trmuz
...
object-lay
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2ec979469e | ||
|
|
2db0cc457f | ||
|
|
3cad8d12af | ||
|
|
5be96d7ddc | ||
|
|
a3b6aef67f | ||
|
|
077fa2e7e1 | ||
|
|
c3b8e7c638 | ||
|
|
4346922b9a | ||
|
|
d652192af1 | ||
|
|
d83750c901 | ||
|
|
8394b81841 | ||
|
|
febda7d0e2 | ||
|
|
f9abb5e83f | ||
|
|
f69602d085 | ||
|
|
ad187fc3bd | ||
|
|
637926e0b6 | ||
|
|
03ce40e781 | ||
|
|
1f7f24c467 | ||
|
|
f2c329b768 | ||
|
|
22368fbe6f | ||
|
|
6a12ab8598 | ||
|
|
a4fdb0a3ef | ||
|
|
c7bb8b8e67 | ||
|
|
41c5194693 | ||
|
|
8c8b67a6ea | ||
|
|
f0cc0fb2b8 | ||
|
|
fc8089c248 | ||
|
|
d795db9017 | ||
|
|
544e3eee5b | ||
|
|
dfc304d9f6 | ||
|
|
54688517c4 | ||
|
|
21fc77ea28 | ||
|
|
2976974009 | ||
|
|
030954d556 | ||
|
|
389a5eb84f | ||
|
|
6d3b96f0b0 | ||
|
|
2a13bf6c0b | ||
|
|
e9f4f5bc31 | ||
|
|
e7400be99a | ||
|
|
591a1e8fbb | ||
|
|
2f5a227fb0 | ||
|
|
931ff62421 | ||
|
|
3037307ee8 | ||
|
|
d6c1725d7e | ||
|
|
16eae70c17 | ||
|
|
9e7e6be374 | ||
|
|
3e8bed1db2 | ||
|
|
e4ac02a968 | ||
|
|
eff358980a | ||
|
|
108bd7f224 | ||
|
|
ab43c8c0c2 | ||
|
|
585dff8b48 | ||
|
|
cb09041387 | ||
|
|
80899f3f70 | ||
|
|
00d2bb06fd | ||
|
|
ff1043e976 | ||
|
|
51a4eb46b8 | ||
|
|
558bf0fbf2 | ||
|
|
76aff57467 | ||
|
|
f82fc1902c | ||
|
|
e9e8fe42ed | ||
|
|
80e007787c | ||
|
|
bfcc705117 | ||
|
|
834150ad1d | ||
|
|
31ec208a9b | ||
|
|
a5d9459c42 | ||
|
|
06271a88d4 | ||
|
|
c48bccf623 | ||
|
|
9975f769f9 | ||
|
|
c5d8f99d6f | ||
|
|
bcd57a9af1 | ||
|
|
12337be2b7 | ||
|
|
25c4902c21 | ||
|
|
f024e1d54c | ||
|
|
bab7ed9188 | ||
|
|
6eda8c9713 | ||
|
|
22e88c860f | ||
|
|
7884248022 | ||
|
|
4891fd750f | ||
|
|
783e14b949 | ||
|
|
74777ad23e | ||
|
|
01b35e7582 | ||
|
|
e29288cc8d | ||
|
|
c4c35ca6e9 | ||
|
|
3b1e0284c0 | ||
|
|
7b61d28dd2 | ||
|
|
e3267df5b1 | ||
|
|
9076e5475d | ||
|
|
84d2a18b52 |
3
.github/dependabot.yml
vendored
3
.github/dependabot.yml
vendored
@@ -4,3 +4,6 @@ updates:
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
ignore:
|
||||
- dependency-name: "*"
|
||||
update-types: ["version-update:semver-patch"]
|
||||
|
||||
3
.github/mypy/mypy.ini
vendored
3
.github/mypy/mypy.ini
vendored
@@ -1,8 +1,5 @@
|
||||
[mypy]
|
||||
|
||||
[mypy-tqdm.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-ruamel.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
|
||||
16
.github/pyinstaller/pyinstaller.spec
vendored
16
.github/pyinstaller/pyinstaller.spec
vendored
@@ -2,7 +2,6 @@
|
||||
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
|
||||
import sys
|
||||
|
||||
import wcwidth
|
||||
import capa.rules.cache
|
||||
|
||||
from pathlib import Path
|
||||
@@ -29,13 +28,6 @@ a = Analysis(
|
||||
("../../rules", "rules"),
|
||||
("../../sigs", "sigs"),
|
||||
("../../cache", "cache"),
|
||||
# capa.render.default uses tabulate that depends on wcwidth.
|
||||
# it seems wcwidth uses a json file `version.json`
|
||||
# and this doesn't get picked up by pyinstaller automatically.
|
||||
# so we manually embed the wcwidth resources here.
|
||||
#
|
||||
# ref: https://stackoverflow.com/a/62278462/87207
|
||||
(Path(wcwidth.__file__).parent, "wcwidth"),
|
||||
],
|
||||
# when invoking pyinstaller from the project root,
|
||||
# this gets run from the project root.
|
||||
@@ -48,11 +40,6 @@ a = Analysis(
|
||||
"tkinter",
|
||||
"_tkinter",
|
||||
"Tkinter",
|
||||
# tqdm provides renderers for ipython,
|
||||
# however, this drags in a lot of dependencies.
|
||||
# since we don't spawn a notebook, we can safely remove these.
|
||||
"IPython",
|
||||
"ipywidgets",
|
||||
# these are pulled in by networkx
|
||||
# but we don't need to compute the strongly connected components.
|
||||
"numpy",
|
||||
@@ -70,7 +57,10 @@ a = Analysis(
|
||||
"qt5",
|
||||
"pyqtwebengine",
|
||||
"pyasn1",
|
||||
# don't pull in Binary Ninja/IDA bindings that should
|
||||
# only be installed locally.
|
||||
"binaryninja",
|
||||
"ida",
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
8
.github/workflows/build.yml
vendored
8
.github/workflows/build.yml
vendored
@@ -30,8 +30,8 @@ jobs:
|
||||
python_version: 3.8
|
||||
- os: ubuntu-20.04
|
||||
artifact_name: capa
|
||||
asset_name: linux-py311
|
||||
python_version: 3.11
|
||||
asset_name: linux-py312
|
||||
python_version: 3.12
|
||||
- os: windows-2019
|
||||
artifact_name: capa.exe
|
||||
asset_name: windows
|
||||
@@ -88,7 +88,7 @@ jobs:
|
||||
asset_name: linux
|
||||
- os: ubuntu-22.04
|
||||
artifact_name: capa
|
||||
asset_name: linux-py311
|
||||
asset_name: linux-py312
|
||||
- os: windows-2022
|
||||
artifact_name: capa.exe
|
||||
asset_name: windows
|
||||
@@ -114,7 +114,7 @@ jobs:
|
||||
include:
|
||||
- asset_name: linux
|
||||
artifact_name: capa
|
||||
- asset_name: linux-py311
|
||||
- asset_name: linux-py312
|
||||
artifact_name: capa
|
||||
- asset_name: windows
|
||||
artifact_name: capa.exe
|
||||
|
||||
2
.github/workflows/web-deploy.yml
vendored
2
.github/workflows/web-deploy.yml
vendored
@@ -43,7 +43,7 @@ jobs:
|
||||
fetch-depth: 1
|
||||
show-progress: true
|
||||
- name: Set up Node
|
||||
uses: actions/setup-node@v4
|
||||
uses: actions/setup-node@0a44ba7841725637a19e28fa30b79a866c81b0a6 # v4.0.4
|
||||
with:
|
||||
node-version: 20
|
||||
cache: 'npm'
|
||||
|
||||
2
.github/workflows/web-tests.yml
vendored
2
.github/workflows/web-tests.yml
vendored
@@ -19,7 +19,7 @@ jobs:
|
||||
show-progress: true
|
||||
|
||||
- name: Set up Node
|
||||
uses: actions/setup-node@v3
|
||||
uses: actions/setup-node@0a44ba7841725637a19e28fa30b79a866c81b0a6 # v4.0.4
|
||||
with:
|
||||
node-version: 20
|
||||
cache: 'npm'
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -127,3 +127,4 @@ Pipfile.lock
|
||||
.github/binja/download_headless.py
|
||||
.github/binja/BinaryNinja-headless.zip
|
||||
justfile
|
||||
data/
|
||||
|
||||
@@ -108,6 +108,7 @@ repos:
|
||||
- "--check-untyped-defs"
|
||||
- "--ignore-missing-imports"
|
||||
- "--config-file=.github/mypy/mypy.ini"
|
||||
- "--enable-incomplete-feature=NewGenericSyntax"
|
||||
- "capa/"
|
||||
- "scripts/"
|
||||
- "tests/"
|
||||
|
||||
1004
CHANGELOG.md
1004
CHANGELOG.md
File diff suppressed because it is too large
Load Diff
0
capa/analysis/__init__.py
Normal file
0
capa/analysis/__init__.py
Normal file
38
capa/analysis/flirt.py
Normal file
38
capa/analysis/flirt.py
Normal file
@@ -0,0 +1,38 @@
|
||||
# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
import capa.features.extractors.ida.idalib as idalib
|
||||
|
||||
if not idalib.has_idalib():
|
||||
raise RuntimeError("cannot find IDA idalib module.")
|
||||
|
||||
if not idalib.load_idalib():
|
||||
raise RuntimeError("failed to load IDA idalib module.")
|
||||
|
||||
import idaapi
|
||||
import idautils
|
||||
|
||||
|
||||
class FunctionId(BaseModel):
|
||||
va: int
|
||||
is_library: bool
|
||||
name: str
|
||||
|
||||
|
||||
def get_flirt_matches(lib_only=True):
|
||||
for fva in idautils.Functions():
|
||||
f = idaapi.get_func(fva)
|
||||
is_lib = bool(f.flags & idaapi.FUNC_LIB)
|
||||
fname = idaapi.get_func_name(fva)
|
||||
|
||||
if lib_only and not is_lib:
|
||||
continue
|
||||
|
||||
yield FunctionId(va=fva, is_library=is_lib, name=fname)
|
||||
240
capa/analysis/libraries.py
Normal file
240
capa/analysis/libraries.py
Normal file
@@ -0,0 +1,240 @@
|
||||
# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import io
|
||||
import sys
|
||||
import logging
|
||||
import argparse
|
||||
import tempfile
|
||||
import contextlib
|
||||
from enum import Enum
|
||||
from typing import List, Optional
|
||||
from pathlib import Path
|
||||
|
||||
import rich
|
||||
from pydantic import BaseModel
|
||||
from rich.text import Text
|
||||
from rich.console import Console
|
||||
|
||||
import capa.main
|
||||
import capa.helpers
|
||||
import capa.analysis.flirt
|
||||
import capa.analysis.strings
|
||||
import capa.features.extractors.ida.idalib as idalib
|
||||
|
||||
if not idalib.has_idalib():
|
||||
raise RuntimeError("cannot find IDA idalib module.")
|
||||
|
||||
if not idalib.load_idalib():
|
||||
raise RuntimeError("failed to load IDA idalib module.")
|
||||
|
||||
import idaapi
|
||||
import idapro
|
||||
import ida_auto
|
||||
import idautils
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Classification(str, Enum):
|
||||
USER = "user"
|
||||
LIBRARY = "library"
|
||||
UNKNOWN = "unknown"
|
||||
|
||||
|
||||
class Method(str, Enum):
|
||||
FLIRT = "flirt"
|
||||
STRINGS = "strings"
|
||||
THUNK = "thunk"
|
||||
ENTRYPOINT = "entrypoint"
|
||||
|
||||
|
||||
class FunctionClassification(BaseModel):
|
||||
va: int
|
||||
classification: Classification
|
||||
# name per the disassembler/analysis tool
|
||||
# may be combined with the recovered/suspected name TODO below
|
||||
name: str
|
||||
|
||||
# if is library, this must be provided
|
||||
method: Optional[Method]
|
||||
|
||||
# TODO if is library, recovered/suspected name?
|
||||
|
||||
# if is library, these can optionally be provided.
|
||||
library_name: Optional[str] = None
|
||||
library_version: Optional[str] = None
|
||||
|
||||
|
||||
class FunctionIdResults(BaseModel):
|
||||
function_classifications: List[FunctionClassification]
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def ida_session(input_path: Path, use_temp_dir=True):
|
||||
if use_temp_dir:
|
||||
t = Path(tempfile.mkdtemp(prefix="ida-")) / input_path.name
|
||||
else:
|
||||
t = input_path
|
||||
|
||||
logger.debug("using %s", str(t))
|
||||
# stderr=True is used here to redirect the spinner banner to stderr,
|
||||
# so that users can redirect capa's output.
|
||||
console = Console(stderr=True, quiet=False)
|
||||
|
||||
try:
|
||||
if use_temp_dir:
|
||||
t.write_bytes(input_path.read_bytes())
|
||||
|
||||
# idalib writes to stdout (ugh), so we have to capture that
|
||||
# so as not to screw up structured output.
|
||||
with capa.helpers.stdout_redirector(io.BytesIO()):
|
||||
idapro.enable_console_messages(False)
|
||||
with capa.main.timing("analyze program"):
|
||||
with console.status("analyzing program...", spinner="dots"):
|
||||
if idapro.open_database(str(t.absolute()), run_auto_analysis=True):
|
||||
raise RuntimeError("failed to analyze input file")
|
||||
|
||||
logger.debug("idalib: waiting for analysis...")
|
||||
ida_auto.auto_wait()
|
||||
logger.debug("idalib: opened database.")
|
||||
|
||||
yield
|
||||
finally:
|
||||
idapro.close_database()
|
||||
if use_temp_dir:
|
||||
t.unlink()
|
||||
|
||||
|
||||
def is_thunk_function(fva):
|
||||
f = idaapi.get_func(fva)
|
||||
return bool(f.flags & idaapi.FUNC_THUNK)
|
||||
|
||||
|
||||
def main(argv=None):
|
||||
if argv is None:
|
||||
argv = sys.argv[1:]
|
||||
|
||||
parser = argparse.ArgumentParser(description="Identify library functions using various strategies.")
|
||||
capa.main.install_common_args(parser, wanted={"input_file"})
|
||||
parser.add_argument("--store-idb", action="store_true", default=False, help="store IDA database file")
|
||||
parser.add_argument("--min-string-length", type=int, default=8, help="minimum string length")
|
||||
parser.add_argument("-j", "--json", action="store_true", help="emit JSON instead of text")
|
||||
args = parser.parse_args(args=argv)
|
||||
|
||||
try:
|
||||
capa.main.handle_common_args(args)
|
||||
except capa.main.ShouldExitError as e:
|
||||
return e.status_code
|
||||
|
||||
dbs = capa.analysis.strings.get_default_databases()
|
||||
capa.analysis.strings.prune_databases(dbs, n=args.min_string_length)
|
||||
|
||||
function_classifications: List[FunctionClassification] = []
|
||||
with ida_session(args.input_file, use_temp_dir=not args.store_idb):
|
||||
with capa.main.timing("FLIRT-based library identification"):
|
||||
# TODO: add more signature (files)
|
||||
# TOOD: apply more signatures
|
||||
for flirt_match in capa.analysis.flirt.get_flirt_matches():
|
||||
function_classifications.append(
|
||||
FunctionClassification(
|
||||
va=flirt_match.va,
|
||||
name=flirt_match.name,
|
||||
classification=Classification.LIBRARY,
|
||||
method=Method.FLIRT,
|
||||
# note: we cannot currently include which signature matched per function via the IDA API
|
||||
)
|
||||
)
|
||||
|
||||
# thunks
|
||||
for fva in idautils.Functions():
|
||||
if is_thunk_function(fva):
|
||||
function_classifications.append(
|
||||
FunctionClassification(
|
||||
va=fva,
|
||||
name=idaapi.get_func_name(fva),
|
||||
classification=Classification.LIBRARY,
|
||||
method=Method.THUNK,
|
||||
)
|
||||
)
|
||||
|
||||
with capa.main.timing("string-based library identification"):
|
||||
for string_match in capa.analysis.strings.get_string_matches(dbs):
|
||||
function_classifications.append(
|
||||
FunctionClassification(
|
||||
va=string_match.va,
|
||||
name=idaapi.get_func_name(string_match.va),
|
||||
classification=Classification.LIBRARY,
|
||||
method=Method.STRINGS,
|
||||
library_name=string_match.metadata.library_name,
|
||||
library_version=string_match.metadata.library_version,
|
||||
)
|
||||
)
|
||||
|
||||
for va in idautils.Functions():
|
||||
name = idaapi.get_func_name(va)
|
||||
if name not in {"WinMain", }:
|
||||
continue
|
||||
|
||||
function_classifications.append(
|
||||
FunctionClassification(
|
||||
va=va,
|
||||
name=name,
|
||||
classification=Classification.USER,
|
||||
method=Method.ENTRYPOINT,
|
||||
)
|
||||
)
|
||||
|
||||
doc = FunctionIdResults(function_classifications=[])
|
||||
classifications_by_va = capa.analysis.strings.create_index(function_classifications, "va")
|
||||
for va in idautils.Functions():
|
||||
if classifications := classifications_by_va.get(va):
|
||||
doc.function_classifications.extend(classifications)
|
||||
else:
|
||||
doc.function_classifications.append(
|
||||
FunctionClassification(
|
||||
va=va,
|
||||
name=idaapi.get_func_name(va),
|
||||
classification=Classification.UNKNOWN,
|
||||
method=None,
|
||||
)
|
||||
)
|
||||
|
||||
if args.json:
|
||||
print(doc.model_dump_json()) # noqa: T201 print found
|
||||
|
||||
else:
|
||||
table = rich.table.Table()
|
||||
table.add_column("FVA")
|
||||
table.add_column("CLASSIFICATION")
|
||||
table.add_column("METHOD")
|
||||
table.add_column("FNAME")
|
||||
table.add_column("EXTRA INFO")
|
||||
|
||||
classifications_by_va = capa.analysis.strings.create_index(doc.function_classifications, "va", sorted_=True)
|
||||
for va, classifications in classifications_by_va.items():
|
||||
name = ", ".join({c.name for c in classifications})
|
||||
if "sub_" in name:
|
||||
name = Text(name, style="grey53")
|
||||
|
||||
classification = {c.classification for c in classifications}
|
||||
method = {c.method for c in classifications if c.method}
|
||||
extra = {f"{c.library_name}@{c.library_version}" for c in classifications if c.library_name}
|
||||
|
||||
table.add_row(
|
||||
hex(va),
|
||||
", ".join(classification) if classification != {"unknown"} else Text("unknown", style="grey53"),
|
||||
", ".join(method),
|
||||
name,
|
||||
", ".join(extra),
|
||||
)
|
||||
|
||||
rich.print(table)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
2
capa/analysis/requirements.txt
Normal file
2
capa/analysis/requirements.txt
Normal file
@@ -0,0 +1,2 @@
|
||||
# temporary extra file to track dependencies of the analysis directory
|
||||
nltk==3.9.1
|
||||
269
capa/analysis/strings/__init__.py
Normal file
269
capa/analysis/strings/__init__.py
Normal file
@@ -0,0 +1,269 @@
|
||||
# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
"""
|
||||
further requirements:
|
||||
- nltk
|
||||
"""
|
||||
import gzip
|
||||
import logging
|
||||
import collections
|
||||
from typing import Any, Dict, Mapping
|
||||
from pathlib import Path
|
||||
from dataclasses import dataclass
|
||||
|
||||
import msgspec
|
||||
|
||||
import capa.features.extractors.strings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class LibraryString(msgspec.Struct):
|
||||
string: str
|
||||
library_name: str
|
||||
library_version: str
|
||||
file_path: str | None = None
|
||||
function_name: str | None = None
|
||||
line_number: int | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class LibraryStringDatabase:
|
||||
metadata_by_string: Dict[str, LibraryString]
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self.metadata_by_string)
|
||||
|
||||
@classmethod
|
||||
def from_file(cls, path: Path) -> "LibraryStringDatabase":
|
||||
metadata_by_string: Dict[str, LibraryString] = {}
|
||||
decoder = msgspec.json.Decoder(type=LibraryString)
|
||||
for line in gzip.decompress(path.read_bytes()).split(b"\n"):
|
||||
if not line:
|
||||
continue
|
||||
s = decoder.decode(line)
|
||||
metadata_by_string[s.string] = s
|
||||
|
||||
return cls(metadata_by_string=metadata_by_string)
|
||||
|
||||
|
||||
DEFAULT_FILENAMES = (
|
||||
"brotli.jsonl.gz",
|
||||
"bzip2.jsonl.gz",
|
||||
"cryptopp.jsonl.gz",
|
||||
"curl.jsonl.gz",
|
||||
"detours.jsonl.gz",
|
||||
"jemalloc.jsonl.gz",
|
||||
"jsoncpp.jsonl.gz",
|
||||
"kcp.jsonl.gz",
|
||||
"liblzma.jsonl.gz",
|
||||
"libsodium.jsonl.gz",
|
||||
"libpcap.jsonl.gz",
|
||||
"mbedtls.jsonl.gz",
|
||||
"openssl.jsonl.gz",
|
||||
"sqlite3.jsonl.gz",
|
||||
"tomcrypt.jsonl.gz",
|
||||
"wolfssl.jsonl.gz",
|
||||
"zlib.jsonl.gz",
|
||||
)
|
||||
|
||||
DEFAULT_PATHS = tuple(Path(__file__).parent / "data" / "oss" / filename for filename in DEFAULT_FILENAMES) + (
|
||||
Path(__file__).parent / "data" / "crt" / "msvc_v143.jsonl.gz",
|
||||
)
|
||||
|
||||
|
||||
def get_default_databases() -> list[LibraryStringDatabase]:
|
||||
return [LibraryStringDatabase.from_file(path) for path in DEFAULT_PATHS]
|
||||
|
||||
|
||||
@dataclass
|
||||
class WindowsApiStringDatabase:
|
||||
dll_names: set[str]
|
||||
api_names: set[str]
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self.dll_names) + len(self.api_names)
|
||||
|
||||
@classmethod
|
||||
def from_dir(cls, path: Path) -> "WindowsApiStringDatabase":
|
||||
dll_names: set[str] = set()
|
||||
api_names: set[str] = set()
|
||||
|
||||
for line in gzip.decompress((path / "dlls.txt.gz").read_bytes()).decode("utf-8").splitlines():
|
||||
if not line:
|
||||
continue
|
||||
dll_names.add(line)
|
||||
|
||||
for line in gzip.decompress((path / "apis.txt.gz").read_bytes()).decode("utf-8").splitlines():
|
||||
if not line:
|
||||
continue
|
||||
api_names.add(line)
|
||||
|
||||
return cls(dll_names=dll_names, api_names=api_names)
|
||||
|
||||
@classmethod
|
||||
def from_defaults(cls) -> "WindowsApiStringDatabase":
|
||||
return cls.from_dir(Path(__file__).parent / "data" / "winapi")
|
||||
|
||||
|
||||
def extract_strings(buf, n=4):
|
||||
yield from capa.features.extractors.strings.extract_ascii_strings(buf, n=n)
|
||||
yield from capa.features.extractors.strings.extract_unicode_strings(buf, n=n)
|
||||
|
||||
|
||||
def prune_databases(dbs: list[LibraryStringDatabase], n=8):
|
||||
"""remove less trustyworthy database entries.
|
||||
|
||||
such as:
|
||||
- those found in multiple databases
|
||||
- those that are English words
|
||||
- those that are too short
|
||||
- Windows API and DLL names
|
||||
"""
|
||||
|
||||
# TODO: consider applying these filters directly to the persisted databases, not at load time.
|
||||
|
||||
winapi = WindowsApiStringDatabase.from_defaults()
|
||||
|
||||
try:
|
||||
from nltk.corpus import words as nltk_words
|
||||
|
||||
nltk_words.words()
|
||||
except (ImportError, LookupError):
|
||||
# one-time download of dataset.
|
||||
# this probably doesn't work well for embedded use.
|
||||
import nltk
|
||||
|
||||
nltk.download("words")
|
||||
from nltk.corpus import words as nltk_words
|
||||
words = set(nltk_words.words())
|
||||
|
||||
counter: collections.Counter[str] = collections.Counter()
|
||||
to_remove = set()
|
||||
for db in dbs:
|
||||
for string in db.metadata_by_string.keys():
|
||||
counter[string] += 1
|
||||
|
||||
if string in words:
|
||||
to_remove.add(string)
|
||||
continue
|
||||
|
||||
if len(string) < n:
|
||||
to_remove.add(string)
|
||||
continue
|
||||
|
||||
if string in winapi.api_names:
|
||||
to_remove.add(string)
|
||||
continue
|
||||
|
||||
if string in winapi.dll_names:
|
||||
to_remove.add(string)
|
||||
continue
|
||||
|
||||
for string, count in counter.most_common():
|
||||
if count <= 1:
|
||||
break
|
||||
|
||||
# remove strings that are seen in more than one database
|
||||
to_remove.add(string)
|
||||
|
||||
for db in dbs:
|
||||
for string in to_remove:
|
||||
if string in db.metadata_by_string:
|
||||
del db.metadata_by_string[string]
|
||||
|
||||
|
||||
def get_function_strings():
|
||||
import idaapi
|
||||
import idautils
|
||||
|
||||
import capa.features.extractors.ida.helpers as ida_helpers
|
||||
|
||||
strings_by_function = collections.defaultdict(set)
|
||||
for ea in idautils.Functions():
|
||||
f = idaapi.get_func(ea)
|
||||
|
||||
# ignore library functions and thunk functions as identified by IDA
|
||||
if f.flags & idaapi.FUNC_THUNK:
|
||||
continue
|
||||
if f.flags & idaapi.FUNC_LIB:
|
||||
continue
|
||||
|
||||
for bb in ida_helpers.get_function_blocks(f):
|
||||
for insn in ida_helpers.get_instructions_in_range(bb.start_ea, bb.end_ea):
|
||||
ref = capa.features.extractors.ida.helpers.find_data_reference_from_insn(insn)
|
||||
if ref == insn.ea:
|
||||
continue
|
||||
|
||||
string = capa.features.extractors.ida.helpers.find_string_at(ref)
|
||||
if not string:
|
||||
continue
|
||||
|
||||
strings_by_function[ea].add(string)
|
||||
|
||||
return strings_by_function
|
||||
|
||||
|
||||
@dataclass
|
||||
class LibraryStringClassification:
|
||||
va: int
|
||||
string: str
|
||||
library_name: str
|
||||
metadata: LibraryString
|
||||
|
||||
|
||||
def create_index(s: list, k: str, sorted_: bool = False) -> Mapping[Any, list]:
|
||||
"""create an index of the elements in `s` using the key `k`, optionally sorted by `k`"""
|
||||
if sorted_:
|
||||
s = sorted(s, key=lambda x: getattr(x, k))
|
||||
|
||||
s_by_k = collections.defaultdict(list)
|
||||
for v in s:
|
||||
p = getattr(v, k)
|
||||
s_by_k[p].append(v)
|
||||
return s_by_k
|
||||
|
||||
|
||||
def get_string_matches(dbs: list[LibraryStringDatabase]) -> list[LibraryStringClassification]:
|
||||
matches: list[LibraryStringClassification] = []
|
||||
|
||||
for function, strings in sorted(get_function_strings().items()):
|
||||
for string in strings:
|
||||
for db in dbs:
|
||||
if metadata := db.metadata_by_string.get(string):
|
||||
matches.append(
|
||||
LibraryStringClassification(
|
||||
va=function,
|
||||
string=string,
|
||||
library_name=metadata.library_name,
|
||||
metadata=metadata,
|
||||
)
|
||||
)
|
||||
|
||||
# if there are less than N strings per library, ignore that library
|
||||
matches_by_library = create_index(matches, "library_name")
|
||||
for library_name, library_matches in matches_by_library.items():
|
||||
if len(library_matches) > 5:
|
||||
continue
|
||||
|
||||
logger.info("pruning library %s: only %d matched string", library_name, len(library_matches))
|
||||
matches = [m for m in matches if m.library_name != library_name]
|
||||
|
||||
# if there are conflicts within a single function, don't label it
|
||||
matches_by_function = create_index(matches, "va")
|
||||
for va, function_matches in matches_by_function.items():
|
||||
library_names = {m.library_name for m in function_matches}
|
||||
if len(library_names) == 1:
|
||||
continue
|
||||
|
||||
logger.info("conflicting matches: 0x%x: %s", va, sorted(library_names))
|
||||
# this is potentially slow (O(n**2)) but hopefully fast enough in practice.
|
||||
matches = [m for m in matches if m.va != va]
|
||||
|
||||
return matches
|
||||
130
capa/analysis/strings/__main__.py
Normal file
130
capa/analysis/strings/__main__.py
Normal file
@@ -0,0 +1,130 @@
|
||||
# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import sys
|
||||
import logging
|
||||
import collections
|
||||
from pathlib import Path
|
||||
|
||||
import rich
|
||||
from rich.text import Text
|
||||
|
||||
import capa.analysis.strings
|
||||
import capa.features.extractors.strings
|
||||
import capa.features.extractors.ida.helpers as ida_helpers
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def open_ida(input_path: Path):
|
||||
import tempfile
|
||||
|
||||
import idapro
|
||||
|
||||
t = Path(tempfile.mkdtemp(prefix="ida-")) / input_path.name
|
||||
t.write_bytes(input_path.read_bytes())
|
||||
# resource leak: we should delete this upon exit
|
||||
|
||||
idapro.enable_console_messages(False)
|
||||
idapro.open_database(str(t.absolute()), run_auto_analysis=True)
|
||||
|
||||
import ida_auto
|
||||
|
||||
ida_auto.auto_wait()
|
||||
|
||||
|
||||
def main():
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
|
||||
# use n=8 to ignore common words
|
||||
N = 8
|
||||
|
||||
input_path = Path(sys.argv[1])
|
||||
|
||||
dbs = capa.analysis.strings.get_default_databases()
|
||||
capa.analysis.strings.prune_databases(dbs, n=N)
|
||||
|
||||
strings_by_library = collections.defaultdict(set)
|
||||
for string in capa.analysis.strings.extract_strings(input_path.read_bytes(), n=N):
|
||||
for db in dbs:
|
||||
if metadata := db.metadata_by_string.get(string.s):
|
||||
strings_by_library[metadata.library_name].add(string.s)
|
||||
|
||||
console = rich.get_console()
|
||||
console.print("found libraries:", style="bold")
|
||||
for library, strings in sorted(strings_by_library.items(), key=lambda p: len(p[1]), reverse=True):
|
||||
console.print(f" - [b]{library}[/] ({len(strings)} strings)")
|
||||
|
||||
for string in sorted(strings)[:10]:
|
||||
console.print(f" - {string}", markup=False, style="grey37")
|
||||
|
||||
if len(strings) > 10:
|
||||
console.print(" ...", style="grey37")
|
||||
|
||||
if not strings_by_library:
|
||||
console.print(" (none)", style="grey37")
|
||||
# since we're not going to find any strings
|
||||
# return early and don't do IDA analysis
|
||||
return
|
||||
|
||||
open_ida(input_path)
|
||||
|
||||
import idaapi
|
||||
import idautils
|
||||
import ida_funcs
|
||||
|
||||
strings_by_function = collections.defaultdict(set)
|
||||
for ea in idautils.Functions():
|
||||
f = idaapi.get_func(ea)
|
||||
|
||||
# ignore library functions and thunk functions as identified by IDA
|
||||
if f.flags & idaapi.FUNC_THUNK:
|
||||
continue
|
||||
if f.flags & idaapi.FUNC_LIB:
|
||||
continue
|
||||
|
||||
for bb in ida_helpers.get_function_blocks(f):
|
||||
for insn in ida_helpers.get_instructions_in_range(bb.start_ea, bb.end_ea):
|
||||
ref = capa.features.extractors.ida.helpers.find_data_reference_from_insn(insn)
|
||||
if ref == insn.ea:
|
||||
continue
|
||||
|
||||
string = capa.features.extractors.ida.helpers.find_string_at(ref)
|
||||
if not string:
|
||||
continue
|
||||
|
||||
for db in dbs:
|
||||
if metadata := db.metadata_by_string.get(string):
|
||||
strings_by_function[ea].add(string)
|
||||
|
||||
# ensure there are at least XXX functions renamed, or ignore those entries
|
||||
|
||||
console.print("functions:", style="bold")
|
||||
for function, strings in sorted(strings_by_function.items()):
|
||||
if strings:
|
||||
name = ida_funcs.get_func_name(function)
|
||||
|
||||
console.print(f" [b]{name}[/]@{function:08x}:")
|
||||
|
||||
for string in strings:
|
||||
for db in dbs:
|
||||
if metadata := db.metadata_by_string.get(string):
|
||||
location = Text(
|
||||
f"{metadata.library_name}@{metadata.library_version}::{metadata.function_name}",
|
||||
style="grey37",
|
||||
)
|
||||
console.print(" - ", location, ": ", string.rstrip())
|
||||
|
||||
console.print()
|
||||
|
||||
console.print(
|
||||
f"found {len(strings_by_function)} library functions across {len(list(idautils.Functions()))} functions"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
BIN
capa/analysis/strings/data/crt/msvc_v143.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/crt/msvc_v143.jsonl.gz
Normal file
Binary file not shown.
3
capa/analysis/strings/data/oss/.gitignore
vendored
Normal file
3
capa/analysis/strings/data/oss/.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
*.csv
|
||||
*.jsonl
|
||||
*.jsonl.gz
|
||||
BIN
capa/analysis/strings/data/oss/brotli.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/brotli.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/bzip2.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/bzip2.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/cryptopp.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/cryptopp.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/curl.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/curl.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/detours.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/detours.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/jemalloc.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/jemalloc.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/jsoncpp.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/jsoncpp.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/kcp.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/kcp.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/liblzma.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/liblzma.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/libpcap.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/libpcap.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/libsodium.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/libsodium.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/mbedtls.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/mbedtls.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/openssl.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/openssl.jsonl.gz
Normal file
Binary file not shown.
99
capa/analysis/strings/data/oss/readme.md
Normal file
99
capa/analysis/strings/data/oss/readme.md
Normal file
@@ -0,0 +1,99 @@
|
||||
# Strings from Open Source libraries
|
||||
|
||||
This directory contains databases of strings extracted from open soure software.
|
||||
capa uses these databases to ignore functions that are likely library code.
|
||||
|
||||
There is one file for each database. Each database is a gzip-compressed, JSONL (one JSON document per line) file.
|
||||
The JSON document looks like this:
|
||||
|
||||
string: "1.0.8, 13-Jul-2019"
|
||||
library_name: "bzip2"
|
||||
library_version: "1.0.8#3"
|
||||
file_path: "CMakeFiles/bz2.dir/bzlib.c.obj"
|
||||
function_name: "BZ2_bzlibVersion"
|
||||
line_number: null
|
||||
|
||||
The following databases were extracted via the vkpkg & jh technique:
|
||||
|
||||
- brotli 1.0.9#5
|
||||
- bzip2 1.0.8#3
|
||||
- cryptopp 8.7.0
|
||||
- curl 7.86.0#1
|
||||
- detours 4.0.1#7
|
||||
- jemalloc 5.3.0#1
|
||||
- jsoncpp 1.9.5
|
||||
- kcp 1.7
|
||||
- liblzma 5.2.5#6
|
||||
- libsodium 1.0.18#8
|
||||
- libpcap 1.10.1#3
|
||||
- mbedtls 2.28.1
|
||||
- openssl 3.0.7#1
|
||||
- sqlite3 3.40.0#1
|
||||
- tomcrypt 1.18.2#2
|
||||
- wolfssl 5.5.0
|
||||
- zlib 1.2.13
|
||||
|
||||
This code was originally developed in FLOSS and imported into capa.
|
||||
|
||||
## The vkpkg & jh technique
|
||||
|
||||
Major steps:
|
||||
|
||||
1. build static libraries via vcpkg
|
||||
2. extract features via jh
|
||||
3. convert to JSONL format with `jh_to_qs.py`
|
||||
4. compress with gzip
|
||||
|
||||
### Build static libraries via vcpkg
|
||||
|
||||
[vcpkg](https://vcpkg.io/en/) is a free C/C++ package manager for acquiring and managing libraries.
|
||||
We use it to easily build common open source libraries, like zlib.
|
||||
Use the triplet `x64-windows-static` to build static archives (.lib files that are AR archives containing COFF object files):
|
||||
|
||||
```console
|
||||
PS > C:\vcpkg\vcpkg.exe install --triplet x64-windows-static zlib
|
||||
```
|
||||
|
||||
### Extract features via jh
|
||||
|
||||
[jh](https://github.com/williballenthin/lancelot/blob/master/bin/src/bin/jh.rs)
|
||||
is a lancelot-based utility that parses AR archives containing COFF object files,
|
||||
reconstructs their control flow, finds functions, and extracts features.
|
||||
jh extracts numbers, API calls, and strings; we are only interested in the string features.
|
||||
|
||||
For each feature, jh emits a CSV line with the fields
|
||||
- target triplet
|
||||
- compiler
|
||||
- library
|
||||
- version
|
||||
- build profile
|
||||
- path
|
||||
- function
|
||||
- feature type
|
||||
- feature value
|
||||
|
||||
For example:
|
||||
|
||||
```csv
|
||||
x64-windows-static,msvc143,bzip2,1.0.8#3,release,CMakeFiles/bz2.dir/bzlib.c.obj,BZ2_bzBuffToBuffCompress,number,0x00000100
|
||||
```
|
||||
|
||||
For example, to invoke jh:
|
||||
|
||||
```console
|
||||
$ ~/lancelot/target/release/jh x64-windows-static msvc143 zlib 1.2.13 release /mnt/c/vcpkg/installed/x64-windows-static/lib/zlib.lib > ~/flare-floss/floss/qs/db/data/oss/zlib.csv
|
||||
```
|
||||
|
||||
### Convert to OSS database format
|
||||
|
||||
We use the script `jh_to_qs.py` to convert these CSV lines into JSONL file prepared for FLOSS:
|
||||
|
||||
```console
|
||||
$ python3 jh_to_qs.py zlib.csv > zlib.jsonl
|
||||
```
|
||||
|
||||
These files are then gzip'd:
|
||||
|
||||
```console
|
||||
$ gzip -c zlib.jsonl > zlib.jsonl.gz
|
||||
```
|
||||
BIN
capa/analysis/strings/data/oss/sqlite3.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/sqlite3.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/tomcrypt.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/tomcrypt.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/wolfssl.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/wolfssl.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/zlib.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/zlib.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/winapi/apis.txt.gz
Normal file
BIN
capa/analysis/strings/data/winapi/apis.txt.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/winapi/dlls.txt.gz
Normal file
BIN
capa/analysis/strings/data/winapi/dlls.txt.gz
Normal file
Binary file not shown.
@@ -6,20 +6,16 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import sys
|
||||
import logging
|
||||
import itertools
|
||||
import collections
|
||||
from typing import Any, Tuple
|
||||
|
||||
import tqdm
|
||||
from typing import Any, List, Tuple
|
||||
|
||||
import capa.perf
|
||||
import capa.features.freeze as frz
|
||||
import capa.render.result_document as rdoc
|
||||
from capa.rules import Scope, RuleSet
|
||||
from capa.engine import FeatureSet, MatchResults
|
||||
from capa.helpers import redirecting_print_to_tqdm
|
||||
from capa.capabilities.common import find_file_capabilities
|
||||
from capa.features.extractors.base_extractor import CallHandle, ThreadHandle, ProcessHandle, DynamicFeatureExtractor
|
||||
|
||||
@@ -139,38 +135,30 @@ def find_dynamic_capabilities(
|
||||
feature_counts = rdoc.DynamicFeatureCounts(file=0, processes=())
|
||||
|
||||
assert isinstance(extractor, DynamicFeatureExtractor)
|
||||
with redirecting_print_to_tqdm(disable_progress):
|
||||
with tqdm.contrib.logging.logging_redirect_tqdm():
|
||||
pbar = tqdm.tqdm
|
||||
if disable_progress:
|
||||
# do not use tqdm to avoid unnecessary side effects when caller intends
|
||||
# to disable progress completely
|
||||
def pbar(s, *args, **kwargs):
|
||||
return s
|
||||
processes: List[ProcessHandle] = list(extractor.get_processes())
|
||||
n_processes: int = len(processes)
|
||||
|
||||
elif not sys.stderr.isatty():
|
||||
# don't display progress bar when stderr is redirected to a file
|
||||
def pbar(s, *args, **kwargs):
|
||||
return s
|
||||
with capa.helpers.CapaProgressBar(
|
||||
console=capa.helpers.log_console, transient=True, disable=disable_progress
|
||||
) as pbar:
|
||||
task = pbar.add_task("matching", total=n_processes, unit="processes")
|
||||
for p in processes:
|
||||
process_matches, thread_matches, call_matches, feature_count = find_process_capabilities(
|
||||
ruleset, extractor, p
|
||||
)
|
||||
feature_counts.processes += (
|
||||
rdoc.ProcessFeatureCount(address=frz.Address.from_capa(p.address), count=feature_count),
|
||||
)
|
||||
logger.debug("analyzed %s and extracted %d features", p.address, feature_count)
|
||||
|
||||
processes = list(extractor.get_processes())
|
||||
for rule_name, res in process_matches.items():
|
||||
all_process_matches[rule_name].extend(res)
|
||||
for rule_name, res in thread_matches.items():
|
||||
all_thread_matches[rule_name].extend(res)
|
||||
for rule_name, res in call_matches.items():
|
||||
all_call_matches[rule_name].extend(res)
|
||||
|
||||
pb = pbar(processes, desc="matching", unit=" processes", leave=False)
|
||||
for p in pb:
|
||||
process_matches, thread_matches, call_matches, feature_count = find_process_capabilities(
|
||||
ruleset, extractor, p
|
||||
)
|
||||
feature_counts.processes += (
|
||||
rdoc.ProcessFeatureCount(address=frz.Address.from_capa(p.address), count=feature_count),
|
||||
)
|
||||
logger.debug("analyzed %s and extracted %d features", p.address, feature_count)
|
||||
|
||||
for rule_name, res in process_matches.items():
|
||||
all_process_matches[rule_name].extend(res)
|
||||
for rule_name, res in thread_matches.items():
|
||||
all_thread_matches[rule_name].extend(res)
|
||||
for rule_name, res in call_matches.items():
|
||||
all_call_matches[rule_name].extend(res)
|
||||
pbar.advance(task)
|
||||
|
||||
# collection of features that captures the rule matches within process and thread scopes.
|
||||
# mapping from feature (matched rule) to set of addresses at which it matched.
|
||||
|
||||
@@ -6,21 +6,18 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import sys
|
||||
import time
|
||||
import logging
|
||||
import itertools
|
||||
import collections
|
||||
from typing import Any, Tuple
|
||||
|
||||
import tqdm.contrib.logging
|
||||
from typing import Any, List, Tuple
|
||||
|
||||
import capa.perf
|
||||
import capa.helpers
|
||||
import capa.features.freeze as frz
|
||||
import capa.render.result_document as rdoc
|
||||
from capa.rules import Scope, RuleSet
|
||||
from capa.engine import FeatureSet, MatchResults
|
||||
from capa.helpers import redirecting_print_to_tqdm
|
||||
from capa.capabilities.common import find_file_capabilities
|
||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, StaticFeatureExtractor
|
||||
|
||||
@@ -143,75 +140,58 @@ def find_static_capabilities(
|
||||
library_functions: Tuple[rdoc.LibraryFunction, ...] = ()
|
||||
|
||||
assert isinstance(extractor, StaticFeatureExtractor)
|
||||
with redirecting_print_to_tqdm(disable_progress):
|
||||
with tqdm.contrib.logging.logging_redirect_tqdm():
|
||||
pbar = tqdm.tqdm
|
||||
if capa.helpers.is_runtime_ghidra():
|
||||
# Ghidrathon interpreter cannot properly handle
|
||||
# the TMonitor thread that is created via a monitor_interval
|
||||
# > 0
|
||||
pbar.monitor_interval = 0
|
||||
if disable_progress:
|
||||
# do not use tqdm to avoid unnecessary side effects when caller intends
|
||||
# to disable progress completely
|
||||
def pbar(s, *args, **kwargs):
|
||||
return s
|
||||
functions: List[FunctionHandle] = list(extractor.get_functions())
|
||||
n_funcs: int = len(functions)
|
||||
n_libs: int = 0
|
||||
percentage: float = 0
|
||||
|
||||
elif not sys.stderr.isatty():
|
||||
# don't display progress bar when stderr is redirected to a file
|
||||
def pbar(s, *args, **kwargs):
|
||||
return s
|
||||
|
||||
functions = list(extractor.get_functions())
|
||||
n_funcs = len(functions)
|
||||
|
||||
pb = pbar(functions, desc="matching", unit=" functions", postfix="skipped 0 library functions", leave=False)
|
||||
for f in pb:
|
||||
t0 = time.time()
|
||||
if extractor.is_library_function(f.address):
|
||||
function_name = extractor.get_function_name(f.address)
|
||||
logger.debug("skipping library function 0x%x (%s)", f.address, function_name)
|
||||
library_functions += (
|
||||
rdoc.LibraryFunction(address=frz.Address.from_capa(f.address), name=function_name),
|
||||
)
|
||||
n_libs = len(library_functions)
|
||||
percentage = round(100 * (n_libs / n_funcs))
|
||||
if isinstance(pb, tqdm.tqdm):
|
||||
pb.set_postfix_str(f"skipped {n_libs} library functions ({percentage}%)")
|
||||
continue
|
||||
|
||||
function_matches, bb_matches, insn_matches, feature_count = find_code_capabilities(
|
||||
ruleset, extractor, f
|
||||
with capa.helpers.CapaProgressBar(
|
||||
console=capa.helpers.log_console, transient=True, disable=disable_progress
|
||||
) as pbar:
|
||||
task = pbar.add_task(
|
||||
"matching", total=n_funcs, unit="functions", postfix=f"skipped {n_libs} library functions, {percentage}%"
|
||||
)
|
||||
for f in functions:
|
||||
t0 = time.time()
|
||||
if extractor.is_library_function(f.address):
|
||||
function_name = extractor.get_function_name(f.address)
|
||||
logger.debug("skipping library function 0x%x (%s)", f.address, function_name)
|
||||
library_functions += (
|
||||
rdoc.LibraryFunction(address=frz.Address.from_capa(f.address), name=function_name),
|
||||
)
|
||||
feature_counts.functions += (
|
||||
rdoc.FunctionFeatureCount(address=frz.Address.from_capa(f.address), count=feature_count),
|
||||
)
|
||||
t1 = time.time()
|
||||
n_libs = len(library_functions)
|
||||
percentage = round(100 * (n_libs / n_funcs))
|
||||
pbar.update(task, postfix=f"skipped {n_libs} library functions, {percentage}%")
|
||||
pbar.advance(task)
|
||||
continue
|
||||
|
||||
match_count = 0
|
||||
for name, matches_ in itertools.chain(
|
||||
function_matches.items(), bb_matches.items(), insn_matches.items()
|
||||
):
|
||||
# in practice, most matches are derived rules,
|
||||
# like "check OS version/5bf4c7f39fd4492cbed0f6dc7d596d49"
|
||||
# but when we log to the human, they really care about "real" rules.
|
||||
if not ruleset.rules[name].is_subscope_rule():
|
||||
match_count += len(matches_)
|
||||
function_matches, bb_matches, insn_matches, feature_count = find_code_capabilities(ruleset, extractor, f)
|
||||
feature_counts.functions += (
|
||||
rdoc.FunctionFeatureCount(address=frz.Address.from_capa(f.address), count=feature_count),
|
||||
)
|
||||
t1 = time.time()
|
||||
|
||||
logger.debug(
|
||||
"analyzed function 0x%x and extracted %d features, %d matches in %0.02fs",
|
||||
f.address,
|
||||
feature_count,
|
||||
match_count,
|
||||
t1 - t0,
|
||||
)
|
||||
match_count = 0
|
||||
for name, matches_ in itertools.chain(function_matches.items(), bb_matches.items(), insn_matches.items()):
|
||||
if not ruleset.rules[name].is_subscope_rule():
|
||||
match_count += len(matches_)
|
||||
|
||||
for rule_name, res in function_matches.items():
|
||||
all_function_matches[rule_name].extend(res)
|
||||
for rule_name, res in bb_matches.items():
|
||||
all_bb_matches[rule_name].extend(res)
|
||||
for rule_name, res in insn_matches.items():
|
||||
all_insn_matches[rule_name].extend(res)
|
||||
logger.debug(
|
||||
"analyzed function 0x%x and extracted %d features, %d matches in %0.02fs",
|
||||
f.address,
|
||||
feature_count,
|
||||
match_count,
|
||||
t1 - t0,
|
||||
)
|
||||
|
||||
for rule_name, res in function_matches.items():
|
||||
all_function_matches[rule_name].extend(res)
|
||||
for rule_name, res in bb_matches.items():
|
||||
all_bb_matches[rule_name].extend(res)
|
||||
for rule_name, res in insn_matches.items():
|
||||
all_insn_matches[rule_name].extend(res)
|
||||
|
||||
pbar.advance(task)
|
||||
|
||||
# collection of features that captures the rule matches within function, BB, and instruction scopes.
|
||||
# mapping from feature (matched rule) to set of addresses at which it matched.
|
||||
|
||||
@@ -5,8 +5,6 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import struct
|
||||
from typing import Tuple, Iterator
|
||||
|
||||
from binaryninja import Segment, BinaryView, SymbolType, SymbolBinding
|
||||
@@ -20,56 +18,24 @@ from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress, Absolu
|
||||
from capa.features.extractors.binja.helpers import read_c_string, unmangle_c_name
|
||||
|
||||
|
||||
def check_segment_for_pe(bv: BinaryView, seg: Segment) -> Iterator[Tuple[int, int]]:
|
||||
"""check segment for embedded PE
|
||||
|
||||
adapted for binja from:
|
||||
https://github.com/vivisect/vivisect/blob/7be4037b1cecc4551b397f840405a1fc606f9b53/PE/carve.py#L19
|
||||
"""
|
||||
mz_xor = [
|
||||
(
|
||||
capa.features.extractors.helpers.xor_static(b"MZ", i),
|
||||
capa.features.extractors.helpers.xor_static(b"PE", i),
|
||||
i,
|
||||
)
|
||||
for i in range(256)
|
||||
]
|
||||
|
||||
todo = []
|
||||
# If this is the first segment of the binary, skip the first bytes. Otherwise, there will always be a matched
|
||||
# PE at the start of the binaryview.
|
||||
start = seg.start
|
||||
if bv.view_type == "PE" and start == bv.start:
|
||||
def check_segment_for_pe(bv: BinaryView, seg: Segment) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""check segment for embedded PE"""
|
||||
start = 0
|
||||
if bv.view_type == "PE" and seg.start == bv.start:
|
||||
# If this is the first segment of the binary, skip the first bytes.
|
||||
# Otherwise, there will always be a matched PE at the start of the binaryview.
|
||||
start += 1
|
||||
|
||||
for mzx, pex, i in mz_xor:
|
||||
for off, _ in bv.find_all_data(start, seg.end, mzx):
|
||||
todo.append((off, mzx, pex, i))
|
||||
buf = bv.read(seg.start, seg.length)
|
||||
|
||||
while len(todo):
|
||||
off, mzx, pex, i = todo.pop()
|
||||
|
||||
# The MZ header has one field we will check e_lfanew is at 0x3c
|
||||
e_lfanew = off + 0x3C
|
||||
|
||||
if seg.end < (e_lfanew + 4):
|
||||
continue
|
||||
|
||||
newoff = struct.unpack("<I", capa.features.extractors.helpers.xor_static(bv.read(e_lfanew, 4), i))[0]
|
||||
|
||||
peoff = off + newoff
|
||||
if seg.end < (peoff + 2):
|
||||
continue
|
||||
|
||||
if bv.read(peoff, 2) == pex:
|
||||
yield off, i
|
||||
for offset, _ in capa.features.extractors.helpers.carve_pe(buf, start):
|
||||
yield Characteristic("embedded pe"), FileOffsetAddress(seg.start + offset)
|
||||
|
||||
|
||||
def extract_file_embedded_pe(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""extract embedded PE features"""
|
||||
for seg in bv.segments:
|
||||
for ea, _ in check_segment_for_pe(bv, seg):
|
||||
yield Characteristic("embedded pe"), FileOffsetAddress(ea)
|
||||
yield from check_segment_for_pe(bv, seg)
|
||||
|
||||
|
||||
def extract_file_export_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
||||
|
||||
@@ -5,31 +5,175 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import os
|
||||
import sys
|
||||
import logging
|
||||
import subprocess
|
||||
import importlib.util
|
||||
from typing import Optional
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# When the script gets executed as a standalone executable (via PyInstaller), `import binaryninja` does not work because
|
||||
# we have excluded the binaryninja module in `pyinstaller.spec`. The trick here is to call the system Python and try
|
||||
# to find out the path of the binaryninja module that has been installed.
|
||||
# Note, including the binaryninja module in the `pyinstaller.spec` would not work, since the binaryninja module tries to
|
||||
# find the binaryninja core e.g., `libbinaryninjacore.dylib`, using a relative path. And this does not work when the
|
||||
# binaryninja module is extracted by the PyInstaller.
|
||||
code = r"""
|
||||
CODE = r"""
|
||||
from pathlib import Path
|
||||
from importlib import util
|
||||
spec = util.find_spec('binaryninja')
|
||||
if spec is not None:
|
||||
if len(spec.submodule_search_locations) > 0:
|
||||
path = Path(spec.submodule_search_locations[0])
|
||||
# encode the path with utf8 then convert to hex, make sure it can be read and restored properly
|
||||
print(str(path.parent).encode('utf8').hex())
|
||||
path = Path(spec.submodule_search_locations[0])
|
||||
# encode the path with utf8 then convert to hex, make sure it can be read and restored properly
|
||||
print(str(path.parent).encode('utf8').hex())
|
||||
"""
|
||||
|
||||
|
||||
def find_binja_path() -> Path:
|
||||
raw_output = subprocess.check_output(["python", "-c", code]).decode("ascii").strip()
|
||||
return Path(bytes.fromhex(raw_output).decode("utf8"))
|
||||
def find_binaryninja_path_via_subprocess() -> Optional[Path]:
|
||||
raw_output = subprocess.check_output(["python", "-c", CODE]).decode("ascii").strip()
|
||||
output = bytes.fromhex(raw_output).decode("utf8")
|
||||
if not output.strip():
|
||||
return None
|
||||
return Path(output)
|
||||
|
||||
|
||||
def get_desktop_entry(name: str) -> Optional[Path]:
|
||||
"""
|
||||
Find the path for the given XDG Desktop Entry name.
|
||||
|
||||
Like:
|
||||
|
||||
>> get_desktop_entry("com.vector35.binaryninja.desktop")
|
||||
Path("~/.local/share/applications/com.vector35.binaryninja.desktop")
|
||||
"""
|
||||
assert sys.platform in ("linux", "linux2")
|
||||
assert name.endswith(".desktop")
|
||||
|
||||
data_dirs = os.environ.get("XDG_DATA_DIRS", "/usr/share") + f":{Path.home()}/.local/share"
|
||||
for data_dir in data_dirs.split(":"):
|
||||
applications = Path(data_dir) / "applications"
|
||||
for application in applications.glob("*.desktop"):
|
||||
if application.name == name:
|
||||
return application
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def get_binaryninja_path(desktop_entry: Path) -> Optional[Path]:
|
||||
# from: Exec=/home/wballenthin/software/binaryninja/binaryninja %u
|
||||
# to: /home/wballenthin/software/binaryninja/
|
||||
for line in desktop_entry.read_text(encoding="utf-8").splitlines():
|
||||
if not line.startswith("Exec="):
|
||||
continue
|
||||
|
||||
if not line.endswith("binaryninja %u"):
|
||||
continue
|
||||
|
||||
binaryninja_path = Path(line[len("Exec=") : -len("binaryninja %u")])
|
||||
if not binaryninja_path.exists():
|
||||
return None
|
||||
|
||||
return binaryninja_path
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def validate_binaryninja_path(binaryninja_path: Path) -> bool:
|
||||
if not binaryninja_path:
|
||||
return False
|
||||
|
||||
module_path = binaryninja_path / "python"
|
||||
if not module_path.is_dir():
|
||||
return False
|
||||
|
||||
if not (module_path / "binaryninja" / "__init__.py").is_file():
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def find_binaryninja() -> Optional[Path]:
|
||||
binaryninja_path = find_binaryninja_path_via_subprocess()
|
||||
if not binaryninja_path or not validate_binaryninja_path(binaryninja_path):
|
||||
if sys.platform == "linux" or sys.platform == "linux2":
|
||||
# ok
|
||||
logger.debug("detected OS: linux")
|
||||
elif sys.platform == "darwin":
|
||||
logger.warning("unsupported platform to find Binary Ninja: %s", sys.platform)
|
||||
return False
|
||||
elif sys.platform == "win32":
|
||||
logger.warning("unsupported platform to find Binary Ninja: %s", sys.platform)
|
||||
return False
|
||||
else:
|
||||
logger.warning("unsupported platform to find Binary Ninja: %s", sys.platform)
|
||||
return False
|
||||
|
||||
desktop_entry = get_desktop_entry("com.vector35.binaryninja.desktop")
|
||||
if not desktop_entry:
|
||||
logger.debug("failed to find Binary Ninja application")
|
||||
return None
|
||||
logger.debug("found Binary Ninja application: %s", desktop_entry)
|
||||
|
||||
binaryninja_path = get_binaryninja_path(desktop_entry)
|
||||
if not binaryninja_path:
|
||||
logger.debug("failed to determine Binary Ninja installation path")
|
||||
return None
|
||||
|
||||
if not validate_binaryninja_path(binaryninja_path):
|
||||
logger.debug("failed to validate Binary Ninja installation")
|
||||
return None
|
||||
|
||||
logger.debug("found Binary Ninja installation: %s", binaryninja_path)
|
||||
|
||||
return binaryninja_path / "python"
|
||||
|
||||
|
||||
def is_binaryninja_installed() -> bool:
|
||||
"""Is the binaryninja module ready to import?"""
|
||||
try:
|
||||
return importlib.util.find_spec("binaryninja") is not None
|
||||
except ModuleNotFoundError:
|
||||
return False
|
||||
|
||||
|
||||
def has_binaryninja() -> bool:
|
||||
if is_binaryninja_installed():
|
||||
logger.debug("found installed Binary Ninja API")
|
||||
return True
|
||||
|
||||
logger.debug("Binary Ninja API not installed, searching...")
|
||||
|
||||
binaryninja_path = find_binaryninja()
|
||||
if not binaryninja_path:
|
||||
logger.debug("failed to find Binary Ninja installation")
|
||||
|
||||
logger.debug("found Binary Ninja API: %s", binaryninja_path)
|
||||
return binaryninja_path is not None
|
||||
|
||||
|
||||
def load_binaryninja() -> bool:
|
||||
try:
|
||||
import binaryninja
|
||||
|
||||
return True
|
||||
except ImportError:
|
||||
binaryninja_path = find_binaryninja()
|
||||
if not binaryninja_path:
|
||||
return False
|
||||
|
||||
sys.path.append(binaryninja_path.absolute().as_posix())
|
||||
try:
|
||||
import binaryninja # noqa: F401 unused import
|
||||
|
||||
return True
|
||||
except ImportError:
|
||||
return False
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print(find_binja_path())
|
||||
print(find_binaryninja_path_via_subprocess())
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
import logging
|
||||
from typing import Tuple, Iterator
|
||||
|
||||
import capa.features.extractors.helpers
|
||||
from capa.helpers import assert_never
|
||||
from capa.features.insn import API, Number
|
||||
from capa.features.common import String, Feature
|
||||
@@ -50,7 +51,8 @@ def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -
|
||||
else:
|
||||
assert_never(value)
|
||||
|
||||
yield API(call.api), ch.address
|
||||
for name in capa.features.extractors.helpers.generate_symbols("", call.api):
|
||||
yield API(name), ch.address
|
||||
|
||||
|
||||
def extract_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
import logging
|
||||
from typing import Tuple, Iterator
|
||||
|
||||
import capa.features.extractors.helpers
|
||||
from capa.features.insn import API, Number
|
||||
from capa.features.common import String, Feature
|
||||
from capa.features.address import Address
|
||||
@@ -44,7 +45,8 @@ def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -
|
||||
# but yielding the entire string would be helpful for an analyst looking at the verbose output
|
||||
yield String(arg_value), ch.address
|
||||
|
||||
yield API(call.name), ch.address
|
||||
for name in capa.features.extractors.helpers.generate_symbols("", call.name):
|
||||
yield API(name), ch.address
|
||||
|
||||
|
||||
def extract_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
|
||||
@@ -8,7 +8,6 @@
|
||||
from typing import List, Tuple, Iterator
|
||||
|
||||
import idaapi
|
||||
import ida_nalt
|
||||
|
||||
import capa.ida.helpers
|
||||
import capa.features.extractors.elf
|
||||
@@ -32,7 +31,9 @@ class IdaFeatureExtractor(StaticFeatureExtractor):
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
hashes=SampleHashes(
|
||||
md5=ida_nalt.retrieve_input_file_md5(), sha1="(unknown)", sha256=ida_nalt.retrieve_input_file_sha256()
|
||||
md5=capa.ida.helpers.retrieve_input_file_md5(),
|
||||
sha1="(unknown)",
|
||||
sha256=capa.ida.helpers.retrieve_input_file_sha256(),
|
||||
)
|
||||
)
|
||||
self.global_features: List[Tuple[Feature, Address]] = []
|
||||
|
||||
@@ -41,7 +41,7 @@ if hasattr(ida_bytes, "parse_binpat_str"):
|
||||
return
|
||||
|
||||
while True:
|
||||
ea, _ = ida_bytes.bin_search3(start, end, patterns, ida_bytes.BIN_SEARCH_FORWARD)
|
||||
ea, _ = ida_bytes.bin_search(start, end, patterns, ida_bytes.BIN_SEARCH_FORWARD)
|
||||
if ea == idaapi.BADADDR:
|
||||
break
|
||||
start = ea + 1
|
||||
|
||||
117
capa/features/extractors/ida/idalib.py
Normal file
117
capa/features/extractors/ida/idalib.py
Normal file
@@ -0,0 +1,117 @@
|
||||
# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import logging
|
||||
import importlib.util
|
||||
from typing import Optional
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def is_idalib_installed() -> bool:
|
||||
try:
|
||||
return importlib.util.find_spec("idapro") is not None
|
||||
except ModuleNotFoundError:
|
||||
return False
|
||||
|
||||
|
||||
def get_idalib_user_config_path() -> Optional[Path]:
|
||||
"""Get the path to the user's config file based on platform following IDA's user directories."""
|
||||
# derived from `py-activate-idalib.py` from IDA v9.0 Beta 4
|
||||
|
||||
if sys.platform == "win32":
|
||||
# On Windows, use the %APPDATA%\Hex-Rays\IDA Pro directory
|
||||
config_dir = Path(os.getenv("APPDATA")) / "Hex-Rays" / "IDA Pro"
|
||||
else:
|
||||
# On macOS and Linux, use ~/.idapro
|
||||
config_dir = Path.home() / ".idapro"
|
||||
|
||||
# Return the full path to the config file (now in JSON format)
|
||||
user_config_path = config_dir / "ida-config.json"
|
||||
if not user_config_path.exists():
|
||||
return None
|
||||
return user_config_path
|
||||
|
||||
|
||||
def find_idalib() -> Optional[Path]:
|
||||
config_path = get_idalib_user_config_path()
|
||||
if not config_path:
|
||||
logger.error("IDA Pro user configuration does not exist, please make sure you've installed idalib properly.")
|
||||
return None
|
||||
|
||||
config = json.loads(config_path.read_text(encoding="utf-8"))
|
||||
|
||||
try:
|
||||
ida_install_dir = Path(config["Paths"]["ida-install-dir"])
|
||||
except KeyError:
|
||||
logger.error(
|
||||
"IDA Pro user configuration does not contain location of IDA Pro installation, please make sure you've installed idalib properly."
|
||||
)
|
||||
return None
|
||||
|
||||
if not ida_install_dir.exists():
|
||||
return None
|
||||
|
||||
libname = {
|
||||
"win32": "idalib.dll",
|
||||
"linux": "libidalib.so",
|
||||
"linux2": "libidalib.so",
|
||||
"darwin": "libidalib.dylib",
|
||||
}[sys.platform]
|
||||
|
||||
if not (ida_install_dir / "ida.hlp").is_file():
|
||||
return None
|
||||
|
||||
if not (ida_install_dir / libname).is_file():
|
||||
return None
|
||||
|
||||
idalib_path = ida_install_dir / "idalib" / "python"
|
||||
if not idalib_path.exists():
|
||||
return None
|
||||
|
||||
if not (idalib_path / "idapro" / "__init__.py").is_file():
|
||||
return None
|
||||
|
||||
return idalib_path
|
||||
|
||||
|
||||
def has_idalib() -> bool:
|
||||
if is_idalib_installed():
|
||||
logger.debug("found installed IDA idalib API")
|
||||
return True
|
||||
|
||||
logger.debug("IDA idalib API not installed, searching...")
|
||||
|
||||
idalib_path = find_idalib()
|
||||
if not idalib_path:
|
||||
logger.debug("failed to find IDA idalib installation")
|
||||
|
||||
logger.debug("found IDA idalib API: %s", idalib_path)
|
||||
return idalib_path is not None
|
||||
|
||||
|
||||
def load_idalib() -> bool:
|
||||
try:
|
||||
import idapro
|
||||
|
||||
return True
|
||||
except ImportError:
|
||||
idalib_path = find_idalib()
|
||||
if not idalib_path:
|
||||
return False
|
||||
|
||||
sys.path.append(idalib_path.absolute().as_posix())
|
||||
try:
|
||||
import idapro # noqa: F401 unused import
|
||||
|
||||
return True
|
||||
except ImportError:
|
||||
return False
|
||||
@@ -130,7 +130,13 @@ def extract_file_arch(pe, **kwargs):
|
||||
elif pe.FILE_HEADER.Machine == pefile.MACHINE_TYPE["IMAGE_FILE_MACHINE_AMD64"]:
|
||||
yield Arch(ARCH_AMD64), NO_ADDRESS
|
||||
else:
|
||||
logger.warning("unsupported architecture: %s", pefile.MACHINE_TYPE[pe.FILE_HEADER.Machine])
|
||||
try:
|
||||
logger.warning(
|
||||
"unsupported architecture: %s",
|
||||
pefile.MACHINE_TYPE[pe.FILE_HEADER.Machine],
|
||||
)
|
||||
except KeyError:
|
||||
logger.warning("unknown architecture: %s", pe.FILE_HEADER.Machine)
|
||||
|
||||
|
||||
def extract_file_features(pe, buf):
|
||||
|
||||
@@ -10,6 +10,7 @@ from typing import Dict, List, Tuple, Optional
|
||||
from pathlib import Path
|
||||
from zipfile import ZipFile
|
||||
from collections import defaultdict
|
||||
from dataclasses import dataclass
|
||||
|
||||
from capa.exceptions import UnsupportedFormatError
|
||||
from capa.features.extractors.vmray.models import File, Flog, SummaryV2, StaticData, FunctionCall, xml_to_dict
|
||||
@@ -21,6 +22,21 @@ DEFAULT_ARCHIVE_PASSWORD = b"infected"
|
||||
SUPPORTED_FLOG_VERSIONS = ("2",)
|
||||
|
||||
|
||||
@dataclass
|
||||
class VMRayMonitorThread:
|
||||
tid: int # thread ID assigned by OS
|
||||
monitor_id: int # unique ID assigned to thread by VMRay
|
||||
process_monitor_id: int # unqiue ID assigned to containing process by VMRay
|
||||
|
||||
|
||||
@dataclass
|
||||
class VMRayMonitorProcess:
|
||||
pid: int # process ID assigned by OS
|
||||
ppid: int # parent process ID assigned by OS
|
||||
monitor_id: int # unique ID assigned to process by VMRay
|
||||
image_name: str
|
||||
|
||||
|
||||
class VMRayAnalysis:
|
||||
def __init__(self, zipfile_path: Path):
|
||||
self.zipfile = ZipFile(zipfile_path, "r")
|
||||
@@ -45,9 +61,15 @@ class VMRayAnalysis:
|
||||
self.exports: Dict[int, str] = {}
|
||||
self.imports: Dict[int, Tuple[str, str]] = {}
|
||||
self.sections: Dict[int, str] = {}
|
||||
self.process_ids: Dict[int, int] = {}
|
||||
self.process_threads: Dict[int, List[int]] = defaultdict(list)
|
||||
self.process_calls: Dict[int, Dict[int, List[FunctionCall]]] = defaultdict(lambda: defaultdict(list))
|
||||
self.monitor_processes: Dict[int, VMRayMonitorProcess] = {}
|
||||
self.monitor_threads: Dict[int, VMRayMonitorThread] = {}
|
||||
|
||||
# map monitor thread IDs to their associated monitor process ID
|
||||
self.monitor_threads_by_monitor_process: Dict[int, List[int]] = defaultdict(list)
|
||||
|
||||
# map function calls to their associated monitor thread ID mapped to its associated monitor process ID
|
||||
self.monitor_process_calls: Dict[int, Dict[int, List[FunctionCall]]] = defaultdict(lambda: defaultdict(list))
|
||||
|
||||
self.base_address: int
|
||||
|
||||
self.sample_file_name: Optional[str] = None
|
||||
@@ -79,13 +101,14 @@ class VMRayAnalysis:
|
||||
|
||||
self.sample_file_buf: bytes = self.zipfile.read(sample_file_path, pwd=DEFAULT_ARCHIVE_PASSWORD)
|
||||
|
||||
# do not change order, it matters
|
||||
self._compute_base_address()
|
||||
self._compute_imports()
|
||||
self._compute_exports()
|
||||
self._compute_sections()
|
||||
self._compute_process_ids()
|
||||
self._compute_process_threads()
|
||||
self._compute_process_calls()
|
||||
self._compute_monitor_processes()
|
||||
self._compute_monitor_threads()
|
||||
self._compute_monitor_process_calls()
|
||||
|
||||
def _find_sample_file(self):
|
||||
for file_name, file_analysis in self.sv2.files.items():
|
||||
@@ -128,34 +151,48 @@ class VMRayAnalysis:
|
||||
for elffile_section in self.sample_file_static_data.elf.sections:
|
||||
self.sections[elffile_section.header.sh_addr] = elffile_section.header.sh_name
|
||||
|
||||
def _compute_process_ids(self):
|
||||
def _compute_monitor_processes(self):
|
||||
for process in self.sv2.processes.values():
|
||||
# we expect VMRay's monitor IDs to be unique, but OS PIDs may be reused
|
||||
assert process.monitor_id not in self.process_ids.keys()
|
||||
self.process_ids[process.monitor_id] = process.os_pid
|
||||
# we expect monitor IDs to be unique
|
||||
assert process.monitor_id not in self.monitor_processes
|
||||
|
||||
def _compute_process_threads(self):
|
||||
# logs/flog.xml appears to be the only file that contains thread-related data
|
||||
# so we use it here to map processes to threads
|
||||
ppid: int = (
|
||||
self.sv2.processes[process.ref_parent_process.path[1]].os_pid if process.ref_parent_process else 0
|
||||
)
|
||||
self.monitor_processes[process.monitor_id] = VMRayMonitorProcess(
|
||||
process.os_pid, ppid, process.monitor_id, process.image_name
|
||||
)
|
||||
|
||||
# not all processes are recorded in SummaryV2.json, get missing data from flog.xml, see #2394
|
||||
for monitor_process in self.flog.analysis.monitor_processes:
|
||||
vmray_monitor_process: VMRayMonitorProcess = VMRayMonitorProcess(
|
||||
monitor_process.os_pid,
|
||||
monitor_process.os_parent_pid,
|
||||
monitor_process.process_id,
|
||||
monitor_process.image_name,
|
||||
)
|
||||
|
||||
if monitor_process.process_id not in self.monitor_processes:
|
||||
self.monitor_processes[monitor_process.process_id] = vmray_monitor_process
|
||||
else:
|
||||
# we expect monitor processes recorded in both SummaryV2.json and flog.xml to equal
|
||||
assert self.monitor_processes[monitor_process.process_id] == vmray_monitor_process
|
||||
|
||||
def _compute_monitor_threads(self):
|
||||
for monitor_thread in self.flog.analysis.monitor_threads:
|
||||
# we expect monitor IDs to be unique
|
||||
assert monitor_thread.thread_id not in self.monitor_threads
|
||||
|
||||
self.monitor_threads[monitor_thread.thread_id] = VMRayMonitorThread(
|
||||
monitor_thread.os_tid, monitor_thread.thread_id, monitor_thread.process_id
|
||||
)
|
||||
|
||||
# we expect each monitor thread ID to be unique for its associated monitor process ID e.g. monitor
|
||||
# thread ID 10 should not be captured twice for monitor process ID 1
|
||||
assert monitor_thread.thread_id not in self.monitor_threads_by_monitor_process[monitor_thread.thread_id]
|
||||
|
||||
self.monitor_threads_by_monitor_process[monitor_thread.process_id].append(monitor_thread.thread_id)
|
||||
|
||||
def _compute_monitor_process_calls(self):
|
||||
for function_call in self.flog.analysis.function_calls:
|
||||
pid: int = self.get_process_os_pid(function_call.process_id) # flog.xml uses process monitor ID, not OS PID
|
||||
tid: int = function_call.thread_id
|
||||
|
||||
assert isinstance(pid, int)
|
||||
assert isinstance(tid, int)
|
||||
|
||||
if tid not in self.process_threads[pid]:
|
||||
self.process_threads[pid].append(tid)
|
||||
|
||||
def _compute_process_calls(self):
|
||||
for function_call in self.flog.analysis.function_calls:
|
||||
pid: int = self.get_process_os_pid(function_call.process_id) # flog.xml uses process monitor ID, not OS PID
|
||||
tid: int = function_call.thread_id
|
||||
|
||||
assert isinstance(pid, int)
|
||||
assert isinstance(tid, int)
|
||||
|
||||
self.process_calls[pid][tid].append(function_call)
|
||||
|
||||
def get_process_os_pid(self, monitor_id: int) -> int:
|
||||
return self.process_ids[monitor_id]
|
||||
self.monitor_process_calls[function_call.process_id][function_call.thread_id].append(function_call)
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
import logging
|
||||
from typing import Tuple, Iterator
|
||||
|
||||
import capa.features.extractors.helpers
|
||||
from capa.features.insn import API, Number
|
||||
from capa.features.common import String, Feature
|
||||
from capa.features.address import Address
|
||||
@@ -26,7 +27,11 @@ def get_call_param_features(param: Param, ch: CallHandle) -> Iterator[Tuple[Feat
|
||||
if param.deref.type_ in PARAM_TYPE_INT:
|
||||
yield Number(hexint(param.deref.value)), ch.address
|
||||
elif param.deref.type_ in PARAM_TYPE_STR:
|
||||
yield String(param.deref.value), ch.address
|
||||
# TODO(mr-tz): remove FPS like " \\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\x09\\x0a\\x0b\\x0c\\x0d\\x0e\\x0f\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a\\x1b\\x1c\\x1d\\x1e\..."
|
||||
# https://github.com/mandiant/capa/issues/2432
|
||||
|
||||
# parsing the data up to here results in double-escaped backslashes, remove those here
|
||||
yield String(param.deref.value.replace("\\\\", "\\")), ch.address
|
||||
else:
|
||||
logger.debug("skipping deref param type %s", param.deref.type_)
|
||||
elif param.value is not None:
|
||||
@@ -41,7 +46,8 @@ def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -
|
||||
for param in call.params_in.params:
|
||||
yield from get_call_param_features(param, ch)
|
||||
|
||||
yield API(call.name), ch.address
|
||||
for name in capa.features.extractors.helpers.generate_symbols("", call.name):
|
||||
yield API(name), ch.address
|
||||
|
||||
|
||||
def extract_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
|
||||
@@ -15,9 +15,16 @@ import capa.features.extractors.vmray.call
|
||||
import capa.features.extractors.vmray.file
|
||||
import capa.features.extractors.vmray.global_
|
||||
from capa.features.common import Feature, Characteristic
|
||||
from capa.features.address import NO_ADDRESS, Address, ThreadAddress, DynamicCallAddress, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.vmray import VMRayAnalysis
|
||||
from capa.features.extractors.vmray.models import PARAM_TYPE_STR, Process, ParamList, FunctionCall
|
||||
from capa.features.address import (
|
||||
NO_ADDRESS,
|
||||
Address,
|
||||
ThreadAddress,
|
||||
ProcessAddress,
|
||||
DynamicCallAddress,
|
||||
AbsoluteVirtualAddress,
|
||||
)
|
||||
from capa.features.extractors.vmray import VMRayAnalysis, VMRayMonitorThread, VMRayMonitorProcess
|
||||
from capa.features.extractors.vmray.models import PARAM_TYPE_STR, ParamList, FunctionCall
|
||||
from capa.features.extractors.base_extractor import (
|
||||
CallHandle,
|
||||
SampleHashes,
|
||||
@@ -69,20 +76,24 @@ class VMRayExtractor(DynamicFeatureExtractor):
|
||||
yield from self.global_features
|
||||
|
||||
def get_processes(self) -> Iterator[ProcessHandle]:
|
||||
yield from capa.features.extractors.vmray.file.get_processes(self.analysis)
|
||||
for monitor_process in self.analysis.monitor_processes.values():
|
||||
address: ProcessAddress = ProcessAddress(pid=monitor_process.pid, ppid=monitor_process.ppid)
|
||||
yield ProcessHandle(address, inner=monitor_process)
|
||||
|
||||
def extract_process_features(self, ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
# we have not identified process-specific features for VMRay yet
|
||||
yield from []
|
||||
|
||||
def get_process_name(self, ph) -> str:
|
||||
process: Process = ph.inner
|
||||
return process.image_name
|
||||
monitor_process: VMRayMonitorProcess = ph.inner
|
||||
return monitor_process.image_name
|
||||
|
||||
def get_threads(self, ph: ProcessHandle) -> Iterator[ThreadHandle]:
|
||||
for thread in self.analysis.process_threads[ph.address.pid]:
|
||||
address: ThreadAddress = ThreadAddress(process=ph.address, tid=thread)
|
||||
yield ThreadHandle(address=address, inner={})
|
||||
for monitor_thread_id in self.analysis.monitor_threads_by_monitor_process[ph.inner.monitor_id]:
|
||||
monitor_thread: VMRayMonitorThread = self.analysis.monitor_threads[monitor_thread_id]
|
||||
|
||||
address: ThreadAddress = ThreadAddress(process=ph.address, tid=monitor_thread.tid)
|
||||
yield ThreadHandle(address=address, inner=monitor_thread)
|
||||
|
||||
def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
if False:
|
||||
@@ -92,7 +103,7 @@ class VMRayExtractor(DynamicFeatureExtractor):
|
||||
return
|
||||
|
||||
def get_calls(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[CallHandle]:
|
||||
for function_call in self.analysis.process_calls[ph.address.pid][th.address.tid]:
|
||||
for function_call in self.analysis.monitor_process_calls[ph.inner.monitor_id][th.inner.monitor_id]:
|
||||
addr = DynamicCallAddress(thread=th.address, id=function_call.fncall_id)
|
||||
yield CallHandle(address=addr, inner=function_call)
|
||||
|
||||
|
||||
@@ -6,37 +6,18 @@
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import logging
|
||||
from typing import Dict, Tuple, Iterator
|
||||
from typing import Tuple, Iterator
|
||||
|
||||
import capa.features.extractors.common
|
||||
from capa.features.file import Export, Import, Section
|
||||
from capa.features.common import String, Feature
|
||||
from capa.features.address import NO_ADDRESS, Address, ProcessAddress, AbsoluteVirtualAddress
|
||||
from capa.features.address import NO_ADDRESS, Address, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.vmray import VMRayAnalysis
|
||||
from capa.features.extractors.helpers import generate_symbols
|
||||
from capa.features.extractors.vmray.models import Process
|
||||
from capa.features.extractors.base_extractor import ProcessHandle
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_processes(analysis: VMRayAnalysis) -> Iterator[ProcessHandle]:
|
||||
processes: Dict[str, Process] = analysis.sv2.processes
|
||||
|
||||
for process in processes.values():
|
||||
# we map VMRay's monitor ID to the OS PID to make it easier for users
|
||||
# to follow the processes in capa's output
|
||||
pid: int = analysis.get_process_os_pid(process.monitor_id)
|
||||
ppid: int = (
|
||||
analysis.get_process_os_pid(processes[process.ref_parent_process.path[1]].monitor_id)
|
||||
if process.ref_parent_process
|
||||
else 0
|
||||
)
|
||||
|
||||
addr: ProcessAddress = ProcessAddress(pid=pid, ppid=ppid)
|
||||
yield ProcessHandle(address=addr, inner=process)
|
||||
|
||||
|
||||
def extract_export_names(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
|
||||
for addr, name in analysis.exports.items():
|
||||
yield Export(name), AbsoluteVirtualAddress(addr)
|
||||
|
||||
@@ -87,7 +87,7 @@ class Param(BaseModel):
|
||||
deref: Optional[ParamDeref] = None
|
||||
|
||||
|
||||
def validate_param_list(value: Union[List[Param], Param]) -> List[Param]:
|
||||
def validate_ensure_is_list(value: Union[List[Param], Param]) -> List[Param]:
|
||||
if isinstance(value, list):
|
||||
return value
|
||||
else:
|
||||
@@ -97,7 +97,7 @@ def validate_param_list(value: Union[List[Param], Param]) -> List[Param]:
|
||||
# params may be stored as a list of Param or a single Param so we convert
|
||||
# the input value to Python list type before the inner validation (List[Param])
|
||||
# is called
|
||||
ParamList = Annotated[List[Param], BeforeValidator(validate_param_list)]
|
||||
ParamList = Annotated[List[Param], BeforeValidator(validate_ensure_is_list)]
|
||||
|
||||
|
||||
class Params(BaseModel):
|
||||
@@ -137,12 +137,46 @@ class FunctionReturn(BaseModel):
|
||||
from_addr: HexInt = Field(alias="from")
|
||||
|
||||
|
||||
class MonitorProcess(BaseModel):
|
||||
ts: HexInt
|
||||
process_id: int
|
||||
image_name: str
|
||||
filename: str
|
||||
# page_root: HexInt
|
||||
os_pid: HexInt
|
||||
# os_integrity_level: HexInt
|
||||
# os_privileges: HexInt
|
||||
monitor_reason: str
|
||||
parent_id: int
|
||||
os_parent_pid: HexInt
|
||||
# cmd_line: str
|
||||
# cur_dir: str
|
||||
# os_username: str
|
||||
# bitness: int
|
||||
# os_groups: str
|
||||
|
||||
|
||||
class MonitorThread(BaseModel):
|
||||
ts: HexInt
|
||||
thread_id: int
|
||||
process_id: int
|
||||
os_tid: HexInt
|
||||
|
||||
|
||||
# handle if there's only single entries, but the model expects a list
|
||||
MonitorProcessList = Annotated[List[MonitorProcess], BeforeValidator(validate_ensure_is_list)]
|
||||
MonitorThreadList = Annotated[List[MonitorThread], BeforeValidator(validate_ensure_is_list)]
|
||||
FunctionCallList = Annotated[List[FunctionCall], BeforeValidator(validate_ensure_is_list)]
|
||||
|
||||
|
||||
class Analysis(BaseModel):
|
||||
log_version: str # tested 2
|
||||
analyzer_version: str # tested 2024.2.1
|
||||
# analysis_date: str
|
||||
|
||||
function_calls: List[FunctionCall] = Field(alias="fncall", default=[])
|
||||
monitor_processes: MonitorProcessList = Field(alias="monitor_process", default=[])
|
||||
monitor_threads: MonitorThreadList = Field(alias="monitor_thread", default=[])
|
||||
function_calls: FunctionCallList = Field(alias="fncall", default=[])
|
||||
# function_returns: List[FunctionReturn] = Field(alias="fnret", default=[])
|
||||
|
||||
|
||||
|
||||
@@ -372,6 +372,10 @@ if __name__ == "__main__":
|
||||
from capa.exceptions import UnsupportedRuntimeError
|
||||
|
||||
raise UnsupportedRuntimeError("This version of capa can only be used with Python 3.8+")
|
||||
elif sys.version_info < (3, 10):
|
||||
from warnings import warn
|
||||
|
||||
warn("This is the last capa version supporting Python 3.8 and 3.9.", DeprecationWarning, stacklevel=2)
|
||||
exit_code = main()
|
||||
if exit_code != 0:
|
||||
popup("capa explorer encountered errors during analysis. Please check the console output for more information.") # type: ignore [name-defined] # noqa: F821
|
||||
|
||||
@@ -164,4 +164,8 @@ if __name__ == "__main__":
|
||||
from capa.exceptions import UnsupportedRuntimeError
|
||||
|
||||
raise UnsupportedRuntimeError("This version of capa can only be used with Python 3.8+")
|
||||
elif sys.version_info < (3, 10):
|
||||
from warnings import warn
|
||||
|
||||
warn("This is the last capa version supporting Python 3.8 and 3.9.", DeprecationWarning, stacklevel=2)
|
||||
sys.exit(main())
|
||||
|
||||
153
capa/helpers.py
153
capa/helpers.py
@@ -5,11 +5,13 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import io
|
||||
import os
|
||||
import sys
|
||||
import gzip
|
||||
import inspect
|
||||
import ctypes
|
||||
import logging
|
||||
import tempfile
|
||||
import contextlib
|
||||
import importlib.util
|
||||
from typing import Dict, List, Union, BinaryIO, Iterator, NoReturn
|
||||
@@ -17,8 +19,21 @@ from pathlib import Path
|
||||
from zipfile import ZipFile
|
||||
from datetime import datetime
|
||||
|
||||
import tqdm
|
||||
import msgspec.json
|
||||
from rich.console import Console
|
||||
from rich.progress import (
|
||||
Task,
|
||||
Text,
|
||||
Progress,
|
||||
BarColumn,
|
||||
TextColumn,
|
||||
SpinnerColumn,
|
||||
ProgressColumn,
|
||||
TimeElapsedColumn,
|
||||
MofNCompleteColumn,
|
||||
TaskProgressColumn,
|
||||
TimeRemainingColumn,
|
||||
)
|
||||
|
||||
from capa.exceptions import UnsupportedFormatError
|
||||
from capa.features.common import (
|
||||
@@ -48,6 +63,10 @@ EXTENSIONS_FREEZE = "frz"
|
||||
logger = logging.getLogger("capa")
|
||||
|
||||
|
||||
# shared console used to redirect logging to stderr
|
||||
log_console: Console = Console(stderr=True)
|
||||
|
||||
|
||||
def hex(n: int) -> str:
|
||||
"""render the given number using upper case hex, like: 0x123ABC"""
|
||||
if n < 0:
|
||||
@@ -81,6 +100,59 @@ def assert_never(value) -> NoReturn:
|
||||
assert False, f"Unhandled value: {value} ({type(value).__name__})" # noqa: B011
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def stdout_redirector(stream):
|
||||
"""
|
||||
Redirect stdout at the C runtime level,
|
||||
which lets us handle native libraries that spam stdout.
|
||||
|
||||
*But*, this only works on Linux! Otherwise will silently still write to stdout.
|
||||
So, try to upstream the fix when possible.
|
||||
|
||||
Via: https://eli.thegreenplace.net/2015/redirecting-all-kinds-of-stdout-in-python/
|
||||
"""
|
||||
if sys.platform not in ("linux", "linux2"):
|
||||
logger.warning("Unable to capture STDOUT on non-Linux (begin)")
|
||||
yield
|
||||
logger.warning("Unable to capture STDOUT on non-Linux (end)")
|
||||
return
|
||||
|
||||
# libc is only on Linux
|
||||
LIBC = ctypes.CDLL(None)
|
||||
C_STDOUT = ctypes.c_void_p.in_dll(LIBC, "stdout")
|
||||
|
||||
# The original fd stdout points to. Usually 1 on POSIX systems.
|
||||
original_stdout_fd = sys.stdout.fileno()
|
||||
|
||||
def _redirect_stdout(to_fd):
|
||||
"""Redirect stdout to the given file descriptor."""
|
||||
# Flush the C-level buffer stdout
|
||||
LIBC.fflush(C_STDOUT)
|
||||
# Flush and close sys.stdout - also closes the file descriptor (fd)
|
||||
sys.stdout.close()
|
||||
# Make original_stdout_fd point to the same file as to_fd
|
||||
os.dup2(to_fd, original_stdout_fd)
|
||||
# Create a new sys.stdout that points to the redirected fd
|
||||
sys.stdout = io.TextIOWrapper(os.fdopen(original_stdout_fd, "wb"))
|
||||
|
||||
# Save a copy of the original stdout fd in saved_stdout_fd
|
||||
saved_stdout_fd = os.dup(original_stdout_fd)
|
||||
try:
|
||||
# Create a temporary file and redirect stdout to it
|
||||
tfile = tempfile.TemporaryFile(mode="w+b")
|
||||
_redirect_stdout(tfile.fileno())
|
||||
# Yield to caller, then redirect stdout back to the saved fd
|
||||
yield
|
||||
_redirect_stdout(saved_stdout_fd)
|
||||
# Copy contents of temporary file to the given stream
|
||||
tfile.flush()
|
||||
tfile.seek(0, io.SEEK_SET)
|
||||
stream.write(tfile.read())
|
||||
finally:
|
||||
tfile.close()
|
||||
os.close(saved_stdout_fd)
|
||||
|
||||
|
||||
def load_json_from_path(json_path: Path):
|
||||
with gzip.open(json_path, "r") as compressed_report:
|
||||
try:
|
||||
@@ -191,39 +263,6 @@ def get_format(sample: Path) -> str:
|
||||
return FORMAT_UNKNOWN
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def redirecting_print_to_tqdm(disable_progress):
|
||||
"""
|
||||
tqdm (progress bar) expects to have fairly tight control over console output.
|
||||
so calls to `print()` will break the progress bar and make things look bad.
|
||||
so, this context manager temporarily replaces the `print` implementation
|
||||
with one that is compatible with tqdm.
|
||||
via: https://stackoverflow.com/a/42424890/87207
|
||||
"""
|
||||
old_print = print # noqa: T202 [reserved word print used]
|
||||
|
||||
def new_print(*args, **kwargs):
|
||||
# If tqdm.tqdm.write raises error, use builtin print
|
||||
if disable_progress:
|
||||
old_print(*args, **kwargs)
|
||||
else:
|
||||
try:
|
||||
tqdm.tqdm.write(*args, **kwargs)
|
||||
except Exception:
|
||||
old_print(*args, **kwargs)
|
||||
|
||||
try:
|
||||
# Globally replace print with new_print.
|
||||
# Verified this works manually on Python 3.11:
|
||||
# >>> import inspect
|
||||
# >>> inspect.builtins
|
||||
# <module 'builtins' (built-in)>
|
||||
inspect.builtins.print = new_print # type: ignore
|
||||
yield
|
||||
finally:
|
||||
inspect.builtins.print = old_print # type: ignore
|
||||
|
||||
|
||||
def log_unsupported_format_error():
|
||||
logger.error("-" * 80)
|
||||
logger.error(" Input file does not appear to be a supported file.")
|
||||
@@ -377,3 +416,47 @@ def is_cache_newer_than_rule_code(cache_dir: Path) -> bool:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
class RateColumn(ProgressColumn):
|
||||
"""Renders speed column in progress bar."""
|
||||
|
||||
def render(self, task: "Task") -> Text:
|
||||
speed = f"{task.speed:>.1f}" if task.speed else "00.0"
|
||||
unit = task.fields.get("unit", "it")
|
||||
return Text.from_markup(f"[progress.data.speed]{speed} {unit}/s")
|
||||
|
||||
|
||||
class PostfixColumn(ProgressColumn):
|
||||
"""Renders a postfix column in progress bar."""
|
||||
|
||||
def render(self, task: "Task") -> Text:
|
||||
return Text(task.fields.get("postfix", ""))
|
||||
|
||||
|
||||
class MofNCompleteColumnWithUnit(MofNCompleteColumn):
|
||||
"""Renders completed/total count column with a unit."""
|
||||
|
||||
def render(self, task: "Task") -> Text:
|
||||
ret = super().render(task)
|
||||
unit = task.fields.get("unit")
|
||||
return ret.append(f" {unit}") if unit else ret
|
||||
|
||||
|
||||
class CapaProgressBar(Progress):
|
||||
@classmethod
|
||||
def get_default_columns(cls):
|
||||
return (
|
||||
SpinnerColumn(),
|
||||
TextColumn("[progress.description]{task.description}"),
|
||||
TaskProgressColumn(),
|
||||
BarColumn(),
|
||||
MofNCompleteColumnWithUnit(),
|
||||
"•",
|
||||
TimeElapsedColumn(),
|
||||
"<",
|
||||
TimeRemainingColumn(),
|
||||
"•",
|
||||
RateColumn(),
|
||||
PostfixColumn(),
|
||||
)
|
||||
|
||||
@@ -14,6 +14,7 @@ from pathlib import Path
|
||||
import idc
|
||||
import idaapi
|
||||
import ida_ida
|
||||
import ida_nalt
|
||||
import idautils
|
||||
import ida_bytes
|
||||
import ida_loader
|
||||
@@ -64,6 +65,12 @@ if version < 9.0:
|
||||
info: idaapi.idainfo = idaapi.get_inf_structure()
|
||||
return info.is_64bit()
|
||||
|
||||
def retrieve_input_file_md5() -> str:
|
||||
return ida_nalt.retrieve_input_file_md5()
|
||||
|
||||
def retrieve_input_file_sha256() -> str:
|
||||
return ida_nalt.retrieve_input_file_sha256()
|
||||
|
||||
else:
|
||||
|
||||
def get_filetype() -> "ida_ida.filetype_t":
|
||||
@@ -78,6 +85,12 @@ else:
|
||||
def is_64bit() -> bool:
|
||||
return idaapi.inf_is_64bit()
|
||||
|
||||
def retrieve_input_file_md5() -> str:
|
||||
return ida_nalt.retrieve_input_file_md5().hex()
|
||||
|
||||
def retrieve_input_file_sha256() -> str:
|
||||
return ida_nalt.retrieve_input_file_sha256().hex()
|
||||
|
||||
|
||||
def inform_user_ida_ui(message):
|
||||
# this isn't a logger, this is IDA's logging facility
|
||||
|
||||
@@ -5,8 +5,8 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import io
|
||||
import os
|
||||
import sys
|
||||
import logging
|
||||
import datetime
|
||||
import contextlib
|
||||
@@ -69,6 +69,7 @@ BACKEND_DRAKVUF = "drakvuf"
|
||||
BACKEND_VMRAY = "vmray"
|
||||
BACKEND_FREEZE = "freeze"
|
||||
BACKEND_BINEXPORT2 = "binexport2"
|
||||
BACKEND_IDA = "ida"
|
||||
|
||||
|
||||
class CorruptFile(ValueError):
|
||||
@@ -170,6 +171,7 @@ def get_workspace(path: Path, input_format: str, sigpaths: List[Path]):
|
||||
# to do a subclass check via isinstance.
|
||||
if type(e) is Exception and "Couldn't convert rva" in e.args[0]:
|
||||
raise CorruptFile(e.args[0]) from e
|
||||
raise
|
||||
|
||||
viv_utils.flirt.register_flirt_signature_analyzers(vw, [str(s) for s in sigpaths])
|
||||
|
||||
@@ -237,24 +239,15 @@ def get_extractor(
|
||||
return capa.features.extractors.dnfile.extractor.DnfileFeatureExtractor(input_path)
|
||||
|
||||
elif backend == BACKEND_BINJA:
|
||||
import capa.helpers
|
||||
from capa.features.extractors.binja.find_binja_api import find_binja_path
|
||||
import capa.features.extractors.binja.find_binja_api as finder
|
||||
|
||||
# When we are running as a standalone executable, we cannot directly import binaryninja
|
||||
# We need to fist find the binja API installation path and add it into sys.path
|
||||
if capa.helpers.is_running_standalone():
|
||||
bn_api = find_binja_path()
|
||||
if bn_api.exists():
|
||||
sys.path.append(str(bn_api))
|
||||
if not finder.has_binaryninja():
|
||||
raise RuntimeError("cannot find Binary Ninja API module.")
|
||||
|
||||
try:
|
||||
import binaryninja
|
||||
from binaryninja import BinaryView
|
||||
except ImportError:
|
||||
raise RuntimeError(
|
||||
"Cannot import binaryninja module. Please install the Binary Ninja Python API first: "
|
||||
+ "https://docs.binary.ninja/dev/batch.html#install-the-api)."
|
||||
)
|
||||
if not finder.load_binaryninja():
|
||||
raise RuntimeError("failed to load Binary Ninja API module.")
|
||||
|
||||
import binaryninja
|
||||
|
||||
import capa.features.extractors.binja.extractor
|
||||
|
||||
@@ -269,7 +262,7 @@ def get_extractor(
|
||||
raise UnsupportedOSError()
|
||||
|
||||
with console.status("analyzing program...", spinner="dots"):
|
||||
bv: BinaryView = binaryninja.load(str(input_path))
|
||||
bv: binaryninja.BinaryView = binaryninja.load(str(input_path))
|
||||
if bv is None:
|
||||
raise RuntimeError(f"Binary Ninja cannot open file {input_path}")
|
||||
|
||||
@@ -321,6 +314,34 @@ def get_extractor(
|
||||
|
||||
return capa.features.extractors.binexport2.extractor.BinExport2FeatureExtractor(be2, buf)
|
||||
|
||||
elif backend == BACKEND_IDA:
|
||||
import capa.features.extractors.ida.idalib as idalib
|
||||
|
||||
if not idalib.has_idalib():
|
||||
raise RuntimeError("cannot find IDA idalib module.")
|
||||
|
||||
if not idalib.load_idalib():
|
||||
raise RuntimeError("failed to load IDA idalib module.")
|
||||
|
||||
import idapro
|
||||
import ida_auto
|
||||
|
||||
import capa.features.extractors.ida.extractor
|
||||
|
||||
logger.debug("idalib: opening database...")
|
||||
# idalib writes to stdout (ugh), so we have to capture that
|
||||
# so as not to screw up structured output.
|
||||
with capa.helpers.stdout_redirector(io.BytesIO()):
|
||||
with console.status("analyzing program...", spinner="dots"):
|
||||
if idapro.open_database(str(input_path), run_auto_analysis=True):
|
||||
raise RuntimeError("failed to analyze input file")
|
||||
|
||||
logger.debug("idalib: waiting for analysis...")
|
||||
ida_auto.auto_wait()
|
||||
logger.debug("idalib: opened database.")
|
||||
|
||||
return capa.features.extractors.ida.extractor.IdaFeatureExtractor()
|
||||
|
||||
else:
|
||||
raise ValueError("unexpected backend: " + backend)
|
||||
|
||||
|
||||
21
capa/main.py
21
capa/main.py
@@ -22,6 +22,7 @@ from pathlib import Path
|
||||
|
||||
import colorama
|
||||
from pefile import PEFormatError
|
||||
from rich.logging import RichHandler
|
||||
from elftools.common.exceptions import ELFError
|
||||
|
||||
import capa.perf
|
||||
@@ -43,6 +44,7 @@ import capa.features.extractors.common
|
||||
from capa.rules import RuleSet
|
||||
from capa.engine import MatchResults
|
||||
from capa.loader import (
|
||||
BACKEND_IDA,
|
||||
BACKEND_VIV,
|
||||
BACKEND_CAPE,
|
||||
BACKEND_BINJA,
|
||||
@@ -283,6 +285,7 @@ def install_common_args(parser, wanted=None):
|
||||
backends = [
|
||||
(BACKEND_AUTO, "(default) detect appropriate backend automatically"),
|
||||
(BACKEND_VIV, "vivisect"),
|
||||
(BACKEND_IDA, "IDA via idalib"),
|
||||
(BACKEND_PEFILE, "pefile (file features only)"),
|
||||
(BACKEND_BINJA, "Binary Ninja"),
|
||||
(BACKEND_DOTNET, ".NET"),
|
||||
@@ -403,15 +406,23 @@ def handle_common_args(args):
|
||||
ShouldExitError: if the program is invoked incorrectly and should exit.
|
||||
"""
|
||||
if args.quiet:
|
||||
logging.basicConfig(level=logging.WARNING)
|
||||
logging.getLogger().setLevel(logging.WARNING)
|
||||
elif args.debug:
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
logging.getLogger().setLevel(logging.DEBUG)
|
||||
else:
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logging.getLogger().setLevel(logging.INFO)
|
||||
|
||||
# use [/] after the logger name to reset any styling,
|
||||
# and prevent the color from carrying over to the message
|
||||
logformat = "[dim]%(name)s[/]: %(message)s"
|
||||
|
||||
# set markup=True to allow the use of Rich's markup syntax in log messages
|
||||
rich_handler = RichHandler(markup=True, show_time=False, show_path=True, console=capa.helpers.log_console)
|
||||
rich_handler.setFormatter(logging.Formatter(logformat))
|
||||
|
||||
# use RichHandler for root logger
|
||||
logging.getLogger().addHandler(rich_handler)
|
||||
|
||||
# disable vivisect-related logging, it's verbose and not relevant for capa users
|
||||
set_vivisect_log_level(logging.CRITICAL)
|
||||
|
||||
@@ -892,6 +903,10 @@ def apply_extractor_filters(extractor: FeatureExtractor, extractor_filters: Filt
|
||||
def main(argv: Optional[List[str]] = None):
|
||||
if sys.version_info < (3, 8):
|
||||
raise UnsupportedRuntimeError("This version of capa can only be used with Python 3.8+")
|
||||
elif sys.version_info < (3, 10):
|
||||
from warnings import warn
|
||||
|
||||
warn("This is the last capa version supporting Python 3.8 and 3.9.", DeprecationWarning, stacklevel=2)
|
||||
|
||||
if argv is None:
|
||||
argv = sys.argv[1:]
|
||||
|
||||
@@ -9,28 +9,29 @@
|
||||
import io
|
||||
from typing import Dict, List, Tuple, Union, Iterator, Optional
|
||||
|
||||
import termcolor
|
||||
import rich.console
|
||||
from rich.progress import Text
|
||||
|
||||
import capa.render.result_document as rd
|
||||
|
||||
|
||||
def bold(s: str) -> str:
|
||||
def bold(s: str) -> Text:
|
||||
"""draw attention to the given string"""
|
||||
return termcolor.colored(s, "cyan")
|
||||
return Text.from_markup(f"[cyan]{s}")
|
||||
|
||||
|
||||
def bold2(s: str) -> str:
|
||||
def bold2(s: str) -> Text:
|
||||
"""draw attention to the given string, within a `bold` section"""
|
||||
return termcolor.colored(s, "green")
|
||||
return Text.from_markup(f"[green]{s}")
|
||||
|
||||
|
||||
def mute(s: str) -> str:
|
||||
def mute(s: str) -> Text:
|
||||
"""draw attention away from the given string"""
|
||||
return termcolor.colored(s, "dark_grey")
|
||||
return Text.from_markup(f"[dim]{s}")
|
||||
|
||||
|
||||
def warn(s: str) -> str:
|
||||
return termcolor.colored(s, "yellow")
|
||||
def warn(s: str) -> Text:
|
||||
return Text.from_markup(f"[yellow]{s}")
|
||||
|
||||
|
||||
def format_parts_id(data: Union[rd.AttackSpec, rd.MBCSpec]):
|
||||
@@ -85,3 +86,17 @@ class StringIO(io.StringIO):
|
||||
def writeln(self, s):
|
||||
self.write(s)
|
||||
self.write("\n")
|
||||
|
||||
|
||||
class Console(rich.console.Console):
|
||||
def writeln(self, *args, **kwargs) -> None:
|
||||
"""
|
||||
prints the text with a new line at the end.
|
||||
"""
|
||||
return self.print(*args, **kwargs)
|
||||
|
||||
def write(self, *args, **kwargs) -> None:
|
||||
"""
|
||||
prints the text without a new line at the end.
|
||||
"""
|
||||
return self.print(*args, **kwargs, end="")
|
||||
|
||||
@@ -25,7 +25,8 @@ See the License for the specific language governing permissions and limitations
|
||||
|
||||
from typing import cast
|
||||
|
||||
import tabulate
|
||||
from rich.text import Text
|
||||
from rich.table import Table
|
||||
|
||||
import capa.rules
|
||||
import capa.helpers
|
||||
@@ -34,6 +35,7 @@ import capa.features.freeze as frz
|
||||
import capa.render.result_document as rd
|
||||
from capa.rules import RuleSet
|
||||
from capa.engine import MatchResults
|
||||
from capa.render.utils import Console
|
||||
|
||||
|
||||
def format_address(address: frz.Address) -> str:
|
||||
@@ -140,7 +142,7 @@ def render_call(layout: rd.DynamicLayout, addr: frz.Address) -> str:
|
||||
)
|
||||
|
||||
|
||||
def render_static_meta(ostream, meta: rd.StaticMetadata):
|
||||
def render_static_meta(console: Console, meta: rd.StaticMetadata):
|
||||
"""
|
||||
like:
|
||||
|
||||
@@ -161,12 +163,16 @@ def render_static_meta(ostream, meta: rd.StaticMetadata):
|
||||
total feature count 1918
|
||||
"""
|
||||
|
||||
grid = Table.grid(padding=(0, 2))
|
||||
grid.add_column(style="dim")
|
||||
grid.add_column()
|
||||
|
||||
rows = [
|
||||
("md5", meta.sample.md5),
|
||||
("sha1", meta.sample.sha1),
|
||||
("sha256", meta.sample.sha256),
|
||||
("path", meta.sample.path),
|
||||
("timestamp", meta.timestamp),
|
||||
("timestamp", str(meta.timestamp)),
|
||||
("capa version", meta.version),
|
||||
("os", meta.analysis.os),
|
||||
("format", meta.analysis.format),
|
||||
@@ -175,18 +181,21 @@ def render_static_meta(ostream, meta: rd.StaticMetadata):
|
||||
("extractor", meta.analysis.extractor),
|
||||
("base address", format_address(meta.analysis.base_address)),
|
||||
("rules", "\n".join(meta.analysis.rules)),
|
||||
("function count", len(meta.analysis.feature_counts.functions)),
|
||||
("library function count", len(meta.analysis.library_functions)),
|
||||
("function count", str(len(meta.analysis.feature_counts.functions))),
|
||||
("library function count", str(len(meta.analysis.library_functions))),
|
||||
(
|
||||
"total feature count",
|
||||
meta.analysis.feature_counts.file + sum(f.count for f in meta.analysis.feature_counts.functions),
|
||||
str(meta.analysis.feature_counts.file + sum(f.count for f in meta.analysis.feature_counts.functions)),
|
||||
),
|
||||
]
|
||||
|
||||
ostream.writeln(tabulate.tabulate(rows, tablefmt="plain"))
|
||||
for row in rows:
|
||||
grid.add_row(*row)
|
||||
|
||||
console.print(grid)
|
||||
|
||||
|
||||
def render_dynamic_meta(ostream, meta: rd.DynamicMetadata):
|
||||
def render_dynamic_meta(console: Console, meta: rd.DynamicMetadata):
|
||||
"""
|
||||
like:
|
||||
|
||||
@@ -205,12 +214,16 @@ def render_dynamic_meta(ostream, meta: rd.DynamicMetadata):
|
||||
total feature count 1918
|
||||
"""
|
||||
|
||||
table = Table.grid(padding=(0, 2))
|
||||
table.add_column(style="dim")
|
||||
table.add_column()
|
||||
|
||||
rows = [
|
||||
("md5", meta.sample.md5),
|
||||
("sha1", meta.sample.sha1),
|
||||
("sha256", meta.sample.sha256),
|
||||
("path", meta.sample.path),
|
||||
("timestamp", meta.timestamp),
|
||||
("timestamp", str(meta.timestamp)),
|
||||
("capa version", meta.version),
|
||||
("os", meta.analysis.os),
|
||||
("format", meta.analysis.format),
|
||||
@@ -218,26 +231,29 @@ def render_dynamic_meta(ostream, meta: rd.DynamicMetadata):
|
||||
("analysis", meta.flavor.value),
|
||||
("extractor", meta.analysis.extractor),
|
||||
("rules", "\n".join(meta.analysis.rules)),
|
||||
("process count", len(meta.analysis.feature_counts.processes)),
|
||||
("process count", str(len(meta.analysis.feature_counts.processes))),
|
||||
(
|
||||
"total feature count",
|
||||
meta.analysis.feature_counts.file + sum(p.count for p in meta.analysis.feature_counts.processes),
|
||||
str(meta.analysis.feature_counts.file + sum(p.count for p in meta.analysis.feature_counts.processes)),
|
||||
),
|
||||
]
|
||||
|
||||
ostream.writeln(tabulate.tabulate(rows, tablefmt="plain"))
|
||||
for row in rows:
|
||||
table.add_row(*row)
|
||||
|
||||
console.print(table)
|
||||
|
||||
|
||||
def render_meta(osstream, doc: rd.ResultDocument):
|
||||
def render_meta(console: Console, doc: rd.ResultDocument):
|
||||
if doc.meta.flavor == rd.Flavor.STATIC:
|
||||
render_static_meta(osstream, cast(rd.StaticMetadata, doc.meta))
|
||||
render_static_meta(console, cast(rd.StaticMetadata, doc.meta))
|
||||
elif doc.meta.flavor == rd.Flavor.DYNAMIC:
|
||||
render_dynamic_meta(osstream, cast(rd.DynamicMetadata, doc.meta))
|
||||
render_dynamic_meta(console, cast(rd.DynamicMetadata, doc.meta))
|
||||
else:
|
||||
raise ValueError("invalid meta analysis")
|
||||
|
||||
|
||||
def render_rules(ostream, doc: rd.ResultDocument):
|
||||
def render_rules(console: Console, doc: rd.ResultDocument):
|
||||
"""
|
||||
like:
|
||||
|
||||
@@ -254,11 +270,15 @@ def render_rules(ostream, doc: rd.ResultDocument):
|
||||
if count == 1:
|
||||
capability = rutils.bold(rule.meta.name)
|
||||
else:
|
||||
capability = f"{rutils.bold(rule.meta.name)} ({count} matches)"
|
||||
capability = Text.assemble(rutils.bold(rule.meta.name), f" ({count} matches)")
|
||||
|
||||
ostream.writeln(capability)
|
||||
console.print(capability)
|
||||
had_match = True
|
||||
|
||||
table = Table.grid(padding=(0, 2))
|
||||
table.add_column(style="dim")
|
||||
table.add_column()
|
||||
|
||||
rows = []
|
||||
|
||||
ns = rule.meta.namespace
|
||||
@@ -310,23 +330,26 @@ def render_rules(ostream, doc: rd.ResultDocument):
|
||||
|
||||
rows.append(("matches", "\n".join(lines)))
|
||||
|
||||
ostream.writeln(tabulate.tabulate(rows, tablefmt="plain"))
|
||||
ostream.write("\n")
|
||||
for row in rows:
|
||||
table.add_row(*row)
|
||||
|
||||
console.print(table)
|
||||
console.print()
|
||||
|
||||
if not had_match:
|
||||
ostream.writeln(rutils.bold("no capabilities found"))
|
||||
console.print(rutils.bold("no capabilities found"))
|
||||
|
||||
|
||||
def render_verbose(doc: rd.ResultDocument):
|
||||
ostream = rutils.StringIO()
|
||||
console = Console(highlight=False)
|
||||
|
||||
render_meta(ostream, doc)
|
||||
ostream.write("\n")
|
||||
with console.capture() as capture:
|
||||
render_meta(console, doc)
|
||||
console.print()
|
||||
render_rules(console, doc)
|
||||
console.print()
|
||||
|
||||
render_rules(ostream, doc)
|
||||
ostream.write("\n")
|
||||
|
||||
return ostream.getvalue()
|
||||
return capture.get()
|
||||
|
||||
|
||||
def render(meta, rules: RuleSet, capabilities: MatchResults) -> str:
|
||||
|
||||
@@ -9,7 +9,8 @@ import logging
|
||||
import textwrap
|
||||
from typing import Dict, Iterable, Optional
|
||||
|
||||
import tabulate
|
||||
from rich.text import Text
|
||||
from rich.table import Table
|
||||
|
||||
import capa.rules
|
||||
import capa.helpers
|
||||
@@ -22,6 +23,7 @@ import capa.render.result_document as rd
|
||||
import capa.features.freeze.features as frzf
|
||||
from capa.rules import RuleSet
|
||||
from capa.engine import MatchResults
|
||||
from capa.render.utils import Console
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -45,7 +47,7 @@ def hanging_indent(s: str, indent: int) -> str:
|
||||
return textwrap.indent(s, prefix=prefix)[len(prefix) :]
|
||||
|
||||
|
||||
def render_locations(ostream, layout: rd.Layout, locations: Iterable[frz.Address], indent: int):
|
||||
def render_locations(console: Console, layout: rd.Layout, locations: Iterable[frz.Address], indent: int):
|
||||
import capa.render.verbose as v
|
||||
|
||||
# it's possible to have an empty locations array here,
|
||||
@@ -56,7 +58,7 @@ def render_locations(ostream, layout: rd.Layout, locations: Iterable[frz.Address
|
||||
if len(locations) == 0:
|
||||
return
|
||||
|
||||
ostream.write(" @ ")
|
||||
console.write(" @ ")
|
||||
location0 = locations[0]
|
||||
|
||||
if len(locations) == 1:
|
||||
@@ -64,58 +66,58 @@ def render_locations(ostream, layout: rd.Layout, locations: Iterable[frz.Address
|
||||
|
||||
if location.type == frz.AddressType.CALL:
|
||||
assert isinstance(layout, rd.DynamicLayout)
|
||||
ostream.write(hanging_indent(v.render_call(layout, location), indent + 1))
|
||||
console.write(hanging_indent(v.render_call(layout, location), indent + 1))
|
||||
else:
|
||||
ostream.write(v.format_address(locations[0]))
|
||||
console.write(v.format_address(locations[0]))
|
||||
|
||||
elif location0.type == frz.AddressType.CALL and len(locations) > 1:
|
||||
location = locations[0]
|
||||
|
||||
assert isinstance(layout, rd.DynamicLayout)
|
||||
s = f"{v.render_call(layout, location)}\nand {(len(locations) - 1)} more..."
|
||||
ostream.write(hanging_indent(s, indent + 1))
|
||||
console.write(hanging_indent(s, indent + 1))
|
||||
|
||||
elif len(locations) > 4:
|
||||
# don't display too many locations, because it becomes very noisy.
|
||||
# probably only the first handful of locations will be useful for inspection.
|
||||
ostream.write(", ".join(map(v.format_address, locations[0:4])))
|
||||
ostream.write(f", and {(len(locations) - 4)} more...")
|
||||
console.write(", ".join(map(v.format_address, locations[0:4])))
|
||||
console.write(f", and {(len(locations) - 4)} more...")
|
||||
|
||||
elif len(locations) > 1:
|
||||
ostream.write(", ".join(map(v.format_address, locations)))
|
||||
console.write(", ".join(map(v.format_address, locations)))
|
||||
|
||||
else:
|
||||
raise RuntimeError("unreachable")
|
||||
|
||||
|
||||
def render_statement(ostream, layout: rd.Layout, match: rd.Match, statement: rd.Statement, indent: int):
|
||||
ostream.write(" " * indent)
|
||||
def render_statement(console: Console, layout: rd.Layout, match: rd.Match, statement: rd.Statement, indent: int):
|
||||
console.write(" " * indent)
|
||||
|
||||
if isinstance(statement, rd.SubscopeStatement):
|
||||
# emit `basic block:`
|
||||
# rather than `subscope:`
|
||||
ostream.write(statement.scope)
|
||||
console.write(statement.scope)
|
||||
|
||||
ostream.write(":")
|
||||
console.write(":")
|
||||
if statement.description:
|
||||
ostream.write(f" = {statement.description}")
|
||||
ostream.writeln("")
|
||||
console.write(f" = {statement.description}")
|
||||
console.writeln()
|
||||
|
||||
elif isinstance(statement, (rd.CompoundStatement)):
|
||||
# emit `and:` `or:` `optional:` `not:`
|
||||
ostream.write(statement.type)
|
||||
console.write(statement.type)
|
||||
|
||||
ostream.write(":")
|
||||
console.write(":")
|
||||
if statement.description:
|
||||
ostream.write(f" = {statement.description}")
|
||||
ostream.writeln("")
|
||||
console.write(f" = {statement.description}")
|
||||
console.writeln()
|
||||
|
||||
elif isinstance(statement, rd.SomeStatement):
|
||||
ostream.write(f"{statement.count} or more:")
|
||||
console.write(f"{statement.count} or more:")
|
||||
|
||||
if statement.description:
|
||||
ostream.write(f" = {statement.description}")
|
||||
ostream.writeln("")
|
||||
console.write(f" = {statement.description}")
|
||||
console.writeln()
|
||||
|
||||
elif isinstance(statement, rd.RangeStatement):
|
||||
# `range` is a weird node, its almost a hybrid of statement+feature.
|
||||
@@ -133,25 +135,25 @@ def render_statement(ostream, layout: rd.Layout, match: rd.Match, statement: rd.
|
||||
value = rutils.bold2(value)
|
||||
|
||||
if child.description:
|
||||
ostream.write(f"count({child.type}({value} = {child.description})): ")
|
||||
console.write(f"count({child.type}({value} = {child.description})): ")
|
||||
else:
|
||||
ostream.write(f"count({child.type}({value})): ")
|
||||
console.write(f"count({child.type}({value})): ")
|
||||
else:
|
||||
ostream.write(f"count({child.type}): ")
|
||||
console.write(f"count({child.type}): ")
|
||||
|
||||
if statement.max == statement.min:
|
||||
ostream.write(f"{statement.min}")
|
||||
console.write(f"{statement.min}")
|
||||
elif statement.min == 0:
|
||||
ostream.write(f"{statement.max} or fewer")
|
||||
console.write(f"{statement.max} or fewer")
|
||||
elif statement.max == (1 << 64 - 1):
|
||||
ostream.write(f"{statement.min} or more")
|
||||
console.write(f"{statement.min} or more")
|
||||
else:
|
||||
ostream.write(f"between {statement.min} and {statement.max}")
|
||||
console.write(f"between {statement.min} and {statement.max}")
|
||||
|
||||
if statement.description:
|
||||
ostream.write(f" = {statement.description}")
|
||||
render_locations(ostream, layout, match.locations, indent)
|
||||
ostream.writeln("")
|
||||
console.write(f" = {statement.description}")
|
||||
render_locations(console, layout, match.locations, indent)
|
||||
console.writeln()
|
||||
|
||||
else:
|
||||
raise RuntimeError("unexpected match statement type: " + str(statement))
|
||||
@@ -162,9 +164,9 @@ def render_string_value(s: str) -> str:
|
||||
|
||||
|
||||
def render_feature(
|
||||
ostream, layout: rd.Layout, rule: rd.RuleMatches, match: rd.Match, feature: frzf.Feature, indent: int
|
||||
console: Console, layout: rd.Layout, rule: rd.RuleMatches, match: rd.Match, feature: frzf.Feature, indent: int
|
||||
):
|
||||
ostream.write(" " * indent)
|
||||
console.write(" " * indent)
|
||||
|
||||
key = feature.type
|
||||
value: Optional[str]
|
||||
@@ -205,14 +207,14 @@ def render_feature(
|
||||
elif isinstance(feature, frzf.OperandOffsetFeature):
|
||||
key = f"operand[{feature.index}].offset"
|
||||
|
||||
ostream.write(f"{key}: ")
|
||||
console.write(f"{key}: ")
|
||||
|
||||
if value:
|
||||
ostream.write(rutils.bold2(value))
|
||||
console.write(rutils.bold2(value))
|
||||
|
||||
if feature.description:
|
||||
ostream.write(capa.rules.DESCRIPTION_SEPARATOR)
|
||||
ostream.write(feature.description)
|
||||
console.write(capa.rules.DESCRIPTION_SEPARATOR)
|
||||
console.write(feature.description)
|
||||
|
||||
if isinstance(feature, (frzf.OSFeature, frzf.ArchFeature, frzf.FormatFeature)):
|
||||
# don't show the location of these global features
|
||||
@@ -224,35 +226,32 @@ def render_feature(
|
||||
elif isinstance(feature, (frzf.OSFeature, frzf.ArchFeature, frzf.FormatFeature)):
|
||||
pass
|
||||
else:
|
||||
render_locations(ostream, layout, match.locations, indent)
|
||||
ostream.write("\n")
|
||||
render_locations(console, layout, match.locations, indent)
|
||||
console.writeln()
|
||||
else:
|
||||
# like:
|
||||
# regex: /blah/ = SOME_CONSTANT
|
||||
# - "foo blah baz" @ 0x401000
|
||||
# - "aaa blah bbb" @ 0x402000, 0x403400
|
||||
ostream.write(key)
|
||||
ostream.write(": ")
|
||||
ostream.write(value)
|
||||
ostream.write("\n")
|
||||
console.writeln(f"{key}: {value}")
|
||||
|
||||
for capture, locations in sorted(match.captures.items()):
|
||||
ostream.write(" " * (indent + 1))
|
||||
ostream.write("- ")
|
||||
ostream.write(rutils.bold2(render_string_value(capture)))
|
||||
console.write(" " * (indent + 1))
|
||||
console.write("- ")
|
||||
console.write(rutils.bold2(render_string_value(capture)))
|
||||
if isinstance(layout, rd.DynamicLayout) and rule.meta.scopes.dynamic == capa.rules.Scope.CALL:
|
||||
# like above, don't re-render calls when in call scope.
|
||||
pass
|
||||
else:
|
||||
render_locations(ostream, layout, locations, indent=indent)
|
||||
ostream.write("\n")
|
||||
render_locations(console, layout, locations, indent=indent)
|
||||
console.writeln()
|
||||
|
||||
|
||||
def render_node(ostream, layout: rd.Layout, rule: rd.RuleMatches, match: rd.Match, node: rd.Node, indent: int):
|
||||
def render_node(console: Console, layout: rd.Layout, rule: rd.RuleMatches, match: rd.Match, node: rd.Node, indent: int):
|
||||
if isinstance(node, rd.StatementNode):
|
||||
render_statement(ostream, layout, match, node.statement, indent=indent)
|
||||
render_statement(console, layout, match, node.statement, indent=indent)
|
||||
elif isinstance(node, rd.FeatureNode):
|
||||
render_feature(ostream, layout, rule, match, node.feature, indent=indent)
|
||||
render_feature(console, layout, rule, match, node.feature, indent=indent)
|
||||
else:
|
||||
raise RuntimeError("unexpected node type: " + str(node))
|
||||
|
||||
@@ -265,7 +264,9 @@ MODE_SUCCESS = "success"
|
||||
MODE_FAILURE = "failure"
|
||||
|
||||
|
||||
def render_match(ostream, layout: rd.Layout, rule: rd.RuleMatches, match: rd.Match, indent=0, mode=MODE_SUCCESS):
|
||||
def render_match(
|
||||
console: Console, layout: rd.Layout, rule: rd.RuleMatches, match: rd.Match, indent=0, mode=MODE_SUCCESS
|
||||
):
|
||||
child_mode = mode
|
||||
if mode == MODE_SUCCESS:
|
||||
# display only nodes that evaluated successfully.
|
||||
@@ -297,13 +298,13 @@ def render_match(ostream, layout: rd.Layout, rule: rd.RuleMatches, match: rd.Mat
|
||||
else:
|
||||
raise RuntimeError("unexpected mode: " + mode)
|
||||
|
||||
render_node(ostream, layout, rule, match, match.node, indent=indent)
|
||||
render_node(console, layout, rule, match, match.node, indent=indent)
|
||||
|
||||
for child in match.children:
|
||||
render_match(ostream, layout, rule, child, indent=indent + 1, mode=child_mode)
|
||||
render_match(console, layout, rule, child, indent=indent + 1, mode=child_mode)
|
||||
|
||||
|
||||
def render_rules(ostream, doc: rd.ResultDocument):
|
||||
def render_rules(console: Console, doc: rd.ResultDocument):
|
||||
"""
|
||||
like:
|
||||
|
||||
@@ -350,13 +351,13 @@ def render_rules(ostream, doc: rd.ResultDocument):
|
||||
if count == 1:
|
||||
if rule.meta.lib:
|
||||
lib_info = " (library rule)"
|
||||
capability = f"{rutils.bold(rule.meta.name)}{lib_info}"
|
||||
capability = Text.assemble(rutils.bold(rule.meta.name), f"{lib_info}")
|
||||
else:
|
||||
if rule.meta.lib:
|
||||
lib_info = ", only showing first match of library rule"
|
||||
capability = f"{rutils.bold(rule.meta.name)} ({count} matches{lib_info})"
|
||||
capability = Text.assemble(rutils.bold(rule.meta.name), f" ({count} matches{lib_info})")
|
||||
|
||||
ostream.writeln(capability)
|
||||
console.writeln(capability)
|
||||
had_match = True
|
||||
|
||||
rows = []
|
||||
@@ -402,7 +403,14 @@ def render_rules(ostream, doc: rd.ResultDocument):
|
||||
if rule.meta.description:
|
||||
rows.append(("description", rule.meta.description))
|
||||
|
||||
ostream.writeln(tabulate.tabulate(rows, tablefmt="plain"))
|
||||
grid = Table.grid(padding=(0, 2))
|
||||
grid.add_column(style="dim")
|
||||
grid.add_column()
|
||||
|
||||
for row in rows:
|
||||
grid.add_row(*row)
|
||||
|
||||
console.writeln(grid)
|
||||
|
||||
if capa.rules.Scope.FILE in rule.meta.scopes:
|
||||
matches = doc.rules[rule.meta.name].matches
|
||||
@@ -413,61 +421,58 @@ def render_rules(ostream, doc: rd.ResultDocument):
|
||||
# so, lets be explicit about our assumptions and raise an exception if they fail.
|
||||
raise RuntimeError(f"unexpected file scope match count: {len(matches)}")
|
||||
_, first_match = matches[0]
|
||||
render_match(ostream, doc.meta.analysis.layout, rule, first_match, indent=0)
|
||||
render_match(console, doc.meta.analysis.layout, rule, first_match, indent=0)
|
||||
else:
|
||||
for location, match in sorted(doc.rules[rule.meta.name].matches):
|
||||
if doc.meta.flavor == rd.Flavor.STATIC:
|
||||
assert rule.meta.scopes.static is not None
|
||||
ostream.write(rule.meta.scopes.static.value)
|
||||
ostream.write(" @ ")
|
||||
ostream.write(capa.render.verbose.format_address(location))
|
||||
console.write(rule.meta.scopes.static.value + " @ ")
|
||||
console.write(capa.render.verbose.format_address(location))
|
||||
|
||||
if rule.meta.scopes.static == capa.rules.Scope.BASIC_BLOCK:
|
||||
func = frz.Address.from_capa(functions_by_bb[location.to_capa()])
|
||||
ostream.write(f" in function {capa.render.verbose.format_address(func)}")
|
||||
console.write(f" in function {capa.render.verbose.format_address(func)}")
|
||||
|
||||
elif doc.meta.flavor == rd.Flavor.DYNAMIC:
|
||||
assert rule.meta.scopes.dynamic is not None
|
||||
assert isinstance(doc.meta.analysis.layout, rd.DynamicLayout)
|
||||
|
||||
ostream.write(rule.meta.scopes.dynamic.value)
|
||||
|
||||
ostream.write(" @ ")
|
||||
console.write(rule.meta.scopes.dynamic.value + " @ ")
|
||||
|
||||
if rule.meta.scopes.dynamic == capa.rules.Scope.PROCESS:
|
||||
ostream.write(v.render_process(doc.meta.analysis.layout, location))
|
||||
console.write(v.render_process(doc.meta.analysis.layout, location))
|
||||
elif rule.meta.scopes.dynamic == capa.rules.Scope.THREAD:
|
||||
ostream.write(v.render_thread(doc.meta.analysis.layout, location))
|
||||
console.write(v.render_thread(doc.meta.analysis.layout, location))
|
||||
elif rule.meta.scopes.dynamic == capa.rules.Scope.CALL:
|
||||
ostream.write(hanging_indent(v.render_call(doc.meta.analysis.layout, location), indent=1))
|
||||
console.write(hanging_indent(v.render_call(doc.meta.analysis.layout, location), indent=1))
|
||||
else:
|
||||
capa.helpers.assert_never(rule.meta.scopes.dynamic)
|
||||
|
||||
else:
|
||||
capa.helpers.assert_never(doc.meta.flavor)
|
||||
|
||||
ostream.write("\n")
|
||||
render_match(ostream, doc.meta.analysis.layout, rule, match, indent=1)
|
||||
console.writeln()
|
||||
render_match(console, doc.meta.analysis.layout, rule, match, indent=1)
|
||||
if rule.meta.lib:
|
||||
# only show first match
|
||||
break
|
||||
|
||||
ostream.write("\n")
|
||||
console.writeln()
|
||||
|
||||
if not had_match:
|
||||
ostream.writeln(rutils.bold("no capabilities found"))
|
||||
console.writeln(rutils.bold("no capabilities found"))
|
||||
|
||||
|
||||
def render_vverbose(doc: rd.ResultDocument):
|
||||
ostream = rutils.StringIO()
|
||||
console = Console(highlight=False)
|
||||
|
||||
capa.render.verbose.render_meta(ostream, doc)
|
||||
ostream.write("\n")
|
||||
with console.capture() as capture:
|
||||
capa.render.verbose.render_meta(console, doc)
|
||||
console.writeln()
|
||||
render_rules(console, doc)
|
||||
console.writeln()
|
||||
|
||||
render_rules(ostream, doc)
|
||||
ostream.write("\n")
|
||||
|
||||
return ostream.getvalue()
|
||||
return capture.get()
|
||||
|
||||
|
||||
def render(meta, rules: RuleSet, capabilities: MatchResults) -> str:
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
__version__ = "7.3.0"
|
||||
__version__ = "7.4.0"
|
||||
|
||||
|
||||
def get_major_version():
|
||||
|
||||
@@ -26,7 +26,9 @@
|
||||
|
||||
### Bug Fixes
|
||||
|
||||
### capa explorer IDA Pro plugin
|
||||
### capa Explorer Web
|
||||
|
||||
### capa Explorer IDA Pro plugin
|
||||
|
||||
### Development
|
||||
|
||||
@@ -42,5 +44,6 @@
|
||||
- [ ] [publish to PyPI](https://pypi.org/project/flare-capa)
|
||||
- [ ] [create tag in capa rules](https://github.com/mandiant/capa-rules/tags)
|
||||
- [ ] [create release in capa rules](https://github.com/mandiant/capa-rules/releases)
|
||||
- [ ] Update [homepage](https://github.com/mandiant/capa/blob/master/web/public/index.html)
|
||||
- [ ] [Spread the word](https://twitter.com)
|
||||
- [ ] Update internal service
|
||||
|
||||
@@ -20,7 +20,7 @@ authors = [
|
||||
description = "The FLARE team's open-source tool to identify capabilities in executable files."
|
||||
readme = {file = "README.md", content-type = "text/markdown"}
|
||||
license = {file = "LICENSE.txt"}
|
||||
requires-python = ">=3.8"
|
||||
requires-python = ">=3.8.1"
|
||||
keywords = ["malware analysis", "reverse engineering", "capability detection", "software behaviors", "capa", "FLARE"]
|
||||
classifiers = [
|
||||
"Development Status :: 5 - Production/Stable",
|
||||
@@ -65,12 +65,8 @@ dependencies = [
|
||||
# or minor otherwise).
|
||||
# As specific constraints are identified, please provide
|
||||
# comments and context.
|
||||
"tqdm>=4",
|
||||
"pyyaml>=6",
|
||||
"tabulate>=0.9",
|
||||
"colorama>=0.4",
|
||||
"termcolor>=2",
|
||||
"wcwidth>=0.2",
|
||||
"ida-settings>=2",
|
||||
"ruamel.yaml>=0.18",
|
||||
"pefile>=2023.2.7",
|
||||
@@ -81,6 +77,8 @@ dependencies = [
|
||||
"protobuf>=5",
|
||||
"msgspec>=0.18.6",
|
||||
"xmltodict>=0.13.0",
|
||||
# for library detection (in development)
|
||||
"nltk>=3",
|
||||
|
||||
# ---------------------------------------
|
||||
# Dependencies that we develop
|
||||
@@ -146,11 +144,9 @@ dev = [
|
||||
"types-backports==0.1.3",
|
||||
"types-colorama==0.4.15.11",
|
||||
"types-PyYAML==6.0.8",
|
||||
"types-tabulate==0.9.0.20240106",
|
||||
"types-termcolor==1.1.4",
|
||||
"types-psutil==6.0.0.20240901",
|
||||
"types_requests==2.32.0.20240712",
|
||||
"types-protobuf==5.27.0.20240907",
|
||||
"types-protobuf==5.28.0.20240924",
|
||||
"deptry==0.20.0"
|
||||
]
|
||||
build = [
|
||||
@@ -159,7 +155,7 @@ build = [
|
||||
# These dependencies are not used in production environments
|
||||
# and should not conflict with other libraries/tooling.
|
||||
"pyinstaller==6.10.0",
|
||||
"setuptools==70.0.0",
|
||||
"setuptools==75.1.0",
|
||||
"build==1.2.2"
|
||||
]
|
||||
scripts = [
|
||||
@@ -183,7 +179,9 @@ known_first_party = [
|
||||
"binaryninja",
|
||||
"flirt",
|
||||
"ghidra",
|
||||
"idapro",
|
||||
"ida_ida",
|
||||
"ida_auto",
|
||||
"ida_bytes",
|
||||
"ida_entry",
|
||||
"ida_funcs",
|
||||
@@ -234,10 +232,7 @@ DEP002 = [
|
||||
"types-protobuf",
|
||||
"types-psutil",
|
||||
"types-PyYAML",
|
||||
"types-tabulate",
|
||||
"types-termcolor",
|
||||
"types_requests",
|
||||
"wcwidth"
|
||||
]
|
||||
|
||||
# dependencies imported but missing from definitions
|
||||
|
||||
@@ -20,29 +20,28 @@ markdown-it-py==3.0.0
|
||||
mdurl==0.1.2
|
||||
msgpack==1.0.8
|
||||
networkx==3.1
|
||||
pefile==2023.2.7
|
||||
pefile==2024.8.26
|
||||
pip==24.2
|
||||
protobuf==5.27.3
|
||||
protobuf==5.28.2
|
||||
pyasn1==0.5.1
|
||||
pyasn1-modules==0.3.0
|
||||
pycparser==2.22
|
||||
pydantic==2.9.1
|
||||
pydantic-core==2.23.3
|
||||
pydantic==2.9.2
|
||||
# pydantic pins pydantic-core,
|
||||
# but dependabot updates these separately (which is broken) and is annoying,
|
||||
# so we rely on pydantic to pull in the right version of pydantic-core.
|
||||
# pydantic-core==2.23.4
|
||||
xmltodict==0.13.0
|
||||
pyelftools==0.31
|
||||
pygments==2.18.0
|
||||
python-flirt==0.8.10
|
||||
pyyaml==6.0.2
|
||||
rich==13.8.0
|
||||
rich==13.9.2
|
||||
ruamel-yaml==0.18.6
|
||||
ruamel-yaml-clib==0.2.8
|
||||
setuptools==70.0.0
|
||||
setuptools==75.1.0
|
||||
six==1.16.0
|
||||
sortedcontainers==2.4.0
|
||||
tabulate==0.9.0
|
||||
termcolor==2.4.0
|
||||
tqdm==4.66.5
|
||||
viv-utils==0.7.11
|
||||
vivisect==1.2.1
|
||||
wcwidth==0.2.13
|
||||
msgspec==0.18.6
|
||||
|
||||
2
rules
2
rules
Submodule rules updated: dec3ded6f6...64b174e502
316
scripts/compare-backends.py
Normal file
316
scripts/compare-backends.py
Normal file
@@ -0,0 +1,316 @@
|
||||
# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import sys
|
||||
import json
|
||||
import time
|
||||
import logging
|
||||
import argparse
|
||||
import contextlib
|
||||
import statistics
|
||||
import subprocess
|
||||
import multiprocessing
|
||||
from typing import Set, Dict, List, Optional
|
||||
from pathlib import Path
|
||||
from collections import Counter
|
||||
from dataclasses import dataclass
|
||||
from multiprocessing import Pool
|
||||
|
||||
import rich
|
||||
import rich.box
|
||||
import rich.table
|
||||
|
||||
import capa.main
|
||||
|
||||
logger = logging.getLogger("capa.compare-backends")
|
||||
|
||||
BACKENDS = ("vivisect", "ida", "binja")
|
||||
|
||||
|
||||
@dataclass
|
||||
class CapaInvocation:
|
||||
path: Path
|
||||
backend: str
|
||||
duration: float
|
||||
returncode: int
|
||||
stdout: Optional[str]
|
||||
stderr: Optional[str]
|
||||
err: Optional[str]
|
||||
|
||||
|
||||
def invoke_capa(file: Path, backend: str) -> CapaInvocation:
|
||||
stdout = None
|
||||
stderr = None
|
||||
err = None
|
||||
returncode: int
|
||||
try:
|
||||
logger.debug("run capa: %s: %s", backend, file.name)
|
||||
t1 = time.time()
|
||||
child = subprocess.run(
|
||||
["python", "-m", "capa.main", "--json", "--backend=" + backend, str(file)],
|
||||
capture_output=True,
|
||||
check=True,
|
||||
text=True,
|
||||
encoding="utf-8",
|
||||
)
|
||||
returncode = child.returncode
|
||||
stdout = child.stdout
|
||||
stderr = child.stderr
|
||||
except subprocess.CalledProcessError as e:
|
||||
returncode = e.returncode
|
||||
stdout = e.stdout
|
||||
stderr = e.stderr
|
||||
|
||||
logger.debug("%s:%s: error", backend, file.name)
|
||||
err = str(e)
|
||||
else:
|
||||
pass
|
||||
finally:
|
||||
t2 = time.time()
|
||||
|
||||
return CapaInvocation(
|
||||
path=file,
|
||||
backend=backend,
|
||||
duration=t2 - t1,
|
||||
returncode=returncode,
|
||||
stdout=stdout,
|
||||
stderr=stderr,
|
||||
err=err,
|
||||
)
|
||||
|
||||
|
||||
def wrapper_invoke_capa(args):
|
||||
file, backend = args
|
||||
return invoke_capa(file, backend)
|
||||
|
||||
|
||||
def collect(args):
|
||||
results_path = args.results_path
|
||||
if not results_path.is_file():
|
||||
default_doc = {backend: {} for backend in BACKENDS} # type: ignore
|
||||
results_path.write_text(json.dumps(default_doc), encoding="utf-8")
|
||||
|
||||
testfiles = Path(__file__).parent.parent / "tests" / "data"
|
||||
|
||||
for file in sorted(p for p in testfiles.glob("*")):
|
||||
# remove leftover analysis files
|
||||
# because IDA doesn't cleanup after itself, currently.
|
||||
if file.suffix in (".til", ".id0", ".id1", ".id2", ".nam", ".viv"):
|
||||
logger.debug("removing: %s", file)
|
||||
with contextlib.suppress(IOError):
|
||||
file.unlink()
|
||||
|
||||
doc = json.loads(results_path.read_text(encoding="utf-8"))
|
||||
|
||||
plan = []
|
||||
for file in sorted(p for p in testfiles.glob("*")):
|
||||
if not file.is_file():
|
||||
continue
|
||||
|
||||
if file.is_dir():
|
||||
continue
|
||||
|
||||
if file.name.startswith("."):
|
||||
continue
|
||||
|
||||
if file.suffix not in (".exe_", ".dll_", ".elf_", ""):
|
||||
continue
|
||||
|
||||
logger.debug("%s", file.name)
|
||||
key = str(file)
|
||||
|
||||
for backend in BACKENDS:
|
||||
|
||||
if (backend, file.name) in {
|
||||
("binja", "0953cc3b77ed2974b09e3a00708f88de931d681e2d0cb64afbaf714610beabe6.exe_")
|
||||
}:
|
||||
# this file takes 38GB+ and 20hrs+
|
||||
# https://github.com/Vector35/binaryninja-api/issues/5951
|
||||
continue
|
||||
|
||||
if key in doc[backend]:
|
||||
if not args.retry_failures:
|
||||
continue
|
||||
|
||||
if not doc[backend][key]["err"]:
|
||||
# didn't previously fail, don't repeat work
|
||||
continue
|
||||
|
||||
else:
|
||||
# want to retry this previous failure
|
||||
pass
|
||||
|
||||
plan.append((file, backend))
|
||||
|
||||
pool_size = multiprocessing.cpu_count() // 2
|
||||
logger.info("work pool size: %d", pool_size)
|
||||
with Pool(processes=pool_size) as pool:
|
||||
for i, result in enumerate(pool.imap_unordered(wrapper_invoke_capa, plan)):
|
||||
doc[result.backend][str(result.path)] = {
|
||||
"path": str(result.path),
|
||||
"returncode": result.returncode,
|
||||
"stdout": result.stdout,
|
||||
"stderr": result.stderr,
|
||||
"err": result.err,
|
||||
"duration": result.duration,
|
||||
}
|
||||
|
||||
if i % 8 == 0:
|
||||
logger.info("syncing output database")
|
||||
results_path.write_text(json.dumps(doc))
|
||||
|
||||
logger.info(
|
||||
"%.1f\t%s %s %s",
|
||||
result.duration,
|
||||
"(err)" if result.err else " ",
|
||||
result.backend.ljust(8),
|
||||
result.path.name,
|
||||
)
|
||||
|
||||
results_path.write_text(json.dumps(doc))
|
||||
return
|
||||
|
||||
|
||||
def report(args):
|
||||
doc = json.loads(args.results_path.read_text(encoding="utf-8"))
|
||||
|
||||
samples = set()
|
||||
for backend in BACKENDS:
|
||||
samples.update(doc[backend].keys())
|
||||
|
||||
failures_by_backend: Dict[str, Set[str]] = {backend: set() for backend in BACKENDS}
|
||||
durations_by_backend: Dict[str, List[float]] = {backend: [] for backend in BACKENDS}
|
||||
|
||||
console = rich.get_console()
|
||||
for key in sorted(samples):
|
||||
sample = Path(key).name
|
||||
console.print(sample, style="bold")
|
||||
|
||||
seen_rules: Counter[str] = Counter()
|
||||
|
||||
rules_by_backend: Dict[str, Set[str]] = {backend: set() for backend in BACKENDS}
|
||||
|
||||
for backend in BACKENDS:
|
||||
if key not in doc[backend]:
|
||||
continue
|
||||
|
||||
entry = doc[backend][key]
|
||||
duration = entry["duration"]
|
||||
|
||||
if not entry["err"]:
|
||||
matches = json.loads(entry["stdout"])["rules"].keys()
|
||||
seen_rules.update(matches)
|
||||
rules_by_backend[backend].update(matches)
|
||||
durations_by_backend[backend].append(duration)
|
||||
|
||||
console.print(f" {backend: >8}: {duration: >6.1f}s {len(matches): >3d} matches")
|
||||
|
||||
else:
|
||||
failures_by_backend[backend].add(sample)
|
||||
console.print(f" {backend: >8}: {duration: >6.1f}s (error)")
|
||||
|
||||
if not seen_rules:
|
||||
console.print()
|
||||
continue
|
||||
|
||||
t = rich.table.Table(box=rich.box.SIMPLE, header_style="default")
|
||||
t.add_column("viv")
|
||||
t.add_column("ida")
|
||||
t.add_column("bn")
|
||||
t.add_column("rule")
|
||||
|
||||
for rule, _ in seen_rules.most_common():
|
||||
t.add_row(
|
||||
"x" if rule in rules_by_backend["vivisect"] else " ",
|
||||
"x" if rule in rules_by_backend["ida"] else " ",
|
||||
"x" if rule in rules_by_backend["binja"] else " ",
|
||||
rule,
|
||||
)
|
||||
|
||||
console.print(t)
|
||||
|
||||
for backend in BACKENDS:
|
||||
console.print(f"failures for {backend}:", style="bold")
|
||||
for failure in sorted(failures_by_backend[backend]):
|
||||
console.print(f" - {failure}")
|
||||
|
||||
if not failures_by_backend[backend]:
|
||||
console.print(" (none)", style="green")
|
||||
console.print()
|
||||
|
||||
console.print("durations:", style="bold")
|
||||
console.print(" (10-quantiles, in seconds)", style="grey37")
|
||||
for backend in BACKENDS:
|
||||
q = statistics.quantiles(durations_by_backend[backend], n=10)
|
||||
console.print(f" {backend: <8}: ", end="")
|
||||
for i in range(9):
|
||||
if i in (4, 8):
|
||||
style = "bold"
|
||||
else:
|
||||
style = "default"
|
||||
console.print(f"{q[i]: >6.1f}", style=style, end=" ")
|
||||
console.print()
|
||||
console.print(" ^-- 10% of samples took less than this ^", style="grey37")
|
||||
console.print(" 10% of samples took more than this -----------------+", style="grey37")
|
||||
|
||||
console.print()
|
||||
for backend in BACKENDS:
|
||||
total = sum(durations_by_backend[backend])
|
||||
successes = len(durations_by_backend[backend])
|
||||
avg = statistics.mean(durations_by_backend[backend])
|
||||
console.print(
|
||||
f" {backend: <8}: {total: >7.0f} seconds across {successes: >4d} successful runs, {avg: >4.1f} average"
|
||||
)
|
||||
console.print()
|
||||
|
||||
console.print("slowest samples:", style="bold")
|
||||
for backend in BACKENDS:
|
||||
console.print(backend)
|
||||
for duration, path in sorted(
|
||||
((d["duration"], Path(d["path"]).name) for d in doc[backend].values()), reverse=True
|
||||
)[:5]:
|
||||
console.print(f" - {duration: >6.1f} {path}")
|
||||
|
||||
return
|
||||
|
||||
|
||||
def main(argv=None):
|
||||
if argv is None:
|
||||
argv = sys.argv[1:]
|
||||
|
||||
default_samples_path = Path(__file__).resolve().parent.parent / "tests" / "data"
|
||||
|
||||
parser = argparse.ArgumentParser(description="Compare analysis backends.")
|
||||
capa.main.install_common_args(
|
||||
parser,
|
||||
wanted=set(),
|
||||
)
|
||||
|
||||
subparsers = parser.add_subparsers()
|
||||
collect_parser = subparsers.add_parser("collect")
|
||||
collect_parser.add_argument("results_path", type=Path, help="Path to output JSON file")
|
||||
collect_parser.add_argument("--samples", type=Path, default=default_samples_path, help="Path to samples")
|
||||
collect_parser.add_argument("--retry-failures", action="store_true", help="Retry previous failures")
|
||||
collect_parser.set_defaults(func=collect)
|
||||
|
||||
report_parser = subparsers.add_parser("report")
|
||||
report_parser.add_argument("results_path", type=Path, help="Path to JSON file")
|
||||
report_parser.set_defaults(func=report)
|
||||
|
||||
args = parser.parse_args(args=argv)
|
||||
|
||||
try:
|
||||
capa.main.handle_common_args(args)
|
||||
except capa.main.ShouldExitError as e:
|
||||
return e.status_code
|
||||
|
||||
args.func(args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
106
scripts/detect-backends.py
Normal file
106
scripts/detect-backends.py
Normal file
@@ -0,0 +1,106 @@
|
||||
# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import sys
|
||||
import logging
|
||||
import argparse
|
||||
import importlib.util
|
||||
|
||||
import rich
|
||||
import rich.table
|
||||
|
||||
import capa.main
|
||||
from capa.features.extractors.ida.idalib import find_idalib, load_idalib, is_idalib_installed
|
||||
from capa.features.extractors.binja.find_binja_api import find_binaryninja, load_binaryninja, is_binaryninja_installed
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def is_vivisect_installed() -> bool:
|
||||
try:
|
||||
return importlib.util.find_spec("vivisect") is not None
|
||||
except ModuleNotFoundError:
|
||||
return False
|
||||
|
||||
|
||||
def load_vivisect() -> bool:
|
||||
try:
|
||||
import vivisect # noqa: F401 unused import
|
||||
|
||||
return True
|
||||
except ImportError:
|
||||
return False
|
||||
|
||||
|
||||
def main(argv=None):
|
||||
if argv is None:
|
||||
argv = sys.argv[1:]
|
||||
|
||||
parser = argparse.ArgumentParser(description="Detect analysis backends.")
|
||||
capa.main.install_common_args(parser, wanted=set())
|
||||
args = parser.parse_args(args=argv)
|
||||
|
||||
try:
|
||||
capa.main.handle_common_args(args)
|
||||
except capa.main.ShouldExitError as e:
|
||||
return e.status_code
|
||||
|
||||
if args.debug:
|
||||
logging.getLogger("capa").setLevel(logging.DEBUG)
|
||||
logging.getLogger("viv_utils").setLevel(logging.DEBUG)
|
||||
else:
|
||||
logging.getLogger("capa").setLevel(logging.ERROR)
|
||||
logging.getLogger("viv_utils").setLevel(logging.ERROR)
|
||||
|
||||
table = rich.table.Table()
|
||||
table.add_column("backend")
|
||||
table.add_column("already installed?")
|
||||
table.add_column("found?")
|
||||
table.add_column("loads?")
|
||||
|
||||
if True:
|
||||
row = ["vivisect"]
|
||||
if is_vivisect_installed():
|
||||
row.append("True")
|
||||
row.append("-")
|
||||
else:
|
||||
row.append("False")
|
||||
row.append("False")
|
||||
|
||||
row.append(str(load_vivisect()))
|
||||
table.add_row(*row)
|
||||
|
||||
if True:
|
||||
row = ["Binary Ninja"]
|
||||
if is_binaryninja_installed():
|
||||
row.append("True")
|
||||
row.append("-")
|
||||
else:
|
||||
row.append("False")
|
||||
row.append(str(find_binaryninja() is not None))
|
||||
|
||||
row.append(str(load_binaryninja()))
|
||||
table.add_row(*row)
|
||||
|
||||
if True:
|
||||
row = ["IDA idalib"]
|
||||
if is_idalib_installed():
|
||||
row.append("True")
|
||||
row.append("-")
|
||||
else:
|
||||
row.append("False")
|
||||
row.append(str(find_idalib() is not None))
|
||||
|
||||
row.append(str(load_idalib()))
|
||||
table.add_row(*row)
|
||||
|
||||
rich.print(table)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -31,11 +31,9 @@ from typing import Set, Dict, List
|
||||
from pathlib import Path
|
||||
from dataclasses import field, dataclass
|
||||
|
||||
import tqdm
|
||||
import pydantic
|
||||
import termcolor
|
||||
import ruamel.yaml
|
||||
import tqdm.contrib.logging
|
||||
from rich import print
|
||||
|
||||
import capa.main
|
||||
import capa.rules
|
||||
@@ -51,18 +49,6 @@ from capa.render.result_document import RuleMetadata
|
||||
logger = logging.getLogger("lint")
|
||||
|
||||
|
||||
def red(s):
|
||||
return termcolor.colored(s, "red")
|
||||
|
||||
|
||||
def orange(s):
|
||||
return termcolor.colored(s, "yellow")
|
||||
|
||||
|
||||
def green(s):
|
||||
return termcolor.colored(s, "green")
|
||||
|
||||
|
||||
@dataclass
|
||||
class Context:
|
||||
"""
|
||||
@@ -80,8 +66,8 @@ class Context:
|
||||
|
||||
|
||||
class Lint:
|
||||
WARN = orange("WARN")
|
||||
FAIL = red("FAIL")
|
||||
WARN = "[yellow]WARN[/yellow]"
|
||||
FAIL = "[red]FAIL[/red]"
|
||||
|
||||
name = "lint"
|
||||
level = FAIL
|
||||
@@ -896,7 +882,7 @@ def lint_rule(ctx: Context, rule: Rule):
|
||||
if (not lints_failed) and (not lints_warned) and has_examples:
|
||||
print("")
|
||||
print(f'{" (nursery) " if is_nursery_rule(rule) else ""} {rule.name}')
|
||||
print(f" {Lint.WARN}: {green('no lint failures')}: Graduate the rule")
|
||||
print(f" {Lint.WARN}: '[green]no lint failures[/green]': Graduate the rule")
|
||||
print("")
|
||||
else:
|
||||
lints_failed = len(tuple(filter(lambda v: v.level == Lint.FAIL, violations)))
|
||||
@@ -921,12 +907,15 @@ def lint(ctx: Context):
|
||||
ret = {}
|
||||
|
||||
source_rules = [rule for rule in ctx.rules.rules.values() if not rule.is_subscope_rule()]
|
||||
with tqdm.contrib.logging.tqdm_logging_redirect(source_rules, unit="rule", leave=False) as pbar:
|
||||
with capa.helpers.redirecting_print_to_tqdm(False):
|
||||
for rule in pbar:
|
||||
name = rule.name
|
||||
pbar.set_description(width(f"linting rule: {name}", 48))
|
||||
ret[name] = lint_rule(ctx, rule)
|
||||
n_rules: int = len(source_rules)
|
||||
|
||||
with capa.helpers.CapaProgressBar(transient=True, console=capa.helpers.log_console) as pbar:
|
||||
task = pbar.add_task(description="linting", total=n_rules, unit="rule")
|
||||
for rule in source_rules:
|
||||
name = rule.name
|
||||
pbar.update(task, description=width(f"linting rule: {name}", 48))
|
||||
ret[name] = lint_rule(ctx, rule)
|
||||
pbar.advance(task)
|
||||
|
||||
return ret
|
||||
|
||||
@@ -1020,18 +1009,18 @@ def main(argv=None):
|
||||
logger.debug("lints ran for ~ %02d:%02dm", min, sec)
|
||||
|
||||
if warned_rules:
|
||||
print(orange("rules with WARN:"))
|
||||
print("[yellow]rules with WARN:[/yellow]")
|
||||
for warned_rule in sorted(warned_rules):
|
||||
print(" - " + warned_rule)
|
||||
print()
|
||||
|
||||
if failed_rules:
|
||||
print(red("rules with FAIL:"))
|
||||
print("[red]rules with FAIL:[/red]")
|
||||
for failed_rule in sorted(failed_rules):
|
||||
print(" - " + failed_rule)
|
||||
return 1
|
||||
else:
|
||||
logger.info(green("no lints failed, nice!"))
|
||||
logger.info("[green]no lints failed, nice![/green]")
|
||||
return 0
|
||||
|
||||
|
||||
|
||||
@@ -42,9 +42,10 @@ import logging
|
||||
import argparse
|
||||
import subprocess
|
||||
|
||||
import tqdm
|
||||
import humanize
|
||||
import tabulate
|
||||
from rich import box
|
||||
from rich.table import Table
|
||||
from rich.console import Console
|
||||
|
||||
import capa.main
|
||||
import capa.perf
|
||||
@@ -92,51 +93,61 @@ def main(argv=None):
|
||||
except capa.main.ShouldExitError as e:
|
||||
return e.status_code
|
||||
|
||||
with tqdm.tqdm(total=args.number * args.repeat, leave=False) as pbar:
|
||||
with capa.helpers.CapaProgressBar(console=capa.helpers.log_console) as progress:
|
||||
total_iterations = args.number * args.repeat
|
||||
task = progress.add_task("profiling", total=total_iterations)
|
||||
|
||||
def do_iteration():
|
||||
capa.perf.reset()
|
||||
capa.capabilities.common.find_capabilities(rules, extractor, disable_progress=True)
|
||||
pbar.update(1)
|
||||
|
||||
progress.advance(task)
|
||||
|
||||
samples = timeit.repeat(do_iteration, number=args.number, repeat=args.repeat)
|
||||
|
||||
logger.debug("perf: find capabilities: min: %0.2fs", (min(samples) / float(args.number)))
|
||||
logger.debug("perf: find capabilities: avg: %0.2fs", (sum(samples) / float(args.repeat) / float(args.number)))
|
||||
logger.debug(
|
||||
"perf: find capabilities: avg: %0.2fs",
|
||||
(sum(samples) / float(args.repeat) / float(args.number)),
|
||||
)
|
||||
logger.debug("perf: find capabilities: max: %0.2fs", (max(samples) / float(args.number)))
|
||||
|
||||
for counter, count in capa.perf.counters.most_common():
|
||||
logger.debug("perf: counter: %s: %s", counter, count)
|
||||
|
||||
print(
|
||||
tabulate.tabulate(
|
||||
[(counter, humanize.intcomma(count)) for counter, count in capa.perf.counters.most_common()],
|
||||
headers=["feature class", "evaluation count"],
|
||||
tablefmt="github",
|
||||
)
|
||||
)
|
||||
print()
|
||||
console = Console()
|
||||
|
||||
print(
|
||||
tabulate.tabulate(
|
||||
[
|
||||
(
|
||||
args.label,
|
||||
"{:,}".format(capa.perf.counters["evaluate.feature"]),
|
||||
# python documentation indicates that min(samples) should be preferred,
|
||||
# so lets put that first.
|
||||
#
|
||||
# https://docs.python.org/3/library/timeit.html#timeit.Timer.repeat
|
||||
f"{(min(samples) / float(args.number)):.2f}s",
|
||||
f"{(sum(samples) / float(args.repeat) / float(args.number)):.2f}s",
|
||||
f"{(max(samples) / float(args.number)):.2f}s",
|
||||
)
|
||||
],
|
||||
headers=["label", "count(evaluations)", "min(time)", "avg(time)", "max(time)"],
|
||||
tablefmt="github",
|
||||
)
|
||||
table1 = Table(box=box.MARKDOWN)
|
||||
table1.add_column("feature class")
|
||||
table1.add_column("evaluation count")
|
||||
|
||||
for counter, count in capa.perf.counters.most_common():
|
||||
table1.add_row(counter, humanize.intcomma(count))
|
||||
|
||||
console.print(table1)
|
||||
console.print()
|
||||
|
||||
table2 = Table(box=box.MARKDOWN)
|
||||
table2.add_column("label")
|
||||
table2.add_column("count(evaluations)", style="magenta")
|
||||
table2.add_column("min(time)", style="green")
|
||||
table2.add_column("avg(time)", style="yellow")
|
||||
table2.add_column("max(time)", style="red")
|
||||
|
||||
table2.add_row(
|
||||
args.label,
|
||||
# python documentation indicates that min(samples) should be preferred,
|
||||
# so lets put that first.
|
||||
#
|
||||
# https://docs.python.org/3/library/timeit.html#timeit.Timer.repeat
|
||||
"{:,}".format(capa.perf.counters["evaluate.feature"]),
|
||||
f"{(min(samples) / float(args.number)):.2f}s",
|
||||
f"{(sum(samples) / float(args.repeat) / float(args.number)):.2f}s",
|
||||
f"{(max(samples) / float(args.number)):.2f}s",
|
||||
)
|
||||
|
||||
console.print(table2)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
|
||||
184
scripts/show-object-layout.py
Normal file
184
scripts/show-object-layout.py
Normal file
@@ -0,0 +1,184 @@
|
||||
import sys
|
||||
import sqlite3
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
from dataclasses import dataclass
|
||||
|
||||
import pefile
|
||||
|
||||
import capa.main
|
||||
|
||||
|
||||
@dataclass
|
||||
class AssemblageRow:
|
||||
# from table: binaries
|
||||
binary_id: int
|
||||
file_name: str
|
||||
platform: str
|
||||
build_mode: str
|
||||
toolset_version: str
|
||||
github_url: str
|
||||
optimization: str
|
||||
repo_last_update: int
|
||||
size: int
|
||||
path: str
|
||||
license: str
|
||||
binary_hash: str
|
||||
repo_commit_hash: str
|
||||
# from table: functions
|
||||
function_id: int
|
||||
function_name: str
|
||||
function_hash: str
|
||||
top_comments: str
|
||||
source_codes: str
|
||||
prototype: str
|
||||
_source_file: str
|
||||
# from table: rvas
|
||||
rva_id: int
|
||||
start_rva: int
|
||||
end_rva: int
|
||||
|
||||
@property
|
||||
def source_file(self):
|
||||
# cleanup some extra metadata provided by assemblage
|
||||
return self._source_file.partition(" (MD5: ")[0].partition(" (0x3: ")[0]
|
||||
|
||||
|
||||
class Assemblage:
|
||||
conn: sqlite3.Connection
|
||||
samples: Path
|
||||
|
||||
def __init__(self, db: Path, samples: Path):
|
||||
super().__init__()
|
||||
|
||||
self.db = db
|
||||
self.samples = samples
|
||||
|
||||
self.conn = sqlite3.connect(self.db)
|
||||
with self.conn:
|
||||
self.conn.executescript("""
|
||||
PRAGMA journal_mode = WAL;
|
||||
PRAGMA synchronous = NORMAL;
|
||||
PRAGMA busy_timeout = 5000;
|
||||
PRAGMA cache_size = -20000; -- 20MB
|
||||
PRAGMA foreign_keys = true;
|
||||
PRAGMA temp_store = memory;
|
||||
|
||||
BEGIN IMMEDIATE TRANSACTION;
|
||||
CREATE INDEX IF NOT EXISTS idx__functions__binary_id ON functions (binary_id);
|
||||
CREATE INDEX IF NOT EXISTS idx__rvas__function_id ON rvas (function_id);
|
||||
|
||||
CREATE VIEW IF NOT EXISTS assemblage AS
|
||||
SELECT
|
||||
binaries.id AS binary_id,
|
||||
binaries.file_name AS file_name,
|
||||
binaries.platform AS platform,
|
||||
binaries.build_mode AS build_mode,
|
||||
binaries.toolset_version AS toolset_version,
|
||||
binaries.github_url AS github_url,
|
||||
binaries.optimization AS optimization,
|
||||
binaries.repo_last_update AS repo_last_update,
|
||||
binaries.size AS size,
|
||||
binaries.path AS path,
|
||||
binaries.license AS license,
|
||||
binaries.hash AS hash,
|
||||
binaries.repo_commit_hash AS repo_commit_hash,
|
||||
|
||||
functions.id AS function_id,
|
||||
functions.name AS function_name,
|
||||
functions.hash AS function_hash,
|
||||
functions.top_comments AS top_comments,
|
||||
functions.source_codes AS source_codes,
|
||||
functions.prototype AS prototype,
|
||||
functions.source_file AS source_file,
|
||||
|
||||
rvas.id AS rva_id,
|
||||
rvas.start AS start_rva,
|
||||
rvas.end AS end_rva
|
||||
FROM binaries
|
||||
JOIN functions ON binaries.id = functions.binary_id
|
||||
JOIN rvas ON functions.id = rvas.function_id;
|
||||
""")
|
||||
|
||||
def get_row_by_binary_id(self, binary_id: int) -> AssemblageRow:
|
||||
with self.conn:
|
||||
cur = self.conn.execute("SELECT * FROM assemblage WHERE binary_id = ? LIMIT 1;", (binary_id, ))
|
||||
return AssemblageRow(*cur.fetchone())
|
||||
|
||||
def get_rows_by_binary_id(self, binary_id: int) -> AssemblageRow:
|
||||
with self.conn:
|
||||
cur = self.conn.execute("SELECT * FROM assemblage WHERE binary_id = ?;", (binary_id, ))
|
||||
row = cur.fetchone()
|
||||
while row:
|
||||
yield AssemblageRow(*row)
|
||||
row = cur.fetchone()
|
||||
|
||||
def get_path_by_binary_id(self, binary_id: int) -> Path:
|
||||
with self.conn:
|
||||
cur = self.conn.execute("""SELECT path FROM assemblage WHERE binary_id = ? LIMIT 1""", (binary_id, ))
|
||||
return self.samples / cur.fetchone()[0]
|
||||
|
||||
def get_pe_by_binary_id(self, binary_id: int) -> pefile.PE:
|
||||
path = self.get_path_by_binary_id(binary_id)
|
||||
return pefile.PE(data=path.read_bytes(), fast_load=True)
|
||||
|
||||
|
||||
def main(argv=None):
|
||||
if argv is None:
|
||||
argv = sys.argv[1:]
|
||||
|
||||
parser = argparse.ArgumentParser(description="Inspect object boundaries in compiled programs")
|
||||
capa.main.install_common_args(parser, wanted={})
|
||||
parser.add_argument("assemblage_database", type=Path, help="path to Assemblage database")
|
||||
parser.add_argument("assemblage_directory", type=Path, help="path to Assemblage samples directory")
|
||||
parser.add_argument("binary_id", type=int, help="primary key of binary to inspect")
|
||||
args = parser.parse_args(args=argv)
|
||||
|
||||
try:
|
||||
capa.main.handle_common_args(args)
|
||||
except capa.main.ShouldExitError as e:
|
||||
return e.status_code
|
||||
|
||||
if not args.assemblage_database.is_file():
|
||||
raise ValueError("database doesn't exist")
|
||||
|
||||
db = Assemblage(args.assemblage_database, args.assemblage_directory)
|
||||
# print(db.get_row_by_binary_id(args.binary_id))
|
||||
# print(db.get_pe_by_binary_id(args.binary_id))
|
||||
|
||||
@dataclass
|
||||
class Function:
|
||||
file: str
|
||||
name: str
|
||||
start_rva: int
|
||||
end_rva: int
|
||||
|
||||
functions = [
|
||||
Function(
|
||||
file=m.source_file,
|
||||
name=m.function_name,
|
||||
start_rva=m.start_rva,
|
||||
end_rva=m.end_rva,
|
||||
)
|
||||
for m in db.get_rows_by_binary_id(args.binary_id)
|
||||
]
|
||||
|
||||
import rich
|
||||
import rich.table
|
||||
|
||||
print(db.get_path_by_binary_id(args.binary_id))
|
||||
|
||||
t = rich.table.Table()
|
||||
t.add_column("rva")
|
||||
t.add_column("filename")
|
||||
t.add_column("name")
|
||||
|
||||
for function in sorted(functions, key=lambda f: f.start_rva):
|
||||
t.add_row(hex(function.start_rva), function.file, function.name)
|
||||
|
||||
rich.print(t)
|
||||
|
||||
# db.conn.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -12,11 +12,12 @@ import sys
|
||||
import typing
|
||||
import logging
|
||||
import argparse
|
||||
from typing import Set, Tuple
|
||||
from typing import Set, List, Tuple
|
||||
from collections import Counter
|
||||
|
||||
import tabulate
|
||||
from termcolor import colored
|
||||
from rich import print
|
||||
from rich.text import Text
|
||||
from rich.table import Table
|
||||
|
||||
import capa.main
|
||||
import capa.rules
|
||||
@@ -77,23 +78,30 @@ def get_file_features(
|
||||
return feature_map
|
||||
|
||||
|
||||
def get_colored(s: str):
|
||||
def get_colored(s: str) -> Text:
|
||||
if "(" in s and ")" in s:
|
||||
s_split = s.split("(", 1)
|
||||
s_color = colored(s_split[1][:-1], "cyan")
|
||||
return f"{s_split[0]}({s_color})"
|
||||
return Text.assemble(s_split[0], "(", (s_split[1][:-1], "cyan"), ")")
|
||||
else:
|
||||
return colored(s, "cyan")
|
||||
return Text(s, style="cyan")
|
||||
|
||||
|
||||
def print_unused_features(feature_map: typing.Counter[Feature], rules_feature_set: Set[Feature]):
|
||||
unused_features = []
|
||||
unused_features: List[Tuple[str, Text]] = []
|
||||
for feature, count in reversed(feature_map.most_common()):
|
||||
if feature in rules_feature_set:
|
||||
continue
|
||||
unused_features.append((str(count), get_colored(str(feature))))
|
||||
|
||||
table = Table(title="Unused Features", box=None)
|
||||
table.add_column("Count", style="dim")
|
||||
table.add_column("Feature")
|
||||
|
||||
for count_str, feature_text in unused_features:
|
||||
table.add_row(count_str, feature_text)
|
||||
|
||||
print("\n")
|
||||
print(tabulate.tabulate(unused_features, headers=["Count", "Feature"], tablefmt="plain"))
|
||||
print(table)
|
||||
print("\n")
|
||||
|
||||
|
||||
|
||||
Submodule tests/data updated: 93dd0f904a...2de79e9dfb
@@ -431,6 +431,14 @@ def get_data_path_by_name(name) -> Path:
|
||||
/ "vmray"
|
||||
/ "93b2d1840566f45fab674ebc79a9d19c88993bcb645e0357f3cb584d16e7c795_min_archive.zip"
|
||||
)
|
||||
elif name.startswith("2f8a79-vmray"):
|
||||
return (
|
||||
CD
|
||||
/ "data"
|
||||
/ "dynamic"
|
||||
/ "vmray"
|
||||
/ "2f8a79b12a7a989ac7e5f6ec65050036588a92e65aeb6841e08dc228ff0e21b4_min_archive.zip"
|
||||
)
|
||||
elif name.startswith("ea2876"):
|
||||
return CD / "data" / "ea2876e9175410b6f6719f80ee44b9553960758c7d0f7bed73c0fe9a78d8e669.dll_"
|
||||
elif name.startswith("1038a2"):
|
||||
|
||||
@@ -37,6 +37,8 @@ DYNAMIC_CAPE_FEATURE_PRESENCE_TESTS = sorted(
|
||||
),
|
||||
("0000a657", "process=(1180:3052)", capa.features.common.String("nope"), False),
|
||||
# thread/api calls
|
||||
("0000a657", "process=(2900:2852),thread=2904", capa.features.insn.API("RegQueryValueExA"), True),
|
||||
("0000a657", "process=(2900:2852),thread=2904", capa.features.insn.API("RegQueryValueEx"), True),
|
||||
("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.API("NtQueryValueKey"), True),
|
||||
("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.API("GetActiveWindow"), False),
|
||||
# thread/number call argument
|
||||
|
||||
@@ -22,6 +22,8 @@ DYNAMIC_DRAKVUF_FEATURE_PRESENCE_TESTS = sorted(
|
||||
("93b2d1-drakvuf", "process=(3564:4852),thread=6592", capa.features.insn.API("LdrLoadDll"), True),
|
||||
("93b2d1-drakvuf", "process=(3564:4852),thread=6592", capa.features.insn.API("DoesNotExist"), False),
|
||||
# call/api
|
||||
("93b2d1-drakvuf", "process=(3564:4852),thread=4716,call=17", capa.features.insn.API("CreateWindowExW"), True),
|
||||
("93b2d1-drakvuf", "process=(3564:4852),thread=4716,call=17", capa.features.insn.API("CreateWindowEx"), True),
|
||||
("93b2d1-drakvuf", "process=(3564:4852),thread=6592,call=1", capa.features.insn.API("LdrLoadDll"), True),
|
||||
("93b2d1-drakvuf", "process=(3564:4852),thread=6592,call=1", capa.features.insn.API("DoesNotExist"), False),
|
||||
# call/string argument
|
||||
|
||||
@@ -10,7 +10,6 @@ import textwrap
|
||||
from unittest.mock import Mock
|
||||
|
||||
import fixtures
|
||||
import rich.console
|
||||
|
||||
import capa.rules
|
||||
import capa.render.utils
|
||||
@@ -24,6 +23,7 @@ import capa.features.basicblock
|
||||
import capa.render.result_document
|
||||
import capa.render.result_document as rd
|
||||
import capa.features.freeze.features
|
||||
from capa.render.utils import Console
|
||||
|
||||
|
||||
def test_render_number():
|
||||
@@ -154,7 +154,7 @@ def test_render_meta_maec():
|
||||
|
||||
# capture the output of render_maec
|
||||
f = io.StringIO()
|
||||
console = rich.console.Console(file=f)
|
||||
console = Console(file=f)
|
||||
capa.render.default.render_maec(mock_rd, console)
|
||||
output = f.getvalue()
|
||||
|
||||
@@ -198,7 +198,7 @@ def test_render_meta_maec():
|
||||
],
|
||||
)
|
||||
def test_render_vverbose_feature(feature, expected):
|
||||
ostream = capa.render.utils.StringIO()
|
||||
console = Console(highlight=False)
|
||||
|
||||
addr = capa.features.freeze.Address.from_capa(capa.features.address.AbsoluteVirtualAddress(0x401000))
|
||||
feature = capa.features.freeze.features.feature_from_capa(feature)
|
||||
@@ -240,6 +240,8 @@ def test_render_vverbose_feature(feature, expected):
|
||||
matches=(),
|
||||
)
|
||||
|
||||
capa.render.vverbose.render_feature(ostream, layout, rm, matches, feature, indent=0)
|
||||
with console.capture() as capture:
|
||||
capa.render.vverbose.render_feature(console, layout, rm, matches, feature, indent=0)
|
||||
|
||||
assert ostream.getvalue().strip() == expected
|
||||
output = capture.get().strip()
|
||||
assert output == expected
|
||||
|
||||
@@ -19,22 +19,51 @@ DYNAMIC_VMRAY_FEATURE_PRESENCE_TESTS = sorted(
|
||||
("93b2d1-vmray", "file", capa.features.common.String("\\Program Files\\WindowsApps\\does_not_exist"), False),
|
||||
# file/imports
|
||||
("93b2d1-vmray", "file", capa.features.file.Import("GetAddrInfoW"), True),
|
||||
("93b2d1-vmray", "file", capa.features.file.Import("GetAddrInfo"), True),
|
||||
# thread/api calls
|
||||
("93b2d1-vmray", "process=(2176:0),thread=7", capa.features.insn.API("GetAddrInfoW"), True),
|
||||
("93b2d1-vmray", "process=(2176:0),thread=7", capa.features.insn.API("DoesNotExist"), False),
|
||||
("93b2d1-vmray", "process=(2176:0),thread=2180", capa.features.insn.API("LoadLibraryExA"), True),
|
||||
("93b2d1-vmray", "process=(2176:0),thread=2180", capa.features.insn.API("LoadLibraryEx"), True),
|
||||
("93b2d1-vmray", "process=(2176:0),thread=2420", capa.features.insn.API("GetAddrInfoW"), True),
|
||||
("93b2d1-vmray", "process=(2176:0),thread=2420", capa.features.insn.API("GetAddrInfo"), True),
|
||||
("93b2d1-vmray", "process=(2176:0),thread=2420", capa.features.insn.API("DoesNotExist"), False),
|
||||
# call/api
|
||||
("93b2d1-vmray", "process=(2176:0),thread=7,call=2361", capa.features.insn.API("GetAddrInfoW"), True),
|
||||
("93b2d1-vmray", "process=(2176:0),thread=2420,call=2361", capa.features.insn.API("GetAddrInfoW"), True),
|
||||
# call/string argument
|
||||
(
|
||||
"93b2d1-vmray",
|
||||
"process=(2176:0),thread=7,call=10323",
|
||||
"process=(2176:0),thread=2420,call=10323",
|
||||
capa.features.common.String("raw.githubusercontent.com"),
|
||||
True,
|
||||
),
|
||||
# backslashes in paths; see #2428
|
||||
(
|
||||
"93b2d1-vmray",
|
||||
"process=(2176:0),thread=2180,call=267",
|
||||
capa.features.common.String("C:\\Users\\WhuOXYsD\\Desktop\\filename.exe"),
|
||||
True,
|
||||
),
|
||||
(
|
||||
"93b2d1-vmray",
|
||||
"process=(2176:0),thread=2180,call=267",
|
||||
capa.features.common.String("C:\\\\Users\\\\WhuOXYsD\\\\Desktop\\\\filename.exe"),
|
||||
False,
|
||||
),
|
||||
(
|
||||
"93b2d1-vmray",
|
||||
"process=(2176:0),thread=2204,call=2395",
|
||||
capa.features.common.String("Software\\Microsoft\\Windows\\CurrentVersion\\Policies\\System"),
|
||||
True,
|
||||
),
|
||||
(
|
||||
"93b2d1-vmray",
|
||||
"process=(2176:0),thread=2204,call=2395",
|
||||
capa.features.common.String("Software\\\\Microsoft\\\\Windows\\\\CurrentVersion\\\\Policies\\\\System"),
|
||||
False,
|
||||
),
|
||||
# call/number argument
|
||||
# VirtualAlloc(4096, 4)
|
||||
("93b2d1-vmray", "process=(2176:0),thread=7,call=2358", capa.features.insn.Number(4096), True),
|
||||
("93b2d1-vmray", "process=(2176:0),thread=7,call=2358", capa.features.insn.Number(4), True),
|
||||
("93b2d1-vmray", "process=(2176:0),thread=2420,call=2358", capa.features.insn.Number(4096), True),
|
||||
("93b2d1-vmray", "process=(2176:0),thread=2420,call=2358", capa.features.insn.Number(4), True),
|
||||
],
|
||||
# order tests by (file, item)
|
||||
# so that our LRU cache is most effective.
|
||||
@@ -46,24 +75,24 @@ DYNAMIC_VMRAY_FEATURE_COUNT_TESTS = sorted(
|
||||
# file/imports
|
||||
("93b2d1-vmray", "file", capa.features.file.Import("GetAddrInfoW"), 1),
|
||||
# thread/api calls
|
||||
("93b2d1-vmray", "process=(2176:0),thread=7", capa.features.insn.API("free"), 1),
|
||||
("93b2d1-vmray", "process=(2176:0),thread=7", capa.features.insn.API("GetAddrInfoW"), 5),
|
||||
("93b2d1-vmray", "process=(2176:0),thread=2420", capa.features.insn.API("free"), 1),
|
||||
("93b2d1-vmray", "process=(2176:0),thread=2420", capa.features.insn.API("GetAddrInfoW"), 5),
|
||||
# call/api
|
||||
("93b2d1-vmray", "process=(2176:0),thread=7,call=2345", capa.features.insn.API("free"), 1),
|
||||
("93b2d1-vmray", "process=(2176:0),thread=7,call=2345", capa.features.insn.API("GetAddrInfoW"), 0),
|
||||
("93b2d1-vmray", "process=(2176:0),thread=7,call=2361", capa.features.insn.API("GetAddrInfoW"), 1),
|
||||
("93b2d1-vmray", "process=(2176:0),thread=2420,call=2345", capa.features.insn.API("free"), 1),
|
||||
("93b2d1-vmray", "process=(2176:0),thread=2420,call=2345", capa.features.insn.API("GetAddrInfoW"), 0),
|
||||
("93b2d1-vmray", "process=(2176:0),thread=2420,call=2361", capa.features.insn.API("GetAddrInfoW"), 1),
|
||||
# call/string argument
|
||||
(
|
||||
"93b2d1-vmray",
|
||||
"process=(2176:0),thread=7,call=10323",
|
||||
"process=(2176:0),thread=2420,call=10323",
|
||||
capa.features.common.String("raw.githubusercontent.com"),
|
||||
1,
|
||||
),
|
||||
("93b2d1-vmray", "process=(2176:0),thread=7,call=10323", capa.features.common.String("non_existant"), 0),
|
||||
("93b2d1-vmray", "process=(2176:0),thread=2420,call=10323", capa.features.common.String("non_existant"), 0),
|
||||
# call/number argument
|
||||
("93b2d1-vmray", "process=(2176:0),thread=7,call=10315", capa.features.insn.Number(4096), 1),
|
||||
("93b2d1-vmray", "process=(2176:0),thread=7,call=10315", capa.features.insn.Number(4), 1),
|
||||
("93b2d1-vmray", "process=(2176:0),thread=7,call=10315", capa.features.insn.Number(404), 0),
|
||||
("93b2d1-vmray", "process=(2176:0),thread=2420,call=10315", capa.features.insn.Number(4096), 1),
|
||||
("93b2d1-vmray", "process=(2176:0),thread=2420,call=10315", capa.features.insn.Number(4), 1),
|
||||
("93b2d1-vmray", "process=(2176:0),thread=2420,call=10315", capa.features.insn.Number(404), 0),
|
||||
],
|
||||
# order tests by (file, item)
|
||||
# so that our LRU cache is most effective.
|
||||
@@ -87,3 +116,10 @@ def test_vmray_features(sample, scope, feature, expected):
|
||||
)
|
||||
def test_vmray_feature_counts(sample, scope, feature, expected):
|
||||
fixtures.do_test_feature_count(fixtures.get_vmray_extractor, sample, scope, feature, expected)
|
||||
|
||||
|
||||
def test_vmray_processes():
|
||||
# see #2394
|
||||
path = fixtures.get_data_path_by_name("2f8a79-vmray")
|
||||
vmre = fixtures.get_vmray_extractor(path)
|
||||
assert len(vmre.analysis.monitor_processes) == 9
|
||||
|
||||
8262
web/explorer/package-lock.json
generated
8262
web/explorer/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -33,7 +33,7 @@
|
||||
"eslint-plugin-vue": "^9.23.0",
|
||||
"jsdom": "^24.1.0",
|
||||
"prettier": "^3.2.5",
|
||||
"vite": "^5.3.1",
|
||||
"vite": "^5.4.6",
|
||||
"vite-plugin-singlefile": "^2.0.2",
|
||||
"vitest": "^1.6.0"
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
<script setup>
|
||||
import Menubar from "primevue/menubar";
|
||||
import { RouterLink } from "vue-router";
|
||||
import Button from "primevue/button";
|
||||
|
||||
const isBundle = import.meta.env.MODE === "bundle";
|
||||
</script>
|
||||
@@ -14,6 +15,9 @@ const isBundle = import.meta.env.MODE === "bundle";
|
||||
</template>
|
||||
<template #end>
|
||||
<div class="flex align-items-center gap-3">
|
||||
<a href="https://github.com/mandiant/capa/issues/new/choose" target="_blank" rel="noopener noreferrer">
|
||||
<Button severity="contrast" size="small" outlined label="Provide feedback" />
|
||||
</a>
|
||||
<a
|
||||
v-if="!isBundle"
|
||||
v-ripple
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
mode="basic"
|
||||
name="model[]"
|
||||
accept=".json,.gz"
|
||||
:max-file-size="10000000"
|
||||
:max-file-size="100000000"
|
||||
:auto="true"
|
||||
:custom-upload="true"
|
||||
choose-label="Upload from local"
|
||||
|
||||
@@ -18,12 +18,20 @@ const router = createRouter({
|
||||
name: "analysis",
|
||||
component: AnalysisView,
|
||||
beforeEnter: (to, from, next) => {
|
||||
if (rdocStore.data.value === null) {
|
||||
// No rdoc loaded, redirect to home page
|
||||
next({ name: "home" });
|
||||
} else {
|
||||
// rdoc is loaded, proceed to analysis page
|
||||
// check if rdoc is loaded
|
||||
if (rdocStore.data.value !== null) {
|
||||
// rdocStore.data already contains the rdoc json - continue
|
||||
next();
|
||||
} else {
|
||||
// rdoc is not loaded, check if the rdoc query param is set in the URL
|
||||
const rdocUrl = to.query.rdoc;
|
||||
if (rdocUrl) {
|
||||
// query param is set - try to load the rdoc from the homepage
|
||||
next({ name: "home", query: { rdoc: rdocUrl } });
|
||||
} else {
|
||||
// no query param is set - go back home
|
||||
next({ name: "home" });
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
@@ -492,6 +492,8 @@ function getFeatureName(feature) {
|
||||
return `operand[${feature.index}].offset: 0x${feature.operand_offset.toString(16).toUpperCase()}`;
|
||||
case "class":
|
||||
return `${feature.class_}`;
|
||||
case "import":
|
||||
return `${feature.import_}`;
|
||||
default:
|
||||
return `${feature[feature.type]}`;
|
||||
}
|
||||
|
||||
@@ -88,7 +88,7 @@
|
||||
box-shadow: 0 0.5rem 1rem rgba(0,0,0,0.05),inset 0 -1px 0 rgba(0,0,0,0.15);"
|
||||
>
|
||||
<a href="/" class="d-flex align-items-center mb-3 mb-md-0 me-md-auto">
|
||||
<img src="./img/logo.png" height=48 />
|
||||
<img src="./img/logo.png" alt="capa logo" height=48 />
|
||||
</a>
|
||||
|
||||
<ul class="nav nav-pills">
|
||||
@@ -118,7 +118,7 @@
|
||||
references.
|
||||
</p>
|
||||
<div class="d-grid gap-2 d-md-flex justify-content-md-start mb-4 mb-lg-3">
|
||||
<a href="#download" type="button" class="btn btn-primary bs-primary btn-lg px-4 me-md-2 fw-bold">Download</button>
|
||||
<a href="#download" type="button" class="btn btn-primary bs-primary btn-lg px-4 me-md-2 fw-bold">Download</a>
|
||||
<a href="./rules/" type="button" class="btn btn-outline-secondary btn-lg px-4">Browse Rules</a>
|
||||
</div>
|
||||
</div>
|
||||
@@ -194,7 +194,7 @@
|
||||
<div class="row flex-lg-row-reverse align-items-center g-5">
|
||||
<h1>What's New</h1>
|
||||
|
||||
<h3 class="mt-3">Rule Updates</h3>
|
||||
<h2 class="mt-3">Rule Updates</h2>
|
||||
|
||||
<ul class="mt-2 ps-5">
|
||||
<!-- TODO(williballenthin): add date -->
|
||||
@@ -213,10 +213,22 @@
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
<h3 class="mt-3">Tool Updates</h3>
|
||||
<h2 class="mt-3">Tool Updates</h2>
|
||||
|
||||
<h5 class="mt-2">v7.2.0</h5>
|
||||
<!-- TODO(williballenthin): add date -->
|
||||
<h3 class="mt-2">v7.3.0 (<em>2024-09-20</em>)</h3>
|
||||
<div class="mt-0">
|
||||
The <a href="https://github.com/mandiant/capa/releases/tag/v7.3.0">capa v7.3.0</a> release comes with the following three major enhancements:
|
||||
<p><strong>1. Support for VMRay sandbox analysis archives</strong>: Unlock powerful malware analysis with capa's new <a href="https://www.vmray.com/">VMRay sandbox</a> integration!
|
||||
Simply provide a VMRay analysis archive, and capa will automatically extract and match capabilities to streamline your workflow. This is the second support for the analysis of dynamic
|
||||
analysis results after <a href="https://www.mandiant.com/resources/blog/dynamic-capa-executable-behavior-cape-sandbox">CAPE</a>.</p>
|
||||
<p><strong>2. Support for BinExport files generated by Ghidra</strong>: <a href="https://github.com/google/binexport">BinExport</a> files store disassembled data into a Protocol Buffer format.
|
||||
capa now supports the analysis of BinExport files generated by Ghidra. Using Ghidra and the BinExport file format users can now analyze ARM (AARCH64) ELF files targeting Android.</p>
|
||||
<p><strong>3. Introducing the capa rules website</strong>: You can now browse capa's default rule set at <a href="https://mandiant.github.io/capa/rules">https://mandiant.github.io/capa/rules</a>.
|
||||
In modern terminals the CLI capa tool hyperlinks to resources on the web, including entries on the capa rules website.
|
||||
Furthermore, <a href="https://mandiant.github.io/capa">https://mandiant.github.io/capa</a> provides a landing page for the capa tool project.</p>
|
||||
</div>
|
||||
|
||||
<h3 class="mt-2">v7.2.0 (<em>2024-08-20</em>)</h3>
|
||||
<p class="mt-0">
|
||||
<a href="https://github.com/mandiant/capa/releases/tag/v7.2.0">capa v7.2.0</a>
|
||||
introduces a first version of capa Explorer Web: a web-based user interface to inspect capa results using your browser.
|
||||
@@ -254,9 +266,9 @@
|
||||
<div class="col">
|
||||
<div class="row row-cols-1 row-cols-sm-2 g-4">
|
||||
<div class="col d-flex flex-column gap-2">
|
||||
<h4 class="fw-semibold mb-0 text-body-emphasis">
|
||||
<h3 class="fw-semibold mb-0 text-body-emphasis">
|
||||
IDA Pro
|
||||
</h4>
|
||||
</h3>
|
||||
<p class="text-body-secondary">
|
||||
<!-- TODO(williballenthin): add link to find out more -->
|
||||
Use the capa Explorer IDA Plugin to guide your reverse engineering, zeroing in on the interesting functions by behavior.
|
||||
@@ -264,9 +276,9 @@
|
||||
</div>
|
||||
|
||||
<div class="col d-flex flex-column gap-2">
|
||||
<h4 class="fw-semibold mb-0 text-body-emphasis">
|
||||
<h3 class="fw-semibold mb-0 text-body-emphasis">
|
||||
Ghidra
|
||||
</h4>
|
||||
</h3>
|
||||
<p class="text-body-secondary">
|
||||
<!-- TODO(williballenthin): add link to find out more -->
|
||||
Invoke Ghidra in headless mode to collect features for capa, or use the capa Explorer Ghidra plugin to understand key functions.
|
||||
@@ -274,9 +286,9 @@
|
||||
</div>
|
||||
|
||||
<div class="col d-flex flex-column gap-2">
|
||||
<h4 class="fw-semibold mb-0 text-body-emphasis">
|
||||
<h3 class="fw-semibold mb-0 text-body-emphasis">
|
||||
Binary Ninja
|
||||
</h4>
|
||||
</h3>
|
||||
<p class="text-body-secondary">
|
||||
<!-- TODO(williballenthin): add link to find out more -->
|
||||
Use Binary Ninja as the disassembler backend, relying on its state-of-the-art code analysis to recover capabilities.
|
||||
@@ -284,9 +296,9 @@
|
||||
</div>
|
||||
|
||||
<div class="col d-flex flex-column gap-2">
|
||||
<h4 class="fw-semibold mb-0 text-body-emphasis">
|
||||
<h3 class="fw-semibold mb-0 text-body-emphasis">
|
||||
CAPE
|
||||
</h4>
|
||||
</h3>
|
||||
<p class="text-body-secondary">
|
||||
<!-- TODO(williballenthin): add link to find out more -->
|
||||
Analyze the API trace captured by CAPE as it detonates malware, summarizing the behaviors seen across thousands of function calls.
|
||||
@@ -356,10 +368,10 @@
|
||||
|
||||
<div class="bg-dark text-secondary px-4 pt-5 text-center">
|
||||
<div class="py-5">
|
||||
<img src="./img/icon.png" />
|
||||
<h3 class="display-5 fw-bold text-white">
|
||||
<img src="./img/icon.png" alt="capa icon"/>
|
||||
<h2 class="display-5 fw-bold text-white">
|
||||
capa
|
||||
</h3>
|
||||
</h2>
|
||||
|
||||
<div class="col-lg-6 mx-auto">
|
||||
<p class="fs-5 my-4">
|
||||
@@ -379,7 +391,7 @@
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</main>
|
||||
</main>
|
||||
|
||||
<script>
|
||||
window.addEventListener('DOMContentLoaded', (event) => {
|
||||
|
||||
Reference in New Issue
Block a user