mirror of
https://github.com/mandiant/capa.git
synced 2025-12-13 08:00:44 -08:00
Compare commits
50 Commits
fix/be2/im
...
add-codema
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d0bafd6ab7 | ||
|
|
9d3d3be21d | ||
|
|
8251a4c16f | ||
|
|
7407cb39ca | ||
|
|
0162e447fd | ||
|
|
829dae388f | ||
|
|
2a4d0ae080 | ||
|
|
d9a754730c | ||
|
|
4acacba9d6 | ||
|
|
d00f172973 | ||
|
|
1572dd87ed | ||
|
|
23a88fae70 | ||
|
|
474e64cd32 | ||
|
|
c664dc662f | ||
|
|
c1c71613a9 | ||
|
|
fa90aae3dc | ||
|
|
7ba02c424e | ||
|
|
f238708ab8 | ||
|
|
9c639005ee | ||
|
|
c37b04fa5f | ||
|
|
dadd536498 | ||
|
|
f3b07dba14 | ||
|
|
66158db197 | ||
|
|
a4285c013e | ||
|
|
6924974b6b | ||
|
|
dc153c4763 | ||
|
|
71a28e4482 | ||
|
|
f6ed36fa0f | ||
|
|
6e68034d57 | ||
|
|
0df50f5d54 | ||
|
|
f1131750cc | ||
|
|
077082a376 | ||
|
|
86318093da | ||
|
|
4ee8a7c6b1 | ||
|
|
151d30bec6 | ||
|
|
3bd339522e | ||
|
|
7ecf292095 | ||
|
|
45ea683d19 | ||
|
|
2b95fa089d | ||
|
|
d3d71f97c8 | ||
|
|
4c9d81072a | ||
|
|
a94c68377a | ||
|
|
14e076864c | ||
|
|
6684f9f890 | ||
|
|
e622989eeb | ||
|
|
9c9dd15bf9 | ||
|
|
06fad4a89e | ||
|
|
e06a0ab75f | ||
|
|
0371ade358 | ||
|
|
80b5a116a5 |
46
CHANGELOG.md
46
CHANGELOG.md
@@ -6,12 +6,28 @@
|
||||
|
||||
### Breaking Changes
|
||||
|
||||
### New Rules (0)
|
||||
### New Rules (15)
|
||||
|
||||
- communication/socket/connect-socket moritz.raabe@mandiant.com joakim@intezer.com mrhafizfarhad@gmail.com
|
||||
- communication/socket/udp/connect-udp-socket mrhafizfarhad@gmail.com
|
||||
- nursery/enter-debug-mode-in-dotnet @v1bh475u
|
||||
- nursery/decrypt-data-using-tripledes-in-dotnet 0xRavenspar
|
||||
- nursery/encrypt-data-using-tripledes-in-dotnet 0xRavenspar
|
||||
- nursery/disable-system-features-via-registry-on-windows mehunhoff@google.com
|
||||
- data-manipulation/encryption/chaskey/encrypt-data-using-chaskey still@teamt5.org
|
||||
- data-manipulation/encryption/speck/encrypt-data-using-speck still@teamt5.org
|
||||
- load-code/dotnet/load-assembly-via-iassembly still@teamt5.org
|
||||
- malware-family/donut-loader/load-shellcode-via-donut still@teamt5.org
|
||||
- nursery/disable-device-guard-features-via-registry-on-windows mehunhoff@google.com
|
||||
- nursery/disable-firewall-features-via-registry-on-windows mehunhoff@google.com
|
||||
- nursery/disable-system-restore-features-via-registry-on-windows mehunhoff@google.com
|
||||
- nursery/disable-windows-defender-features-via-registry-on-windows mehunhoff@google.com
|
||||
-
|
||||
|
||||
### Bug Fixes
|
||||
- only parse CAPE fields required for analysis @mike-hunhoff #2607
|
||||
- cape: make some fields optional @williballenthin #2631 #2632
|
||||
- lint: add WARN for regex features that contain unescaped dot #2635
|
||||
- lint: add ERROR for incomplete registry control set regex #2643
|
||||
|
||||
### capa Explorer Web
|
||||
|
||||
@@ -20,8 +36,30 @@
|
||||
### Development
|
||||
|
||||
### Raw diffs
|
||||
- [capa v9.0.0...master](https://github.com/mandiant/capa/compare/v9.0.0...master)
|
||||
- [capa-rules v9.0.0...master](https://github.com/mandiant/capa-rules/compare/v9.0.0...master)
|
||||
- [capa v9.1.0...master](https://github.com/mandiant/capa/compare/v9.1.0...master)
|
||||
- [capa-rules v9.1.0...master](https://github.com/mandiant/capa-rules/compare/v9.1.0...master)
|
||||
|
||||
## v9.1.0
|
||||
|
||||
This release improves a few aspects of dynamic analysis, relaxing our validation on fields across many CAPE versions, for example.
|
||||
It also includes an updated rule pack in which many dynamic rules make better use of the "span of calls" scope.
|
||||
|
||||
|
||||
### New Rules (3)
|
||||
|
||||
- host-interaction/registry/change-registry-key-timestamp wballenthin@google.com
|
||||
- host-interaction/mutex/check-mutex-and-terminate-process-on-windows @_re_fox moritz.raabe@mandiant.com mehunhoff@google.com
|
||||
- anti-analysis/anti-forensic/clear-logs/clear-windows-event-logs-remotely 99.elad.levi@gmail.com
|
||||
|
||||
### Bug Fixes
|
||||
- only parse CAPE fields required for analysis @mike-hunhoff #2607
|
||||
- main: render result document without needing associated rules @williballenthin #2610
|
||||
- vmray: only verify process OS and monitor IDs match @mike-hunhoff #2613
|
||||
- render: don't assume prior matches exist within a thread @mike-hunhoff #2612
|
||||
|
||||
### Raw diffs
|
||||
- [capa v9.0.0...v9.1.0](https://github.com/mandiant/capa/compare/v9.0.0...v9.1.0)
|
||||
- [capa-rules v9.0.0...v9.1.0](https://github.com/mandiant/capa-rules/compare/v9.0.0...v9.1.0)
|
||||
|
||||
## v9.0.0
|
||||
|
||||
|
||||
@@ -54,7 +54,8 @@ class CapeExtractor(DynamicFeatureExtractor):
|
||||
|
||||
def get_base_address(self) -> Union[AbsoluteVirtualAddress, _NoAddress, None]:
|
||||
# value according to the PE header, the actual trace may use a different imagebase
|
||||
assert self.report.static is not None and self.report.static.pe is not None
|
||||
assert self.report.static is not None
|
||||
assert self.report.static.pe is not None
|
||||
return AbsoluteVirtualAddress(self.report.static.pe.imagebase)
|
||||
|
||||
def extract_global_features(self) -> Iterator[tuple[Feature, Address]]:
|
||||
|
||||
@@ -88,31 +88,49 @@ def extract_file_strings(report: CapeReport) -> Iterator[tuple[Feature, Address]
|
||||
|
||||
|
||||
def extract_used_regkeys(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||
if not report.behavior.summary:
|
||||
return
|
||||
|
||||
for regkey in report.behavior.summary.keys:
|
||||
yield String(regkey), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_used_files(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||
if not report.behavior.summary:
|
||||
return
|
||||
|
||||
for file in report.behavior.summary.files:
|
||||
yield String(file), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_used_mutexes(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||
if not report.behavior.summary:
|
||||
return
|
||||
|
||||
for mutex in report.behavior.summary.mutexes:
|
||||
yield String(mutex), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_used_commands(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||
if not report.behavior.summary:
|
||||
return
|
||||
|
||||
for cmd in report.behavior.summary.executed_commands:
|
||||
yield String(cmd), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_used_apis(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||
if not report.behavior.summary:
|
||||
return
|
||||
|
||||
for symbol in report.behavior.summary.resolved_apis:
|
||||
yield String(symbol), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_used_services(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||
if not report.behavior.summary:
|
||||
return
|
||||
|
||||
for svc in report.behavior.summary.created_services:
|
||||
yield String(svc), NO_ADDRESS
|
||||
for svc in report.behavior.summary.started_services:
|
||||
|
||||
@@ -188,15 +188,15 @@ class PE(FlexibleModel):
|
||||
# timestamp: str
|
||||
|
||||
# list[ImportedDll], or dict[basename(dll), ImportedDll]
|
||||
imports: Union[list[ImportedDll], dict[str, ImportedDll]]
|
||||
imports: list[ImportedDll] | dict[str, ImportedDll] = Field(default_factory=list) # type: ignore
|
||||
# imported_dll_count: Optional[int] = None
|
||||
# imphash: str
|
||||
|
||||
# exported_dll_name: Optional[str] = None
|
||||
exports: list[ExportedSymbol]
|
||||
exports: list[ExportedSymbol] = Field(default_factory=list)
|
||||
|
||||
# dirents: list[DirectoryEntry]
|
||||
sections: list[Section]
|
||||
sections: list[Section] = Field(default_factory=list)
|
||||
|
||||
# ep_bytes: Optional[HexBytes] = None
|
||||
|
||||
@@ -364,7 +364,7 @@ class EncryptedBuffer(FlexibleModel):
|
||||
|
||||
|
||||
class Behavior(FlexibleModel):
|
||||
summary: Summary
|
||||
summary: Summary | None = None
|
||||
|
||||
# list of processes, of threads, of calls
|
||||
processes: list[Process]
|
||||
|
||||
@@ -223,16 +223,15 @@ class VMRayAnalysis:
|
||||
# we expect monitor processes recorded in both SummaryV2.json and flog.xml to equal
|
||||
# to ensure this, we compare the pid, monitor_id, and origin_monitor_id
|
||||
# for the other fields we've observed cases with slight deviations, e.g.,
|
||||
# the ppid for a process in flog.xml is not set correctly, all other data is equal
|
||||
# the ppid, origin monitor id, etc. for a process in flog.xml is not set correctly, all other
|
||||
# data is equal
|
||||
sv2p = self.monitor_processes[monitor_process.process_id]
|
||||
if self.monitor_processes[monitor_process.process_id] != vmray_monitor_process:
|
||||
logger.debug("processes differ: %s (sv2) vs. %s (flog)", sv2p, vmray_monitor_process)
|
||||
|
||||
assert (sv2p.pid, sv2p.monitor_id, sv2p.origin_monitor_id) == (
|
||||
vmray_monitor_process.pid,
|
||||
vmray_monitor_process.monitor_id,
|
||||
vmray_monitor_process.origin_monitor_id,
|
||||
)
|
||||
# we need, at a minimum, for the process id and monitor id to match, otherwise there is likely a bug
|
||||
# in the way that VMRay tracked one of the processes
|
||||
assert (sv2p.pid, sv2p.monitor_id) == (vmray_monitor_process.pid, vmray_monitor_process.monitor_id)
|
||||
|
||||
def _compute_monitor_threads(self):
|
||||
for monitor_thread in self.flog.analysis.monitor_threads:
|
||||
|
||||
75
capa/main.py
75
capa/main.py
@@ -995,7 +995,27 @@ def main(argv: Optional[list[str]] = None):
|
||||
handle_common_args(args)
|
||||
ensure_input_exists_from_cli(args)
|
||||
input_format = get_input_format_from_cli(args)
|
||||
rules = get_rules_from_cli(args)
|
||||
except ShouldExitError as e:
|
||||
return e.status_code
|
||||
|
||||
if input_format == FORMAT_RESULT:
|
||||
# render the result document immediately,
|
||||
# no need to load the rules or do other processing.
|
||||
result_doc = capa.render.result_document.ResultDocument.from_file(args.input_file)
|
||||
|
||||
if args.json:
|
||||
print(result_doc.model_dump_json(exclude_none=True))
|
||||
elif args.vverbose:
|
||||
print(capa.render.vverbose.render_vverbose(result_doc))
|
||||
elif args.verbose:
|
||||
print(capa.render.verbose.render_verbose(result_doc))
|
||||
else:
|
||||
print(capa.render.default.render_default(result_doc))
|
||||
return 0
|
||||
|
||||
try:
|
||||
rules: RuleSet = get_rules_from_cli(args)
|
||||
|
||||
found_limitation = False
|
||||
file_extractors = get_file_extractors_from_cli(args, input_format)
|
||||
if input_format in STATIC_FORMATS:
|
||||
@@ -1003,45 +1023,30 @@ def main(argv: Optional[list[str]] = None):
|
||||
found_limitation = find_static_limitations_from_cli(args, rules, file_extractors)
|
||||
if input_format in DYNAMIC_FORMATS:
|
||||
found_limitation = find_dynamic_limitations_from_cli(args, rules, file_extractors)
|
||||
|
||||
backend = get_backend_from_cli(args, input_format)
|
||||
sample_path = get_sample_path_from_cli(args, backend)
|
||||
if sample_path is None:
|
||||
os_ = "unknown"
|
||||
else:
|
||||
os_ = capa.loader.get_os(sample_path)
|
||||
extractor: FeatureExtractor = get_extractor_from_cli(args, input_format, backend)
|
||||
except ShouldExitError as e:
|
||||
return e.status_code
|
||||
|
||||
meta: rdoc.Metadata
|
||||
capabilities: Capabilities
|
||||
capabilities: Capabilities = find_capabilities(rules, extractor, disable_progress=args.quiet)
|
||||
|
||||
if input_format == FORMAT_RESULT:
|
||||
# result document directly parses into meta, capabilities
|
||||
result_doc = capa.render.result_document.ResultDocument.from_file(args.input_file)
|
||||
meta, capabilities = result_doc.to_capa()
|
||||
meta: rdoc.Metadata = capa.loader.collect_metadata(
|
||||
argv, args.input_file, input_format, os_, args.rules, extractor, capabilities
|
||||
)
|
||||
meta.analysis.layout = capa.loader.compute_layout(rules, extractor, capabilities.matches)
|
||||
|
||||
else:
|
||||
# all other formats we must create an extractor
|
||||
# and use that to extract meta and capabilities
|
||||
|
||||
try:
|
||||
backend = get_backend_from_cli(args, input_format)
|
||||
sample_path = get_sample_path_from_cli(args, backend)
|
||||
if sample_path is None:
|
||||
os_ = "unknown"
|
||||
else:
|
||||
os_ = capa.loader.get_os(sample_path)
|
||||
extractor = get_extractor_from_cli(args, input_format, backend)
|
||||
except ShouldExitError as e:
|
||||
return e.status_code
|
||||
|
||||
capabilities = find_capabilities(rules, extractor, disable_progress=args.quiet)
|
||||
|
||||
meta = capa.loader.collect_metadata(
|
||||
argv, args.input_file, input_format, os_, args.rules, extractor, capabilities
|
||||
)
|
||||
meta.analysis.layout = capa.loader.compute_layout(rules, extractor, capabilities.matches)
|
||||
|
||||
if found_limitation:
|
||||
# bail if capa's static feature extractor encountered file limitation e.g. a packed binary
|
||||
# or capa's dynamic feature extractor encountered some limitation e.g. a dotnet sample
|
||||
# do show the output in verbose mode, though.
|
||||
if not (args.verbose or args.vverbose or args.json):
|
||||
return E_FILE_LIMITATION
|
||||
if found_limitation:
|
||||
# bail if capa's static feature extractor encountered file limitation e.g. a packed binary
|
||||
# or capa's dynamic feature extractor encountered some limitation e.g. a dotnet sample
|
||||
# do show the output in verbose mode, though.
|
||||
if not (args.verbose or args.vverbose or args.json):
|
||||
return E_FILE_LIMITATION
|
||||
|
||||
if args.json:
|
||||
print(capa.render.json.render(meta, rules, capabilities.matches))
|
||||
|
||||
@@ -418,8 +418,9 @@ class Match(FrozenModel):
|
||||
and a.id <= location.id
|
||||
]
|
||||
)
|
||||
_, most_recent_match = matches_in_thread[-1]
|
||||
children.append(Match.from_capa(rules, capabilities, most_recent_match))
|
||||
if matches_in_thread:
|
||||
_, most_recent_match = matches_in_thread[-1]
|
||||
children.append(Match.from_capa(rules, capabilities, most_recent_match))
|
||||
|
||||
else:
|
||||
children.append(Match.from_capa(rules, capabilities, rule_matches[location]))
|
||||
@@ -478,8 +479,11 @@ class Match(FrozenModel):
|
||||
and a.id <= location.id
|
||||
]
|
||||
)
|
||||
_, most_recent_match = matches_in_thread[-1]
|
||||
children.append(Match.from_capa(rules, capabilities, most_recent_match))
|
||||
# namespace matches may not occur within the same thread as the result, so only
|
||||
# proceed if a match within the same thread is found
|
||||
if matches_in_thread:
|
||||
_, most_recent_match = matches_in_thread[-1]
|
||||
children.append(Match.from_capa(rules, capabilities, most_recent_match))
|
||||
else:
|
||||
if location in rule_matches:
|
||||
children.append(Match.from_capa(rules, capabilities, rule_matches[location]))
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
__version__ = "9.0.0"
|
||||
__version__ = "9.1.0"
|
||||
|
||||
|
||||
def get_major_version():
|
||||
|
||||
@@ -136,17 +136,17 @@ dev = [
|
||||
"flake8-simplify==0.21.0",
|
||||
"flake8-use-pathlib==0.3.0",
|
||||
"flake8-copyright==0.2.4",
|
||||
"ruff==0.9.2",
|
||||
"ruff==0.11.0",
|
||||
"black==25.1.0",
|
||||
"isort==6.0.0",
|
||||
"mypy==1.15.0",
|
||||
"mypy-protobuf==3.6.0",
|
||||
"PyGithub==2.5.0",
|
||||
"PyGithub==2.6.0",
|
||||
# type stubs for mypy
|
||||
"types-backports==0.1.3",
|
||||
"types-colorama==0.4.15.11",
|
||||
"types-PyYAML==6.0.8",
|
||||
"types-psutil==6.1.0.20241102",
|
||||
"types-psutil==7.0.0.20250218",
|
||||
"types_requests==2.32.0.20240712",
|
||||
"types-protobuf==5.29.1.20241207",
|
||||
"deptry==0.23.0"
|
||||
@@ -157,12 +157,12 @@ build = [
|
||||
# These dependencies are not used in production environments
|
||||
# and should not conflict with other libraries/tooling.
|
||||
"pyinstaller==6.12.0",
|
||||
"setuptools==75.8.0",
|
||||
"setuptools==76.0.0",
|
||||
"build==1.2.2"
|
||||
]
|
||||
scripts = [
|
||||
"jschema_to_python==1.2.3",
|
||||
"psutil==6.1.0",
|
||||
"psutil==7.0.0",
|
||||
"stix2==3.0.1",
|
||||
"sarif_om==1.0.4",
|
||||
"requests==2.32.3",
|
||||
|
||||
@@ -12,7 +12,7 @@ cxxfilt==0.3.0
|
||||
dncil==1.0.2
|
||||
dnfile==0.15.0
|
||||
funcy==2.0
|
||||
humanize==4.10.0
|
||||
humanize==4.12.0
|
||||
ida-netnode==3.0
|
||||
ida-settings==2.1.0
|
||||
intervaltree==3.1.0
|
||||
@@ -22,7 +22,7 @@ msgpack==1.0.8
|
||||
networkx==3.4.2
|
||||
pefile==2024.8.26
|
||||
pip==25.0
|
||||
protobuf==5.29.3
|
||||
protobuf==6.30.1
|
||||
pyasn1==0.5.1
|
||||
pyasn1-modules==0.3.0
|
||||
pycparser==2.22
|
||||
@@ -32,14 +32,14 @@ pydantic==2.10.1
|
||||
# so we rely on pydantic to pull in the right version of pydantic-core.
|
||||
# pydantic-core==2.23.4
|
||||
xmltodict==0.14.2
|
||||
pyelftools==0.31
|
||||
pyelftools==0.32
|
||||
pygments==2.19.1
|
||||
python-flirt==0.9.2
|
||||
pyyaml==6.0.2
|
||||
rich==13.9.2
|
||||
ruamel-yaml==0.18.6
|
||||
ruamel-yaml-clib==0.2.8
|
||||
setuptools==75.8.0
|
||||
setuptools==76.0.0
|
||||
six==1.17.0
|
||||
sortedcontainers==2.4.0
|
||||
viv-utils==0.8.0
|
||||
|
||||
2
rules
2
rules
Submodule rules updated: c0aa922f20...d64c2c91ea
490
scripts/codemap.py
Normal file
490
scripts/codemap.py
Normal file
@@ -0,0 +1,490 @@
|
||||
#!/usr/bin/env python
|
||||
# /// script
|
||||
# requires-python = ">=3.12"
|
||||
# dependencies = [
|
||||
# "protobuf",
|
||||
# "python-lancelot",
|
||||
# "rich",
|
||||
# ]
|
||||
# ///
|
||||
#
|
||||
# TODO:
|
||||
# - ignore stack cookie check
|
||||
import sys
|
||||
import json
|
||||
import time
|
||||
import logging
|
||||
import argparse
|
||||
import contextlib
|
||||
from typing import Any
|
||||
from pathlib import Path
|
||||
from collections import defaultdict
|
||||
from dataclasses import dataclass
|
||||
|
||||
import lancelot
|
||||
import rich.padding
|
||||
import lancelot.be2utils
|
||||
import google.protobuf.message
|
||||
from rich.text import Text
|
||||
from rich.theme import Theme
|
||||
from rich.markup import escape
|
||||
from rich.console import Console
|
||||
from lancelot.be2utils.binexport2_pb2 import BinExport2
|
||||
|
||||
logger = logging.getLogger("codemap")
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def timing(msg: str):
|
||||
t0 = time.time()
|
||||
yield
|
||||
t1 = time.time()
|
||||
logger.debug("perf: %s: %0.2fs", msg, t1 - t0)
|
||||
|
||||
|
||||
class Renderer:
|
||||
def __init__(self, console: Console):
|
||||
self.console: Console = console
|
||||
self.indent: int = 0
|
||||
|
||||
@contextlib.contextmanager
|
||||
def indenting(self):
|
||||
self.indent += 1
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
self.indent -= 1
|
||||
|
||||
@staticmethod
|
||||
def markup(s: str, **kwargs) -> Text:
|
||||
escaped_args = {k: (escape(v) if isinstance(v, str) else v) for k, v in kwargs.items()}
|
||||
return Text.from_markup(s.format(**escaped_args))
|
||||
|
||||
def print(self, renderable, **kwargs):
|
||||
if not kwargs:
|
||||
return self.console.print(rich.padding.Padding(renderable, (0, 0, 0, self.indent * 2)))
|
||||
|
||||
assert isinstance(renderable, str)
|
||||
return self.print(self.markup(renderable, **kwargs))
|
||||
|
||||
def writeln(self, s: str):
|
||||
self.print(s)
|
||||
|
||||
@contextlib.contextmanager
|
||||
def section(self, name):
|
||||
if isinstance(name, str):
|
||||
self.print("[title]{name}", name=name)
|
||||
elif isinstance(name, Text):
|
||||
name = name.copy()
|
||||
name.stylize_before(self.console.get_style("title"))
|
||||
self.print(name)
|
||||
else:
|
||||
raise ValueError("unexpected section name")
|
||||
|
||||
with self.indenting():
|
||||
yield
|
||||
|
||||
|
||||
@dataclass
|
||||
class AssemblageLocation:
|
||||
name: str
|
||||
file: str
|
||||
prototype: str
|
||||
rva: int
|
||||
|
||||
@property
|
||||
def path(self):
|
||||
if not self.file.endswith(")"):
|
||||
return self.file
|
||||
|
||||
return self.file.rpartition(" (")[0]
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict[str, Any]):
|
||||
return cls(
|
||||
name=data["name"],
|
||||
file=data["file"],
|
||||
prototype=data["prototype"],
|
||||
rva=data["function_start"],
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def from_json(doc: str):
|
||||
return AssemblageLocation.from_dict(json.loads(doc))
|
||||
|
||||
|
||||
def main(argv: list[str] | None = None):
|
||||
if argv is None:
|
||||
argv = sys.argv[1:]
|
||||
|
||||
parser = argparse.ArgumentParser(description="Inspect BinExport2 files")
|
||||
parser.add_argument("input_file", type=Path, help="path to input file")
|
||||
parser.add_argument("--capa", type=Path, help="path to capa JSON results file")
|
||||
parser.add_argument("--assemblage", type=Path, help="path to Assemblage JSONL file")
|
||||
parser.add_argument("-d", "--debug", action="store_true", help="enable debugging output on STDERR")
|
||||
parser.add_argument("-q", "--quiet", action="store_true", help="disable all output but errors")
|
||||
args = parser.parse_args(args=argv)
|
||||
|
||||
logging.basicConfig()
|
||||
if args.quiet:
|
||||
logging.getLogger().setLevel(logging.WARNING)
|
||||
elif args.debug:
|
||||
logging.getLogger().setLevel(logging.DEBUG)
|
||||
else:
|
||||
logging.getLogger().setLevel(logging.INFO)
|
||||
|
||||
theme = Theme(
|
||||
{
|
||||
"decoration": "grey54",
|
||||
"title": "yellow",
|
||||
"key": "black",
|
||||
"value": "blue",
|
||||
"default": "black",
|
||||
},
|
||||
inherit=False,
|
||||
)
|
||||
console = Console(theme=theme, markup=False, emoji=False)
|
||||
o = Renderer(console)
|
||||
|
||||
be2: BinExport2
|
||||
buf: bytes
|
||||
try:
|
||||
# easiest way to determine if this is a BinExport2 proto is...
|
||||
# to just try to decode it.
|
||||
buf = args.input_file.read_bytes()
|
||||
with timing("loading BinExport2"):
|
||||
be2 = BinExport2()
|
||||
be2.ParseFromString(buf)
|
||||
|
||||
except google.protobuf.message.DecodeError:
|
||||
with timing("analyzing file"):
|
||||
input_file: Path = args.input_file
|
||||
buf = lancelot.get_binexport2_bytes_from_bytes(input_file.read_bytes())
|
||||
|
||||
with timing("loading BinExport2"):
|
||||
be2 = BinExport2()
|
||||
be2.ParseFromString(buf)
|
||||
|
||||
with timing("indexing BinExport2"):
|
||||
idx = lancelot.be2utils.BinExport2Index(be2)
|
||||
|
||||
matches_by_function: defaultdict[int, set[str]] = defaultdict(set)
|
||||
if args.capa:
|
||||
with timing("loading capa"):
|
||||
doc = json.loads(args.capa.read_text())
|
||||
|
||||
functions_by_basic_block: dict[int, int] = {}
|
||||
for function in doc["meta"]["analysis"]["layout"]["functions"]:
|
||||
for basic_block in function["matched_basic_blocks"]:
|
||||
functions_by_basic_block[basic_block["address"]["value"]] = function["address"]["value"]
|
||||
|
||||
matches_by_address: defaultdict[int, set[str]] = defaultdict(set)
|
||||
for rule_name, results in doc["rules"].items():
|
||||
for location, _ in results["matches"]:
|
||||
if location["type"] != "absolute":
|
||||
continue
|
||||
address = location["value"]
|
||||
matches_by_address[location["value"]].add(rule_name)
|
||||
|
||||
for address, matches in matches_by_address.items():
|
||||
if function := functions_by_basic_block.get(address):
|
||||
if function in idx.thunks:
|
||||
# forward any capa for a thunk to its target
|
||||
# since viv may not recognize the thunk as a separate function.
|
||||
logger.debug("forwarding capa matches from thunk 0x%x to 0x%x", function, idx.thunks[function])
|
||||
function = idx.thunks[function]
|
||||
|
||||
matches_by_function[function].update(matches)
|
||||
for match in matches:
|
||||
logger.info("capa: 0x%x: %s", function, match)
|
||||
else:
|
||||
# we don't know which function this is.
|
||||
# hopefully its a function recognized in our BinExport analysis.
|
||||
# *shrug*
|
||||
#
|
||||
# apparently viv doesn't emit function entries for thunks?
|
||||
# or somehow our layout is messed up.
|
||||
|
||||
if address in idx.thunks:
|
||||
# forward any capa for a thunk to its target
|
||||
# since viv may not recognize the thunk as a separate function.
|
||||
logger.debug("forwarding capa matches from thunk 0x%x to 0x%x", address, idx.thunks[address])
|
||||
address = idx.thunks[address]
|
||||
# since we found the thunk, we know this is a BinExport-recognized function.
|
||||
# so thats nice.
|
||||
for match in matches:
|
||||
logger.info("capa: 0x%x: %s", address, match)
|
||||
else:
|
||||
logger.warning("unknown address: 0x%x: %s", address, matches)
|
||||
|
||||
matches_by_function[address].update(matches)
|
||||
|
||||
# guess the base address (which BinExport2) does not track explicitly,
|
||||
# by assuming it is the lowest mapped page.
|
||||
base_address = min(map(lambda section: section.address, be2.section))
|
||||
logging.info("guessed base address: 0x%x", base_address)
|
||||
|
||||
assemblage_locations_by_va: dict[int, AssemblageLocation] = {}
|
||||
if args.assemblage:
|
||||
with timing("loading assemblage"):
|
||||
with args.assemblage.open("rt", encoding="utf-8") as f:
|
||||
for line in f:
|
||||
if not line:
|
||||
continue
|
||||
location = AssemblageLocation.from_json(line)
|
||||
assemblage_locations_by_va[base_address + location.rva] = location
|
||||
|
||||
# update function names for the in-memory BinExport2 using Assemblage data.
|
||||
# this won't affect the be2 on disk, because we don't serialize it back out.
|
||||
for address, location in assemblage_locations_by_va.items():
|
||||
if not location.name:
|
||||
continue
|
||||
|
||||
if vertex_index := idx.vertex_index_by_address.get(address):
|
||||
vertex = be2.call_graph.vertex[vertex_index].demangled_name = location.name
|
||||
|
||||
# index all the callers of each function, resolving thunks.
|
||||
# idx.callers_by_vertex_id does not resolve thunks.
|
||||
resolved_callers_by_vertex_id = defaultdict(set)
|
||||
for edge in be2.call_graph.edge:
|
||||
source_index = edge.source_vertex_index
|
||||
|
||||
if lancelot.be2utils.is_thunk_vertex(be2.call_graph.vertex[source_index]):
|
||||
# we don't care about the callers that are thunks.
|
||||
continue
|
||||
|
||||
if lancelot.be2utils.is_thunk_vertex(be2.call_graph.vertex[edge.target_vertex_index]):
|
||||
thunk_vertex = be2.call_graph.vertex[edge.target_vertex_index]
|
||||
thunk_address = thunk_vertex.address
|
||||
|
||||
target_address = idx.thunks[thunk_address]
|
||||
target_index = idx.vertex_index_by_address[target_address]
|
||||
logger.debug(
|
||||
"call %s -(thunk)-> %s",
|
||||
idx.get_function_name_by_vertex(source_index),
|
||||
idx.get_function_name_by_vertex(target_index),
|
||||
)
|
||||
else:
|
||||
target_index = edge.target_vertex_index
|
||||
logger.debug(
|
||||
"call %s -> %s",
|
||||
idx.get_function_name_by_vertex(source_index),
|
||||
idx.get_function_name_by_vertex(target_index),
|
||||
)
|
||||
resolved_callers_by_vertex_id[target_index].add(source_index)
|
||||
|
||||
t0 = time.time()
|
||||
|
||||
with o.section("meta"):
|
||||
o.writeln(f"name: {be2.meta_information.executable_name}")
|
||||
o.writeln(f"sha256: {be2.meta_information.executable_id}")
|
||||
o.writeln(f"arch: {be2.meta_information.architecture_name}")
|
||||
o.writeln(f"ts: {be2.meta_information.timestamp}")
|
||||
|
||||
with o.section("modules"):
|
||||
for module in be2.module:
|
||||
o.writeln(f"- {module.name}")
|
||||
if not be2.module:
|
||||
o.writeln("(none)")
|
||||
|
||||
with o.section("sections"):
|
||||
for section in be2.section:
|
||||
perms = ""
|
||||
perms += "r" if section.flag_r else "-"
|
||||
perms += "w" if section.flag_w else "-"
|
||||
perms += "x" if section.flag_x else "-"
|
||||
o.writeln(f"- {hex(section.address)} {perms} {hex(section.size)}")
|
||||
|
||||
with o.section("libraries"):
|
||||
for library in be2.library:
|
||||
o.writeln(
|
||||
f"- {library.name:<12s} {'(static)' if library.is_static else ''}{(' at ' + hex(library.load_address)) if library.HasField('load_address') else ''}"
|
||||
)
|
||||
if not be2.library:
|
||||
o.writeln("(none)")
|
||||
|
||||
vertex_order_by_address = {address: i for (i, address) in enumerate(idx.vertex_index_by_address.keys())}
|
||||
|
||||
with o.section("functions"):
|
||||
last_address = None
|
||||
for _, vertex_index in idx.vertex_index_by_address.items():
|
||||
vertex = be2.call_graph.vertex[vertex_index]
|
||||
vertex_order = vertex_order_by_address[vertex.address]
|
||||
|
||||
if vertex.HasField("library_index"):
|
||||
continue
|
||||
|
||||
if vertex.HasField("module_index"):
|
||||
continue
|
||||
|
||||
function_name = idx.get_function_name_by_vertex(vertex_index)
|
||||
|
||||
if last_address:
|
||||
try:
|
||||
last_path = assemblage_locations_by_va[last_address].path
|
||||
path = assemblage_locations_by_va[vertex.address].path
|
||||
if last_path != path:
|
||||
o.print(o.markup("[blue]~~~~~~~~~~~~~~~~~~~~~~~~~~~~~[/] [title]file[/] {path}\n", path=path))
|
||||
except KeyError:
|
||||
pass
|
||||
last_address = vertex.address
|
||||
|
||||
if lancelot.be2utils.is_thunk_vertex(vertex):
|
||||
with o.section(
|
||||
o.markup(
|
||||
"thunk [default]{function_name}[/] [decoration]@ {function_address}[/]",
|
||||
function_name=function_name,
|
||||
function_address=hex(vertex.address),
|
||||
)
|
||||
):
|
||||
continue
|
||||
|
||||
with o.section(
|
||||
o.markup(
|
||||
"function [default]{function_name}[/] [decoration]@ {function_address}[/]",
|
||||
function_name=function_name,
|
||||
function_address=hex(vertex.address),
|
||||
)
|
||||
):
|
||||
if vertex.address in idx.thunks:
|
||||
o.writeln("")
|
||||
continue
|
||||
|
||||
# keep the xrefs separate from the calls, since they're visually hard to distinguish.
|
||||
# use local index of callers that has resolved intermediate thunks,
|
||||
# since they are sometimes stored in a physically distant location.
|
||||
for caller_index in resolved_callers_by_vertex_id.get(vertex_index, []):
|
||||
caller_vertex = be2.call_graph.vertex[caller_index]
|
||||
caller_order = vertex_order_by_address[caller_vertex.address]
|
||||
caller_delta = caller_order - vertex_order
|
||||
if caller_delta < 0:
|
||||
direction = "↑"
|
||||
else:
|
||||
direction = "↓"
|
||||
|
||||
o.print(
|
||||
"xref: [decoration]{direction}[/] {name} [decoration]({delta:+})[/]",
|
||||
direction=direction,
|
||||
name=idx.get_function_name_by_vertex(caller_index),
|
||||
delta=caller_delta,
|
||||
)
|
||||
|
||||
if vertex.address not in idx.flow_graph_index_by_address:
|
||||
num_basic_blocks = 0
|
||||
num_instructions = 0
|
||||
num_edges = 0
|
||||
total_instruction_size = 0
|
||||
else:
|
||||
flow_graph_index = idx.flow_graph_index_by_address[vertex.address]
|
||||
flow_graph = be2.flow_graph[flow_graph_index]
|
||||
num_basic_blocks = len(flow_graph.basic_block_index)
|
||||
num_instructions = sum(
|
||||
len(list(idx.instruction_indices(be2.basic_block[bb_idx])))
|
||||
for bb_idx in flow_graph.basic_block_index
|
||||
)
|
||||
num_edges = len(flow_graph.edge)
|
||||
total_instruction_size = 0
|
||||
for bb_idx in flow_graph.basic_block_index:
|
||||
basic_block = be2.basic_block[bb_idx]
|
||||
for _, instruction, _ in idx.basic_block_instructions(basic_block):
|
||||
total_instruction_size += len(instruction.raw_bytes)
|
||||
|
||||
o.writeln(
|
||||
f"B/E/I: {num_basic_blocks} / {num_edges} / {num_instructions} ({total_instruction_size} bytes)"
|
||||
)
|
||||
|
||||
for match in matches_by_function.get(vertex.address, []):
|
||||
o.writeln(f"capa: {match}")
|
||||
|
||||
if vertex.address in idx.flow_graph_index_by_address:
|
||||
flow_graph_index = idx.flow_graph_index_by_address[vertex.address]
|
||||
flow_graph = be2.flow_graph[flow_graph_index]
|
||||
|
||||
seen_callees = set()
|
||||
|
||||
for basic_block_index in flow_graph.basic_block_index:
|
||||
basic_block = be2.basic_block[basic_block_index]
|
||||
|
||||
for instruction_index, instruction, _ in idx.basic_block_instructions(basic_block):
|
||||
if instruction.call_target:
|
||||
for call_target_address in instruction.call_target:
|
||||
if call_target_address in idx.thunks:
|
||||
call_target_address = idx.thunks[call_target_address]
|
||||
|
||||
call_target_index = idx.vertex_index_by_address[call_target_address]
|
||||
call_target_vertex = be2.call_graph.vertex[call_target_index]
|
||||
|
||||
if call_target_vertex.HasField("library_index"):
|
||||
continue
|
||||
|
||||
if call_target_vertex.address in seen_callees:
|
||||
continue
|
||||
seen_callees.add(call_target_vertex.address)
|
||||
|
||||
call_target_order = vertex_order_by_address[call_target_address]
|
||||
call_target_delta = call_target_order - vertex_order
|
||||
call_target_name = idx.get_function_name_by_address(call_target_address)
|
||||
if call_target_delta < 0:
|
||||
direction = "↑"
|
||||
else:
|
||||
direction = "↓"
|
||||
|
||||
o.print(
|
||||
"calls: [decoration]{direction}[/] {name} [decoration]({delta:+})[/]",
|
||||
direction=direction,
|
||||
name=call_target_name,
|
||||
delta=call_target_delta,
|
||||
)
|
||||
|
||||
for basic_block_index in flow_graph.basic_block_index:
|
||||
basic_block = be2.basic_block[basic_block_index]
|
||||
|
||||
for instruction_index, instruction, _ in idx.basic_block_instructions(basic_block):
|
||||
if instruction.call_target:
|
||||
for call_target_address in instruction.call_target:
|
||||
call_target_index = idx.vertex_index_by_address[call_target_address]
|
||||
call_target_vertex = be2.call_graph.vertex[call_target_index]
|
||||
|
||||
if not call_target_vertex.HasField("library_index"):
|
||||
continue
|
||||
|
||||
if call_target_vertex.address in seen_callees:
|
||||
continue
|
||||
seen_callees.add(call_target_vertex.address)
|
||||
|
||||
call_target_name = idx.get_function_name_by_address(call_target_address)
|
||||
o.print(
|
||||
"api: {name}",
|
||||
name=call_target_name,
|
||||
)
|
||||
|
||||
seen_strings = set()
|
||||
for basic_block_index in flow_graph.basic_block_index:
|
||||
basic_block = be2.basic_block[basic_block_index]
|
||||
|
||||
for instruction_index, instruction, _ in idx.basic_block_instructions(basic_block):
|
||||
if instruction_index in idx.string_reference_index_by_source_instruction_index:
|
||||
for string_reference_index in idx.string_reference_index_by_source_instruction_index[
|
||||
instruction_index
|
||||
]:
|
||||
string_reference = be2.string_reference[string_reference_index]
|
||||
string_index = string_reference.string_table_index
|
||||
string = be2.string_table[string_index]
|
||||
|
||||
if string in seen_strings:
|
||||
continue
|
||||
seen_strings.add(string)
|
||||
|
||||
o.print(
|
||||
'string: [decoration]"[/]{string}[decoration]"[/]',
|
||||
string=string.rstrip(),
|
||||
)
|
||||
|
||||
o.print("")
|
||||
|
||||
t1 = time.time()
|
||||
logger.debug("perf: rendering BinExport2: %0.2fs", t1 - t0)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -49,7 +49,7 @@ import capa.helpers
|
||||
import capa.features.insn
|
||||
import capa.capabilities.common
|
||||
from capa.rules import Rule, RuleSet
|
||||
from capa.features.common import OS_AUTO, String, Feature, Substring
|
||||
from capa.features.common import OS_AUTO, Regex, String, Feature, Substring
|
||||
from capa.render.result_document import RuleMetadata
|
||||
|
||||
logger = logging.getLogger("lint")
|
||||
@@ -721,6 +721,76 @@ class FeatureStringTooShort(Lint):
|
||||
return False
|
||||
|
||||
|
||||
class FeatureRegexRegistryControlSetMatchIncomplete(Lint):
|
||||
name = "feature regex registry control set match incomplete"
|
||||
recommendation = (
|
||||
'use "(ControlSet\\d{3}|CurrentControlSet)" to match both indirect references '
|
||||
+ 'via "CurrentControlSet" and direct references via "ControlSetXXX"'
|
||||
)
|
||||
|
||||
def check_features(self, ctx: Context, features: list[Feature]):
|
||||
for feature in features:
|
||||
if not isinstance(feature, (Regex,)):
|
||||
continue
|
||||
|
||||
assert isinstance(feature.value, str)
|
||||
|
||||
pat = feature.value.lower()
|
||||
|
||||
if "system\\\\" in pat and "controlset" in pat or "currentcontrolset" in pat:
|
||||
if "system\\\\(controlset\\d{3}|currentcontrolset)" not in pat:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
class FeatureRegexContainsUnescapedPeriod(Lint):
|
||||
name = "feature regex contains unescaped period"
|
||||
recommendation_template = 'escape the period in "{:s}" unless it should be treated as a regex dot operator'
|
||||
level = Lint.WARN
|
||||
|
||||
def check_features(self, ctx: Context, features: list[Feature]):
|
||||
for feature in features:
|
||||
if isinstance(feature, (Regex,)):
|
||||
assert isinstance(feature.value, str)
|
||||
|
||||
pat = feature.value.removeprefix("/")
|
||||
pat = pat.removesuffix("/i").removesuffix("/")
|
||||
|
||||
index = pat.find(".")
|
||||
if index == -1:
|
||||
return False
|
||||
|
||||
if index < len(pat) - 1:
|
||||
if pat[index + 1] in ("*", "+", "?", "{"):
|
||||
# like "/VB5!.*/"
|
||||
return False
|
||||
|
||||
if index == 0:
|
||||
# like "/.exe/" which should be "/\.exe/"
|
||||
self.recommendation = self.recommendation_template.format(feature.value)
|
||||
return True
|
||||
|
||||
if pat[index - 1] != "\\":
|
||||
# like "/test.exe/" which should be "/test\.exe/"
|
||||
self.recommendation = self.recommendation_template.format(feature.value)
|
||||
return True
|
||||
|
||||
if pat[index - 1] == "\\":
|
||||
for i, char in enumerate(pat[0:index][::-1]):
|
||||
if char == "\\":
|
||||
continue
|
||||
|
||||
if i % 2 == 0:
|
||||
# like "/\\\\.\\pipe\\VBoxTrayIPC/"
|
||||
self.recommendation = self.recommendation_template.format(feature.value)
|
||||
return True
|
||||
|
||||
break
|
||||
|
||||
return False
|
||||
|
||||
|
||||
class FeatureNegativeNumber(Lint):
|
||||
name = "feature value is negative"
|
||||
recommendation = "specify the number's two's complement representation"
|
||||
@@ -931,7 +1001,13 @@ def lint_meta(ctx: Context, rule: Rule):
|
||||
return run_lints(META_LINTS, ctx, rule)
|
||||
|
||||
|
||||
FEATURE_LINTS = (FeatureStringTooShort(), FeatureNegativeNumber(), FeatureNtdllNtoskrnlApi())
|
||||
FEATURE_LINTS = (
|
||||
FeatureStringTooShort(),
|
||||
FeatureNegativeNumber(),
|
||||
FeatureNtdllNtoskrnlApi(),
|
||||
FeatureRegexContainsUnescapedPeriod(),
|
||||
FeatureRegexRegistryControlSetMatchIncomplete(),
|
||||
)
|
||||
|
||||
|
||||
def lint_features(ctx: Context, rule: Rule):
|
||||
|
||||
Submodule tests/data updated: 6cf615dd01...6cb0838954
1281
web/explorer/package-lock.json
generated
1281
web/explorer/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -26,15 +26,15 @@
|
||||
},
|
||||
"devDependencies": {
|
||||
"@rushstack/eslint-patch": "^1.8.0",
|
||||
"@vitejs/plugin-vue": "^5.0.5",
|
||||
"@vitejs/plugin-vue": "^5.2.3",
|
||||
"@vue/eslint-config-prettier": "^9.0.0",
|
||||
"@vue/test-utils": "^2.4.6",
|
||||
"eslint": "^8.57.0",
|
||||
"eslint-plugin-vue": "^9.23.0",
|
||||
"jsdom": "^24.1.0",
|
||||
"prettier": "^3.2.5",
|
||||
"vite": "^5.4.14",
|
||||
"vite-plugin-singlefile": "^2.0.2",
|
||||
"vitest": "^1.6.0"
|
||||
"vite": "^6.2.3",
|
||||
"vite-plugin-singlefile": "^2.2.0",
|
||||
"vitest": "^3.0.9"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -214,22 +214,36 @@
|
||||
|
||||
<ul class="mt-2 ps-5">
|
||||
<!-- TODO(williballenthin): add date -->
|
||||
|
||||
<li>
|
||||
added:
|
||||
<a href="./rules/use bigint function/">
|
||||
use bigint function
|
||||
<a href="./rules/change registry key timestamp/">
|
||||
change registry key timestamp
|
||||
</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
added:
|
||||
<a href="./rules/encrypt data using RSA via embedded library/">
|
||||
encrypt data using RSA via embedded library
|
||||
<a href="./rules/check mutex and terminate process on windows/">
|
||||
check mutex and terminate process on Windows
|
||||
</a>
|
||||
</li>
|
||||
|
||||
<li>
|
||||
added:
|
||||
<a href="./rules/clear windows event logs remotely/">
|
||||
clear windows event logs remotely
|
||||
</a>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
<h2 class="mt-3">Tool Updates</h2>
|
||||
|
||||
<h3 class="mt-2">v9.1.0 (<em>2025-03-02</em>)</h3>
|
||||
<p class="mt-0">
|
||||
This release improves a few aspects of dynamic analysis, relaxing our validation on fields across many CAPE versions, for example.
|
||||
It also includes an updated rule pack in which many dynamic rules make better use of the "span of calls" scope.
|
||||
</p>
|
||||
|
||||
<h3 class="mt-2">v9.0.0 (<em>2025-02-05</em>)</h3>
|
||||
<p class="mt-0">
|
||||
|
||||
Reference in New Issue
Block a user