mirror of
https://github.com/mandiant/capa.git
synced 2025-12-13 08:00:44 -08:00
Compare commits
8 Commits
add-codema
...
feat/2620
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c8131bd35b | ||
|
|
81419db62a | ||
|
|
664a6d8043 | ||
|
|
4008775786 | ||
|
|
7d28cf8016 | ||
|
|
c057a3b927 | ||
|
|
02405e2159 | ||
|
|
3acc0fe147 |
14
CHANGELOG.md
14
CHANGELOG.md
@@ -6,28 +6,16 @@
|
||||
|
||||
### Breaking Changes
|
||||
|
||||
### New Rules (15)
|
||||
### New Rules (4)
|
||||
|
||||
- communication/socket/connect-socket moritz.raabe@mandiant.com joakim@intezer.com mrhafizfarhad@gmail.com
|
||||
- communication/socket/udp/connect-udp-socket mrhafizfarhad@gmail.com
|
||||
- nursery/enter-debug-mode-in-dotnet @v1bh475u
|
||||
- nursery/decrypt-data-using-tripledes-in-dotnet 0xRavenspar
|
||||
- nursery/encrypt-data-using-tripledes-in-dotnet 0xRavenspar
|
||||
- nursery/disable-system-features-via-registry-on-windows mehunhoff@google.com
|
||||
- data-manipulation/encryption/chaskey/encrypt-data-using-chaskey still@teamt5.org
|
||||
- data-manipulation/encryption/speck/encrypt-data-using-speck still@teamt5.org
|
||||
- load-code/dotnet/load-assembly-via-iassembly still@teamt5.org
|
||||
- malware-family/donut-loader/load-shellcode-via-donut still@teamt5.org
|
||||
- nursery/disable-device-guard-features-via-registry-on-windows mehunhoff@google.com
|
||||
- nursery/disable-firewall-features-via-registry-on-windows mehunhoff@google.com
|
||||
- nursery/disable-system-restore-features-via-registry-on-windows mehunhoff@google.com
|
||||
- nursery/disable-windows-defender-features-via-registry-on-windows mehunhoff@google.com
|
||||
-
|
||||
|
||||
### Bug Fixes
|
||||
- cape: make some fields optional @williballenthin #2631 #2632
|
||||
- lint: add WARN for regex features that contain unescaped dot #2635
|
||||
- lint: add ERROR for incomplete registry control set regex #2643
|
||||
|
||||
### capa Explorer Web
|
||||
|
||||
|
||||
@@ -21,9 +21,9 @@ import capa.features.extractors.cape.file
|
||||
import capa.features.extractors.cape.thread
|
||||
import capa.features.extractors.cape.global_
|
||||
import capa.features.extractors.cape.process
|
||||
from capa.exceptions import EmptyReportError, UnsupportedFormatError
|
||||
from capa.exceptions import EmptyReportError
|
||||
from capa.features.common import Feature
|
||||
from capa.features.address import Address, AbsoluteVirtualAddress, _NoAddress
|
||||
from capa.features.address import NO_ADDRESS, Address, AbsoluteVirtualAddress, _NoAddress
|
||||
from capa.features.extractors.cape.models import Call, Static, Process, CapeReport
|
||||
from capa.features.extractors.base_extractor import (
|
||||
CallHandle,
|
||||
@@ -53,9 +53,14 @@ class CapeExtractor(DynamicFeatureExtractor):
|
||||
self.global_features = list(capa.features.extractors.cape.global_.extract_features(self.report))
|
||||
|
||||
def get_base_address(self) -> Union[AbsoluteVirtualAddress, _NoAddress, None]:
|
||||
if self.report.static is None:
|
||||
return NO_ADDRESS
|
||||
|
||||
if self.report.static.pe is None:
|
||||
# TODO: handle ELF
|
||||
return NO_ADDRESS
|
||||
|
||||
# value according to the PE header, the actual trace may use a different imagebase
|
||||
assert self.report.static is not None
|
||||
assert self.report.static.pe is not None
|
||||
return AbsoluteVirtualAddress(self.report.static.pe.imagebase)
|
||||
|
||||
def extract_global_features(self) -> Iterator[tuple[Feature, Address]]:
|
||||
@@ -120,8 +125,10 @@ class CapeExtractor(DynamicFeatureExtractor):
|
||||
parts.append(" -> ")
|
||||
if call.pretty_return:
|
||||
parts.append(call.pretty_return)
|
||||
else:
|
||||
elif call.return_:
|
||||
parts.append(hex(call.return_))
|
||||
else:
|
||||
parts.append("?")
|
||||
|
||||
return "".join(parts)
|
||||
|
||||
@@ -132,25 +139,11 @@ class CapeExtractor(DynamicFeatureExtractor):
|
||||
if cr.info.version not in TESTED_VERSIONS:
|
||||
logger.warning("CAPE version '%s' not tested/supported yet", cr.info.version)
|
||||
|
||||
# TODO(mr-tz): support more file types
|
||||
# https://github.com/mandiant/capa/issues/1933
|
||||
if "PE" not in cr.target.file.type:
|
||||
logger.error(
|
||||
"capa currently only supports PE target files, this target file's type is: '%s'.\nPlease report this at: https://github.com/mandiant/capa/issues/1933",
|
||||
cr.target.file.type,
|
||||
)
|
||||
|
||||
# observed in 2.4-CAPE reports from capesandbox.com
|
||||
if cr.static is None and cr.target.file.pe is not None:
|
||||
cr.static = Static()
|
||||
cr.static.pe = cr.target.file.pe
|
||||
|
||||
if cr.static is None:
|
||||
raise UnsupportedFormatError("CAPE report missing static analysis")
|
||||
|
||||
if cr.static.pe is None:
|
||||
raise UnsupportedFormatError("CAPE report missing PE analysis")
|
||||
|
||||
if len(cr.behavior.processes) == 0:
|
||||
raise EmptyReportError("CAPE did not capture any processes")
|
||||
|
||||
|
||||
@@ -32,7 +32,13 @@ def get_processes(report: CapeReport) -> Iterator[ProcessHandle]:
|
||||
"""
|
||||
seen_processes = {}
|
||||
for process in report.behavior.processes:
|
||||
addr = ProcessAddress(pid=process.process_id, ppid=process.parent_id)
|
||||
if process.parent_id is None:
|
||||
# on CAPE for Linux, the root process may have no parent id, so we set that to 0
|
||||
ppid = 0
|
||||
else:
|
||||
ppid = process.parent_id
|
||||
|
||||
addr = ProcessAddress(pid=process.process_id, ppid=ppid)
|
||||
yield ProcessHandle(address=addr, inner=process)
|
||||
|
||||
# check for pid and ppid reuse
|
||||
@@ -52,7 +58,13 @@ def extract_import_names(report: CapeReport) -> Iterator[tuple[Feature, Address]
|
||||
"""
|
||||
extract imported function names
|
||||
"""
|
||||
assert report.static is not None and report.static.pe is not None
|
||||
if report.static is None:
|
||||
return
|
||||
|
||||
if report.static.pe is None:
|
||||
# TODO: elf
|
||||
return
|
||||
|
||||
imports = report.static.pe.imports
|
||||
|
||||
if isinstance(imports, dict):
|
||||
@@ -70,13 +82,25 @@ def extract_import_names(report: CapeReport) -> Iterator[tuple[Feature, Address]
|
||||
|
||||
|
||||
def extract_export_names(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||
assert report.static is not None and report.static.pe is not None
|
||||
if report.static is None:
|
||||
return
|
||||
|
||||
if report.static.pe is None:
|
||||
# TODO: elf
|
||||
return
|
||||
|
||||
for function in report.static.pe.exports:
|
||||
yield Export(function.name), AbsoluteVirtualAddress(function.address)
|
||||
|
||||
|
||||
def extract_section_names(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||
assert report.static is not None and report.static.pe is not None
|
||||
if report.static is None:
|
||||
return
|
||||
|
||||
if report.static.pe is None:
|
||||
# TODO: elf
|
||||
return
|
||||
|
||||
for section in report.static.pe.sections:
|
||||
yield Section(section.name), AbsoluteVirtualAddress(section.virtual_address)
|
||||
|
||||
|
||||
@@ -42,9 +42,6 @@ def extract_arch(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||
yield Arch(ARCH_AMD64), NO_ADDRESS
|
||||
else:
|
||||
logger.warning("unrecognized Architecture: %s", report.target.file.type)
|
||||
raise ValueError(
|
||||
f"unrecognized Architecture from the CAPE report; output of file command: {report.target.file.type}"
|
||||
)
|
||||
|
||||
|
||||
def extract_format(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||
@@ -54,9 +51,6 @@ def extract_format(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||
yield Format(FORMAT_ELF), NO_ADDRESS
|
||||
else:
|
||||
logger.warning("unknown file format, file command output: %s", report.target.file.type)
|
||||
raise ValueError(
|
||||
f"unrecognized file format from the CAPE report; output of file command: {report.target.file.type}"
|
||||
)
|
||||
|
||||
|
||||
def extract_os(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||
@@ -80,7 +74,10 @@ def extract_os(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||
else:
|
||||
# if the operating system information is missing from the cape report, it's likely a bug
|
||||
logger.warning("unrecognized OS: %s", file_output)
|
||||
raise ValueError(f"unrecognized OS from the CAPE report; output of file command: {file_output}")
|
||||
elif report.info.machine and report.info.machine.platform == "windows":
|
||||
yield OS(OS_WINDOWS), NO_ADDRESS
|
||||
elif report.info.machine and report.info.machine.platform == "linux":
|
||||
yield OS(OS_LINUX), NO_ADDRESS
|
||||
else:
|
||||
# the sample is shellcode
|
||||
logger.debug("unsupported file format, file command output: %s", file_output)
|
||||
|
||||
@@ -29,8 +29,26 @@ def validate_hex_bytes(value):
|
||||
return bytes.fromhex(value) if isinstance(value, str) else value
|
||||
|
||||
|
||||
def validate_status_code(value):
|
||||
if isinstance(value, str):
|
||||
if value == "?":
|
||||
# TODO: check for this in the return handling
|
||||
return None
|
||||
|
||||
# like: -1 EINVAL (Invalid argument)
|
||||
# like: 0 (Timeout)
|
||||
# like: 0x8002 (flags O_RDWR|O_LARGEFILE)
|
||||
assert value.endswith(")")
|
||||
num = value.partition(" ")[0]
|
||||
return int(num, 16) if num.startswith("0x") else int(num, 10)
|
||||
else:
|
||||
return value
|
||||
|
||||
|
||||
HexInt = Annotated[int, BeforeValidator(validate_hex_int)]
|
||||
HexBytes = Annotated[bytes, BeforeValidator(validate_hex_bytes)]
|
||||
# this is a status code, such as returned by CAPE for Linux, like: "0 (Timeout)" or "0x8002 (flags O_RDWR|O_LARGEFILE)
|
||||
StatusCode = Annotated[int | None, BeforeValidator(validate_status_code)]
|
||||
|
||||
|
||||
# a model that *cannot* have extra fields
|
||||
@@ -71,8 +89,13 @@ Emptydict: TypeAlias = BaseModel
|
||||
EmptyList: TypeAlias = list[Any]
|
||||
|
||||
|
||||
class Machine(FlexibleModel):
|
||||
platform: Optional[str] = None
|
||||
|
||||
|
||||
class Info(FlexibleModel):
|
||||
version: str
|
||||
machine: Optional[Machine] = None
|
||||
|
||||
|
||||
class ImportedSymbol(FlexibleModel):
|
||||
@@ -287,16 +310,38 @@ class Argument(FlexibleModel):
|
||||
pretty_value: Optional[str] = None
|
||||
|
||||
|
||||
def validate_argument(value):
|
||||
if isinstance(value, str):
|
||||
# for a few calls on CAPE for Linux, we see arguments like in this call:
|
||||
#
|
||||
# timestamp: "18:12:17.199276"
|
||||
# category: "misc"
|
||||
# api: "uname"
|
||||
# return: "0"
|
||||
# ▽ arguments:
|
||||
# [0]: "{sysname=\"Linux\", nodename=\"laptop\", ...}"
|
||||
#
|
||||
# which is just a string with a JSON-like thing inside,
|
||||
# that we want to map a default unnamed argument.
|
||||
return Argument(name="", value=value)
|
||||
else:
|
||||
return value
|
||||
|
||||
|
||||
# mypy isn't happy about assigning to type
|
||||
Argument = Annotated[Argument, BeforeValidator(validate_argument)] # type: ignore
|
||||
|
||||
|
||||
class Call(FlexibleModel):
|
||||
# timestamp: str
|
||||
thread_id: int
|
||||
thread_id: int | None = None
|
||||
# category: str
|
||||
|
||||
api: str
|
||||
|
||||
arguments: list[Argument]
|
||||
# status: bool
|
||||
return_: HexInt = Field(alias="return")
|
||||
return_: HexInt | StatusCode = Field(alias="return")
|
||||
pretty_return: Optional[str] = None
|
||||
|
||||
# repeated: int
|
||||
@@ -315,12 +360,12 @@ class Call(FlexibleModel):
|
||||
class Process(FlexibleModel):
|
||||
process_id: int
|
||||
process_name: str
|
||||
parent_id: int
|
||||
parent_id: int | None
|
||||
# module_path: str
|
||||
# first_seen: str
|
||||
calls: list[Call]
|
||||
threads: list[int]
|
||||
environ: dict[str, str]
|
||||
threads: list[int] | None = None # this can be None for CAPE for Linux, which doesn't track threads.
|
||||
environ: dict[str, str] = Field(default_factory=dict) # type: ignore
|
||||
|
||||
|
||||
"""
|
||||
|
||||
@@ -29,6 +29,13 @@ def get_threads(ph: ProcessHandle) -> Iterator[ThreadHandle]:
|
||||
get the threads associated with a given process
|
||||
"""
|
||||
process: Process = ph.inner
|
||||
|
||||
if not process.threads:
|
||||
# CAPE for linux doesn't record threads
|
||||
# so we return a default 0 value
|
||||
yield ThreadHandle(address=ThreadAddress(process=ph.address, tid=0), inner={})
|
||||
return
|
||||
|
||||
threads: list[int] = process.threads
|
||||
|
||||
for thread in threads:
|
||||
@@ -42,6 +49,9 @@ def extract_environ_strings(ph: ProcessHandle) -> Iterator[tuple[Feature, Addres
|
||||
"""
|
||||
process: Process = ph.inner
|
||||
|
||||
if not process.environ:
|
||||
return
|
||||
|
||||
for value in (value for value in process.environ.values() if value):
|
||||
yield String(value), ph.address
|
||||
|
||||
|
||||
@@ -29,8 +29,16 @@ def get_calls(ph: ProcessHandle, th: ThreadHandle) -> Iterator[CallHandle]:
|
||||
|
||||
tid = th.address.tid
|
||||
for call_index, call in enumerate(process.calls):
|
||||
if call.thread_id != tid:
|
||||
continue
|
||||
|
||||
if call.thread_id is None:
|
||||
# CAPE for linux doesn't record threads
|
||||
# so this must be the 0 value
|
||||
# and we'll enumerate all the calls in this process
|
||||
assert tid == 0
|
||||
|
||||
else:
|
||||
if call.thread_id != tid:
|
||||
continue
|
||||
|
||||
for symbol in generate_symbols("", call.api):
|
||||
call.api = symbol
|
||||
|
||||
2
rules
2
rules
Submodule rules updated: d64c2c91ea...e85887a875
@@ -1,490 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# /// script
|
||||
# requires-python = ">=3.12"
|
||||
# dependencies = [
|
||||
# "protobuf",
|
||||
# "python-lancelot",
|
||||
# "rich",
|
||||
# ]
|
||||
# ///
|
||||
#
|
||||
# TODO:
|
||||
# - ignore stack cookie check
|
||||
import sys
|
||||
import json
|
||||
import time
|
||||
import logging
|
||||
import argparse
|
||||
import contextlib
|
||||
from typing import Any
|
||||
from pathlib import Path
|
||||
from collections import defaultdict
|
||||
from dataclasses import dataclass
|
||||
|
||||
import lancelot
|
||||
import rich.padding
|
||||
import lancelot.be2utils
|
||||
import google.protobuf.message
|
||||
from rich.text import Text
|
||||
from rich.theme import Theme
|
||||
from rich.markup import escape
|
||||
from rich.console import Console
|
||||
from lancelot.be2utils.binexport2_pb2 import BinExport2
|
||||
|
||||
logger = logging.getLogger("codemap")
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def timing(msg: str):
|
||||
t0 = time.time()
|
||||
yield
|
||||
t1 = time.time()
|
||||
logger.debug("perf: %s: %0.2fs", msg, t1 - t0)
|
||||
|
||||
|
||||
class Renderer:
|
||||
def __init__(self, console: Console):
|
||||
self.console: Console = console
|
||||
self.indent: int = 0
|
||||
|
||||
@contextlib.contextmanager
|
||||
def indenting(self):
|
||||
self.indent += 1
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
self.indent -= 1
|
||||
|
||||
@staticmethod
|
||||
def markup(s: str, **kwargs) -> Text:
|
||||
escaped_args = {k: (escape(v) if isinstance(v, str) else v) for k, v in kwargs.items()}
|
||||
return Text.from_markup(s.format(**escaped_args))
|
||||
|
||||
def print(self, renderable, **kwargs):
|
||||
if not kwargs:
|
||||
return self.console.print(rich.padding.Padding(renderable, (0, 0, 0, self.indent * 2)))
|
||||
|
||||
assert isinstance(renderable, str)
|
||||
return self.print(self.markup(renderable, **kwargs))
|
||||
|
||||
def writeln(self, s: str):
|
||||
self.print(s)
|
||||
|
||||
@contextlib.contextmanager
|
||||
def section(self, name):
|
||||
if isinstance(name, str):
|
||||
self.print("[title]{name}", name=name)
|
||||
elif isinstance(name, Text):
|
||||
name = name.copy()
|
||||
name.stylize_before(self.console.get_style("title"))
|
||||
self.print(name)
|
||||
else:
|
||||
raise ValueError("unexpected section name")
|
||||
|
||||
with self.indenting():
|
||||
yield
|
||||
|
||||
|
||||
@dataclass
|
||||
class AssemblageLocation:
|
||||
name: str
|
||||
file: str
|
||||
prototype: str
|
||||
rva: int
|
||||
|
||||
@property
|
||||
def path(self):
|
||||
if not self.file.endswith(")"):
|
||||
return self.file
|
||||
|
||||
return self.file.rpartition(" (")[0]
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict[str, Any]):
|
||||
return cls(
|
||||
name=data["name"],
|
||||
file=data["file"],
|
||||
prototype=data["prototype"],
|
||||
rva=data["function_start"],
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def from_json(doc: str):
|
||||
return AssemblageLocation.from_dict(json.loads(doc))
|
||||
|
||||
|
||||
def main(argv: list[str] | None = None):
|
||||
if argv is None:
|
||||
argv = sys.argv[1:]
|
||||
|
||||
parser = argparse.ArgumentParser(description="Inspect BinExport2 files")
|
||||
parser.add_argument("input_file", type=Path, help="path to input file")
|
||||
parser.add_argument("--capa", type=Path, help="path to capa JSON results file")
|
||||
parser.add_argument("--assemblage", type=Path, help="path to Assemblage JSONL file")
|
||||
parser.add_argument("-d", "--debug", action="store_true", help="enable debugging output on STDERR")
|
||||
parser.add_argument("-q", "--quiet", action="store_true", help="disable all output but errors")
|
||||
args = parser.parse_args(args=argv)
|
||||
|
||||
logging.basicConfig()
|
||||
if args.quiet:
|
||||
logging.getLogger().setLevel(logging.WARNING)
|
||||
elif args.debug:
|
||||
logging.getLogger().setLevel(logging.DEBUG)
|
||||
else:
|
||||
logging.getLogger().setLevel(logging.INFO)
|
||||
|
||||
theme = Theme(
|
||||
{
|
||||
"decoration": "grey54",
|
||||
"title": "yellow",
|
||||
"key": "black",
|
||||
"value": "blue",
|
||||
"default": "black",
|
||||
},
|
||||
inherit=False,
|
||||
)
|
||||
console = Console(theme=theme, markup=False, emoji=False)
|
||||
o = Renderer(console)
|
||||
|
||||
be2: BinExport2
|
||||
buf: bytes
|
||||
try:
|
||||
# easiest way to determine if this is a BinExport2 proto is...
|
||||
# to just try to decode it.
|
||||
buf = args.input_file.read_bytes()
|
||||
with timing("loading BinExport2"):
|
||||
be2 = BinExport2()
|
||||
be2.ParseFromString(buf)
|
||||
|
||||
except google.protobuf.message.DecodeError:
|
||||
with timing("analyzing file"):
|
||||
input_file: Path = args.input_file
|
||||
buf = lancelot.get_binexport2_bytes_from_bytes(input_file.read_bytes())
|
||||
|
||||
with timing("loading BinExport2"):
|
||||
be2 = BinExport2()
|
||||
be2.ParseFromString(buf)
|
||||
|
||||
with timing("indexing BinExport2"):
|
||||
idx = lancelot.be2utils.BinExport2Index(be2)
|
||||
|
||||
matches_by_function: defaultdict[int, set[str]] = defaultdict(set)
|
||||
if args.capa:
|
||||
with timing("loading capa"):
|
||||
doc = json.loads(args.capa.read_text())
|
||||
|
||||
functions_by_basic_block: dict[int, int] = {}
|
||||
for function in doc["meta"]["analysis"]["layout"]["functions"]:
|
||||
for basic_block in function["matched_basic_blocks"]:
|
||||
functions_by_basic_block[basic_block["address"]["value"]] = function["address"]["value"]
|
||||
|
||||
matches_by_address: defaultdict[int, set[str]] = defaultdict(set)
|
||||
for rule_name, results in doc["rules"].items():
|
||||
for location, _ in results["matches"]:
|
||||
if location["type"] != "absolute":
|
||||
continue
|
||||
address = location["value"]
|
||||
matches_by_address[location["value"]].add(rule_name)
|
||||
|
||||
for address, matches in matches_by_address.items():
|
||||
if function := functions_by_basic_block.get(address):
|
||||
if function in idx.thunks:
|
||||
# forward any capa for a thunk to its target
|
||||
# since viv may not recognize the thunk as a separate function.
|
||||
logger.debug("forwarding capa matches from thunk 0x%x to 0x%x", function, idx.thunks[function])
|
||||
function = idx.thunks[function]
|
||||
|
||||
matches_by_function[function].update(matches)
|
||||
for match in matches:
|
||||
logger.info("capa: 0x%x: %s", function, match)
|
||||
else:
|
||||
# we don't know which function this is.
|
||||
# hopefully its a function recognized in our BinExport analysis.
|
||||
# *shrug*
|
||||
#
|
||||
# apparently viv doesn't emit function entries for thunks?
|
||||
# or somehow our layout is messed up.
|
||||
|
||||
if address in idx.thunks:
|
||||
# forward any capa for a thunk to its target
|
||||
# since viv may not recognize the thunk as a separate function.
|
||||
logger.debug("forwarding capa matches from thunk 0x%x to 0x%x", address, idx.thunks[address])
|
||||
address = idx.thunks[address]
|
||||
# since we found the thunk, we know this is a BinExport-recognized function.
|
||||
# so thats nice.
|
||||
for match in matches:
|
||||
logger.info("capa: 0x%x: %s", address, match)
|
||||
else:
|
||||
logger.warning("unknown address: 0x%x: %s", address, matches)
|
||||
|
||||
matches_by_function[address].update(matches)
|
||||
|
||||
# guess the base address (which BinExport2) does not track explicitly,
|
||||
# by assuming it is the lowest mapped page.
|
||||
base_address = min(map(lambda section: section.address, be2.section))
|
||||
logging.info("guessed base address: 0x%x", base_address)
|
||||
|
||||
assemblage_locations_by_va: dict[int, AssemblageLocation] = {}
|
||||
if args.assemblage:
|
||||
with timing("loading assemblage"):
|
||||
with args.assemblage.open("rt", encoding="utf-8") as f:
|
||||
for line in f:
|
||||
if not line:
|
||||
continue
|
||||
location = AssemblageLocation.from_json(line)
|
||||
assemblage_locations_by_va[base_address + location.rva] = location
|
||||
|
||||
# update function names for the in-memory BinExport2 using Assemblage data.
|
||||
# this won't affect the be2 on disk, because we don't serialize it back out.
|
||||
for address, location in assemblage_locations_by_va.items():
|
||||
if not location.name:
|
||||
continue
|
||||
|
||||
if vertex_index := idx.vertex_index_by_address.get(address):
|
||||
vertex = be2.call_graph.vertex[vertex_index].demangled_name = location.name
|
||||
|
||||
# index all the callers of each function, resolving thunks.
|
||||
# idx.callers_by_vertex_id does not resolve thunks.
|
||||
resolved_callers_by_vertex_id = defaultdict(set)
|
||||
for edge in be2.call_graph.edge:
|
||||
source_index = edge.source_vertex_index
|
||||
|
||||
if lancelot.be2utils.is_thunk_vertex(be2.call_graph.vertex[source_index]):
|
||||
# we don't care about the callers that are thunks.
|
||||
continue
|
||||
|
||||
if lancelot.be2utils.is_thunk_vertex(be2.call_graph.vertex[edge.target_vertex_index]):
|
||||
thunk_vertex = be2.call_graph.vertex[edge.target_vertex_index]
|
||||
thunk_address = thunk_vertex.address
|
||||
|
||||
target_address = idx.thunks[thunk_address]
|
||||
target_index = idx.vertex_index_by_address[target_address]
|
||||
logger.debug(
|
||||
"call %s -(thunk)-> %s",
|
||||
idx.get_function_name_by_vertex(source_index),
|
||||
idx.get_function_name_by_vertex(target_index),
|
||||
)
|
||||
else:
|
||||
target_index = edge.target_vertex_index
|
||||
logger.debug(
|
||||
"call %s -> %s",
|
||||
idx.get_function_name_by_vertex(source_index),
|
||||
idx.get_function_name_by_vertex(target_index),
|
||||
)
|
||||
resolved_callers_by_vertex_id[target_index].add(source_index)
|
||||
|
||||
t0 = time.time()
|
||||
|
||||
with o.section("meta"):
|
||||
o.writeln(f"name: {be2.meta_information.executable_name}")
|
||||
o.writeln(f"sha256: {be2.meta_information.executable_id}")
|
||||
o.writeln(f"arch: {be2.meta_information.architecture_name}")
|
||||
o.writeln(f"ts: {be2.meta_information.timestamp}")
|
||||
|
||||
with o.section("modules"):
|
||||
for module in be2.module:
|
||||
o.writeln(f"- {module.name}")
|
||||
if not be2.module:
|
||||
o.writeln("(none)")
|
||||
|
||||
with o.section("sections"):
|
||||
for section in be2.section:
|
||||
perms = ""
|
||||
perms += "r" if section.flag_r else "-"
|
||||
perms += "w" if section.flag_w else "-"
|
||||
perms += "x" if section.flag_x else "-"
|
||||
o.writeln(f"- {hex(section.address)} {perms} {hex(section.size)}")
|
||||
|
||||
with o.section("libraries"):
|
||||
for library in be2.library:
|
||||
o.writeln(
|
||||
f"- {library.name:<12s} {'(static)' if library.is_static else ''}{(' at ' + hex(library.load_address)) if library.HasField('load_address') else ''}"
|
||||
)
|
||||
if not be2.library:
|
||||
o.writeln("(none)")
|
||||
|
||||
vertex_order_by_address = {address: i for (i, address) in enumerate(idx.vertex_index_by_address.keys())}
|
||||
|
||||
with o.section("functions"):
|
||||
last_address = None
|
||||
for _, vertex_index in idx.vertex_index_by_address.items():
|
||||
vertex = be2.call_graph.vertex[vertex_index]
|
||||
vertex_order = vertex_order_by_address[vertex.address]
|
||||
|
||||
if vertex.HasField("library_index"):
|
||||
continue
|
||||
|
||||
if vertex.HasField("module_index"):
|
||||
continue
|
||||
|
||||
function_name = idx.get_function_name_by_vertex(vertex_index)
|
||||
|
||||
if last_address:
|
||||
try:
|
||||
last_path = assemblage_locations_by_va[last_address].path
|
||||
path = assemblage_locations_by_va[vertex.address].path
|
||||
if last_path != path:
|
||||
o.print(o.markup("[blue]~~~~~~~~~~~~~~~~~~~~~~~~~~~~~[/] [title]file[/] {path}\n", path=path))
|
||||
except KeyError:
|
||||
pass
|
||||
last_address = vertex.address
|
||||
|
||||
if lancelot.be2utils.is_thunk_vertex(vertex):
|
||||
with o.section(
|
||||
o.markup(
|
||||
"thunk [default]{function_name}[/] [decoration]@ {function_address}[/]",
|
||||
function_name=function_name,
|
||||
function_address=hex(vertex.address),
|
||||
)
|
||||
):
|
||||
continue
|
||||
|
||||
with o.section(
|
||||
o.markup(
|
||||
"function [default]{function_name}[/] [decoration]@ {function_address}[/]",
|
||||
function_name=function_name,
|
||||
function_address=hex(vertex.address),
|
||||
)
|
||||
):
|
||||
if vertex.address in idx.thunks:
|
||||
o.writeln("")
|
||||
continue
|
||||
|
||||
# keep the xrefs separate from the calls, since they're visually hard to distinguish.
|
||||
# use local index of callers that has resolved intermediate thunks,
|
||||
# since they are sometimes stored in a physically distant location.
|
||||
for caller_index in resolved_callers_by_vertex_id.get(vertex_index, []):
|
||||
caller_vertex = be2.call_graph.vertex[caller_index]
|
||||
caller_order = vertex_order_by_address[caller_vertex.address]
|
||||
caller_delta = caller_order - vertex_order
|
||||
if caller_delta < 0:
|
||||
direction = "↑"
|
||||
else:
|
||||
direction = "↓"
|
||||
|
||||
o.print(
|
||||
"xref: [decoration]{direction}[/] {name} [decoration]({delta:+})[/]",
|
||||
direction=direction,
|
||||
name=idx.get_function_name_by_vertex(caller_index),
|
||||
delta=caller_delta,
|
||||
)
|
||||
|
||||
if vertex.address not in idx.flow_graph_index_by_address:
|
||||
num_basic_blocks = 0
|
||||
num_instructions = 0
|
||||
num_edges = 0
|
||||
total_instruction_size = 0
|
||||
else:
|
||||
flow_graph_index = idx.flow_graph_index_by_address[vertex.address]
|
||||
flow_graph = be2.flow_graph[flow_graph_index]
|
||||
num_basic_blocks = len(flow_graph.basic_block_index)
|
||||
num_instructions = sum(
|
||||
len(list(idx.instruction_indices(be2.basic_block[bb_idx])))
|
||||
for bb_idx in flow_graph.basic_block_index
|
||||
)
|
||||
num_edges = len(flow_graph.edge)
|
||||
total_instruction_size = 0
|
||||
for bb_idx in flow_graph.basic_block_index:
|
||||
basic_block = be2.basic_block[bb_idx]
|
||||
for _, instruction, _ in idx.basic_block_instructions(basic_block):
|
||||
total_instruction_size += len(instruction.raw_bytes)
|
||||
|
||||
o.writeln(
|
||||
f"B/E/I: {num_basic_blocks} / {num_edges} / {num_instructions} ({total_instruction_size} bytes)"
|
||||
)
|
||||
|
||||
for match in matches_by_function.get(vertex.address, []):
|
||||
o.writeln(f"capa: {match}")
|
||||
|
||||
if vertex.address in idx.flow_graph_index_by_address:
|
||||
flow_graph_index = idx.flow_graph_index_by_address[vertex.address]
|
||||
flow_graph = be2.flow_graph[flow_graph_index]
|
||||
|
||||
seen_callees = set()
|
||||
|
||||
for basic_block_index in flow_graph.basic_block_index:
|
||||
basic_block = be2.basic_block[basic_block_index]
|
||||
|
||||
for instruction_index, instruction, _ in idx.basic_block_instructions(basic_block):
|
||||
if instruction.call_target:
|
||||
for call_target_address in instruction.call_target:
|
||||
if call_target_address in idx.thunks:
|
||||
call_target_address = idx.thunks[call_target_address]
|
||||
|
||||
call_target_index = idx.vertex_index_by_address[call_target_address]
|
||||
call_target_vertex = be2.call_graph.vertex[call_target_index]
|
||||
|
||||
if call_target_vertex.HasField("library_index"):
|
||||
continue
|
||||
|
||||
if call_target_vertex.address in seen_callees:
|
||||
continue
|
||||
seen_callees.add(call_target_vertex.address)
|
||||
|
||||
call_target_order = vertex_order_by_address[call_target_address]
|
||||
call_target_delta = call_target_order - vertex_order
|
||||
call_target_name = idx.get_function_name_by_address(call_target_address)
|
||||
if call_target_delta < 0:
|
||||
direction = "↑"
|
||||
else:
|
||||
direction = "↓"
|
||||
|
||||
o.print(
|
||||
"calls: [decoration]{direction}[/] {name} [decoration]({delta:+})[/]",
|
||||
direction=direction,
|
||||
name=call_target_name,
|
||||
delta=call_target_delta,
|
||||
)
|
||||
|
||||
for basic_block_index in flow_graph.basic_block_index:
|
||||
basic_block = be2.basic_block[basic_block_index]
|
||||
|
||||
for instruction_index, instruction, _ in idx.basic_block_instructions(basic_block):
|
||||
if instruction.call_target:
|
||||
for call_target_address in instruction.call_target:
|
||||
call_target_index = idx.vertex_index_by_address[call_target_address]
|
||||
call_target_vertex = be2.call_graph.vertex[call_target_index]
|
||||
|
||||
if not call_target_vertex.HasField("library_index"):
|
||||
continue
|
||||
|
||||
if call_target_vertex.address in seen_callees:
|
||||
continue
|
||||
seen_callees.add(call_target_vertex.address)
|
||||
|
||||
call_target_name = idx.get_function_name_by_address(call_target_address)
|
||||
o.print(
|
||||
"api: {name}",
|
||||
name=call_target_name,
|
||||
)
|
||||
|
||||
seen_strings = set()
|
||||
for basic_block_index in flow_graph.basic_block_index:
|
||||
basic_block = be2.basic_block[basic_block_index]
|
||||
|
||||
for instruction_index, instruction, _ in idx.basic_block_instructions(basic_block):
|
||||
if instruction_index in idx.string_reference_index_by_source_instruction_index:
|
||||
for string_reference_index in idx.string_reference_index_by_source_instruction_index[
|
||||
instruction_index
|
||||
]:
|
||||
string_reference = be2.string_reference[string_reference_index]
|
||||
string_index = string_reference.string_table_index
|
||||
string = be2.string_table[string_index]
|
||||
|
||||
if string in seen_strings:
|
||||
continue
|
||||
seen_strings.add(string)
|
||||
|
||||
o.print(
|
||||
'string: [decoration]"[/]{string}[decoration]"[/]',
|
||||
string=string.rstrip(),
|
||||
)
|
||||
|
||||
o.print("")
|
||||
|
||||
t1 = time.time()
|
||||
logger.debug("perf: rendering BinExport2: %0.2fs", t1 - t0)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -721,29 +721,6 @@ class FeatureStringTooShort(Lint):
|
||||
return False
|
||||
|
||||
|
||||
class FeatureRegexRegistryControlSetMatchIncomplete(Lint):
|
||||
name = "feature regex registry control set match incomplete"
|
||||
recommendation = (
|
||||
'use "(ControlSet\\d{3}|CurrentControlSet)" to match both indirect references '
|
||||
+ 'via "CurrentControlSet" and direct references via "ControlSetXXX"'
|
||||
)
|
||||
|
||||
def check_features(self, ctx: Context, features: list[Feature]):
|
||||
for feature in features:
|
||||
if not isinstance(feature, (Regex,)):
|
||||
continue
|
||||
|
||||
assert isinstance(feature.value, str)
|
||||
|
||||
pat = feature.value.lower()
|
||||
|
||||
if "system\\\\" in pat and "controlset" in pat or "currentcontrolset" in pat:
|
||||
if "system\\\\(controlset\\d{3}|currentcontrolset)" not in pat:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
class FeatureRegexContainsUnescapedPeriod(Lint):
|
||||
name = "feature regex contains unescaped period"
|
||||
recommendation_template = 'escape the period in "{:s}" unless it should be treated as a regex dot operator'
|
||||
@@ -1006,7 +983,6 @@ FEATURE_LINTS = (
|
||||
FeatureNegativeNumber(),
|
||||
FeatureNtdllNtoskrnlApi(),
|
||||
FeatureRegexContainsUnescapedPeriod(),
|
||||
FeatureRegexRegistryControlSetMatchIncomplete(),
|
||||
)
|
||||
|
||||
|
||||
|
||||
8
web/explorer/package-lock.json
generated
8
web/explorer/package-lock.json
generated
@@ -27,7 +27,7 @@
|
||||
"eslint-plugin-vue": "^9.23.0",
|
||||
"jsdom": "^24.1.0",
|
||||
"prettier": "^3.2.5",
|
||||
"vite": "^6.2.3",
|
||||
"vite": "^6.2.2",
|
||||
"vite-plugin-singlefile": "^2.2.0",
|
||||
"vitest": "^3.0.9"
|
||||
}
|
||||
@@ -3561,9 +3561,9 @@
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/vite": {
|
||||
"version": "6.2.3",
|
||||
"resolved": "https://registry.npmjs.org/vite/-/vite-6.2.3.tgz",
|
||||
"integrity": "sha512-IzwM54g4y9JA/xAeBPNaDXiBF8Jsgl3VBQ2YQ/wOY6fyW3xMdSoltIV3Bo59DErdqdE6RxUfv8W69DvUorE4Eg==",
|
||||
"version": "6.2.2",
|
||||
"resolved": "https://registry.npmjs.org/vite/-/vite-6.2.2.tgz",
|
||||
"integrity": "sha512-yW7PeMM+LkDzc7CgJuRLMW2Jz0FxMOsVJ8Lv3gpgW9WLcb9cTW+121UEr1hvmfR7w3SegR5ItvYyzVz1vxNJgQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
|
||||
@@ -33,7 +33,7 @@
|
||||
"eslint-plugin-vue": "^9.23.0",
|
||||
"jsdom": "^24.1.0",
|
||||
"prettier": "^3.2.5",
|
||||
"vite": "^6.2.3",
|
||||
"vite": "^6.2.2",
|
||||
"vite-plugin-singlefile": "^2.2.0",
|
||||
"vitest": "^3.0.9"
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user