mirror of
https://github.com/mandiant/capa.git
synced 2025-12-12 07:40:38 -08:00
Compare commits
6 Commits
v9.0.0
...
fix/be2/im
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7a66dfc025 | ||
|
|
f5db5fd5cf | ||
|
|
9a270e6bdd | ||
|
|
8773bc77ab | ||
|
|
a278bf593a | ||
|
|
f85cd80d90 |
@@ -11,6 +11,8 @@
|
||||
-
|
||||
|
||||
### Bug Fixes
|
||||
- only parse CAPE fields required for analysis @mike-hunhoff #2607
|
||||
- improve _number_ feature extraction for BinExport @mike-hunhoff #2609
|
||||
|
||||
### capa Explorer Web
|
||||
|
||||
|
||||
@@ -349,30 +349,9 @@ def get_operand_register_expression(be2: BinExport2, operand: BinExport2.Operand
|
||||
|
||||
|
||||
def get_operand_immediate_expression(be2: BinExport2, operand: BinExport2.Operand) -> Optional[BinExport2.Expression]:
|
||||
if len(operand.expression_index) == 1:
|
||||
# - type: IMMEDIATE_INT
|
||||
# immediate: 20588728364
|
||||
# parent_index: 0
|
||||
expression: BinExport2.Expression = be2.expression[operand.expression_index[0]]
|
||||
for expression in get_operand_expressions(be2, operand):
|
||||
if expression.type == BinExport2.Expression.IMMEDIATE_INT:
|
||||
return expression
|
||||
|
||||
elif len(operand.expression_index) == 2:
|
||||
# from IDA, which provides a size hint for every operand,
|
||||
# we get the following pattern for immediate constants:
|
||||
#
|
||||
# - type: SIZE_PREFIX
|
||||
# symbol: "b8"
|
||||
# - type: IMMEDIATE_INT
|
||||
# immediate: 20588728364
|
||||
# parent_index: 0
|
||||
expression0: BinExport2.Expression = be2.expression[operand.expression_index[0]]
|
||||
expression1: BinExport2.Expression = be2.expression[operand.expression_index[1]]
|
||||
|
||||
if expression0.type == BinExport2.Expression.SIZE_PREFIX:
|
||||
if expression1.type == BinExport2.Expression.IMMEDIATE_INT:
|
||||
return expression1
|
||||
|
||||
return None
|
||||
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from typing import Any, Union, Literal, Optional, Annotated, TypeAlias
|
||||
from typing import Any, Union, Optional, Annotated, TypeAlias
|
||||
|
||||
from pydantic import Field, BaseModel, ConfigDict
|
||||
from pydantic.functional_validators import BeforeValidator
|
||||
@@ -75,34 +75,37 @@ class Info(FlexibleModel):
|
||||
version: str
|
||||
|
||||
|
||||
class ImportedSymbol(ExactModel):
|
||||
class ImportedSymbol(FlexibleModel):
|
||||
address: HexInt
|
||||
name: Optional[str] = None
|
||||
|
||||
|
||||
class ImportedDll(ExactModel):
|
||||
class ImportedDll(FlexibleModel):
|
||||
dll: str
|
||||
imports: list[ImportedSymbol]
|
||||
|
||||
|
||||
class DirectoryEntry(ExactModel):
|
||||
"""
|
||||
class DirectoryEntry(FlexibleModel):
|
||||
name: str
|
||||
virtual_address: HexInt
|
||||
size: HexInt
|
||||
"""
|
||||
|
||||
|
||||
class Section(ExactModel):
|
||||
class Section(FlexibleModel):
|
||||
name: str
|
||||
raw_address: HexInt
|
||||
# raw_address: HexInt
|
||||
virtual_address: HexInt
|
||||
virtual_size: HexInt
|
||||
size_of_data: HexInt
|
||||
characteristics: str
|
||||
characteristics_raw: HexInt
|
||||
entropy: float
|
||||
# virtual_size: HexInt
|
||||
# size_of_data: HexInt
|
||||
# characteristics: str
|
||||
# characteristics_raw: HexInt
|
||||
# entropy: float
|
||||
|
||||
|
||||
class Resource(ExactModel):
|
||||
"""
|
||||
class Resource(FlexibleModel):
|
||||
name: str
|
||||
language: Optional[str] = None
|
||||
sublanguage: str
|
||||
@@ -140,7 +143,7 @@ class DigitalSigner(FlexibleModel):
|
||||
extensions_subjectKeyIdentifier: Optional[str] = None
|
||||
|
||||
|
||||
class AuxSigner(ExactModel):
|
||||
class AuxSigner(FlexibleModel):
|
||||
name: str
|
||||
issued_to: str = Field(alias="Issued to")
|
||||
issued_by: str = Field(alias="Issued by")
|
||||
@@ -148,7 +151,7 @@ class AuxSigner(ExactModel):
|
||||
sha1_hash: str = Field(alias="SHA1 hash")
|
||||
|
||||
|
||||
class Signer(ExactModel):
|
||||
class Signer(FlexibleModel):
|
||||
aux_sha1: Optional[str] = None
|
||||
aux_timestamp: Optional[str] = None
|
||||
aux_valid: Optional[bool] = None
|
||||
@@ -157,60 +160,61 @@ class Signer(ExactModel):
|
||||
aux_signers: Optional[list[AuxSigner]] = None
|
||||
|
||||
|
||||
class Overlay(ExactModel):
|
||||
class Overlay(FlexibleModel):
|
||||
offset: HexInt
|
||||
size: HexInt
|
||||
|
||||
|
||||
class KV(ExactModel):
|
||||
class KV(FlexibleModel):
|
||||
name: str
|
||||
value: str
|
||||
"""
|
||||
|
||||
|
||||
class ExportedSymbol(ExactModel):
|
||||
class ExportedSymbol(FlexibleModel):
|
||||
address: HexInt
|
||||
name: str
|
||||
ordinal: int
|
||||
# ordinal: int
|
||||
|
||||
|
||||
class PE(ExactModel):
|
||||
peid_signatures: TODO
|
||||
class PE(FlexibleModel):
|
||||
# peid_signatures: TODO
|
||||
imagebase: HexInt
|
||||
entrypoint: HexInt
|
||||
reported_checksum: HexInt
|
||||
actual_checksum: HexInt
|
||||
osversion: str
|
||||
pdbpath: Optional[str] = None
|
||||
timestamp: str
|
||||
# entrypoint: HexInt
|
||||
# reported_checksum: HexInt
|
||||
# actual_checksum: HexInt
|
||||
# osversion: str
|
||||
# pdbpath: Optional[str] = None
|
||||
# timestamp: str
|
||||
|
||||
# list[ImportedDll], or dict[basename(dll), ImportedDll]
|
||||
imports: Union[list[ImportedDll], dict[str, ImportedDll]]
|
||||
imported_dll_count: Optional[int] = None
|
||||
imphash: str
|
||||
# imported_dll_count: Optional[int] = None
|
||||
# imphash: str
|
||||
|
||||
exported_dll_name: Optional[str] = None
|
||||
# exported_dll_name: Optional[str] = None
|
||||
exports: list[ExportedSymbol]
|
||||
|
||||
dirents: list[DirectoryEntry]
|
||||
# dirents: list[DirectoryEntry]
|
||||
sections: list[Section]
|
||||
|
||||
ep_bytes: Optional[HexBytes] = None
|
||||
# ep_bytes: Optional[HexBytes] = None
|
||||
|
||||
overlay: Optional[Overlay] = None
|
||||
resources: list[Resource]
|
||||
versioninfo: list[KV]
|
||||
# overlay: Optional[Overlay] = None
|
||||
# resources: list[Resource]
|
||||
# versioninfo: list[KV]
|
||||
|
||||
# base64 encoded data
|
||||
icon: Optional[str] = None
|
||||
# icon: Optional[str] = None
|
||||
# MD5-like hash
|
||||
icon_hash: Optional[str] = None
|
||||
# icon_hash: Optional[str] = None
|
||||
# MD5-like hash
|
||||
icon_fuzzy: Optional[str] = None
|
||||
# icon_fuzzy: Optional[str] = None
|
||||
# short hex string
|
||||
icon_dhash: Optional[str] = None
|
||||
# icon_dhash: Optional[str] = None
|
||||
|
||||
digital_signers: list[DigitalSigner]
|
||||
guest_signers: Signer
|
||||
# digital_signers: list[DigitalSigner]
|
||||
# guest_signers: Signer
|
||||
|
||||
|
||||
# TODO(mr-tz): target.file.dotnet, target.file.extracted_files, target.file.extracted_files_tool,
|
||||
@@ -218,48 +222,49 @@ class PE(ExactModel):
|
||||
# https://github.com/mandiant/capa/issues/1814
|
||||
class File(FlexibleModel):
|
||||
type: str
|
||||
cape_type_code: Optional[int] = None
|
||||
cape_type: Optional[str] = None
|
||||
# cape_type_code: Optional[int] = None
|
||||
# cape_type: Optional[str] = None
|
||||
|
||||
pid: Optional[Union[int, Literal[""]]] = None
|
||||
name: Union[list[str], str]
|
||||
path: str
|
||||
guest_paths: Union[list[str], str, None]
|
||||
timestamp: Optional[str] = None
|
||||
# pid: Optional[Union[int, Literal[""]]] = None
|
||||
# name: Union[list[str], str]
|
||||
# path: str
|
||||
# guest_paths: Union[list[str], str, None]
|
||||
# timestamp: Optional[str] = None
|
||||
|
||||
#
|
||||
# hashes
|
||||
#
|
||||
crc32: str
|
||||
# crc32: str
|
||||
md5: str
|
||||
sha1: str
|
||||
sha256: str
|
||||
sha512: str
|
||||
sha3_384: Optional[str] = None
|
||||
ssdeep: str
|
||||
# sha512: str
|
||||
# sha3_384: Optional[str] = None
|
||||
# ssdeep: str
|
||||
# unsure why this would ever be "False"
|
||||
tlsh: Optional[Union[str, bool]] = None
|
||||
rh_hash: Optional[str] = None
|
||||
# tlsh: Optional[Union[str, bool]] = None
|
||||
# rh_hash: Optional[str] = None
|
||||
|
||||
#
|
||||
# other metadata, static analysis
|
||||
#
|
||||
size: int
|
||||
# size: int
|
||||
pe: Optional[PE] = None
|
||||
ep_bytes: Optional[HexBytes] = None
|
||||
entrypoint: Optional[int] = None
|
||||
data: Optional[str] = None
|
||||
strings: Optional[list[str]] = None
|
||||
# ep_bytes: Optional[HexBytes] = None
|
||||
# entrypoint: Optional[int] = None
|
||||
# data: Optional[str] = None
|
||||
# strings: Optional[list[str]] = None
|
||||
|
||||
#
|
||||
# detections (skip)
|
||||
#
|
||||
yara: Skip = None
|
||||
cape_yara: Skip = None
|
||||
clamav: Skip = None
|
||||
virustotal: Skip = None
|
||||
# yara: Skip = None
|
||||
# cape_yara: Skip = None
|
||||
# clamav: Skip = None
|
||||
# virustotal: Skip = None
|
||||
|
||||
|
||||
"""
|
||||
class ProcessFile(File):
|
||||
#
|
||||
# like a File, but also has dynamic analysis results
|
||||
@@ -272,35 +277,36 @@ class ProcessFile(File):
|
||||
target_pid: Optional[Union[int, str]] = None
|
||||
target_path: Optional[str] = None
|
||||
target_process: Optional[str] = None
|
||||
"""
|
||||
|
||||
|
||||
class Argument(ExactModel):
|
||||
class Argument(FlexibleModel):
|
||||
name: str
|
||||
# unsure why empty list is provided here
|
||||
value: Union[HexInt, int, str, EmptyList]
|
||||
pretty_value: Optional[str] = None
|
||||
|
||||
|
||||
class Call(ExactModel):
|
||||
timestamp: str
|
||||
class Call(FlexibleModel):
|
||||
# timestamp: str
|
||||
thread_id: int
|
||||
category: str
|
||||
# category: str
|
||||
|
||||
api: str
|
||||
|
||||
arguments: list[Argument]
|
||||
status: bool
|
||||
# status: bool
|
||||
return_: HexInt = Field(alias="return")
|
||||
pretty_return: Optional[str] = None
|
||||
|
||||
repeated: int
|
||||
# repeated: int
|
||||
|
||||
# virtual addresses
|
||||
caller: HexInt
|
||||
parentcaller: HexInt
|
||||
# caller: HexInt
|
||||
# parentcaller: HexInt
|
||||
|
||||
# index into calls array
|
||||
id: int
|
||||
# id: int
|
||||
|
||||
|
||||
# FlexibleModel to account for extended fields
|
||||
@@ -310,14 +316,15 @@ class Process(FlexibleModel):
|
||||
process_id: int
|
||||
process_name: str
|
||||
parent_id: int
|
||||
module_path: str
|
||||
first_seen: str
|
||||
# module_path: str
|
||||
# first_seen: str
|
||||
calls: list[Call]
|
||||
threads: list[int]
|
||||
environ: dict[str, str]
|
||||
|
||||
|
||||
class ProcessTree(ExactModel):
|
||||
"""
|
||||
class ProcessTree(FlexibleModel):
|
||||
name: str
|
||||
pid: int
|
||||
parent_id: int
|
||||
@@ -325,17 +332,18 @@ class ProcessTree(ExactModel):
|
||||
threads: list[int]
|
||||
environ: dict[str, str]
|
||||
children: list["ProcessTree"]
|
||||
"""
|
||||
|
||||
|
||||
class Summary(ExactModel):
|
||||
class Summary(FlexibleModel):
|
||||
files: list[str]
|
||||
read_files: list[str]
|
||||
write_files: list[str]
|
||||
delete_files: list[str]
|
||||
# read_files: list[str]
|
||||
# write_files: list[str]
|
||||
# delete_files: list[str]
|
||||
keys: list[str]
|
||||
read_keys: list[str]
|
||||
write_keys: list[str]
|
||||
delete_keys: list[str]
|
||||
# read_keys: list[str]
|
||||
# write_keys: list[str]
|
||||
# delete_keys: list[str]
|
||||
executed_commands: list[str]
|
||||
resolved_apis: list[str]
|
||||
mutexes: list[str]
|
||||
@@ -343,7 +351,8 @@ class Summary(ExactModel):
|
||||
started_services: list[str]
|
||||
|
||||
|
||||
class EncryptedBuffer(ExactModel):
|
||||
"""
|
||||
class EncryptedBuffer(FlexibleModel):
|
||||
process_name: str
|
||||
pid: int
|
||||
|
||||
@@ -351,38 +360,41 @@ class EncryptedBuffer(ExactModel):
|
||||
buffer: str
|
||||
buffer_size: Optional[int] = None
|
||||
crypt_key: Optional[Union[HexInt, str]] = None
|
||||
"""
|
||||
|
||||
|
||||
class Behavior(ExactModel):
|
||||
class Behavior(FlexibleModel):
|
||||
summary: Summary
|
||||
|
||||
# list of processes, of threads, of calls
|
||||
processes: list[Process]
|
||||
# tree of processes
|
||||
processtree: list[ProcessTree]
|
||||
# processtree: list[ProcessTree]
|
||||
|
||||
anomaly: list[str]
|
||||
encryptedbuffers: list[EncryptedBuffer]
|
||||
# anomaly: list[str]
|
||||
# encryptedbuffers: list[EncryptedBuffer]
|
||||
# these are small objects that describe atomic events,
|
||||
# like file move, registry access.
|
||||
# we'll detect the same with our API call analysis.
|
||||
enhanced: Skip = None
|
||||
# enhanced: Skip = None
|
||||
|
||||
|
||||
class Target(ExactModel):
|
||||
category: str
|
||||
class Target(FlexibleModel):
|
||||
# category: str
|
||||
file: File
|
||||
# pe: Optional[PE] = None
|
||||
|
||||
|
||||
class Static(FlexibleModel):
|
||||
pe: Optional[PE] = None
|
||||
# flare_capa: Skip = None
|
||||
|
||||
|
||||
class Static(ExactModel):
|
||||
pe: Optional[PE] = None
|
||||
flare_capa: Skip = None
|
||||
|
||||
|
||||
class Cape(ExactModel):
|
||||
"""
|
||||
class Cape(FlexibleModel):
|
||||
payloads: list[ProcessFile]
|
||||
configs: Skip = None
|
||||
"""
|
||||
|
||||
|
||||
# flexible because there may be more sorts of analysis
|
||||
@@ -405,15 +417,14 @@ class CapeReport(FlexibleModel):
|
||||
# post-processed results: process tree, anomalies, etc
|
||||
behavior: Behavior
|
||||
|
||||
# post-processed results: payloads and extracted configs
|
||||
CAPE: Optional[Union[Cape, list]] = None
|
||||
dropped: Optional[list[File]] = None
|
||||
procdump: Optional[list[ProcessFile]] = None
|
||||
procmemory: Optional[ListTODO] = None
|
||||
|
||||
# =========================================================================
|
||||
# information we won't use in capa
|
||||
#
|
||||
# post-processed results: payloads and extracted configs
|
||||
# CAPE: Optional[Union[Cape, list]] = None
|
||||
# dropped: Optional[list[File]] = None
|
||||
# procdump: Optional[list[ProcessFile]] = None
|
||||
# procmemory: Optional[ListTODO] = None
|
||||
|
||||
#
|
||||
# NBIs and HBIs
|
||||
@@ -422,32 +433,32 @@ class CapeReport(FlexibleModel):
|
||||
#
|
||||
# if we come up with a future use for this, go ahead and re-enable!
|
||||
#
|
||||
network: Skip = None
|
||||
suricata: Skip = None
|
||||
curtain: Skip = None
|
||||
sysmon: Skip = None
|
||||
url_analysis: Skip = None
|
||||
# network: Skip = None
|
||||
# suricata: Skip = None
|
||||
# curtain: Skip = None
|
||||
# sysmon: Skip = None
|
||||
# url_analysis: Skip = None
|
||||
|
||||
# screenshot hash values
|
||||
deduplicated_shots: Skip = None
|
||||
# deduplicated_shots: Skip = None
|
||||
# k-v pairs describing the time it took to run each stage.
|
||||
statistics: Skip = None
|
||||
# statistics: Skip = None
|
||||
# k-v pairs of ATT&CK ID to signature name or similar.
|
||||
ttps: Skip = None
|
||||
# ttps: Skip = None
|
||||
# debug log messages
|
||||
debug: Skip = None
|
||||
# debug: Skip = None
|
||||
|
||||
# various signature matches
|
||||
# we could potentially extend capa to use this info one day,
|
||||
# though it would be quite sandbox-specific,
|
||||
# and more detection-oriented than capability detection.
|
||||
signatures: Skip = None
|
||||
malfamily_tag: Optional[str] = None
|
||||
malscore: float
|
||||
detections: Skip = None
|
||||
detections2pid: Optional[dict[int, list[str]]] = None
|
||||
# signatures: Skip = None
|
||||
# malfamily_tag: Optional[str] = None
|
||||
# malscore: float
|
||||
# detections: Skip = None
|
||||
# detections2pid: Optional[dict[int, list[str]]] = None
|
||||
# AV detections for the sample.
|
||||
virustotal: Skip = None
|
||||
# virustotal: Skip = None
|
||||
|
||||
@classmethod
|
||||
def from_buf(cls, buf: bytes) -> "CapeReport":
|
||||
|
||||
@@ -139,7 +139,7 @@ dev = [
|
||||
"ruff==0.9.2",
|
||||
"black==25.1.0",
|
||||
"isort==6.0.0",
|
||||
"mypy==1.14.1",
|
||||
"mypy==1.15.0",
|
||||
"mypy-protobuf==3.6.0",
|
||||
"PyGithub==2.5.0",
|
||||
# type stubs for mypy
|
||||
@@ -156,7 +156,7 @@ build = [
|
||||
# we want all developer environments to be consistent.
|
||||
# These dependencies are not used in production environments
|
||||
# and should not conflict with other libraries/tooling.
|
||||
"pyinstaller==6.11.1",
|
||||
"pyinstaller==6.12.0",
|
||||
"setuptools==75.8.0",
|
||||
"build==1.2.2"
|
||||
]
|
||||
|
||||
2
rules
2
rules
Submodule rules updated: 79afc557f1...c0aa922f20
Reference in New Issue
Block a user