Merge branch 'master' into Aayush-Goel-04/Issue#331

This commit is contained in:
Aayush Goel
2023-08-07 21:02:42 +05:30
committed by GitHub
17 changed files with 79 additions and 91 deletions

View File

@@ -41,7 +41,7 @@
// "forwardPorts": [],
// Use 'postCreateCommand' to run commands after the container is created.
"postCreateCommand": "git submodule update --init && pip3 install --user -e .[dev]",
"postCreateCommand": "git submodule update --init && pip3 install --user -e .[dev] && pre-commit install",
// Comment out to connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root.
"remoteUser": "vscode",

View File

@@ -3,21 +3,24 @@
## master (unreleased)
### New Features
- ELF: implement file import and export name extractor #1607 @Aayush-Goel-04
- ELF: implement file import and export name extractor #1607 #1608 @Aayush-Goel-04
- bump pydantic from 1.10.9 to 2.1.1 #1582 @Aayush-Goel-04
- develop script to highlight the features that are not used during matching #331 @Aayush-Goel-04
### Breaking Changes
### New Rules (4)
### New Rules (5)
- executable/pe/export/forwarded-export ronnie.salomonsen@mandiant.com
- host-interaction/bootloader/get-uefi-variable jakub.jozwiak@mandiant.com
- host-interaction/bootloader/set-uefi-variable jakub.jozwiak@mandiant.com
- nursery/enumerate-device-drivers-on-linux @mr-tz
-
### Bug Fixes
- Fix binja backend stack string detection. [#1473](https://github.com/mandiant/capa/issues/1473) [@xusheng6](https://github.com/xusheng6)
- Fix binja backend stack string detection. #1473 @xusheng6
- linter: skip native API check for NtProtectVirtualMemory #1675 @williballenthin
### capa explorer IDA Pro plugin

View File

@@ -2,7 +2,7 @@
[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/flare-capa)](https://pypi.org/project/flare-capa)
[![Last release](https://img.shields.io/github/v/release/mandiant/capa)](https://github.com/mandiant/capa/releases)
[![Number of rules](https://img.shields.io/badge/rules-826-blue.svg)](https://github.com/mandiant/capa-rules)
[![Number of rules](https://img.shields.io/badge/rules-828-blue.svg)](https://github.com/mandiant/capa-rules)
[![CI status](https://github.com/mandiant/capa/workflows/CI/badge.svg)](https://github.com/mandiant/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster)
[![Downloads](https://img.shields.io/github/downloads/mandiant/capa/total)](https://github.com/mandiant/capa/releases)
[![License](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE.txt)

View File

@@ -136,8 +136,8 @@ class Feature(abc.ABC): # noqa: B024
import capa.features.freeze.features
return (
capa.features.freeze.features.feature_from_capa(self).json()
< capa.features.freeze.features.feature_from_capa(other).json()
capa.features.freeze.features.feature_from_capa(self).model_dump_json()
< capa.features.freeze.features.feature_from_capa(other).model_dump_json()
)
def get_name_str(self) -> str:

View File

@@ -14,7 +14,7 @@ import logging
from enum import Enum
from typing import List, Tuple, Union
from pydantic import Field, BaseModel
from pydantic import Field, BaseModel, ConfigDict
import capa.helpers
import capa.version
@@ -31,8 +31,7 @@ logger = logging.getLogger(__name__)
class HashableModel(BaseModel):
class Config:
frozen = True
model_config = ConfigDict(frozen=True)
class AddressType(str, Enum):
@@ -46,7 +45,7 @@ class AddressType(str, Enum):
class Address(HashableModel):
type: AddressType
value: Union[int, Tuple[int, int], None]
value: Union[int, Tuple[int, int], None] = None # None default value to support deserialization of NO_ADDRESS
@classmethod
def from_capa(cls, a: capa.features.address.Address) -> "Address":
@@ -159,9 +158,7 @@ class BasicBlockFeature(HashableModel):
basic_block: Address = Field(alias="basic block")
address: Address
feature: Feature
class Config:
allow_population_by_field_name = True
model_config = ConfigDict(populate_by_name=True)
class InstructionFeature(HashableModel):
@@ -194,26 +191,20 @@ class FunctionFeatures(BaseModel):
address: Address
features: Tuple[FunctionFeature, ...]
basic_blocks: Tuple[BasicBlockFeatures, ...] = Field(alias="basic blocks")
class Config:
allow_population_by_field_name = True
model_config = ConfigDict(populate_by_name=True)
class Features(BaseModel):
global_: Tuple[GlobalFeature, ...] = Field(alias="global")
file: Tuple[FileFeature, ...]
functions: Tuple[FunctionFeatures, ...]
class Config:
allow_population_by_field_name = True
model_config = ConfigDict(populate_by_name=True)
class Extractor(BaseModel):
name: str
version: str = capa.version.__version__
class Config:
allow_population_by_field_name = True
model_config = ConfigDict(populate_by_name=True)
class Freeze(BaseModel):
@@ -221,9 +212,7 @@ class Freeze(BaseModel):
base_address: Address = Field(alias="base address")
extractor: Extractor
features: Features
class Config:
allow_population_by_field_name = True
model_config = ConfigDict(populate_by_name=True)
def dumps(extractor: capa.features.extractors.base_extractor.FeatureExtractor) -> str:
@@ -324,7 +313,7 @@ def dumps(extractor: capa.features.extractors.base_extractor.FeatureExtractor) -
) # type: ignore
# Mypy is unable to recognise `base_address` as a argument due to alias
return freeze.json()
return freeze.model_dump_json()
def loads(s: str) -> capa.features.extractors.base_extractor.FeatureExtractor:

View File

@@ -8,7 +8,7 @@
import binascii
from typing import Union, Optional
from pydantic import Field, BaseModel
from pydantic import Field, BaseModel, ConfigDict
import capa.features.file
import capa.features.insn
@@ -17,9 +17,7 @@ import capa.features.basicblock
class FeatureModel(BaseModel):
class Config:
frozen = True
allow_population_by_field_name = True
model_config = ConfigDict(frozen=True, populate_by_name=True)
def to_capa(self) -> capa.features.common.Feature:
if isinstance(self, OSFeature):
@@ -213,141 +211,141 @@ def feature_from_capa(f: capa.features.common.Feature) -> "Feature":
class OSFeature(FeatureModel):
type: str = "os"
os: str
description: Optional[str]
description: Optional[str] = None
class ArchFeature(FeatureModel):
type: str = "arch"
arch: str
description: Optional[str]
description: Optional[str] = None
class FormatFeature(FeatureModel):
type: str = "format"
format: str
description: Optional[str]
description: Optional[str] = None
class MatchFeature(FeatureModel):
type: str = "match"
match: str
description: Optional[str]
description: Optional[str] = None
class CharacteristicFeature(FeatureModel):
type: str = "characteristic"
characteristic: str
description: Optional[str]
description: Optional[str] = None
class ExportFeature(FeatureModel):
type: str = "export"
export: str
description: Optional[str]
description: Optional[str] = None
class ImportFeature(FeatureModel):
type: str = "import"
import_: str = Field(alias="import")
description: Optional[str]
description: Optional[str] = None
class SectionFeature(FeatureModel):
type: str = "section"
section: str
description: Optional[str]
description: Optional[str] = None
class FunctionNameFeature(FeatureModel):
type: str = "function name"
function_name: str = Field(alias="function name")
description: Optional[str]
description: Optional[str] = None
class SubstringFeature(FeatureModel):
type: str = "substring"
substring: str
description: Optional[str]
description: Optional[str] = None
class RegexFeature(FeatureModel):
type: str = "regex"
regex: str
description: Optional[str]
description: Optional[str] = None
class StringFeature(FeatureModel):
type: str = "string"
string: str
description: Optional[str]
description: Optional[str] = None
class ClassFeature(FeatureModel):
type: str = "class"
class_: str = Field(alias="class")
description: Optional[str]
description: Optional[str] = None
class NamespaceFeature(FeatureModel):
type: str = "namespace"
namespace: str
description: Optional[str]
description: Optional[str] = None
class BasicBlockFeature(FeatureModel):
type: str = "basic block"
description: Optional[str]
description: Optional[str] = None
class APIFeature(FeatureModel):
type: str = "api"
api: str
description: Optional[str]
description: Optional[str] = None
class PropertyFeature(FeatureModel):
type: str = "property"
access: Optional[str]
access: Optional[str] = None
property: str
description: Optional[str]
description: Optional[str] = None
class NumberFeature(FeatureModel):
type: str = "number"
number: Union[int, float]
description: Optional[str]
description: Optional[str] = None
class BytesFeature(FeatureModel):
type: str = "bytes"
bytes: str
description: Optional[str]
description: Optional[str] = None
class OffsetFeature(FeatureModel):
type: str = "offset"
offset: int
description: Optional[str]
description: Optional[str] = None
class MnemonicFeature(FeatureModel):
type: str = "mnemonic"
mnemonic: str
description: Optional[str]
description: Optional[str] = None
class OperandNumberFeature(FeatureModel):
type: str = "operand number"
index: int
operand_number: int = Field(alias="operand number")
description: Optional[str]
description: Optional[str] = None
class OperandOffsetFeature(FeatureModel):
type: str = "operand offset"
index: int
operand_offset: int = Field(alias="operand offset")
description: Optional[str]
description: Optional[str] = None
Feature = Union[

View File

@@ -1304,7 +1304,7 @@ class CapaExplorerForm(idaapi.PluginForm):
idaapi.info("No program analysis to save.")
return
s = self.resdoc_cache.json().encode("utf-8")
s = self.resdoc_cache.model_dump_json().encode("utf-8")
path = Path(self.ask_user_capa_json_file())
if not path.exists():

View File

@@ -11,4 +11,4 @@ from capa.engine import MatchResults
def render(meta, rules: RuleSet, capabilities: MatchResults) -> str:
return rd.ResultDocument.from_capa(meta, rules, capabilities).json(exclude_none=True)
return rd.ResultDocument.from_capa(meta, rules, capabilities).model_dump_json(exclude_none=True)

View File

@@ -126,7 +126,7 @@ def metadata_to_pb2(meta: rd.Metadata) -> capa_pb2.Metadata:
timestamp=str(meta.timestamp),
version=meta.version,
argv=meta.argv,
sample=google.protobuf.json_format.ParseDict(meta.sample.dict(), capa_pb2.Sample()),
sample=google.protobuf.json_format.ParseDict(meta.sample.model_dump(), capa_pb2.Sample()),
analysis=capa_pb2.Analysis(
format=meta.analysis.format,
arch=meta.analysis.arch,
@@ -393,7 +393,7 @@ def match_to_pb2(match: rd.Match) -> capa_pb2.Match:
def rule_metadata_to_pb2(rule_metadata: rd.RuleMetadata) -> capa_pb2.RuleMetadata:
# after manual type conversions to the RuleMetadata, we can rely on the protobuf json parser
# conversions include tuple -> list and rd.Enum -> proto.enum
meta = dict_tuple_to_list_values(rule_metadata.dict())
meta = dict_tuple_to_list_values(rule_metadata.model_dump())
meta["scope"] = scope_to_pb2(meta["scope"])
meta["attack"] = list(map(dict_tuple_to_list_values, meta.get("attack", [])))
meta["mbc"] = list(map(dict_tuple_to_list_values, meta.get("mbc", [])))

View File

@@ -7,9 +7,9 @@
# See the License for the specific language governing permissions and limitations under the License.
import datetime
import collections
from typing import Dict, List, Tuple, Union, Optional
from typing import Dict, List, Tuple, Union, Literal, Optional
from pydantic import Field, BaseModel
from pydantic import Field, BaseModel, ConfigDict
import capa.rules
import capa.engine
@@ -23,14 +23,11 @@ from capa.helpers import assert_never
class FrozenModel(BaseModel):
class Config:
frozen = True
extra = "forbid"
model_config = ConfigDict(frozen=True, extra="forbid")
class Model(BaseModel):
class Config:
extra = "forbid"
model_config = ConfigDict(extra="forbid")
class Sample(Model):
@@ -105,13 +102,13 @@ class CompoundStatement(StatementModel):
class SomeStatement(StatementModel):
type = "some"
type: Literal["some"] = "some"
description: Optional[str] = None
count: int
class RangeStatement(StatementModel):
type = "range"
type: Literal["range"] = "range"
description: Optional[str] = None
min: int
max: int
@@ -119,7 +116,7 @@ class RangeStatement(StatementModel):
class SubscopeStatement(StatementModel):
type = "subscope"
type: Literal["subscope"] = "subscope"
description: Optional[str] = None
scope: capa.rules.Scope
@@ -134,7 +131,7 @@ Statement = Union[
class StatementNode(FrozenModel):
type = "statement"
type: Literal["statement"] = "statement"
statement: Statement
@@ -171,7 +168,7 @@ def statement_from_capa(node: capa.engine.Statement) -> Statement:
class FeatureNode(FrozenModel):
type = "feature"
type: Literal["feature"] = "feature"
feature: frz.Feature
@@ -500,15 +497,12 @@ class MaecMetadata(FrozenModel):
malware_family: Optional[str] = Field(None, alias="malware-family")
malware_category: Optional[str] = Field(None, alias="malware-category")
malware_category_ov: Optional[str] = Field(None, alias="malware-category-ov")
class Config:
frozen = True
allow_population_by_field_name = True
model_config = ConfigDict(frozen=True, populate_by_name=True)
class RuleMetadata(FrozenModel):
name: str
namespace: Optional[str]
namespace: Optional[str] = None
authors: Tuple[str, ...]
scope: capa.rules.Scope
attack: Tuple[AttackSpec, ...] = Field(alias="att&ck")
@@ -546,9 +540,7 @@ class RuleMetadata(FrozenModel):
) # type: ignore
# Mypy is unable to recognise arguments due to alias
class Config:
frozen = True
allow_population_by_field_name = True
model_config = ConfigDict(frozen=True, populate_by_name=True)
class RuleMatches(FrozenModel):

View File

@@ -88,7 +88,7 @@ def render_statement(ostream, match: rd.Match, statement: rd.Statement, indent=0
# so, we have to inline some of the feature rendering here.
child = statement.child
value = child.dict(by_alias=True).get(child.type)
value = child.model_dump(by_alias=True).get(child.type)
if value:
if isinstance(child, frzf.StringFeature):
@@ -141,7 +141,7 @@ def render_feature(ostream, match: rd.Match, feature: frzf.Feature, indent=0):
value = feature.class_
else:
# convert attributes to dictionary using aliased names, if applicable
value = feature.dict(by_alias=True).get(key)
value = feature.model_dump(by_alias=True).get(key)
if value is None:
raise ValueError(f"{key} contains None")

View File

@@ -48,7 +48,7 @@ dependencies = [
"pyelftools==0.29",
"dnfile==0.13.0",
"dncil==1.0.2",
"pydantic==1.10.9",
"pydantic==2.1.1",
"protobuf==4.23.4",
]
dynamic = ["version"]

2
rules

Submodule rules updated: 7685a232d9...149cf2d133

View File

@@ -144,8 +144,7 @@ def get_capa_results(args):
meta.analysis.layout = capa.main.compute_layout(rules, extractor, capabilities)
doc = rd.ResultDocument.from_capa(meta, rules, capabilities)
return {"path": path, "status": "ok", "ok": doc.dict(exclude_none=True)}
return {"path": path, "status": "ok", "ok": doc.model_dump()}
def main(argv=None):
@@ -214,7 +213,9 @@ def main(argv=None):
if result["status"] == "error":
logger.warning(result["error"])
elif result["status"] == "ok":
results[result["path"].as_posix()] = rd.ResultDocument.parse_obj(result["ok"]).json(exclude_none=True)
results[result["path"].as_posix()] = rd.ResultDocument.model_validate(result["ok"]).model_dump_json(
exclude_none=True
)
else:
raise ValueError(f"unexpected status: {result['status']}")

View File

@@ -309,7 +309,7 @@ def get_sample_capabilities(ctx: Context, path: Path) -> Set[str]:
logger.debug("analyzing sample: %s", nice_path)
extractor = capa.main.get_extractor(
nice_path, format_, OS_AUTO, "", DEFAULT_SIGNATURES, False, disable_progress=True
nice_path, format_, OS_AUTO, capa.main.BACKEND_VIV, DEFAULT_SIGNATURES, False, disable_progress=True
)
capabilities, _ = capa.main.find_capabilities(ctx.rules, extractor, disable_progress=True)
@@ -569,6 +569,10 @@ class FeatureNtdllNtoskrnlApi(Lint):
"ZwCreateProcess",
"ZwCreateUserProcess",
"RtlCreateUserProcess",
"NtProtectVirtualMemory",
"NtEnumerateSystemEnvironmentValuesEx",
"NtQuerySystemEnvironmentValueEx",
"NtQuerySystemEnvironmentValue",
):
# ntoskrnl.exe does not export these routines
continue
@@ -579,6 +583,7 @@ class FeatureNtdllNtoskrnlApi(Lint):
"KeStackAttachProcess",
"ObfDereferenceObject",
"KeUnstackDetachProcess",
"ExGetFirmwareEnvironmentVariable",
):
# ntdll.dll does not export these routines
continue

View File

@@ -78,7 +78,7 @@ def main(argv=None):
rdpb.ParseFromString(pb)
rd = capa.render.proto.doc_from_pb2(rdpb)
print(rd.json(exclude_none=True, indent=2, sort_keys=True))
print(rd.model_dump_json(exclude_none=True, indent=2))
if __name__ == "__main__":

View File

@@ -236,7 +236,7 @@ def test_basic_block_node_from_capa():
def assert_round_trip(rd: rdoc.ResultDocument):
one = rd
doc = one.json(exclude_none=True)
doc = one.model_dump_json(exclude_none=True)
two = rdoc.ResultDocument.parse_raw(doc)
# show the round trip works
@@ -244,14 +244,14 @@ def assert_round_trip(rd: rdoc.ResultDocument):
# which works thanks to pydantic model equality.
assert one == two
# second by showing their json representations are the same.
assert one.json(exclude_none=True) == two.json(exclude_none=True)
assert one.model_dump_json(exclude_none=True) == two.model_dump_json(exclude_none=True)
# now show that two different versions are not equal.
three = copy.deepcopy(two)
three.meta.__dict__.update({"version": "0.0.0"})
assert one.meta.version != three.meta.version
assert one != three
assert one.json(exclude_none=True) != three.json(exclude_none=True)
assert one.model_dump_json(exclude_none=True) != three.model_dump_json(exclude_none=True)
@pytest.mark.parametrize(