Merge pull request #830 from mandiant/perf/rule-selection

perf: don't try to match rules that will never match
This commit is contained in:
Willi Ballenthin
2021-11-12 11:54:29 -07:00
committed by GitHub
9 changed files with 821 additions and 474 deletions

View File

@@ -6,6 +6,7 @@
- engine: short circuit logic nodes for better performance #824 @williballenthin
- engine: add optimizer the order faster nodes first #829 @williballenthin
- engine: optimize rule evaluation by skipping rules that can't match #830 @williballenthin
### Breaking Changes

View File

@@ -8,13 +8,17 @@
import copy
import collections
from typing import Set, Dict, List, Tuple, Union, Mapping, Iterable
from typing import TYPE_CHECKING, Set, Dict, List, Tuple, Mapping, Iterable
import capa.perf
import capa.rules
import capa.features.common
from capa.features.common import Result, Feature
if TYPE_CHECKING:
# circular import, otherwise
import capa.rules
# a collection of features and the locations at which they are found.
#
# used throughout matching as the context in which features are searched:
@@ -275,15 +279,20 @@ def index_rule_matches(features: FeatureSet, rule: "capa.rules.Rule", locations:
def match(rules: List["capa.rules.Rule"], features: FeatureSet, va: int) -> Tuple[FeatureSet, MatchResults]:
"""
Args:
rules (List[capa.rules.Rule]): these must already be ordered topologically by dependency.
features (Mapping[capa.features.Feature, int]):
va (int): location of the features
match the given rules against the given features,
returning an updated set of features and the matches.
Returns:
Tuple[FeatureSet, MatchResults]: two-tuple with entries:
- set of features used for matching (which may be a superset of the given `features` argument, due to rule match features), and
- mapping from rule name to [(location of match, result object)]
the updated features are just like the input,
but extended to include the match features (e.g. names of rules that matched).
the given feature set is not modified; an updated copy is returned.
the given list of rules must be ordered topologically by dependency,
or else `match` statements will not be handled correctly.
this routine should be fairly optimized, but is not guaranteed to be the fastest matcher possible.
it has a particularly convenient signature: (rules, features) -> matches
other strategies can be imagined that match differently; implement these elsewhere.
specifically, this routine does "top down" matching of the given rules against the feature set.
"""
results = collections.defaultdict(list) # type: MatchResults

View File

@@ -42,7 +42,7 @@ import capa.features.extractors
import capa.features.extractors.common
import capa.features.extractors.pefile
import capa.features.extractors.elffile
from capa.rules import Rule, RuleSet
from capa.rules import Rule, Scope, RuleSet
from capa.engine import FeatureSet, MatchResults
from capa.helpers import get_file_taste
from capa.features.extractors.base_extractor import FunctionHandle, FeatureExtractor
@@ -114,7 +114,7 @@ def find_function_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, f:
bb_features[feature].add(va)
function_features[feature].add(va)
_, matches = capa.engine.match(ruleset.basic_block_rules, bb_features, int(bb))
_, matches = ruleset.match(Scope.BASIC_BLOCK, bb_features, int(bb))
for rule_name, res in matches.items():
bb_matches[rule_name].extend(res)
@@ -122,7 +122,7 @@ def find_function_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, f:
for va, _ in res:
capa.engine.index_rule_matches(function_features, rule, [va])
_, function_matches = capa.engine.match(ruleset.function_rules, function_features, int(f))
_, function_matches = ruleset.match(Scope.FUNCTION, function_features, int(f))
return function_matches, bb_matches, len(function_features)
@@ -143,7 +143,7 @@ def find_file_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, functi
file_features.update(function_features)
_, matches = capa.engine.match(ruleset.file_rules, file_features, 0x0)
_, matches = ruleset.match(Scope.FILE, file_features, 0x0)
return matches, len(file_features)

View File

@@ -14,6 +14,7 @@ import logging
import binascii
import functools
import collections
from enum import Enum
try:
from functools import lru_cache
@@ -22,7 +23,7 @@ except ImportError:
# https://github.com/python/mypy/issues/1153
from backports.functools_lru_cache import lru_cache # type: ignore
from typing import Any, Dict, List, Union, Iterator
from typing import Any, Set, Dict, List, Tuple, Union, Iterator
import yaml
import ruamel.yaml
@@ -66,9 +67,15 @@ META_KEYS = (
HIDDEN_META_KEYS = ("capa/nursery", "capa/path")
FILE_SCOPE = "file"
FUNCTION_SCOPE = "function"
BASIC_BLOCK_SCOPE = "basic block"
class Scope(str, Enum):
FILE = "file"
FUNCTION = "function"
BASIC_BLOCK = "basic block"
FILE_SCOPE = Scope.FILE.value
FUNCTION_SCOPE = Scope.FUNCTION.value
BASIC_BLOCK_SCOPE = Scope.BASIC_BLOCK.value
SUPPORTED_FEATURES = {
@@ -970,6 +977,15 @@ class RuleSet:
self.rules = {rule.name: rule for rule in rules}
self.rules_by_namespace = index_rules_by_namespace(rules)
# unstable
(self._easy_file_rules_by_feature, self._hard_file_rules) = self._index_rules_by_feature(self.file_rules)
(self._easy_function_rules_by_feature, self._hard_function_rules) = self._index_rules_by_feature(
self.function_rules
)
(self._easy_basic_block_rules_by_feature, self._hard_basic_block_rules) = self._index_rules_by_feature(
self.basic_block_rules
)
def __len__(self):
return len(self.rules)
@@ -979,6 +995,126 @@ class RuleSet:
def __contains__(self, rulename):
return rulename in self.rules
@staticmethod
def _index_rules_by_feature(rules) -> Tuple[Dict[Feature, Set[str]], List[str]]:
"""
split the given rules into two structures:
- "easy rules" are indexed by feature,
such that you can quickly find the rules that contain a given feature.
- "hard rules" are those that contain substring/regex/bytes features or match statements.
these continue to be ordered topologically.
a rule evaluator can use the "easy rule" index to restrict the
candidate rules that might match a given set of features.
at this time, a rule evaluator can't do anything special with
the "hard rules". it must still do a full top-down match of each
rule, in topological order.
"""
# we'll do a couple phases:
#
# 1. recursively visit all nodes in all rules,
# a. indexing all features
# b. recording the types of features found per rule
# 2. compute the easy and hard rule sets
# 3. remove hard rules from the rules-by-feature index
# 4. construct the topologically ordered list of hard rules
rules_with_easy_features: Set[str] = set()
rules_with_hard_features: Set[str] = set()
rules_by_feature: Dict[Feature, Set[str]] = collections.defaultdict(set)
def rec(rule_name: str, node: Union[Feature, Statement]):
"""
walk through a rule's logic tree, indexing the easy and hard rules,
and the features referenced by easy rules.
"""
if isinstance(
node,
(
# these are the "hard features"
# substring: scanning feature
capa.features.common.Substring,
# regex: scanning feature
capa.features.common.Regex,
# bytes: scanning feature
capa.features.common.Bytes,
# match: dependency on another rule,
# which we have to evaluate first,
# and is therefore tricky.
capa.features.common.MatchedRule,
),
):
# hard feature: requires scan or match lookup
rules_with_hard_features.add(rule_name)
elif isinstance(node, capa.features.common.Feature):
# easy feature: hash lookup
rules_with_easy_features.add(rule_name)
rules_by_feature[node].add(rule_name)
elif isinstance(node, (ceng.Not)):
# `not:` statements are tricky to deal with.
#
# first, features found under a `not:` should not be indexed,
# because they're not wanted to be found.
# second, `not:` can be nested under another `not:`, or two, etc.
# third, `not:` at the root or directly under an `or:`
# means the rule will match against *anything* not specified there,
# which is a difficult set of things to compute and index.
#
# so, if a rule has a `not:` statement, its hard.
# as of writing, this is an uncommon statement, with only 6 instances in 740 rules.
rules_with_hard_features.add(rule_name)
elif isinstance(node, (ceng.Some)) and node.count == 0:
# `optional:` and `0 or more:` are tricky to deal with.
#
# when a subtree is optional, it may match, but not matching
# doesn't have any impact either.
# now, our rule authors *should* not put this under `or:`
# and this is checked by the linter,
# but this could still happen (e.g. private rule set without linting)
# and would be hard to trace down.
#
# so better to be safe than sorry and consider this a hard case.
rules_with_hard_features.add(rule_name)
elif isinstance(node, (ceng.Range)) and node.min == 0:
# `count(foo): 0 or more` are tricky to deal with.
# because the min is 0,
# this subtree *can* match just about any feature
# (except the given one)
# which is a difficult set of things to compute and index.
rules_with_hard_features.add(rule_name)
elif isinstance(node, (ceng.Range)):
rec(rule_name, node.child)
elif isinstance(node, (ceng.And, ceng.Or, ceng.Some)):
for child in node.children:
rec(rule_name, child)
else:
# programming error
raise Exception("programming error: unexpected node type: %s" % (node))
for rule in rules:
rule_name = rule.meta["name"]
root = rule.statement
rec(rule_name, root)
# if a rule has a hard feature,
# dont consider it easy, and therefore,
# don't index any of its features.
#
# otherwise, its an easy rule, and index its features
for rules_with_feature in rules_by_feature.values():
rules_with_feature.difference_update(rules_with_hard_features)
easy_rules_by_feature = rules_by_feature
# `rules` is already topologically ordered,
# so extract our hard set into the topological ordering.
hard_rules = []
for rule in rules:
if rule.meta["name"] in rules_with_hard_features:
hard_rules.append(rule.meta["name"])
return (easy_rules_by_feature, hard_rules)
@staticmethod
def _get_rules_for_scope(rules, scope):
"""
@@ -1041,3 +1177,65 @@ class RuleSet:
rules_filtered.update(set(capa.rules.get_rules_and_dependencies(rules, rule.name)))
break
return RuleSet(list(rules_filtered))
def match(self, scope: Scope, features: FeatureSet, va: int) -> Tuple[FeatureSet, ceng.MatchResults]:
"""
match rules from this ruleset at the given scope against the given features.
this routine should act just like `capa.engine.match`,
except that it may be more performant.
"""
if scope == scope.FILE:
easy_rules_by_feature = self._easy_file_rules_by_feature
hard_rule_names = self._hard_file_rules
elif scope == scope.FUNCTION:
easy_rules_by_feature = self._easy_function_rules_by_feature
hard_rule_names = self._hard_function_rules
elif scope == scope.BASIC_BLOCK:
easy_rules_by_feature = self._easy_basic_block_rules_by_feature
hard_rule_names = self._hard_basic_block_rules
else:
raise Exception("programming error: unexpected scope")
candidate_rule_names = set()
for feature in features:
easy_rule_names = easy_rules_by_feature.get(feature)
if easy_rule_names:
candidate_rule_names.update(easy_rule_names)
# first, match against the set of rules that have at least one
# feature shared with our feature set.
candidate_rules = [self.rules[name] for name in candidate_rule_names]
features2, easy_matches = ceng.match(candidate_rules, features, va)
# note that we've stored the updated feature set in `features2`.
# this contains a superset of the features in `features`;
# it contains additional features for any easy rule matches.
# we'll pass this feature set to hard rule matching, since one
# of those rules might rely on an easy rule match.
#
# the updated feature set from hard matching will go into `features3`.
# this is a superset of `features2` is a superset of `features`.
# ultimately, this is what we'll return to the caller.
#
# in each case, we could have assigned the updated feature set back to `features`,
# but this is slightly more explicit how we're tracking the data.
# now, match against (topologically ordered) list of rules
# that we can't really make any guesses about.
# these are rules with hard features, like substring/regex/bytes and match statements.
hard_rules = [self.rules[name] for name in hard_rule_names]
features3, hard_matches = ceng.match(hard_rules, features2, va)
# note that above, we probably are skipping matching a bunch of
# rules that definitely would never hit.
# specifically, "easy rules" that don't share any features with
# feature set.
# MatchResults doesn't technically have an .update() method
# but a dict does.
matches = {} # type: ignore
matches.update(easy_matches)
matches.update(hard_matches)
return (features3, matches)

View File

@@ -339,6 +339,52 @@ class OrStatementWithAlwaysTrueChild(Lint):
return self.violation
class NotNotUnderAnd(Lint):
name = "rule contains a `not` statement that's not found under an `and` statement"
recommendation = "clarify the rule logic and ensure `not` is always found under `and`"
violation = False
def check_rule(self, ctx: Context, rule: Rule):
self.violation = False
def rec(statement):
if isinstance(statement, capa.engine.Statement):
if not isinstance(statement, capa.engine.And):
for child in statement.get_children():
if isinstance(child, capa.engine.Not):
self.violation = True
for child in statement.get_children():
rec(child)
rec(rule.statement)
return self.violation
class OptionalNotUnderAnd(Lint):
name = "rule contains an `optional` or `0 or more` statement that's not found under an `and` statement"
recommendation = "clarify the rule logic and ensure `optional` and `0 or more` is always found under `and`"
violation = False
def check_rule(self, ctx: Context, rule: Rule):
self.violation = False
def rec(statement):
if isinstance(statement, capa.engine.Statement):
if not isinstance(statement, capa.engine.And):
for child in statement.get_children():
if isinstance(child, capa.engine.Some) and child.count == 0:
self.violation = True
for child in statement.get_children():
rec(child)
rec(rule.statement)
return self.violation
class UnusualMetaField(Lint):
name = "unusual meta field"
recommendation = "Remove the meta field"
@@ -660,6 +706,8 @@ LOGIC_LINTS = (
DoesntMatchExample(),
StatementWithSingleChildStatement(),
OrStatementWithAlwaysTrueChild(),
NotNotUnderAnd(),
OptionalNotUnderAnd(),
)

View File

@@ -5,13 +5,6 @@
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import textwrap
import capa.rules
import capa.engine
import capa.features.insn
import capa.features.common
from capa.engine import *
from capa.features import *
from capa.features.insn import *
@@ -117,424 +110,6 @@ def test_range():
assert Range(Number(1), min=1, max=3).evaluate({Number(1): {1, 2, 3, 4}}) == False
def test_range_exact():
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
features:
- count(number(100)): 2
"""
)
r = capa.rules.Rule.from_yaml(rule)
# just enough matches
features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0)
assert "test rule" in matches
# not enough matches
features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1}}, 0x0)
assert "test rule" not in matches
# too many matches
features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1, 2, 3}}, 0x0)
assert "test rule" not in matches
def test_range_range():
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
features:
- count(number(100)): (2, 3)
"""
)
r = capa.rules.Rule.from_yaml(rule)
# just enough matches
features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0)
assert "test rule" in matches
# enough matches
features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1, 2, 3}}, 0x0)
assert "test rule" in matches
# not enough matches
features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1}}, 0x0)
assert "test rule" not in matches
# too many matches
features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1, 2, 3, 4}}, 0x0)
assert "test rule" not in matches
def test_range_exact_zero():
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
features:
- count(number(100)): 0
"""
)
r = capa.rules.Rule.from_yaml(rule)
# feature isn't indexed - good.
features, matches = capa.engine.match([r], {}, 0x0)
assert "test rule" in matches
# feature is indexed, but no matches.
# i don't think we should ever really have this case, but good to check anyways.
features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {}}, 0x0)
assert "test rule" in matches
# too many matches
features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1}}, 0x0)
assert "test rule" not in matches
def test_range_with_zero():
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
features:
- count(number(100)): (0, 1)
"""
)
r = capa.rules.Rule.from_yaml(rule)
# ok
features, matches = capa.engine.match([r], {}, 0x0)
assert "test rule" in matches
features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {}}, 0x0)
assert "test rule" in matches
features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1}}, 0x0)
assert "test rule" in matches
# too many matches
features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0)
assert "test rule" not in matches
def test_match_adds_matched_rule_feature():
"""show that using `match` adds a feature for matched rules."""
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
features:
- number: 100
"""
)
r = capa.rules.Rule.from_yaml(rule)
features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1}}, 0x0)
assert capa.features.common.MatchedRule("test rule") in features
def test_match_matched_rules():
"""show that using `match` adds a feature for matched rules."""
rules = [
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule1
features:
- number: 100
"""
)
),
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule2
features:
- match: test rule1
"""
)
),
]
features, matches = capa.engine.match(
capa.rules.topologically_order_rules(rules),
{capa.features.insn.Number(100): {1}},
0x0,
)
assert capa.features.common.MatchedRule("test rule1") in features
assert capa.features.common.MatchedRule("test rule2") in features
# the ordering of the rules must not matter,
# the engine should match rules in an appropriate order.
features, matches = capa.engine.match(
capa.rules.topologically_order_rules(reversed(rules)),
{capa.features.insn.Number(100): {1}},
0x0,
)
assert capa.features.common.MatchedRule("test rule1") in features
assert capa.features.common.MatchedRule("test rule2") in features
def test_substring():
rules = [
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule
features:
- and:
- substring: abc
"""
)
),
]
features, matches = capa.engine.match(
capa.rules.topologically_order_rules(rules),
{capa.features.common.String("aaaa"): {1}},
0x0,
)
assert capa.features.common.MatchedRule("test rule") not in features
features, matches = capa.engine.match(
capa.rules.topologically_order_rules(rules),
{capa.features.common.String("abc"): {1}},
0x0,
)
assert capa.features.common.MatchedRule("test rule") in features
features, matches = capa.engine.match(
capa.rules.topologically_order_rules(rules),
{capa.features.common.String("111abc222"): {1}},
0x0,
)
assert capa.features.common.MatchedRule("test rule") in features
features, matches = capa.engine.match(
capa.rules.topologically_order_rules(rules),
{capa.features.common.String("111abc"): {1}},
0x0,
)
assert capa.features.common.MatchedRule("test rule") in features
features, matches = capa.engine.match(
capa.rules.topologically_order_rules(rules),
{capa.features.common.String("abc222"): {1}},
0x0,
)
assert capa.features.common.MatchedRule("test rule") in features
def test_regex():
rules = [
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule
features:
- and:
- string: /.*bbbb.*/
"""
)
),
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: rule with implied wildcards
features:
- and:
- string: /bbbb/
"""
)
),
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: rule with anchor
features:
- and:
- string: /^bbbb/
"""
)
),
]
features, matches = capa.engine.match(
capa.rules.topologically_order_rules(rules),
{capa.features.insn.Number(100): {1}},
0x0,
)
assert capa.features.common.MatchedRule("test rule") not in features
features, matches = capa.engine.match(
capa.rules.topologically_order_rules(rules),
{capa.features.common.String("aaaa"): {1}},
0x0,
)
assert capa.features.common.MatchedRule("test rule") not in features
features, matches = capa.engine.match(
capa.rules.topologically_order_rules(rules),
{capa.features.common.String("aBBBBa"): {1}},
0x0,
)
assert capa.features.common.MatchedRule("test rule") not in features
features, matches = capa.engine.match(
capa.rules.topologically_order_rules(rules),
{capa.features.common.String("abbbba"): {1}},
0x0,
)
assert capa.features.common.MatchedRule("test rule") in features
assert capa.features.common.MatchedRule("rule with implied wildcards") in features
assert capa.features.common.MatchedRule("rule with anchor") not in features
def test_regex_ignorecase():
rules = [
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule
features:
- and:
- string: /.*bbbb.*/i
"""
)
),
]
features, matches = capa.engine.match(
capa.rules.topologically_order_rules(rules),
{capa.features.common.String("aBBBBa"): {1}},
0x0,
)
assert capa.features.common.MatchedRule("test rule") in features
def test_regex_complex():
rules = [
capa.rules.Rule.from_yaml(
textwrap.dedent(
r"""
rule:
meta:
name: test rule
features:
- or:
- string: /.*HARDWARE\\Key\\key with spaces\\.*/i
"""
)
),
]
features, matches = capa.engine.match(
capa.rules.topologically_order_rules(rules),
{capa.features.common.String(r"Hardware\Key\key with spaces\some value"): {1}},
0x0,
)
assert capa.features.common.MatchedRule("test rule") in features
def test_match_namespace():
rules = [
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: CreateFile API
namespace: file/create/CreateFile
features:
- api: CreateFile
"""
)
),
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: WriteFile API
namespace: file/write
features:
- api: WriteFile
"""
)
),
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: file-create
features:
- match: file/create
"""
)
),
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: filesystem-any
features:
- match: file
"""
)
),
]
features, matches = capa.engine.match(
capa.rules.topologically_order_rules(rules),
{capa.features.insn.API("CreateFile"): {1}},
0x0,
)
assert "CreateFile API" in matches
assert "file-create" in matches
assert "filesystem-any" in matches
assert capa.features.common.MatchedRule("file") in features
assert capa.features.common.MatchedRule("file/create") in features
assert capa.features.common.MatchedRule("file/create/CreateFile") in features
features, matches = capa.engine.match(
capa.rules.topologically_order_rules(rules),
{capa.features.insn.API("WriteFile"): {1}},
0x0,
)
assert "WriteFile API" in matches
assert "file-create" not in matches
assert "filesystem-any" in matches
def test_render_number():
assert str(capa.features.insn.Number(1)) == "number(0x1)"
assert str(capa.features.insn.Number(1, bitness=capa.features.common.BITNESS_X32)) == "number/x32(0x1)"
assert str(capa.features.insn.Number(1, bitness=capa.features.common.BITNESS_X64)) == "number/x64(0x1)"
def test_render_offset():
assert str(capa.features.insn.Offset(1)) == "offset(0x1)"
assert str(capa.features.insn.Offset(1, bitness=capa.features.common.BITNESS_X32)) == "offset/x32(0x1)"
assert str(capa.features.insn.Offset(1, bitness=capa.features.common.BITNESS_X64)) == "offset/x64(0x1)"
def test_short_circuit():
assert Or([Number(1), Number(2)]).evaluate({Number(1): {1}}) == True

533
tests/test_match.py Normal file
View File

@@ -0,0 +1,533 @@
# Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import textwrap
import capa.rules
import capa.engine
import capa.features.insn
import capa.features.common
from capa.rules import Scope
from capa.features import *
from capa.features.insn import *
from capa.features.common import *
def match(rules, features, va, scope=Scope.FUNCTION):
"""
use all matching algorithms and verify that they compute the same result.
then, return those results to the caller so they can make their asserts.
"""
features1, matches1 = capa.engine.match(rules, features, va)
ruleset = capa.rules.RuleSet(rules)
features2, matches2 = ruleset.match(scope, features, va)
for feature, locations in features1.items():
assert feature in features2
assert locations == features2[feature]
for rulename, results in matches1.items():
assert rulename in matches2
assert len(results) == len(matches2[rulename])
return features1, matches1
def test_match_simple():
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
namespace: testns1/testns2
features:
- number: 100
"""
)
r = capa.rules.Rule.from_yaml(rule)
features, matches = match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0)
assert "test rule" in matches
assert MatchedRule("test rule") in features
assert MatchedRule("testns1") in features
assert MatchedRule("testns1/testns2") in features
def test_match_range_exact():
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
features:
- count(number(100)): 2
"""
)
r = capa.rules.Rule.from_yaml(rule)
# just enough matches
_, matches = match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0)
assert "test rule" in matches
# not enough matches
_, matches = match([r], {capa.features.insn.Number(100): {1}}, 0x0)
assert "test rule" not in matches
# too many matches
_, matches = match([r], {capa.features.insn.Number(100): {1, 2, 3}}, 0x0)
assert "test rule" not in matches
def test_match_range_range():
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
features:
- count(number(100)): (2, 3)
"""
)
r = capa.rules.Rule.from_yaml(rule)
# just enough matches
_, matches = match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0)
assert "test rule" in matches
# enough matches
_, matches = match([r], {capa.features.insn.Number(100): {1, 2, 3}}, 0x0)
assert "test rule" in matches
# not enough matches
_, matches = match([r], {capa.features.insn.Number(100): {1}}, 0x0)
assert "test rule" not in matches
# too many matches
_, matches = match([r], {capa.features.insn.Number(100): {1, 2, 3, 4}}, 0x0)
assert "test rule" not in matches
def test_match_range_exact_zero():
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
features:
- count(number(100)): 0
"""
)
r = capa.rules.Rule.from_yaml(rule)
# feature isn't indexed - good.
_, matches = match([r], {}, 0x0)
assert "test rule" in matches
# feature is indexed, but no matches.
# i don't think we should ever really have this case, but good to check anyways.
_, matches = match([r], {capa.features.insn.Number(100): {}}, 0x0)
assert "test rule" in matches
# too many matches
_, matches = match([r], {capa.features.insn.Number(100): {1}}, 0x0)
assert "test rule" not in matches
def test_match_range_with_zero():
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
features:
- count(number(100)): (0, 1)
"""
)
r = capa.rules.Rule.from_yaml(rule)
# ok
_, matches = match([r], {}, 0x0)
assert "test rule" in matches
_, matches = match([r], {capa.features.insn.Number(100): {}}, 0x0)
assert "test rule" in matches
_, matches = match([r], {capa.features.insn.Number(100): {1}}, 0x0)
assert "test rule" in matches
# too many matches
_, matches = match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0)
assert "test rule" not in matches
def test_match_adds_matched_rule_feature():
"""show that using `match` adds a feature for matched rules."""
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
features:
- number: 100
"""
)
r = capa.rules.Rule.from_yaml(rule)
features, _ = match([r], {capa.features.insn.Number(100): {1}}, 0x0)
assert capa.features.common.MatchedRule("test rule") in features
def test_match_matched_rules():
"""show that using `match` adds a feature for matched rules."""
rules = [
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule1
features:
- number: 100
"""
)
),
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule2
features:
- match: test rule1
"""
)
),
]
features, _ = match(
capa.rules.topologically_order_rules(rules),
{capa.features.insn.Number(100): {1}},
0x0,
)
assert capa.features.common.MatchedRule("test rule1") in features
assert capa.features.common.MatchedRule("test rule2") in features
# the ordering of the rules must not matter,
# the engine should match rules in an appropriate order.
features, _ = match(
capa.rules.topologically_order_rules(reversed(rules)),
{capa.features.insn.Number(100): {1}},
0x0,
)
assert capa.features.common.MatchedRule("test rule1") in features
assert capa.features.common.MatchedRule("test rule2") in features
def test_match_namespace():
rules = [
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: CreateFile API
namespace: file/create/CreateFile
features:
- api: CreateFile
"""
)
),
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: WriteFile API
namespace: file/write
features:
- api: WriteFile
"""
)
),
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: file-create
features:
- match: file/create
"""
)
),
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: filesystem-any
features:
- match: file
"""
)
),
]
features, matches = match(
capa.rules.topologically_order_rules(rules),
{capa.features.insn.API("CreateFile"): {1}},
0x0,
)
assert "CreateFile API" in matches
assert "file-create" in matches
assert "filesystem-any" in matches
assert capa.features.common.MatchedRule("file") in features
assert capa.features.common.MatchedRule("file/create") in features
assert capa.features.common.MatchedRule("file/create/CreateFile") in features
features, matches = match(
capa.rules.topologically_order_rules(rules),
{capa.features.insn.API("WriteFile"): {1}},
0x0,
)
assert "WriteFile API" in matches
assert "file-create" not in matches
assert "filesystem-any" in matches
def test_match_substring():
rules = [
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule
features:
- and:
- substring: abc
"""
)
),
]
features, _ = match(
capa.rules.topologically_order_rules(rules),
{capa.features.common.String("aaaa"): {1}},
0x0,
)
assert capa.features.common.MatchedRule("test rule") not in features
features, _ = match(
capa.rules.topologically_order_rules(rules),
{capa.features.common.String("abc"): {1}},
0x0,
)
assert capa.features.common.MatchedRule("test rule") in features
features, _ = match(
capa.rules.topologically_order_rules(rules),
{capa.features.common.String("111abc222"): {1}},
0x0,
)
assert capa.features.common.MatchedRule("test rule") in features
features, _ = match(
capa.rules.topologically_order_rules(rules),
{capa.features.common.String("111abc"): {1}},
0x0,
)
assert capa.features.common.MatchedRule("test rule") in features
features, _ = match(
capa.rules.topologically_order_rules(rules),
{capa.features.common.String("abc222"): {1}},
0x0,
)
assert capa.features.common.MatchedRule("test rule") in features
def test_match_regex():
rules = [
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule
features:
- and:
- string: /.*bbbb.*/
"""
)
),
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: rule with implied wildcards
features:
- and:
- string: /bbbb/
"""
)
),
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: rule with anchor
features:
- and:
- string: /^bbbb/
"""
)
),
]
features, _ = match(
capa.rules.topologically_order_rules(rules),
{capa.features.insn.Number(100): {1}},
0x0,
)
assert capa.features.common.MatchedRule("test rule") not in features
features, _ = match(
capa.rules.topologically_order_rules(rules),
{capa.features.common.String("aaaa"): {1}},
0x0,
)
assert capa.features.common.MatchedRule("test rule") not in features
features, _ = match(
capa.rules.topologically_order_rules(rules),
{capa.features.common.String("aBBBBa"): {1}},
0x0,
)
assert capa.features.common.MatchedRule("test rule") not in features
features, _ = match(
capa.rules.topologically_order_rules(rules),
{capa.features.common.String("abbbba"): {1}},
0x0,
)
assert capa.features.common.MatchedRule("test rule") in features
assert capa.features.common.MatchedRule("rule with implied wildcards") in features
assert capa.features.common.MatchedRule("rule with anchor") not in features
def test_match_regex_ignorecase():
rules = [
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule
features:
- and:
- string: /.*bbbb.*/i
"""
)
),
]
features, _ = match(
capa.rules.topologically_order_rules(rules),
{capa.features.common.String("aBBBBa"): {1}},
0x0,
)
assert capa.features.common.MatchedRule("test rule") in features
def test_match_regex_complex():
rules = [
capa.rules.Rule.from_yaml(
textwrap.dedent(
r"""
rule:
meta:
name: test rule
features:
- or:
- string: /.*HARDWARE\\Key\\key with spaces\\.*/i
"""
)
),
]
features, _ = match(
capa.rules.topologically_order_rules(rules),
{capa.features.common.String(r"Hardware\Key\key with spaces\some value"): {1}},
0x0,
)
assert capa.features.common.MatchedRule("test rule") in features
def test_match_regex_values_always_string():
rules = [
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule
features:
- or:
- string: /123/
- string: /0x123/
"""
)
),
]
features, _ = match(
capa.rules.topologically_order_rules(rules),
{capa.features.common.String("123"): {1}},
0x0,
)
assert capa.features.common.MatchedRule("test rule") in features
features, _ = match(
capa.rules.topologically_order_rules(rules),
{capa.features.common.String("0x123"): {1}},
0x0,
)
assert capa.features.common.MatchedRule("test rule") in features
def test_match_not():
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
namespace: testns1/testns2
features:
- not:
- number: 99
"""
)
r = capa.rules.Rule.from_yaml(rule)
_, matches = match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0)
assert "test rule" in matches
def test_match_not_not():
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
namespace: testns1/testns2
features:
- not:
- not:
- number: 100
"""
)
r = capa.rules.Rule.from_yaml(rule)
_, matches = match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0)
assert "test rule" in matches

View File

@@ -2,9 +2,23 @@ import textwrap
import capa.rules
import capa.render.utils
import capa.features.insn
import capa.features.common
import capa.render.result_document
def test_render_number():
assert str(capa.features.insn.Number(1)) == "number(0x1)"
assert str(capa.features.insn.Number(1, bitness=capa.features.common.BITNESS_X32)) == "number/x32(0x1)"
assert str(capa.features.insn.Number(1, bitness=capa.features.common.BITNESS_X64)) == "number/x64(0x1)"
def test_render_offset():
assert str(capa.features.insn.Offset(1)) == "offset(0x1)"
assert str(capa.features.insn.Offset(1, bitness=capa.features.common.BITNESS_X32)) == "offset/x32(0x1)"
assert str(capa.features.insn.Offset(1, bitness=capa.features.common.BITNESS_X64)) == "offset/x64(0x1)"
def test_render_meta_attack():
# Persistence::Boot or Logon Autostart Execution::Registry Run Keys / Startup Folder [T1547.001]
id = "T1543.003"

View File

@@ -785,37 +785,6 @@ def test_substring_description():
assert (Substring("abc") in children) == True
def test_regex_values_always_string():
rules = [
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule
features:
- or:
- string: /123/
- string: /0x123/
"""
)
),
]
features, matches = capa.engine.match(
capa.rules.topologically_order_rules(rules),
{capa.features.common.String("123"): {1}},
0x0,
)
assert capa.features.common.MatchedRule("test rule") in features
features, matches = capa.engine.match(
capa.rules.topologically_order_rules(rules),
{capa.features.common.String("0x123"): {1}},
0x0,
)
assert capa.features.common.MatchedRule("test rule") in features
def test_filter_rules():
rules = capa.rules.RuleSet(
[