Merge pull request #830 from mandiant/perf/rule-selection

perf: don't try to match rules that will never match
2025-12-12 15:49:46 -08:00 · 2021-11-12 11:54:29 -07:00
parent 10d747cc8c 83253eb7d0
commit 57fe1e27b6
9 changed files with 821 additions and 474 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,7 @@

 - engine: short circuit logic nodes for better performance #824 @williballenthin
 - engine: add optimizer the order faster nodes first #829 @williballenthin
+- engine: optimize rule evaluation by skipping rules that can't match #830 @williballenthin

 ### Breaking Changes

--- a/capa/engine.py
+++ b/capa/engine.py
@@ -8,13 +8,17 @@

 import copy
 import collections
-from typing import Set, Dict, List, Tuple, Union, Mapping, Iterable
+from typing import TYPE_CHECKING, Set, Dict, List, Tuple, Mapping, Iterable

 import capa.perf
-import capa.rules
 import capa.features.common
 from capa.features.common import Result, Feature

+if TYPE_CHECKING:
+    # circular import, otherwise
+    import capa.rules
+
+
 # a collection of features and the locations at which they are found.
 #
 # used throughout matching as the context in which features are searched:
@@ -275,15 +279,20 @@ def index_rule_matches(features: FeatureSet, rule: "capa.rules.Rule", locations:

 def match(rules: List["capa.rules.Rule"], features: FeatureSet, va: int) -> Tuple[FeatureSet, MatchResults]:
    """
-    Args:
-      rules (List[capa.rules.Rule]): these must already be ordered topologically by dependency.
-      features (Mapping[capa.features.Feature, int]):
-      va (int): location of the features
+    match the given rules against the given features,
+    returning an updated set of features and the matches.

-    Returns:
-      Tuple[FeatureSet, MatchResults]: two-tuple with entries:
-        - set of features used for matching (which may be a superset of the given `features` argument, due to rule match features), and
-        - mapping from rule name to [(location of match, result object)]
+    the updated features are just like the input,
+    but extended to include the match features (e.g. names of rules that matched).
+    the given feature set is not modified; an updated copy is returned.
+
+    the given list of rules must be ordered topologically by dependency,
+    or else `match` statements will not be handled correctly.
+
+    this routine should be fairly optimized, but is not guaranteed to be the fastest matcher possible.
+    it has a particularly convenient signature: (rules, features) -> matches
+    other strategies can be imagined that match differently; implement these elsewhere.
+    specifically, this routine does "top down" matching of the given rules against the feature set.
    """
    results = collections.defaultdict(list)  # type: MatchResults

--- a/capa/main.py
+++ b/capa/main.py
@@ -42,7 +42,7 @@ import capa.features.extractors
 import capa.features.extractors.common
 import capa.features.extractors.pefile
 import capa.features.extractors.elffile
-from capa.rules import Rule, RuleSet
+from capa.rules import Rule, Scope, RuleSet
 from capa.engine import FeatureSet, MatchResults
 from capa.helpers import get_file_taste
 from capa.features.extractors.base_extractor import FunctionHandle, FeatureExtractor
@@ -114,7 +114,7 @@ def find_function_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, f:
                bb_features[feature].add(va)
                function_features[feature].add(va)

-        _, matches = capa.engine.match(ruleset.basic_block_rules, bb_features, int(bb))
+        _, matches = ruleset.match(Scope.BASIC_BLOCK, bb_features, int(bb))

        for rule_name, res in matches.items():
            bb_matches[rule_name].extend(res)
@@ -122,7 +122,7 @@ def find_function_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, f:
            for va, _ in res:
                capa.engine.index_rule_matches(function_features, rule, [va])

-    _, function_matches = capa.engine.match(ruleset.function_rules, function_features, int(f))
+    _, function_matches = ruleset.match(Scope.FUNCTION, function_features, int(f))
    return function_matches, bb_matches, len(function_features)


@@ -143,7 +143,7 @@ def find_file_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, functi

    file_features.update(function_features)

-    _, matches = capa.engine.match(ruleset.file_rules, file_features, 0x0)
+    _, matches = ruleset.match(Scope.FILE, file_features, 0x0)
    return matches, len(file_features)


--- a/capa/rules.py
+++ b/capa/rules.py
@@ -14,6 +14,7 @@ import logging
 import binascii
 import functools
 import collections
+from enum import Enum

 try:
    from functools import lru_cache
@@ -22,7 +23,7 @@ except ImportError:
    # https://github.com/python/mypy/issues/1153
    from backports.functools_lru_cache import lru_cache  # type: ignore

-from typing import Any, Dict, List, Union, Iterator
+from typing import Any, Set, Dict, List, Tuple, Union, Iterator

 import yaml
 import ruamel.yaml
@@ -66,9 +67,15 @@ META_KEYS = (
 HIDDEN_META_KEYS = ("capa/nursery", "capa/path")


-FILE_SCOPE = "file"
-FUNCTION_SCOPE = "function"
-BASIC_BLOCK_SCOPE = "basic block"
+class Scope(str, Enum):
+    FILE = "file"
+    FUNCTION = "function"
+    BASIC_BLOCK = "basic block"
+
+
+FILE_SCOPE = Scope.FILE.value
+FUNCTION_SCOPE = Scope.FUNCTION.value
+BASIC_BLOCK_SCOPE = Scope.BASIC_BLOCK.value


 SUPPORTED_FEATURES = {
@@ -970,6 +977,15 @@ class RuleSet:
        self.rules = {rule.name: rule for rule in rules}
        self.rules_by_namespace = index_rules_by_namespace(rules)

+        # unstable
+        (self._easy_file_rules_by_feature, self._hard_file_rules) = self._index_rules_by_feature(self.file_rules)
+        (self._easy_function_rules_by_feature, self._hard_function_rules) = self._index_rules_by_feature(
+            self.function_rules
+        )
+        (self._easy_basic_block_rules_by_feature, self._hard_basic_block_rules) = self._index_rules_by_feature(
+            self.basic_block_rules
+        )
+
    def __len__(self):
        return len(self.rules)

@@ -979,6 +995,126 @@ class RuleSet:
    def __contains__(self, rulename):
        return rulename in self.rules

+    @staticmethod
+    def _index_rules_by_feature(rules) -> Tuple[Dict[Feature, Set[str]], List[str]]:
+        """
+        split the given rules into two structures:
+          - "easy rules" are indexed by feature,
+            such that you can quickly find the rules that contain a given feature.
+          - "hard rules" are those that contain substring/regex/bytes features or match statements.
+            these continue to be ordered topologically.
+
+        a rule evaluator can use the "easy rule" index to restrict the
+        candidate rules that might match a given set of features.
+
+        at this time, a rule evaluator can't do anything special with
+        the "hard rules". it must still do a full top-down match of each
+        rule, in topological order.
+        """
+
+        # we'll do a couple phases:
+        #
+        #  1. recursively visit all nodes in all rules,
+        #    a. indexing all features
+        #    b. recording the types of features found per rule
+        #  2. compute the easy and hard rule sets
+        #  3. remove hard rules from the rules-by-feature index
+        #  4. construct the topologically ordered list of hard rules
+        rules_with_easy_features: Set[str] = set()
+        rules_with_hard_features: Set[str] = set()
+        rules_by_feature: Dict[Feature, Set[str]] = collections.defaultdict(set)
+
+        def rec(rule_name: str, node: Union[Feature, Statement]):
+            """
+            walk through a rule's logic tree, indexing the easy and hard rules,
+            and the features referenced by easy rules.
+            """
+            if isinstance(
+                node,
+                (
+                    # these are the "hard features"
+                    # substring: scanning feature
+                    capa.features.common.Substring,
+                    # regex: scanning feature
+                    capa.features.common.Regex,
+                    # bytes: scanning feature
+                    capa.features.common.Bytes,
+                    # match: dependency on another rule,
+                    # which we have to evaluate first,
+                    # and is therefore tricky.
+                    capa.features.common.MatchedRule,
+                ),
+            ):
+                # hard feature: requires scan or match lookup
+                rules_with_hard_features.add(rule_name)
+            elif isinstance(node, capa.features.common.Feature):
+                # easy feature: hash lookup
+                rules_with_easy_features.add(rule_name)
+                rules_by_feature[node].add(rule_name)
+            elif isinstance(node, (ceng.Not)):
+                # `not:` statements are tricky to deal with.
+                #
+                # first, features found under a `not:` should not be indexed,
+                # because they're not wanted to be found.
+                # second, `not:` can be nested under another `not:`, or two, etc.
+                # third, `not:` at the root or directly under an `or:`
+                # means the rule will match against *anything* not specified there,
+                # which is a difficult set of things to compute and index.
+                #
+                # so, if a rule has a `not:` statement, its hard.
+                # as of writing, this is an uncommon statement, with only 6 instances in 740 rules.
+                rules_with_hard_features.add(rule_name)
+            elif isinstance(node, (ceng.Some)) and node.count == 0:
+                # `optional:` and `0 or more:` are tricky to deal with.
+                #
+                # when a subtree is optional, it may match, but not matching
+                # doesn't have any impact either.
+                # now, our rule authors *should* not put this under `or:`
+                # and this is checked by the linter,
+                # but this could still happen (e.g. private rule set without linting)
+                # and would be hard to trace down.
+                #
+                # so better to be safe than sorry and consider this a hard case.
+                rules_with_hard_features.add(rule_name)
+            elif isinstance(node, (ceng.Range)) and node.min == 0:
+                # `count(foo): 0 or more` are tricky to deal with.
+                # because the min is 0,
+                # this subtree *can* match just about any feature
+                # (except the given one)
+                # which is a difficult set of things to compute and index.
+                rules_with_hard_features.add(rule_name)
+            elif isinstance(node, (ceng.Range)):
+                rec(rule_name, node.child)
+            elif isinstance(node, (ceng.And, ceng.Or, ceng.Some)):
+                for child in node.children:
+                    rec(rule_name, child)
+            else:
+                # programming error
+                raise Exception("programming error: unexpected node type: %s" % (node))
+
+        for rule in rules:
+            rule_name = rule.meta["name"]
+            root = rule.statement
+            rec(rule_name, root)
+
+        # if a rule has a hard feature,
+        # dont consider it easy, and therefore,
+        # don't index any of its features.
+        #
+        # otherwise, its an easy rule, and index its features
+        for rules_with_feature in rules_by_feature.values():
+            rules_with_feature.difference_update(rules_with_hard_features)
+        easy_rules_by_feature = rules_by_feature
+
+        # `rules` is already topologically ordered,
+        # so extract our hard set into the topological ordering.
+        hard_rules = []
+        for rule in rules:
+            if rule.meta["name"] in rules_with_hard_features:
+                hard_rules.append(rule.meta["name"])
+
+        return (easy_rules_by_feature, hard_rules)
+
    @staticmethod
    def _get_rules_for_scope(rules, scope):
        """
@@ -1041,3 +1177,65 @@ class RuleSet:
                    rules_filtered.update(set(capa.rules.get_rules_and_dependencies(rules, rule.name)))
                    break
        return RuleSet(list(rules_filtered))
+
+    def match(self, scope: Scope, features: FeatureSet, va: int) -> Tuple[FeatureSet, ceng.MatchResults]:
+        """
+        match rules from this ruleset at the given scope against the given features.
+
+        this routine should act just like `capa.engine.match`,
+        except that it may be more performant.
+        """
+        if scope == scope.FILE:
+            easy_rules_by_feature = self._easy_file_rules_by_feature
+            hard_rule_names = self._hard_file_rules
+        elif scope == scope.FUNCTION:
+            easy_rules_by_feature = self._easy_function_rules_by_feature
+            hard_rule_names = self._hard_function_rules
+        elif scope == scope.BASIC_BLOCK:
+            easy_rules_by_feature = self._easy_basic_block_rules_by_feature
+            hard_rule_names = self._hard_basic_block_rules
+        else:
+            raise Exception("programming error: unexpected scope")
+
+        candidate_rule_names = set()
+        for feature in features:
+            easy_rule_names = easy_rules_by_feature.get(feature)
+            if easy_rule_names:
+                candidate_rule_names.update(easy_rule_names)
+
+        # first, match against the set of rules that have at least one
+        # feature shared with our feature set.
+        candidate_rules = [self.rules[name] for name in candidate_rule_names]
+        features2, easy_matches = ceng.match(candidate_rules, features, va)
+
+        # note that we've stored the updated feature set in `features2`.
+        # this contains a superset of the features in `features`;
+        # it contains additional features for any easy rule matches.
+        # we'll pass this feature set to hard rule matching, since one
+        # of those rules might rely on an easy rule match.
+        #
+        # the updated feature set from hard matching will go into `features3`.
+        # this is a superset of `features2` is a superset of `features`.
+        # ultimately, this is what we'll return to the caller.
+        #
+        # in each case, we could have assigned the updated feature set back to `features`,
+        # but this is slightly more explicit how we're tracking the data.
+
+        # now, match against (topologically ordered) list of rules
+        # that we can't really make any guesses about.
+        # these are rules with hard features, like substring/regex/bytes and match statements.
+        hard_rules = [self.rules[name] for name in hard_rule_names]
+        features3, hard_matches = ceng.match(hard_rules, features2, va)
+
+        # note that above, we probably are skipping matching a bunch of
+        # rules that definitely would never hit.
+        # specifically, "easy rules" that don't share any features with
+        # feature set.
+
+        # MatchResults doesn't technically have an .update() method
+        # but a dict does.
+        matches = {}  # type: ignore
+        matches.update(easy_matches)
+        matches.update(hard_matches)
+
+        return (features3, matches)
--- a/scripts/lint.py
+++ b/scripts/lint.py
@@ -339,6 +339,52 @@ class OrStatementWithAlwaysTrueChild(Lint):
        return self.violation


+class NotNotUnderAnd(Lint):
+    name = "rule contains a `not` statement that's not found under an `and` statement"
+    recommendation = "clarify the rule logic and ensure `not` is always found under `and`"
+    violation = False
+
+    def check_rule(self, ctx: Context, rule: Rule):
+        self.violation = False
+
+        def rec(statement):
+            if isinstance(statement, capa.engine.Statement):
+                if not isinstance(statement, capa.engine.And):
+                    for child in statement.get_children():
+                        if isinstance(child, capa.engine.Not):
+                            self.violation = True
+
+                for child in statement.get_children():
+                    rec(child)
+
+        rec(rule.statement)
+
+        return self.violation
+
+
+class OptionalNotUnderAnd(Lint):
+    name = "rule contains an `optional` or `0 or more` statement that's not found under an `and` statement"
+    recommendation = "clarify the rule logic and ensure `optional` and `0 or more` is always found under `and`"
+    violation = False
+
+    def check_rule(self, ctx: Context, rule: Rule):
+        self.violation = False
+
+        def rec(statement):
+            if isinstance(statement, capa.engine.Statement):
+                if not isinstance(statement, capa.engine.And):
+                    for child in statement.get_children():
+                        if isinstance(child, capa.engine.Some) and child.count == 0:
+                            self.violation = True
+
+                for child in statement.get_children():
+                    rec(child)
+
+        rec(rule.statement)
+
+        return self.violation
+
+
 class UnusualMetaField(Lint):
    name = "unusual meta field"
    recommendation = "Remove the meta field"
@@ -660,6 +706,8 @@ LOGIC_LINTS = (
    DoesntMatchExample(),
    StatementWithSingleChildStatement(),
    OrStatementWithAlwaysTrueChild(),
+    NotNotUnderAnd(),
+    OptionalNotUnderAnd(),
 )


--- a/tests/test_engine.py
+++ b/tests/test_engine.py
@@ -5,13 +5,6 @@
 # Unless required by applicable law or agreed to in writing, software distributed under the License
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
-
-import textwrap
-
-import capa.rules
-import capa.engine
-import capa.features.insn
-import capa.features.common
 from capa.engine import *
 from capa.features import *
 from capa.features.insn import *
@@ -117,424 +110,6 @@ def test_range():
    assert Range(Number(1), min=1, max=3).evaluate({Number(1): {1, 2, 3, 4}}) == False


-def test_range_exact():
-    rule = textwrap.dedent(
-        """
-        rule:
-            meta:
-                name: test rule
-            features:
-                - count(number(100)): 2
-        """
-    )
-    r = capa.rules.Rule.from_yaml(rule)
-
-    # just enough matches
-    features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0)
-    assert "test rule" in matches
-
-    # not enough matches
-    features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1}}, 0x0)
-    assert "test rule" not in matches
-
-    # too many matches
-    features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1, 2, 3}}, 0x0)
-    assert "test rule" not in matches
-
-
-def test_range_range():
-    rule = textwrap.dedent(
-        """
-         rule:
-             meta:
-                 name: test rule
-             features:
-                 - count(number(100)): (2, 3)
-         """
-    )
-    r = capa.rules.Rule.from_yaml(rule)
-
-    # just enough matches
-    features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0)
-    assert "test rule" in matches
-
-    # enough matches
-    features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1, 2, 3}}, 0x0)
-    assert "test rule" in matches
-
-    # not enough matches
-    features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1}}, 0x0)
-    assert "test rule" not in matches
-
-    # too many matches
-    features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1, 2, 3, 4}}, 0x0)
-    assert "test rule" not in matches
-
-
-def test_range_exact_zero():
-    rule = textwrap.dedent(
-        """
-        rule:
-            meta:
-                name: test rule
-            features:
-                - count(number(100)): 0
-        """
-    )
-    r = capa.rules.Rule.from_yaml(rule)
-
-    # feature isn't indexed - good.
-    features, matches = capa.engine.match([r], {}, 0x0)
-    assert "test rule" in matches
-
-    # feature is indexed, but no matches.
-    # i don't think we should ever really have this case, but good to check anyways.
-    features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {}}, 0x0)
-    assert "test rule" in matches
-
-    # too many matches
-    features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1}}, 0x0)
-    assert "test rule" not in matches
-
-
-def test_range_with_zero():
-    rule = textwrap.dedent(
-        """
-         rule:
-             meta:
-                 name: test rule
-             features:
-                 - count(number(100)): (0, 1)
-         """
-    )
-    r = capa.rules.Rule.from_yaml(rule)
-
-    # ok
-    features, matches = capa.engine.match([r], {}, 0x0)
-    assert "test rule" in matches
-    features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {}}, 0x0)
-    assert "test rule" in matches
-    features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1}}, 0x0)
-    assert "test rule" in matches
-
-    # too many matches
-    features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0)
-    assert "test rule" not in matches
-
-
-def test_match_adds_matched_rule_feature():
-    """show that using `match` adds a feature for matched rules."""
-    rule = textwrap.dedent(
-        """
-        rule:
-            meta:
-                name: test rule
-            features:
-                - number: 100
-        """
-    )
-    r = capa.rules.Rule.from_yaml(rule)
-    features, matches = capa.engine.match([r], {capa.features.insn.Number(100): {1}}, 0x0)
-    assert capa.features.common.MatchedRule("test rule") in features
-
-
-def test_match_matched_rules():
-    """show that using `match` adds a feature for matched rules."""
-    rules = [
-        capa.rules.Rule.from_yaml(
-            textwrap.dedent(
-                """
-                rule:
-                    meta:
-                        name: test rule1
-                    features:
-                        - number: 100
-                """
-            )
-        ),
-        capa.rules.Rule.from_yaml(
-            textwrap.dedent(
-                """
-                rule:
-                    meta:
-                        name: test rule2
-                    features:
-                        - match: test rule1
-                """
-            )
-        ),
-    ]
-
-    features, matches = capa.engine.match(
-        capa.rules.topologically_order_rules(rules),
-        {capa.features.insn.Number(100): {1}},
-        0x0,
-    )
-    assert capa.features.common.MatchedRule("test rule1") in features
-    assert capa.features.common.MatchedRule("test rule2") in features
-
-    # the ordering of the rules must not matter,
-    # the engine should match rules in an appropriate order.
-    features, matches = capa.engine.match(
-        capa.rules.topologically_order_rules(reversed(rules)),
-        {capa.features.insn.Number(100): {1}},
-        0x0,
-    )
-    assert capa.features.common.MatchedRule("test rule1") in features
-    assert capa.features.common.MatchedRule("test rule2") in features
-
-
-def test_substring():
-    rules = [
-        capa.rules.Rule.from_yaml(
-            textwrap.dedent(
-                """
-                rule:
-                    meta:
-                        name: test rule
-                    features:
-                        - and:
-                            - substring: abc
-                """
-            )
-        ),
-    ]
-    features, matches = capa.engine.match(
-        capa.rules.topologically_order_rules(rules),
-        {capa.features.common.String("aaaa"): {1}},
-        0x0,
-    )
-    assert capa.features.common.MatchedRule("test rule") not in features
-
-    features, matches = capa.engine.match(
-        capa.rules.topologically_order_rules(rules),
-        {capa.features.common.String("abc"): {1}},
-        0x0,
-    )
-    assert capa.features.common.MatchedRule("test rule") in features
-
-    features, matches = capa.engine.match(
-        capa.rules.topologically_order_rules(rules),
-        {capa.features.common.String("111abc222"): {1}},
-        0x0,
-    )
-    assert capa.features.common.MatchedRule("test rule") in features
-
-    features, matches = capa.engine.match(
-        capa.rules.topologically_order_rules(rules),
-        {capa.features.common.String("111abc"): {1}},
-        0x0,
-    )
-    assert capa.features.common.MatchedRule("test rule") in features
-
-    features, matches = capa.engine.match(
-        capa.rules.topologically_order_rules(rules),
-        {capa.features.common.String("abc222"): {1}},
-        0x0,
-    )
-    assert capa.features.common.MatchedRule("test rule") in features
-
-
-def test_regex():
-    rules = [
-        capa.rules.Rule.from_yaml(
-            textwrap.dedent(
-                """
-                rule:
-                    meta:
-                        name: test rule
-                    features:
-                        - and:
-                            - string: /.*bbbb.*/
-                """
-            )
-        ),
-        capa.rules.Rule.from_yaml(
-            textwrap.dedent(
-                """
-                rule:
-                    meta:
-                        name: rule with implied wildcards
-                    features:
-                        - and:
-                            - string: /bbbb/
-                """
-            )
-        ),
-        capa.rules.Rule.from_yaml(
-            textwrap.dedent(
-                """
-                rule:
-                    meta:
-                        name: rule with anchor
-                    features:
-                        - and:
-                            - string: /^bbbb/
-                """
-            )
-        ),
-    ]
-    features, matches = capa.engine.match(
-        capa.rules.topologically_order_rules(rules),
-        {capa.features.insn.Number(100): {1}},
-        0x0,
-    )
-    assert capa.features.common.MatchedRule("test rule") not in features
-
-    features, matches = capa.engine.match(
-        capa.rules.topologically_order_rules(rules),
-        {capa.features.common.String("aaaa"): {1}},
-        0x0,
-    )
-    assert capa.features.common.MatchedRule("test rule") not in features
-
-    features, matches = capa.engine.match(
-        capa.rules.topologically_order_rules(rules),
-        {capa.features.common.String("aBBBBa"): {1}},
-        0x0,
-    )
-    assert capa.features.common.MatchedRule("test rule") not in features
-
-    features, matches = capa.engine.match(
-        capa.rules.topologically_order_rules(rules),
-        {capa.features.common.String("abbbba"): {1}},
-        0x0,
-    )
-    assert capa.features.common.MatchedRule("test rule") in features
-    assert capa.features.common.MatchedRule("rule with implied wildcards") in features
-    assert capa.features.common.MatchedRule("rule with anchor") not in features
-
-
-def test_regex_ignorecase():
-    rules = [
-        capa.rules.Rule.from_yaml(
-            textwrap.dedent(
-                """
-                rule:
-                    meta:
-                        name: test rule
-                    features:
-                        - and:
-                            - string: /.*bbbb.*/i
-                """
-            )
-        ),
-    ]
-    features, matches = capa.engine.match(
-        capa.rules.topologically_order_rules(rules),
-        {capa.features.common.String("aBBBBa"): {1}},
-        0x0,
-    )
-    assert capa.features.common.MatchedRule("test rule") in features
-
-
-def test_regex_complex():
-    rules = [
-        capa.rules.Rule.from_yaml(
-            textwrap.dedent(
-                r"""
-                rule:
-                    meta:
-                        name: test rule
-                    features:
-                        - or:
-                            - string: /.*HARDWARE\\Key\\key with spaces\\.*/i
-                """
-            )
-        ),
-    ]
-    features, matches = capa.engine.match(
-        capa.rules.topologically_order_rules(rules),
-        {capa.features.common.String(r"Hardware\Key\key with spaces\some value"): {1}},
-        0x0,
-    )
-    assert capa.features.common.MatchedRule("test rule") in features
-
-
-def test_match_namespace():
-    rules = [
-        capa.rules.Rule.from_yaml(
-            textwrap.dedent(
-                """
-                rule:
-                    meta:
-                        name: CreateFile API
-                        namespace: file/create/CreateFile
-                    features:
-                        - api: CreateFile
-                """
-            )
-        ),
-        capa.rules.Rule.from_yaml(
-            textwrap.dedent(
-                """
-                rule:
-                    meta:
-                        name: WriteFile API
-                        namespace: file/write
-                    features:
-                        - api: WriteFile
-                """
-            )
-        ),
-        capa.rules.Rule.from_yaml(
-            textwrap.dedent(
-                """
-                rule:
-                    meta:
-                        name: file-create
-                    features:
-                        - match: file/create
-                """
-            )
-        ),
-        capa.rules.Rule.from_yaml(
-            textwrap.dedent(
-                """
-                rule:
-                    meta:
-                        name: filesystem-any
-                    features:
-                        - match: file
-                """
-            )
-        ),
-    ]
-
-    features, matches = capa.engine.match(
-        capa.rules.topologically_order_rules(rules),
-        {capa.features.insn.API("CreateFile"): {1}},
-        0x0,
-    )
-    assert "CreateFile API" in matches
-    assert "file-create" in matches
-    assert "filesystem-any" in matches
-    assert capa.features.common.MatchedRule("file") in features
-    assert capa.features.common.MatchedRule("file/create") in features
-    assert capa.features.common.MatchedRule("file/create/CreateFile") in features
-
-    features, matches = capa.engine.match(
-        capa.rules.topologically_order_rules(rules),
-        {capa.features.insn.API("WriteFile"): {1}},
-        0x0,
-    )
-    assert "WriteFile API" in matches
-    assert "file-create" not in matches
-    assert "filesystem-any" in matches
-
-
-def test_render_number():
-    assert str(capa.features.insn.Number(1)) == "number(0x1)"
-    assert str(capa.features.insn.Number(1, bitness=capa.features.common.BITNESS_X32)) == "number/x32(0x1)"
-    assert str(capa.features.insn.Number(1, bitness=capa.features.common.BITNESS_X64)) == "number/x64(0x1)"
-
-
-def test_render_offset():
-    assert str(capa.features.insn.Offset(1)) == "offset(0x1)"
-    assert str(capa.features.insn.Offset(1, bitness=capa.features.common.BITNESS_X32)) == "offset/x32(0x1)"
-    assert str(capa.features.insn.Offset(1, bitness=capa.features.common.BITNESS_X64)) == "offset/x64(0x1)"
-
-
 def test_short_circuit():
    assert Or([Number(1), Number(2)]).evaluate({Number(1): {1}}) == True

--- a/tests/test_match.py
+++ b/tests/test_match.py
@@ -0,0 +1,533 @@
+# Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at: [package root]/LICENSE.txt
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+#  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and limitations under the License.
+
+import textwrap
+
+import capa.rules
+import capa.engine
+import capa.features.insn
+import capa.features.common
+from capa.rules import Scope
+from capa.features import *
+from capa.features.insn import *
+from capa.features.common import *
+
+
+def match(rules, features, va, scope=Scope.FUNCTION):
+    """
+    use all matching algorithms and verify that they compute the same result.
+    then, return those results to the caller so they can make their asserts.
+    """
+    features1, matches1 = capa.engine.match(rules, features, va)
+
+    ruleset = capa.rules.RuleSet(rules)
+    features2, matches2 = ruleset.match(scope, features, va)
+
+    for feature, locations in features1.items():
+        assert feature in features2
+        assert locations == features2[feature]
+
+    for rulename, results in matches1.items():
+        assert rulename in matches2
+        assert len(results) == len(matches2[rulename])
+
+    return features1, matches1
+
+
+def test_match_simple():
+    rule = textwrap.dedent(
+        """
+        rule:
+            meta:
+                name: test rule
+                namespace: testns1/testns2
+            features:
+                - number: 100
+        """
+    )
+    r = capa.rules.Rule.from_yaml(rule)
+
+    features, matches = match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0)
+    assert "test rule" in matches
+    assert MatchedRule("test rule") in features
+    assert MatchedRule("testns1") in features
+    assert MatchedRule("testns1/testns2") in features
+
+
+def test_match_range_exact():
+    rule = textwrap.dedent(
+        """
+        rule:
+            meta:
+                name: test rule
+            features:
+                - count(number(100)): 2
+        """
+    )
+    r = capa.rules.Rule.from_yaml(rule)
+
+    # just enough matches
+    _, matches = match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0)
+    assert "test rule" in matches
+
+    # not enough matches
+    _, matches = match([r], {capa.features.insn.Number(100): {1}}, 0x0)
+    assert "test rule" not in matches
+
+    # too many matches
+    _, matches = match([r], {capa.features.insn.Number(100): {1, 2, 3}}, 0x0)
+    assert "test rule" not in matches
+
+
+def test_match_range_range():
+    rule = textwrap.dedent(
+        """
+         rule:
+             meta:
+                 name: test rule
+             features:
+                 - count(number(100)): (2, 3)
+         """
+    )
+    r = capa.rules.Rule.from_yaml(rule)
+
+    # just enough matches
+    _, matches = match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0)
+    assert "test rule" in matches
+
+    # enough matches
+    _, matches = match([r], {capa.features.insn.Number(100): {1, 2, 3}}, 0x0)
+    assert "test rule" in matches
+
+    # not enough matches
+    _, matches = match([r], {capa.features.insn.Number(100): {1}}, 0x0)
+    assert "test rule" not in matches
+
+    # too many matches
+    _, matches = match([r], {capa.features.insn.Number(100): {1, 2, 3, 4}}, 0x0)
+    assert "test rule" not in matches
+
+
+def test_match_range_exact_zero():
+    rule = textwrap.dedent(
+        """
+        rule:
+            meta:
+                name: test rule
+            features:
+                - count(number(100)): 0
+        """
+    )
+    r = capa.rules.Rule.from_yaml(rule)
+
+    # feature isn't indexed - good.
+    _, matches = match([r], {}, 0x0)
+    assert "test rule" in matches
+
+    # feature is indexed, but no matches.
+    # i don't think we should ever really have this case, but good to check anyways.
+    _, matches = match([r], {capa.features.insn.Number(100): {}}, 0x0)
+    assert "test rule" in matches
+
+    # too many matches
+    _, matches = match([r], {capa.features.insn.Number(100): {1}}, 0x0)
+    assert "test rule" not in matches
+
+
+def test_match_range_with_zero():
+    rule = textwrap.dedent(
+        """
+         rule:
+             meta:
+                 name: test rule
+             features:
+                 - count(number(100)): (0, 1)
+         """
+    )
+    r = capa.rules.Rule.from_yaml(rule)
+
+    # ok
+    _, matches = match([r], {}, 0x0)
+    assert "test rule" in matches
+    _, matches = match([r], {capa.features.insn.Number(100): {}}, 0x0)
+    assert "test rule" in matches
+    _, matches = match([r], {capa.features.insn.Number(100): {1}}, 0x0)
+    assert "test rule" in matches
+
+    # too many matches
+    _, matches = match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0)
+    assert "test rule" not in matches
+
+
+def test_match_adds_matched_rule_feature():
+    """show that using `match` adds a feature for matched rules."""
+    rule = textwrap.dedent(
+        """
+        rule:
+            meta:
+                name: test rule
+            features:
+                - number: 100
+        """
+    )
+    r = capa.rules.Rule.from_yaml(rule)
+    features, _ = match([r], {capa.features.insn.Number(100): {1}}, 0x0)
+    assert capa.features.common.MatchedRule("test rule") in features
+
+
+def test_match_matched_rules():
+    """show that using `match` adds a feature for matched rules."""
+    rules = [
+        capa.rules.Rule.from_yaml(
+            textwrap.dedent(
+                """
+                rule:
+                    meta:
+                        name: test rule1
+                    features:
+                        - number: 100
+                """
+            )
+        ),
+        capa.rules.Rule.from_yaml(
+            textwrap.dedent(
+                """
+                rule:
+                    meta:
+                        name: test rule2
+                    features:
+                        - match: test rule1
+                """
+            )
+        ),
+    ]
+
+    features, _ = match(
+        capa.rules.topologically_order_rules(rules),
+        {capa.features.insn.Number(100): {1}},
+        0x0,
+    )
+    assert capa.features.common.MatchedRule("test rule1") in features
+    assert capa.features.common.MatchedRule("test rule2") in features
+
+    # the ordering of the rules must not matter,
+    # the engine should match rules in an appropriate order.
+    features, _ = match(
+        capa.rules.topologically_order_rules(reversed(rules)),
+        {capa.features.insn.Number(100): {1}},
+        0x0,
+    )
+    assert capa.features.common.MatchedRule("test rule1") in features
+    assert capa.features.common.MatchedRule("test rule2") in features
+
+
+def test_match_namespace():
+    rules = [
+        capa.rules.Rule.from_yaml(
+            textwrap.dedent(
+                """
+                rule:
+                    meta:
+                        name: CreateFile API
+                        namespace: file/create/CreateFile
+                    features:
+                        - api: CreateFile
+                """
+            )
+        ),
+        capa.rules.Rule.from_yaml(
+            textwrap.dedent(
+                """
+                rule:
+                    meta:
+                        name: WriteFile API
+                        namespace: file/write
+                    features:
+                        - api: WriteFile
+                """
+            )
+        ),
+        capa.rules.Rule.from_yaml(
+            textwrap.dedent(
+                """
+                rule:
+                    meta:
+                        name: file-create
+                    features:
+                        - match: file/create
+                """
+            )
+        ),
+        capa.rules.Rule.from_yaml(
+            textwrap.dedent(
+                """
+                rule:
+                    meta:
+                        name: filesystem-any
+                    features:
+                        - match: file
+                """
+            )
+        ),
+    ]
+
+    features, matches = match(
+        capa.rules.topologically_order_rules(rules),
+        {capa.features.insn.API("CreateFile"): {1}},
+        0x0,
+    )
+    assert "CreateFile API" in matches
+    assert "file-create" in matches
+    assert "filesystem-any" in matches
+    assert capa.features.common.MatchedRule("file") in features
+    assert capa.features.common.MatchedRule("file/create") in features
+    assert capa.features.common.MatchedRule("file/create/CreateFile") in features
+
+    features, matches = match(
+        capa.rules.topologically_order_rules(rules),
+        {capa.features.insn.API("WriteFile"): {1}},
+        0x0,
+    )
+    assert "WriteFile API" in matches
+    assert "file-create" not in matches
+    assert "filesystem-any" in matches
+
+
+def test_match_substring():
+    rules = [
+        capa.rules.Rule.from_yaml(
+            textwrap.dedent(
+                """
+                rule:
+                    meta:
+                        name: test rule
+                    features:
+                        - and:
+                            - substring: abc
+                """
+            )
+        ),
+    ]
+    features, _ = match(
+        capa.rules.topologically_order_rules(rules),
+        {capa.features.common.String("aaaa"): {1}},
+        0x0,
+    )
+    assert capa.features.common.MatchedRule("test rule") not in features
+
+    features, _ = match(
+        capa.rules.topologically_order_rules(rules),
+        {capa.features.common.String("abc"): {1}},
+        0x0,
+    )
+    assert capa.features.common.MatchedRule("test rule") in features
+
+    features, _ = match(
+        capa.rules.topologically_order_rules(rules),
+        {capa.features.common.String("111abc222"): {1}},
+        0x0,
+    )
+    assert capa.features.common.MatchedRule("test rule") in features
+
+    features, _ = match(
+        capa.rules.topologically_order_rules(rules),
+        {capa.features.common.String("111abc"): {1}},
+        0x0,
+    )
+    assert capa.features.common.MatchedRule("test rule") in features
+
+    features, _ = match(
+        capa.rules.topologically_order_rules(rules),
+        {capa.features.common.String("abc222"): {1}},
+        0x0,
+    )
+    assert capa.features.common.MatchedRule("test rule") in features
+
+
+def test_match_regex():
+    rules = [
+        capa.rules.Rule.from_yaml(
+            textwrap.dedent(
+                """
+                rule:
+                    meta:
+                        name: test rule
+                    features:
+                        - and:
+                            - string: /.*bbbb.*/
+                """
+            )
+        ),
+        capa.rules.Rule.from_yaml(
+            textwrap.dedent(
+                """
+                rule:
+                    meta:
+                        name: rule with implied wildcards
+                    features:
+                        - and:
+                            - string: /bbbb/
+                """
+            )
+        ),
+        capa.rules.Rule.from_yaml(
+            textwrap.dedent(
+                """
+                rule:
+                    meta:
+                        name: rule with anchor
+                    features:
+                        - and:
+                            - string: /^bbbb/
+                """
+            )
+        ),
+    ]
+    features, _ = match(
+        capa.rules.topologically_order_rules(rules),
+        {capa.features.insn.Number(100): {1}},
+        0x0,
+    )
+    assert capa.features.common.MatchedRule("test rule") not in features
+
+    features, _ = match(
+        capa.rules.topologically_order_rules(rules),
+        {capa.features.common.String("aaaa"): {1}},
+        0x0,
+    )
+    assert capa.features.common.MatchedRule("test rule") not in features
+
+    features, _ = match(
+        capa.rules.topologically_order_rules(rules),
+        {capa.features.common.String("aBBBBa"): {1}},
+        0x0,
+    )
+    assert capa.features.common.MatchedRule("test rule") not in features
+
+    features, _ = match(
+        capa.rules.topologically_order_rules(rules),
+        {capa.features.common.String("abbbba"): {1}},
+        0x0,
+    )
+    assert capa.features.common.MatchedRule("test rule") in features
+    assert capa.features.common.MatchedRule("rule with implied wildcards") in features
+    assert capa.features.common.MatchedRule("rule with anchor") not in features
+
+
+def test_match_regex_ignorecase():
+    rules = [
+        capa.rules.Rule.from_yaml(
+            textwrap.dedent(
+                """
+                rule:
+                    meta:
+                        name: test rule
+                    features:
+                        - and:
+                            - string: /.*bbbb.*/i
+                """
+            )
+        ),
+    ]
+    features, _ = match(
+        capa.rules.topologically_order_rules(rules),
+        {capa.features.common.String("aBBBBa"): {1}},
+        0x0,
+    )
+    assert capa.features.common.MatchedRule("test rule") in features
+
+
+def test_match_regex_complex():
+    rules = [
+        capa.rules.Rule.from_yaml(
+            textwrap.dedent(
+                r"""
+                rule:
+                    meta:
+                        name: test rule
+                    features:
+                        - or:
+                            - string: /.*HARDWARE\\Key\\key with spaces\\.*/i
+                """
+            )
+        ),
+    ]
+    features, _ = match(
+        capa.rules.topologically_order_rules(rules),
+        {capa.features.common.String(r"Hardware\Key\key with spaces\some value"): {1}},
+        0x0,
+    )
+    assert capa.features.common.MatchedRule("test rule") in features
+
+
+def test_match_regex_values_always_string():
+    rules = [
+        capa.rules.Rule.from_yaml(
+            textwrap.dedent(
+                """
+                rule:
+                    meta:
+                        name: test rule
+                    features:
+                        - or:
+                            - string: /123/
+                            - string: /0x123/
+                """
+            )
+        ),
+    ]
+    features, _ = match(
+        capa.rules.topologically_order_rules(rules),
+        {capa.features.common.String("123"): {1}},
+        0x0,
+    )
+    assert capa.features.common.MatchedRule("test rule") in features
+
+    features, _ = match(
+        capa.rules.topologically_order_rules(rules),
+        {capa.features.common.String("0x123"): {1}},
+        0x0,
+    )
+    assert capa.features.common.MatchedRule("test rule") in features
+
+
+def test_match_not():
+    rule = textwrap.dedent(
+        """
+        rule:
+            meta:
+                name: test rule
+                namespace: testns1/testns2
+            features:
+                - not:
+                    - number: 99
+        """
+    )
+    r = capa.rules.Rule.from_yaml(rule)
+
+    _, matches = match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0)
+    assert "test rule" in matches
+
+
+def test_match_not_not():
+    rule = textwrap.dedent(
+        """
+        rule:
+            meta:
+                name: test rule
+                namespace: testns1/testns2
+            features:
+                - not:
+                    - not:
+                        - number: 100
+        """
+    )
+    r = capa.rules.Rule.from_yaml(rule)
+
+    _, matches = match([r], {capa.features.insn.Number(100): {1, 2}}, 0x0)
+    assert "test rule" in matches
--- a/tests/test_render.py
+++ b/tests/test_render.py
@@ -2,9 +2,23 @@ import textwrap

 import capa.rules
 import capa.render.utils
+import capa.features.insn
+import capa.features.common
 import capa.render.result_document


+def test_render_number():
+    assert str(capa.features.insn.Number(1)) == "number(0x1)"
+    assert str(capa.features.insn.Number(1, bitness=capa.features.common.BITNESS_X32)) == "number/x32(0x1)"
+    assert str(capa.features.insn.Number(1, bitness=capa.features.common.BITNESS_X64)) == "number/x64(0x1)"
+
+
+def test_render_offset():
+    assert str(capa.features.insn.Offset(1)) == "offset(0x1)"
+    assert str(capa.features.insn.Offset(1, bitness=capa.features.common.BITNESS_X32)) == "offset/x32(0x1)"
+    assert str(capa.features.insn.Offset(1, bitness=capa.features.common.BITNESS_X64)) == "offset/x64(0x1)"
+
+
 def test_render_meta_attack():
    # Persistence::Boot or Logon Autostart Execution::Registry Run Keys / Startup Folder [T1547.001]
    id = "T1543.003"
--- a/tests/test_rules.py
+++ b/tests/test_rules.py
@@ -785,37 +785,6 @@ def test_substring_description():
    assert (Substring("abc") in children) == True


-def test_regex_values_always_string():
-    rules = [
-        capa.rules.Rule.from_yaml(
-            textwrap.dedent(
-                """
-                rule:
-                    meta:
-                        name: test rule
-                    features:
-                        - or:
-                            - string: /123/
-                            - string: /0x123/
-                """
-            )
-        ),
-    ]
-    features, matches = capa.engine.match(
-        capa.rules.topologically_order_rules(rules),
-        {capa.features.common.String("123"): {1}},
-        0x0,
-    )
-    assert capa.features.common.MatchedRule("test rule") in features
-
-    features, matches = capa.engine.match(
-        capa.rules.topologically_order_rules(rules),
-        {capa.features.common.String("0x123"): {1}},
-        0x0,
-    )
-    assert capa.features.common.MatchedRule("test rule") in features
-
-
 def test_filter_rules():
    rules = capa.rules.RuleSet(
        [