Merge pull request #591 from fireeye/feature-590

main: use rule scope internal/limitation/file for file limitations, not code
2025-12-12 15:49:46 -08:00 · 2021-06-01 11:50:56 -06:00
parent 9968d16f21 8c3605c886
commit 37208aabd3
5 changed files with 82 additions and 62 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -93,7 +93,6 @@ It includes many new rules, including all new techniques introduced in MITRE ATT
 - nursery/list-udp-connections-and-listeners michael.hunhoff@fireeye.com
 - nursery/log-keystrokes-via-raw-input-data michael.hunhoff@fireeye.com
 - nursery/register-http-server-url michael.hunhoff@fireeye.com
-

 ### Bug Fixes

@@ -113,6 +112,7 @@ It includes many new rules, including all new techniques introduced in MITRE ATT
 - linter: check for `or` with always true child statement, e.g. `optional`, colors #348 @mr-tz
 - json: breaking change in results document; now contains parsed MBC fields instead of canonical representation #526 @mr-tz
 - json: breaking change: record all matching strings for regex #159 @williballenthin
+- main: implement file limitations via rules not code #390 @williballenthin

 ### Development

--- a/capa/main.py
+++ b/capa/main.py
@@ -189,57 +189,33 @@ def has_rule_with_namespace(rules, capabilities, rule_cat):
    return False


-def has_file_limitation(rules, capabilities, is_standalone=True):
-    file_limitations = {
-        # capa will likely detect installer specific functionality.
-        # this is probably not what the user wants.
-        "executable/installer": [
-            " This sample appears to be an installer.",
-            " ",
-            " capa cannot handle installers well. This means the results may be misleading or incomplete."
-            " You should try to understand the install mechanism and analyze created files with capa.",
-        ],
-        # capa won't detect much in .NET samples.
-        # it might match some file-level things.
-        # for consistency, bail on things that we don't support.
-        "runtime/dotnet": [
-            " This sample appears to be a .NET module.",
-            " ",
-            " .NET is a cross-platform framework for running managed applications.",
-            " capa cannot handle non-native files. This means that the results may be misleading or incomplete.",
-            " You may have to analyze the file manually, using a tool like the .NET decompiler dnSpy.",
-        ],
-        # capa will detect dozens of capabilities for AutoIt samples,
-        # but these are due to the AutoIt runtime, not the payload script.
-        # so, don't confuse the user with FP matches - bail instead
-        "compiler/autoit": [
-            " This sample appears to be compiled with AutoIt.",
-            " ",
-            " AutoIt is a freeware BASIC-like scripting language designed for automating the Windows GUI.",
-            " capa cannot handle AutoIt scripts. This means that the results will be misleading or incomplete.",
-            " You may have to analyze the file manually, using a tool like the AutoIt decompiler MyAut2Exe.",
-        ],
-        # capa won't detect much in packed samples
-        "anti-analysis/packer/": [
-            " This sample appears to be packed.",
-            " ",
-            " Packed samples have often been obfuscated to hide their logic.",
-            " capa cannot handle obfuscation well. This means the results may be misleading or incomplete.",
-            " If possible, you should try to unpack this input file before analyzing it with capa.",
-        ],
-    }
+def is_internal_rule(rule):
+    return rule.meta.get("namespace", "").startswith("internal/")

-    for category, dialogue in file_limitations.items():
-        if not has_rule_with_namespace(rules, capabilities, category):
+
+def is_file_limitation_rule(rule):
+    return rule.meta.get("namespace", "") == "internal/limitation/file"
+
+
+def has_file_limitation(rules, capabilities, is_standalone=True):
+    file_limitation_rules = list(filter(is_file_limitation_rule, rules.rules.values()))
+
+    for file_limitation_rule in file_limitation_rules:
+        if file_limitation_rule.name not in capabilities:
            continue
+
        logger.warning("-" * 80)
-        for line in dialogue:
-            logger.warning(line)
+        for line in file_limitation_rule.meta.get("description", "").split("\n"):
+            logger.warning(" " + line)
+        logger.warning(" Identified via rule: %s", file_limitation_rule.name)
        if is_standalone:
            logger.warning(" ")
            logger.warning(" Use -v or -vv if you really want to see the capabilities identified by capa.")
        logger.warning("-" * 80)
+
+        # bail on first file limitation
        return True
+
    return False


--- a/capa/render/init.py
+++ b/capa/render/init.py
@@ -123,7 +123,7 @@ def convert_match_to_result_document(rules, capabilities, result):
        if bool(result.success):
            doc["locations"] = result.locations

-    # if we have a `match` statement, then we're referencing another rule.
+    # if we have a `match` statement, then we're referencing another rule or namespace.
    # this could an external rule (written by a human), or
    #  rule generated to support a subscope (basic block, etc.)
    # we still want to include the matching logic in this tree.
@@ -139,25 +139,65 @@ def convert_match_to_result_document(rules, capabilities, result):
        and doc["success"]
    ):

-        rule_name = doc["node"]["feature"]["match"]
-        rule = rules[rule_name]
-        rule_matches = {address: result for (address, result) in capabilities[rule_name]}
+        name = doc["node"]["feature"]["match"]

-        if rule.meta.get("capa/subscope-rule"):
-            # for a subscope rule, fixup the node to be a scope node, rather than a match feature node.
+        if name in rules:
+            # this is a rule that we're matching
            #
-            # e.g. `contain loop/30c4c78e29bf4d54894fc74f664c62e8` -> `basic block`
-            scope = rule.meta["scope"]
-            doc["node"] = {
-                "type": "statement",
-                "statement": {
-                    "type": "subscope",
-                    "subscope": scope,
-                },
-            }
+            # pull matches from the referenced rule into our tree here.
+            rule_name = doc["node"]["feature"]["match"]
+            rule = rules[rule_name]
+            rule_matches = {address: result for (address, result) in capabilities[rule_name]}

-        for location in doc["locations"]:
-            doc["children"].append(convert_match_to_result_document(rules, capabilities, rule_matches[location]))
+            if rule.meta.get("capa/subscope-rule"):
+                # for a subscope rule, fixup the node to be a scope node, rather than a match feature node.
+                #
+                # e.g. `contain loop/30c4c78e29bf4d54894fc74f664c62e8` -> `basic block`
+                scope = rule.meta["scope"]
+                doc["node"] = {
+                    "type": "statement",
+                    "statement": {
+                        "type": "subscope",
+                        "subscope": scope,
+                    },
+                }
+
+            for location in doc["locations"]:
+                doc["children"].append(convert_match_to_result_document(rules, capabilities, rule_matches[location]))
+        else:
+            # this is a namespace that we're matching
+            #
+            # check for all rules in the namespace,
+            # seeing if they matched.
+            # if so, pull their matches into our match tree here.
+            ns_name = doc["node"]["feature"]["match"]
+            ns_rules = rules.rules_by_namespace[ns_name]
+
+            for rule in ns_rules:
+                if rule.name in capabilities:
+                    # the rule matched, so splice results into our tree here.
+                    #
+                    # note, there's a shortcoming in our result document schema here:
+                    # we lose the name of the rule that matched in a namespace.
+                    # for example, if we have a statement: `match: runtime/dotnet`
+                    # and we get matches, we can say the following:
+                    #
+                    #     match: runtime/dotnet @ 0x0
+                    #       or:
+                    #         import: mscoree._CorExeMain @ 0x402000
+                    #
+                    # however, we lose the fact that it was rule
+                    #   "compiled to the .NET platform"
+                    # that contained this logic and did the match.
+                    #
+                    # we could introduce an intermediate node here.
+                    # this would be a breaking change and require updates to the renderers.
+                    # in the meantime, the above might be sufficient.
+                    rule_matches = {address: result for (address, result) in capabilities[rule.name]}
+                    for location in doc["locations"]:
+                        doc["children"].append(
+                            convert_match_to_result_document(rules, capabilities, rule_matches[location])
+                        )

    return doc

--- a/capa/rules.py
+++ b/capa/rules.py
@@ -884,6 +884,7 @@ class RuleSet(object):
        self.function_rules = self._get_rules_for_scope(rules, FUNCTION_SCOPE)
        self.basic_block_rules = self._get_rules_for_scope(rules, BASIC_BLOCK_SCOPE)
        self.rules = {rule.name: rule for rule in rules}
+        self.rules_by_namespace = index_rules_by_namespace(rules)

    def __len__(self):
        return len(self.rules)
@@ -891,6 +892,9 @@ class RuleSet(object):
    def __getitem__(self, rulename):
        return self.rules[rulename]

+    def __contains__(self, rulename):
+        return rulename in self.rules
+
    @staticmethod
    def _get_rules_for_scope(rules, scope):
        """
--- a/scripts/lint.py
+++ b/scripts/lint.py
@@ -511,7 +511,7 @@ def get_normpath(path):
 def get_features(ctx, rule):
    # get features from rule and all dependencies including subscopes and matched rules
    features = []
-    namespaces = capa.rules.index_rules_by_namespace([rule])
+    namespaces = ctx["rules"].rules_by_namespace
    deps = [ctx["rules"].rules[dep] for dep in rule.get_dependencies(namespaces)]
    for r in [rule] + deps:
        features.extend(get_rule_features(r))