Merge branch 'master' into feature-159

2025-12-12 15:49:46 -08:00 · 2021-06-01 11:17:41 -06:00
parent 3c90e909a1 b8a67553d0
commit 2756c05889
9 changed files with 71 additions and 23 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -111,6 +111,7 @@ It includes many new rules, including all new techniques introduced in MITRE ATT
 - linter: summarize results at the end #571 @williballenthin
 - meta: added `library_functions` field, `feature_counts.functions` does not include library functions any more #562 @mr-tz
 - linter: check for `or` with always true child statement, e.g. `optional`, colors #348 @mr-tz
+- json: breaking change in results document; now contains parsed MBC fields instead of canonical representation #526 @mr-tz
 - json: breaking change: record all matching strings for regex #159 @williballenthin

 ### Development
--- a/capa/render/init.py
+++ b/capa/render/init.py
@@ -162,6 +162,43 @@ def convert_match_to_result_document(rules, capabilities, result):
    return doc


+def convert_meta_to_result_document(meta):
+    mbcs = meta.get("mbc", [])
+    meta["mbc"] = [parse_canonical_mbc(mbc) for mbc in mbcs]
+    return meta
+
+
+def parse_canonical_mbc(mbc):
+    """
+    parse capa's canonical MBC representation: `Objective::Behavior::Method [Identifier]`
+    """
+    id = ""
+    objective = ""
+    behavior = ""
+    method = ""
+    parts = mbc.split("::")
+    if len(parts) > 0:
+        last = parts.pop()
+        last, _, id = last.rpartition(" ")
+        id = id.lstrip("[").rstrip("]")
+        parts.append(last)
+
+    if len(parts) > 0:
+        objective = parts[0]
+    if len(parts) > 1:
+        behavior = parts[1]
+    if len(parts) > 2:
+        method = parts[2]
+
+    return {
+        "parts": parts,
+        "id": id,
+        "objective": objective,
+        "behavior": behavior,
+        "method": method,
+    }
+
+
 def convert_capabilities_to_result_document(meta, rules, capabilities):
    """
    convert the given rule set and capabilities result to a common, Python-native data structure.
@@ -204,8 +241,10 @@ def convert_capabilities_to_result_document(meta, rules, capabilities):
        if rule.meta.get("capa/subscope-rule"):
            continue

+        rule_meta = convert_meta_to_result_document(rule.meta)
+
        doc["rules"][rule_name] = {
-            "meta": dict(rule.meta),
+            "meta": rule_meta,
            "source": rule.definition,
            "matches": {
                addr: convert_match_to_result_document(rules, capabilities, match) for (addr, match) in matches
--- a/capa/render/default.py
+++ b/capa/render/default.py
@@ -179,19 +179,11 @@ def render_mbc(doc, ostream):
        if not rule["meta"].get("mbc"):
            continue

-        mbcs = rule["meta"]["mbc"]
-        if not isinstance(mbcs, list):
-            raise ValueError("invalid rule: MBC mapping is not a list")
-
-        for mbc in mbcs:
-            objective, _, rest = mbc.partition("::")
-            if "::" in rest:
-                behavior, _, rest = rest.partition("::")
-                method, _, id = rest.rpartition(" ")
-                objectives[objective].add((behavior, method, id))
+        for mbc in rule["meta"]["mbc"]:
+            if mbc.get("method"):
+                objectives[mbc["objective"]].add((mbc["behavior"], mbc["method"], mbc["id"]))
            else:
-                behavior, _, id = rest.rpartition(" ")
-                objectives[objective].add((behavior, id))
+                objectives[mbc["objective"]].add((mbc["behavior"], mbc["id"]))

    rows = []
    for objective, behaviors in sorted(objectives.items()):
@@ -199,10 +191,10 @@ def render_mbc(doc, ostream):
        for spec in sorted(behaviors):
            if len(spec) == 2:
                behavior, id = spec
-                inner_rows.append("%s %s" % (rutils.bold(behavior), id))
+                inner_rows.append("%s [%s]" % (rutils.bold(behavior), id))
            elif len(spec) == 3:
                behavior, method, id = spec
-                inner_rows.append("%s::%s %s" % (rutils.bold(behavior), method, id))
+                inner_rows.append("%s::%s [%s]" % (rutils.bold(behavior), method, id))
            else:
                raise RuntimeError("unexpected MBC spec format")
        rows.append(
--- a/capa/render/utils.py
+++ b/capa/render/utils.py
@@ -29,6 +29,10 @@ def hex(n):
        return "0x%X" % n


+def format_mbc(mbc):
+    return "%s [%s]" % ("::".join(mbc["parts"]), mbc["id"])
+
+
 def capability_rules(doc):
    """enumerate the rules in (namespace, name) order that are 'capability' rules (not lib/subscope/disposition/etc)."""
    for (_, _, rule) in sorted(
--- a/capa/render/vverbose.py
+++ b/capa/render/vverbose.py
@@ -216,6 +216,12 @@ def render_rules(ostream, doc):
                continue

            v = rule["meta"][key]
+            if not v:
+                continue
+
+            if key == "mbc":
+                v = [rutils.format_mbc(mbc) for mbc in v]
+
            if isinstance(v, list) and len(v) == 1:
                v = v[0]
            elif isinstance(v, list) and len(v) > 1:
--- a/capa/rules.py
+++ b/capa/rules.py
@@ -560,10 +560,11 @@ class Rule(object):

    @classmethod
    def from_dict(cls, d, definition):
-        name = d["rule"]["meta"]["name"]
+        meta = d["rule"]["meta"]
+        name = meta["name"]
        # if scope is not specified, default to function scope.
        # this is probably the mode that rule authors will start with.
-        scope = d["rule"]["meta"].get("scope", FUNCTION_SCOPE)
+        scope = meta.get("scope", FUNCTION_SCOPE)
        statements = d["rule"]["features"]

        # the rule must start with a single logic node.
@@ -577,7 +578,12 @@ class Rule(object):
        if scope not in SUPPORTED_FEATURES.keys():
            raise InvalidRule("{:s} is not a supported scope".format(scope))

-        return cls(name, scope, build_statements(statements[0], scope), d["rule"]["meta"], definition)
+        meta = d["rule"]["meta"]
+        mbcs = meta.get("mbc", [])
+        if not isinstance(mbcs, list):
+            raise InvalidRule("MBC mapping must be a list")
+
+        return cls(name, scope, build_statements(statements[0], scope), meta, definition)

    @staticmethod
    @lru_cache()
--- a/2
+++ b/2
--- a/setup.py
+++ b/setup.py
@@ -11,7 +11,7 @@ import os
 import setuptools

 requirements = [
-    "tqdm==4.60.0",
+    "tqdm==4.61.0",
    "pyyaml==5.4.1",
    "tabulate==0.8.9",
    "colorama==0.4.4",
@@ -21,7 +21,7 @@ requirements = [
    "viv-utils[flirt]==0.6.4",
    "halo==0.0.31",
    "networkx==2.5.1",
-    "ruamel.yaml==0.17.4",
+    "ruamel.yaml==0.17.5",
    "vivisect==1.0.3",
    "smda==1.5.17",
    "pefile==2021.5.24",
--- a/tests/test_smda_features.py
+++ b/tests/test_smda_features.py
@@ -23,10 +23,10 @@ def test_smda_features(sample, scope, feature, expected):
    if scope.__name__ == "file" and isinstance(feature, capa.features.file.FunctionName) and expected is True:
        pytest.xfail("SMDA has no function ID")

-    if sample == "a1982..." and sys.platform == "win32":
+    if "a198216798ca38f280dc413f8c57f2c2" in sample and sys.platform == "win32":
        pytest.xfail("SMDA bug tracked #585")

-    if sample == "al-khaser x64" and sys.platform == "win32":
+    if "al-khaser_x64" in sample and sys.platform == "win32":
        pytest.xfail("SMDA bug tracked #585")

    do_test_feature_presence(get_smda_extractor, sample, scope, feature, expected)