From 0a226e8b01c1758d5f74af18d28f77165b928f39 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Thu, 27 May 2021 09:18:55 -0600 Subject: [PATCH 1/8] main: use rule scope internal/limitation/file for file limitations, not code closes #390 --- CHANGELOG.md | 1 + capa/main.py | 59 +++++++++++++++------------------------------------- 2 files changed, 18 insertions(+), 42 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0d68bf3c..e396f4f3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -111,6 +111,7 @@ It includes many new rules, including all new techniques introduced in MITRE ATT - linter: summarize results at the end #571 @williballenthin - meta: added `library_functions` field, `feature_counts.functions` does not include library functions any more #562 @mr-tz - linter: check for `or` with always true child statement, e.g. `optional`, colors #348 @mr-tz +- main: implement file limitations via rules not code #390 @williballenthin ### Development diff --git a/capa/main.py b/capa/main.py index e25a533b..39906c56 100644 --- a/capa/main.py +++ b/capa/main.py @@ -189,57 +189,32 @@ def has_rule_with_namespace(rules, capabilities, rule_cat): return False -def has_file_limitation(rules, capabilities, is_standalone=True): - file_limitations = { - # capa will likely detect installer specific functionality. - # this is probably not what the user wants. - "executable/installer": [ - " This sample appears to be an installer.", - " ", - " capa cannot handle installers well. This means the results may be misleading or incomplete." - " You should try to understand the install mechanism and analyze created files with capa.", - ], - # capa won't detect much in .NET samples. - # it might match some file-level things. - # for consistency, bail on things that we don't support. - "runtime/dotnet": [ - " This sample appears to be a .NET module.", - " ", - " .NET is a cross-platform framework for running managed applications.", - " capa cannot handle non-native files. This means that the results may be misleading or incomplete.", - " You may have to analyze the file manually, using a tool like the .NET decompiler dnSpy.", - ], - # capa will detect dozens of capabilities for AutoIt samples, - # but these are due to the AutoIt runtime, not the payload script. - # so, don't confuse the user with FP matches - bail instead - "compiler/autoit": [ - " This sample appears to be compiled with AutoIt.", - " ", - " AutoIt is a freeware BASIC-like scripting language designed for automating the Windows GUI.", - " capa cannot handle AutoIt scripts. This means that the results will be misleading or incomplete.", - " You may have to analyze the file manually, using a tool like the AutoIt decompiler MyAut2Exe.", - ], - # capa won't detect much in packed samples - "anti-analysis/packer/": [ - " This sample appears to be packed.", - " ", - " Packed samples have often been obfuscated to hide their logic.", - " capa cannot handle obfuscation well. This means the results may be misleading or incomplete.", - " If possible, you should try to unpack this input file before analyzing it with capa.", - ], - } +def is_internal_rule(rule): + return rule.meta.get("namespace", "").startswith("internal/") - for category, dialogue in file_limitations.items(): - if not has_rule_with_namespace(rules, capabilities, category): + +def is_file_limitation_rule(rule): + return rule.meta.get("namespace", "") == "internal/limitation/file" + + +def has_file_limitation(rules, capabilities, is_standalone=True): + file_limitation_rules = list(filter(is_file_limitation_rule, rules.rules.values())) + + for file_limitation_rule in file_limitation_rules: + if file_limitation_rule.name not in capabilities: continue + logger.warning("-" * 80) - for line in dialogue: + for line in file_limitation_rule.meta.get("description", "").split("\n"): logger.warning(line) if is_standalone: logger.warning(" ") logger.warning(" Use -v or -vv if you really want to see the capabilities identified by capa.") logger.warning("-" * 80) + + # bail on first file limitation return True + return False From 66b2c07af41aa31efb1d0458679eec217b0fb4df Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Mon, 31 May 2021 09:53:19 -0600 Subject: [PATCH 2/8] main: show matching file limitation rule when showing warning --- capa/main.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/capa/main.py b/capa/main.py index 39906c56..1f6174b8 100644 --- a/capa/main.py +++ b/capa/main.py @@ -206,7 +206,8 @@ def has_file_limitation(rules, capabilities, is_standalone=True): logger.warning("-" * 80) for line in file_limitation_rule.meta.get("description", "").split("\n"): - logger.warning(line) + logger.warning(" " + line) + logger.warning(" Identified via rule: %s", file_limitation_rule.name) if is_standalone: logger.warning(" ") logger.warning(" Use -v or -vv if you really want to see the capabilities identified by capa.") From 3cd348e8f72b2359f3169cfe11f9fc5e2046da70 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Mon, 31 May 2021 10:27:44 -0600 Subject: [PATCH 3/8] rules: implement __contains__ for RuleSet --- capa/rules.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/capa/rules.py b/capa/rules.py index e19bd713..16e25e28 100644 --- a/capa/rules.py +++ b/capa/rules.py @@ -891,6 +891,9 @@ class RuleSet(object): def __getitem__(self, rulename): return self.rules[rulename] + def __contains__(self, rulename): + return rulename in self.rules + @staticmethod def _get_rules_for_scope(rules, scope): """ From 3b245ea20138f3a6d8e09fab759de610267c79d1 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Mon, 31 May 2021 10:28:00 -0600 Subject: [PATCH 4/8] rules: index rules by namespace --- capa/rules.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/capa/rules.py b/capa/rules.py index 16e25e28..7af6b9a4 100644 --- a/capa/rules.py +++ b/capa/rules.py @@ -884,6 +884,7 @@ class RuleSet(object): self.function_rules = self._get_rules_for_scope(rules, FUNCTION_SCOPE) self.basic_block_rules = self._get_rules_for_scope(rules, BASIC_BLOCK_SCOPE) self.rules = {rule.name: rule for rule in rules} + self.rules_by_namespace = self._index_rules_by_namespace(rules) def __len__(self): return len(self.rules) @@ -938,6 +939,23 @@ class RuleSet(object): return done + @staticmethod + def _index_rules_by_namespace(rules): + """ + index the given rules into a dictionary, mapping from namespace name to + list of rules in that namespace. + """ + index = collections.defaultdict(list) + + for rule in rules: + namespace = rule.meta.get("namespace") + if namespace: + while namespace: + index[namespace].append(rule) + namespace, _, _ = namespace.rpartition("/") + + return dict(index) + def filter_rules_by_meta(self, tag): """ return new rule set with rules filtered based on all meta field values, adds all dependency rules From 17eeecc5269837c52156ecad6510af3f9fa4af0f Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Mon, 31 May 2021 10:28:11 -0600 Subject: [PATCH 5/8] render: handle namespace matches in result document --- capa/render/__init__.py | 72 +++++++++++++++++++++++++++++++---------- 1 file changed, 55 insertions(+), 17 deletions(-) diff --git a/capa/render/__init__.py b/capa/render/__init__.py index 519a0638..f75f16a9 100644 --- a/capa/render/__init__.py +++ b/capa/render/__init__.py @@ -123,7 +123,7 @@ def convert_match_to_result_document(rules, capabilities, result): if bool(result.success): doc["locations"] = result.locations - # if we have a `match` statement, then we're referencing another rule. + # if we have a `match` statement, then we're referencing another rule or namespace. # this could an external rule (written by a human), or # rule generated to support a subscope (basic block, etc.) # we still want to include the matching logic in this tree. @@ -139,25 +139,63 @@ def convert_match_to_result_document(rules, capabilities, result): and doc["success"] ): - rule_name = doc["node"]["feature"]["match"] - rule = rules[rule_name] - rule_matches = {address: result for (address, result) in capabilities[rule_name]} + name = doc["node"]["feature"]["match"] - if rule.meta.get("capa/subscope-rule"): - # for a subscope rule, fixup the node to be a scope node, rather than a match feature node. + if name in rules: + # this is a rule that we're matching # - # e.g. `contain loop/30c4c78e29bf4d54894fc74f664c62e8` -> `basic block` - scope = rule.meta["scope"] - doc["node"] = { - "type": "statement", - "statement": { - "type": "subscope", - "subscope": scope, - }, - } + # pull matches from the referenced rule into our tree here. + rule_name = doc["node"]["feature"]["match"] + rule = rules[rule_name] + rule_matches = {address: result for (address, result) in capabilities[rule_name]} - for location in doc["locations"]: - doc["children"].append(convert_match_to_result_document(rules, capabilities, rule_matches[location])) + if rule.meta.get("capa/subscope-rule"): + # for a subscope rule, fixup the node to be a scope node, rather than a match feature node. + # + # e.g. `contain loop/30c4c78e29bf4d54894fc74f664c62e8` -> `basic block` + scope = rule.meta["scope"] + doc["node"] = { + "type": "statement", + "statement": { + "type": "subscope", + "subscope": scope, + }, + } + + for location in doc["locations"]: + doc["children"].append(convert_match_to_result_document(rules, capabilities, rule_matches[location])) + else: + # this is a namespace that we're matching + # + # check for all rules in the namespace, + # seeing if they matched. + # if so, pull their matches into our match tree here. + ns_name = doc["node"]["feature"]["match"] + ns_rules = rules.rules_by_namespace[ns_name] + + for rule in ns_rules: + if rule.name in capabilities: + # the rule matched, so splice results into our tree here. + # + # note, there's a shortcoming in our result document schema here: + # we lose the name of the rule that matched in a namespace. + # for example, if we have a statement: `match: runtime/dotnet` + # and we get matches, we can say the following: + # + # match: runtime/dotnet @ 0x0 + # or: + # import: mscoree._CorExeMain @ 0x402000 + # + # however, we lose the fact that it was rule + # "compiled to the .NET platform" + # that contained this logic and did the match. + # + # we could introduce an intermediate node here. + # this would be a breaking change and require updates to the renderers. + # in the meantime, the above might be sufficient. + rule_matches = {address: result for (address, result) in capabilities[rule.name]} + for location in doc["locations"]: + doc["children"].append(convert_match_to_result_document(rules, capabilities, rule_matches[location])) return doc From 8a65c565a510297ab4c70e352702360210b28e36 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Tue, 1 Jun 2021 11:06:12 -0600 Subject: [PATCH 6/8] pep8 --- capa/render/__init__.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/capa/render/__init__.py b/capa/render/__init__.py index f75f16a9..0f77c86f 100644 --- a/capa/render/__init__.py +++ b/capa/render/__init__.py @@ -186,7 +186,7 @@ def convert_match_to_result_document(rules, capabilities, result): # or: # import: mscoree._CorExeMain @ 0x402000 # - # however, we lose the fact that it was rule + # however, we lose the fact that it was rule # "compiled to the .NET platform" # that contained this logic and did the match. # @@ -195,7 +195,9 @@ def convert_match_to_result_document(rules, capabilities, result): # in the meantime, the above might be sufficient. rule_matches = {address: result for (address, result) in capabilities[rule.name]} for location in doc["locations"]: - doc["children"].append(convert_match_to_result_document(rules, capabilities, rule_matches[location])) + doc["children"].append( + convert_match_to_result_document(rules, capabilities, rule_matches[location]) + ) return doc From 8f3d44324708cfab380e6d0d9886c72a23480761 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Tue, 1 Jun 2021 11:25:38 -0600 Subject: [PATCH 7/8] rules: use existing code, dedup --- capa/rules.py | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/capa/rules.py b/capa/rules.py index 7af6b9a4..bc05e97f 100644 --- a/capa/rules.py +++ b/capa/rules.py @@ -884,7 +884,7 @@ class RuleSet(object): self.function_rules = self._get_rules_for_scope(rules, FUNCTION_SCOPE) self.basic_block_rules = self._get_rules_for_scope(rules, BASIC_BLOCK_SCOPE) self.rules = {rule.name: rule for rule in rules} - self.rules_by_namespace = self._index_rules_by_namespace(rules) + self.rules_by_namespace = index_rules_by_namespace(rules) def __len__(self): return len(self.rules) @@ -939,23 +939,6 @@ class RuleSet(object): return done - @staticmethod - def _index_rules_by_namespace(rules): - """ - index the given rules into a dictionary, mapping from namespace name to - list of rules in that namespace. - """ - index = collections.defaultdict(list) - - for rule in rules: - namespace = rule.meta.get("namespace") - if namespace: - while namespace: - index[namespace].append(rule) - namespace, _, _ = namespace.rpartition("/") - - return dict(index) - def filter_rules_by_meta(self, tag): """ return new rule set with rules filtered based on all meta field values, adds all dependency rules From 2706a7171e7dcd7dec689229d2ebe1bf4e735bbb Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Tue, 1 Jun 2021 11:38:05 -0600 Subject: [PATCH 8/8] linter: fix match namespace handling closes #601 --- scripts/lint.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/lint.py b/scripts/lint.py index e5c9ab28..9bd27c2e 100644 --- a/scripts/lint.py +++ b/scripts/lint.py @@ -511,7 +511,7 @@ def get_normpath(path): def get_features(ctx, rule): # get features from rule and all dependencies including subscopes and matched rules features = [] - namespaces = capa.rules.index_rules_by_namespace([rule]) + namespaces = ctx["rules"].rules_by_namespace deps = [ctx["rules"].rules[dep] for dep in rule.get_dependencies(namespaces)] for r in [rule] + deps: features.extend(get_rule_features(r))