From 36749df8480a825574ebee068a4c7d564d69dacb Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Fri, 26 Jun 2020 17:49:54 -0600 Subject: [PATCH 01/61] submodule: rules: bump to rule-organization PR --- rules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rules b/rules index 7f5fb71a..0d775647 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit 7f5fb71a5d9de659733f20743851098d372a4d74 +Subproject commit 0d775647853659e77fefaeed35403771591475ce From 35faa20e9e1931441b803aa1dfb0eabf720fd9b2 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Fri, 26 Jun 2020 17:52:32 -0600 Subject: [PATCH 02/61] linter: check for namespace instead of rule-category --- scripts/lint.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/scripts/lint.py b/scripts/lint.py index 6b0e7b37..32cfa7c7 100644 --- a/scripts/lint.py +++ b/scripts/lint.py @@ -39,12 +39,13 @@ class NameCasing(Lint): rule.name[1] not in string.ascii_uppercase) -class MissingRuleCategory(Lint): - name = 'missing rule category' - recommendation = 'Add meta.rule-category so that the rule is emitted correctly' +class MissingNamespace(Lint): + name = 'missing rule namespace' + recommendation = 'Add meta.namespace so that the rule is emitted correctly' def check_rule(self, ctx, rule): - return ('rule-category' not in rule.meta and + return ('namespace' not in rule.meta and + 'nursery' not in rule.meta and 'maec/malware-category' not in rule.meta and 'lib' not in rule.meta) From d32c20d14046e9df7639b3955e89a2efafab8f7b Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Fri, 26 Jun 2020 17:53:37 -0600 Subject: [PATCH 03/61] linter: check for namespace instead of rule-category --- scripts/lint.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/lint.py b/scripts/lint.py index 32cfa7c7..ecb7d55f 100644 --- a/scripts/lint.py +++ b/scripts/lint.py @@ -190,7 +190,7 @@ def lint_scope(ctx, rule): META_LINTS = ( - MissingRuleCategory(), + MissingNamespace(), MissingAuthor(), MissingExamples(), MissingExampleOffset(), From 5de0884dd29c1b005b8d4d80075526bac85ac6fd Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Fri, 26 Jun 2020 18:09:51 -0600 Subject: [PATCH 04/61] main: learn to save off the path to each rule --- capa/main.py | 41 +++++++++++++++++++--------------------- capa/rules.py | 19 +++++++++++++++++++ scripts/lint.py | 4 ++-- scripts/migrate-rules.py | 6 ++---- 4 files changed, 42 insertions(+), 28 deletions(-) diff --git a/capa/main.py b/capa/main.py index 5600caab..63d13cfb 100644 --- a/capa/main.py +++ b/capa/main.py @@ -583,18 +583,9 @@ def get_rules(rule_path): if not os.path.exists(rule_path): raise IOError('%s does not exist or cannot be accessed' % rule_path) - rules = [] + rule_paths = [] if os.path.isfile(rule_path): - logger.info('reading rule file: %s', rule_path) - with open(rule_path, 'rb') as f: - rule = capa.rules.Rule.from_yaml(f.read().decode('utf-8')) - - if is_nursery_rule_path(rule_path): - rule.meta['nursery'] = True - - rules.append(rule) - logger.debug('rule: %s scope: %s', rule.name, rule.scope) - + rule_paths.append(rule_path) elif os.path.isdir(rule_path): logger.info('reading rules from directory %s', rule_path) for root, dirs, files in os.walk(rule_path): @@ -603,18 +594,24 @@ def get_rules(rule_path): logger.warning('skipping non-.yml file: %s', file) continue - path = os.path.join(root, file) - logger.debug('reading rule file: %s', path) - try: - rule = capa.rules.Rule.from_yaml_file(path) - except capa.rules.InvalidRule: - raise - else: - if is_nursery_rule_path(root): - rule.meta['nursery'] = True + rule_path = os.path.join(root, file) + rule_paths.append(rule_path) + + rules = [] + for rule_path in rule_paths: + logger.info('reading rule file: %s', rule_path) + try: + rule = capa.rules.Rule.from_yaml_file(rule_path) + except capa.rules.InvalidRule: + raise + else: + rule.meta['capa/path'] = rule_path + if is_nursery_rule_path(rule_path): + rule.meta['capa/nursery'] = True + + rules.append(rule) + logger.debug('rule: %s scope: %s', rule.name, rule.scope) - rules.append(rule) - logger.debug('rule: %s scope: %s', rule.name, rule.scope) return rules diff --git a/capa/rules.py b/capa/rules.py index a8e9b568..a742a40d 100644 --- a/capa/rules.py +++ b/capa/rules.py @@ -566,8 +566,27 @@ class Rule(object): continue move_to_end(meta, key) + # these are meta fields that are internal to capa, + # and added during rule reading/construction. + # they may help use manipulate or index rules, + # but should not be exposed to clients. + hidden_meta_keys = ("capa/nursery", "capa/path") + hidden_meta = { + key: meta.get(key) + for key in hidden_meta_keys + } + + for key in hidden_meta.keys(): + del meta[key] + ostream = six.BytesIO() yaml.dump(definition, ostream) + + for key, value in hidden_meta.items(): + if value is None: + continue + meta[key] = value + return ostream.getvalue().decode('utf-8').rstrip("\n") + "\n" diff --git a/scripts/lint.py b/scripts/lint.py index ecb7d55f..29e30b97 100644 --- a/scripts/lint.py +++ b/scripts/lint.py @@ -45,7 +45,7 @@ class MissingNamespace(Lint): def check_rule(self, ctx, rule): return ('namespace' not in rule.meta and - 'nursery' not in rule.meta and + not is_nursery_rule(rule) and 'maec/malware-category' not in rule.meta and 'lib' not in rule.meta) @@ -250,7 +250,7 @@ def is_nursery_rule(rule): For example, they may not have references to public example of a technique. Yet, we still want to capture and report on their matches. ''' - return rule.meta.get('nursery') + return rule.meta.get('capa/nursery') def lint_rule(ctx, rule): diff --git a/scripts/migrate-rules.py b/scripts/migrate-rules.py index 968e7cce..c8de7d2c 100644 --- a/scripts/migrate-rules.py +++ b/scripts/migrate-rules.py @@ -49,7 +49,7 @@ def read_rules(rule_directory): rules[rule.name] = rule if "nursery" in path: - rule.meta["nursery"] = True + rule.meta["capa/nursery"] = True return rules @@ -132,10 +132,8 @@ def main(argv=None): filename = filename + ".yml" try: - if rule.meta.get("nursery"): + if rule.meta.get("capa/nursery"): directory = os.path.join(args.destination, "nursery") - # this isn't meant to be written into the rule - del rule.meta["nursery"] elif rule.meta.get("lib"): directory = os.path.join(args.destination, "lib") else: From 22537eb93685e1441550f4793a37ffdba9fd45e9 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Fri, 26 Jun 2020 18:16:20 -0600 Subject: [PATCH 05/61] linter: learn to check filename to match rule name closes #7 --- scripts/lint.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/scripts/lint.py b/scripts/lint.py index 29e30b97..c6fa514b 100644 --- a/scripts/lint.py +++ b/scripts/lint.py @@ -6,6 +6,7 @@ Usage: $ python scripts/lint.py rules/ ''' import os +import os.path import sys import string import hashlib @@ -39,6 +40,25 @@ class NameCasing(Lint): rule.name[1] not in string.ascii_uppercase) +class FilenameDoesntMatchRuleName(Lint): + name = 'filename doesn\'t match the rule name' + recommendation = 'Rename rule file to match the rule name' + + def check_rule(self, ctx, rule): + expected = rule.name + expected = expected.lower() + expected = expected.replace(" ", "-") + expected = expected.replace("(", "") + expected = expected.replace(")", "") + expected = expected.replace("+", "") + expected = expected.replace("/", "") + expected = expected + ".yml" + + found = os.path.basename(rule.meta['capa/path']) + + return expected != found + + class MissingNamespace(Lint): name = 'missing rule namespace' recommendation = 'Add meta.namespace so that the rule is emitted correctly' @@ -172,6 +192,7 @@ def run_feature_lints(lints, ctx, features): NAME_LINTS = ( NameCasing(), + FilenameDoesntMatchRuleName(), ) From 8ab91269fa5cef4377241b1cc73644d691b6481e Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Fri, 26 Jun 2020 18:23:47 -0600 Subject: [PATCH 06/61] linter: learn to check namespace matches rule path closes #6 --- scripts/lint.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/scripts/lint.py b/scripts/lint.py index c6fa514b..78a22832 100644 --- a/scripts/lint.py +++ b/scripts/lint.py @@ -70,6 +70,24 @@ class MissingNamespace(Lint): 'lib' not in rule.meta) +class NamespaceDoesntMatchRulePath(Lint): + name = 'file path doesn\'t match rule namespace' + recommendation = 'Move rule to appropriate directory or update the namespace' + + def check_rule(self, ctx, rule): + # let the other lints catch namespace issues + if 'namespace' not in rule.meta: + return False + if is_nursery_rule(rule): + return False + if 'maec/malware-category' in rule.meta: + return False + if 'lib' in rule.meta: + return False + + return rule.meta["namespace"] not in rule.meta['capa/path'].replace('\\', '/') + + class MissingScope(Lint): name = 'missing scope' recommendation = 'Add meta.scope so that the scope is explicit (defaults to `function`)' @@ -212,6 +230,7 @@ def lint_scope(ctx, rule): META_LINTS = ( MissingNamespace(), + NamespaceDoesntMatchRulePath(), MissingAuthor(), MissingExamples(), MissingExampleOffset(), From 8f5f5b329debe4c77ee37691a0e0546f5301bf6a Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Fri, 26 Jun 2020 18:31:26 -0600 Subject: [PATCH 07/61] rule: declare the expected hidden meta fields --- capa/rules.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/capa/rules.py b/capa/rules.py index a742a40d..15b2d7a5 100644 --- a/capa/rules.py +++ b/capa/rules.py @@ -21,7 +21,12 @@ logger = logging.getLogger(__name__) # these are the standard metadata fields, in the preferred order. # when reformatted, any custom keys will come after these. -META_KEYS = ("name", "namespace", "rule-category", "author", "description", "lib", "scope", "att&ck", "mbc", "references", "examples") +META_KEYS = ("name", "namespace", "rule-category", "maec/malware-category", "author", "description", "lib", "scope", "att&ck", "mbc", "references", "examples") +# these are meta fields that are internal to capa, +# and added during rule reading/construction. +# they may help use manipulate or index rules, +# but should not be exposed to clients. +HIDDEN_META_KEYS = ("capa/nursery", "capa/path") FILE_SCOPE = 'file' @@ -566,14 +571,12 @@ class Rule(object): continue move_to_end(meta, key) - # these are meta fields that are internal to capa, - # and added during rule reading/construction. - # they may help use manipulate or index rules, - # but should not be exposed to clients. - hidden_meta_keys = ("capa/nursery", "capa/path") + # save off the existing hidden meta values, + # emit the document, + # and re-add the hidden meta. hidden_meta = { key: meta.get(key) - for key in hidden_meta_keys + for key in HIDDEN_META_KEYS } for key in hidden_meta.keys(): From f82e453440316c4cd12f248612bcfc36eee4f585 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Fri, 26 Jun 2020 18:31:52 -0600 Subject: [PATCH 08/61] linter: learn to check for unusual meta fields closes #24 --- scripts/lint.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/scripts/lint.py b/scripts/lint.py index 78a22832..5c1701ce 100644 --- a/scripts/lint.py +++ b/scripts/lint.py @@ -183,6 +183,22 @@ class DoesntMatchExample(Lint): return True +class UnusualMetaField(Lint): + name = 'unusual meta field' + recommendation = 'Remove the unusual meta field' + + def check_rule(self, ctx, rule): + for key in rule.meta.keys(): + if key in capa.rules.META_KEYS: + continue + if key in capa.rules.HIDDEN_META_KEYS: + continue + logger.debug("unusual meta field: %s", key) + return True + + return False + + class FeatureStringTooShort(Lint): name = 'feature string too short' recommendation = 'capa only extracts strings with length >= 4; will not match on "{:s}"' @@ -235,6 +251,7 @@ META_LINTS = ( MissingExamples(), MissingExampleOffset(), ExampleFileDNE(), + UnusualMetaField(), ) From 26fef7c6157c6588e6891c05a566c1db30160949 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Fri, 26 Jun 2020 18:44:19 -0600 Subject: [PATCH 09/61] *: formatting --- capa/rules.py | 32 +++++++++--- scripts/lint.py | 18 +++---- scripts/migrate-rules.py | 104 +++++++++++++++++++-------------------- 3 files changed, 85 insertions(+), 69 deletions(-) diff --git a/capa/rules.py b/capa/rules.py index 15b2d7a5..160eba90 100644 --- a/capa/rules.py +++ b/capa/rules.py @@ -21,12 +21,28 @@ logger = logging.getLogger(__name__) # these are the standard metadata fields, in the preferred order. # when reformatted, any custom keys will come after these. -META_KEYS = ("name", "namespace", "rule-category", "maec/malware-category", "author", "description", "lib", "scope", "att&ck", "mbc", "references", "examples") +META_KEYS = ( + 'name', + 'namespace', + 'rule-category', + 'maec/analysis-conclusion', + 'maec/analysis-conclusion-ov', + 'maec/malware-category', + 'maec/malware-category-ov', + 'author', + 'description', + 'lib', + 'scope', + 'att&ck', + 'mbc', + 'references', + 'examples' +) # these are meta fields that are internal to capa, # and added during rule reading/construction. # they may help use manipulate or index rules, # but should not be exposed to clients. -HIDDEN_META_KEYS = ("capa/nursery", "capa/path") +HIDDEN_META_KEYS = ('capa/nursery', 'capa/path') FILE_SCOPE = 'file' @@ -545,11 +561,11 @@ class Rule(object): definition = yaml.load(self.definition) # definition retains a reference to `meta`, # so we're updating that in place. - definition["rule"]["meta"] = self.meta + definition['rule']['meta'] = self.meta meta = self.meta - meta["name"] = self.name - meta["scope"] = self.scope + meta['name'] = self.name + meta['scope'] = self.scope def move_to_end(m, k): # ruamel.yaml uses an ordereddict-like structure to track maps (CommentedMap). @@ -559,8 +575,8 @@ class Rule(object): del m[k] m[k] = v - move_to_end(definition["rule"], "meta") - move_to_end(definition["rule"], "features") + move_to_end(definition['rule'], 'meta') + move_to_end(definition['rule'], 'features') for key in META_KEYS: if key in meta: @@ -590,7 +606,7 @@ class Rule(object): continue meta[key] = value - return ostream.getvalue().decode('utf-8').rstrip("\n") + "\n" + return ostream.getvalue().decode('utf-8').rstrip('\n') + '\n' def get_rules_with_scope(rules, scope): diff --git a/scripts/lint.py b/scripts/lint.py index 5c1701ce..def5aa0e 100644 --- a/scripts/lint.py +++ b/scripts/lint.py @@ -47,12 +47,12 @@ class FilenameDoesntMatchRuleName(Lint): def check_rule(self, ctx, rule): expected = rule.name expected = expected.lower() - expected = expected.replace(" ", "-") - expected = expected.replace("(", "") - expected = expected.replace(")", "") - expected = expected.replace("+", "") - expected = expected.replace("/", "") - expected = expected + ".yml" + expected = expected.replace(' ', '-') + expected = expected.replace('(', '') + expected = expected.replace(')', '') + expected = expected.replace('+', '') + expected = expected.replace('/', '') + expected = expected + '.yml' found = os.path.basename(rule.meta['capa/path']) @@ -85,7 +85,7 @@ class NamespaceDoesntMatchRulePath(Lint): if 'lib' in rule.meta: return False - return rule.meta["namespace"] not in rule.meta['capa/path'].replace('\\', '/') + return rule.meta['namespace'] not in rule.meta['capa/path'].replace('\\', '/') class MissingScope(Lint): @@ -185,7 +185,7 @@ class DoesntMatchExample(Lint): class UnusualMetaField(Lint): name = 'unusual meta field' - recommendation = 'Remove the unusual meta field' + recommendation = 'Remove the meta field: "{:s}"' def check_rule(self, ctx, rule): for key in rule.meta.keys(): @@ -193,7 +193,7 @@ class UnusualMetaField(Lint): continue if key in capa.rules.HIDDEN_META_KEYS: continue - logger.debug("unusual meta field: %s", key) + self.recommendation = self.recommendation.format(key) return True return False diff --git a/scripts/migrate-rules.py b/scripts/migrate-rules.py index c8de7d2c..1697c41a 100644 --- a/scripts/migrate-rules.py +++ b/scripts/migrate-rules.py @@ -24,15 +24,15 @@ logger = logging.getLogger('migrate-rules') def read_plan(plan_path): with open(plan_path, 'rb') as f: - return list(csv.DictReader(f, restkey="other", fieldnames=( - "existing path", - "existing name", - "existing rule-category", - "proposed name", - "proposed namespace", - "ATT&CK", - "MBC", - "comment1", + return list(csv.DictReader(f, restkey='other', fieldnames=( + 'existing path', + 'existing name', + 'existing rule-category', + 'proposed name', + 'proposed namespace', + 'ATT&CK', + 'MBC', + 'comment1', ))) @@ -48,8 +48,8 @@ def read_rules(rule_directory): rule = capa.rules.Rule.from_yaml_file(path) rules[rule.name] = rule - if "nursery" in path: - rule.meta["capa/nursery"] = True + if 'nursery' in path: + rule.meta['capa/nursery'] = True return rules @@ -70,89 +70,89 @@ def main(argv=None): logging.getLogger().setLevel(logging.INFO) plan = read_plan(args.plan) - logger.info("read %d plan entries", len(plan)) + logger.info('read %d plan entries', len(plan)) rules = read_rules(args.source) - logger.info("read %d rules", len(rules)) + logger.info('read %d rules', len(rules)) - planned_rules = set([row["existing name"] for row in plan]) + planned_rules = set([row['existing name'] for row in plan]) unplanned_rules = [rule for (name, rule) in rules.items() if name not in planned_rules] if unplanned_rules: - logger.error("plan does not account for %d rules:" % (len(unplanned_rules))) + logger.error('plan does not account for %d rules:' % (len(unplanned_rules))) for rule in unplanned_rules: - logger.error(" " + rule.name) + logger.error(' ' + rule.name) return -1 # pairs of strings (needle, replacement) match_translations = [] for row in plan: - if not row["existing name"]: + if not row['existing name']: continue - rule = rules[row["existing name"]] + rule = rules[row['existing name']] - if rule.meta["name"] != row["proposed name"]: - logger.info("renaming rule '%s' -> '%s'", rule.meta["name"], row["proposed name"]) + if rule.meta['name'] != row['proposed name']: + logger.info("renaming rule '%s' -> '%s'", rule.meta['name'], row['proposed name']) # assume the yaml is formatted like `- match: $rule-name`. # but since its been linted, this should be ok. match_translations.append( - ("- match: " + rule.meta["name"], - "- match: " + row["proposed name"])) + ('- match: ' + rule.meta['name'], + '- match: ' + row['proposed name'])) - rule.meta["name"] = row["proposed name"] - rule.name = row["proposed name"] + rule.meta['name'] = row['proposed name'] + rule.name = row['proposed name'] - if "rule-category" in rule.meta: - logger.info("deleting rule category '%s'", rule.meta["rule-category"]) - del rule.meta["rule-category"] + if 'rule-category' in rule.meta: + logger.info("deleting rule category '%s'", rule.meta['rule-category']) + del rule.meta['rule-category'] - rule.meta["namespace"] = row["proposed namespace"] + rule.meta['namespace'] = row['proposed namespace'] - if row["ATT&CK"] != 'n/a' and row["ATT&CK"] != "": - tag = row["ATT&CK"] - name, _, id = tag.rpartition(" ") - tag = "%s [%s]" % (name, id) - rule.meta["att&ck"] = [tag] + if row['ATT&CK'] != 'n/a' and row['ATT&CK'] != '': + tag = row['ATT&CK'] + name, _, id = tag.rpartition(' ') + tag = '%s [%s]' % (name, id) + rule.meta['att&ck'] = [tag] - if row["MBC"] != 'n/a' and row["MBC"] != "": - tag = row["MBC"] - rule.meta["mbc"] = [tag] + if row['MBC'] != 'n/a' and row['MBC'] != '': + tag = row['MBC'] + rule.meta['mbc'] = [tag] for rule in rules.values(): filename = rule.name filename = filename.lower() - filename = filename.replace(" ", "-") - filename = filename.replace("(", "") - filename = filename.replace(")", "") - filename = filename.replace("+", "") - filename = filename.replace("/", "") - filename = filename + ".yml" + filename = filename.replace(' ', '-') + filename = filename.replace('(', '') + filename = filename.replace(')', '') + filename = filename.replace('+', '') + filename = filename.replace('/', '') + filename = filename + '.yml' try: - if rule.meta.get("capa/nursery"): - directory = os.path.join(args.destination, "nursery") - elif rule.meta.get("lib"): - directory = os.path.join(args.destination, "lib") + if rule.meta.get('capa/nursery'): + directory = os.path.join(args.destination, 'nursery') + elif rule.meta.get('lib'): + directory = os.path.join(args.destination, 'lib') else: - directory = os.path.join(args.destination, rule.meta.get("namespace")) + directory = os.path.join(args.destination, rule.meta.get('namespace')) os.makedirs(directory) except OSError: pass else: - logger.info("created namespace: %s", directory) + logger.info('created namespace: %s', directory) path = os.path.join(directory, filename) - logger.info("writing rule %s", path) + logger.info('writing rule %s', path) - doc = rule.to_yaml().decode("utf-8") + doc = rule.to_yaml().decode('utf-8') for (needle, replacement) in match_translations: doc = doc.replace(needle, replacement) - with open(path, "wb") as f: - f.write(doc.encode("utf-8")) + with open(path, 'wb') as f: + f.write(doc.encode('utf-8')) return 0 From d1dd997b7bb58aedb059cdcd68b74117543c6088 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Fri, 26 Jun 2020 18:59:23 -0600 Subject: [PATCH 10/61] setup: add dev dependencies closes #21 --- scripts/setup-hooks.sh | 5 ----- setup.py | 7 +++++++ 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/scripts/setup-hooks.sh b/scripts/setup-hooks.sh index 941e0e58..5c5feae3 100755 --- a/scripts/setup-hooks.sh +++ b/scripts/setup-hooks.sh @@ -21,8 +21,3 @@ create_hook() { echo '\n#### Copying hooks into .git/hooks' create_hook 'post-commit' create_hook 'pre-push' - -echo '\n#### Installing linter/test dependencies\n' -pip install pycodestyle pytest-sugar -pip install https://github.com/williballenthin/vivisect/zipball/master -python setup.py develop diff --git a/setup.py b/setup.py index de1b8a04..317b1ab7 100644 --- a/setup.py +++ b/setup.py @@ -51,6 +51,13 @@ setuptools.setup( }, include_package_data=True, install_requires=requirements, + extras_require={ + 'dev': [ + 'pytest', + 'pytest-sugar', + 'pycodestyle', + ] + }, zip_safe=False, keywords='capa', classifiers=[ From 918a47cfb7d2a2bf5336debb8f09ecc5fc000489 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Fri, 26 Jun 2020 19:04:36 -0600 Subject: [PATCH 11/61] git hooks: address shellcheck issues --- scripts/hooks/post-commit | 32 ++++++++++++----------- scripts/hooks/pre-push | 53 +++++++++++++++++++++------------------ scripts/setup-hooks.sh | 20 +++++++-------- 3 files changed, 56 insertions(+), 49 deletions(-) diff --git a/scripts/hooks/post-commit b/scripts/hooks/post-commit index b2adcd02..ba85f102 100755 --- a/scripts/hooks/post-commit +++ b/scripts/hooks/post-commit @@ -1,34 +1,38 @@ +#!/usr/bin/env bash +# doesn't matter if this gets repeated later on in a hooks file # Use a console with emojis support for a better experience # Stash uncommited changes -MSG="post-commit-$(date +%s)" -git stash push -kqum $MSG -STASH_LIST=$(git stash list) +MSG="post-commit-$(date +%s)"; +git stash push -kqum "$MSG"; +STASH_LIST=$(git stash list); if [[ "$STASH_LIST" == *"$MSG"* ]]; then - echo "Uncommited changes stashed with message '$MSG', if you abort before they are restored run \`git stash pop\`" + echo "Uncommited changes stashed with message '$MSG', if you abort before they are restored run \`git stash pop\`"; fi # Run style checker and print state (it doesn't block the commit) -pycodestyle --config=./ci/tox.ini ./capa/ > style-checker-output.log 2>&1 +pycodestyle --config=./ci/tox.ini ./capa/ > style-checker-output.log 2>&1; if [ $? == 0 ]; then - echo 'Style checker succeeds!! 💘' + echo 'Style checker succeeds!! 💘'; else - echo 'Style checker failed 😭\nCheck style-checker-output.log for details' - exit 1 + echo 'Style checker failed 😭'; + echo 'Check style-checker-output.log for details'; + exit 1; fi # Run rule linter and print state (it doesn't block the commit) -python ./scripts/lint.py ./rules/ > rule-linter-output.log 2>&1 +python ./scripts/lint.py ./rules/ > rule-linter-output.log 2>&1; if [ $? == 0 ]; then - echo 'Rule linter succeeds!! 💖' + echo 'Rule linter succeeds!! 💖'; else - echo 'Rule linter failed 😭\nCheck rule-linter-output.log for details' - exit 2 + echo 'Rule linter failed 😭'; + echo 'Check rule-linter-output.log for details'; + exit 2; fi # Restore stashed changes if [[ "$STASH_LIST" == *"$MSG"* ]]; then - git stash pop -q --index - echo "Stashed changes '$MSG' restored" + git stash pop -q --index; + echo "Stashed changes '$MSG' restored"; fi diff --git a/scripts/hooks/pre-push b/scripts/hooks/pre-push index dd6ca612..edab4689 100755 --- a/scripts/hooks/pre-push +++ b/scripts/hooks/pre-push @@ -1,52 +1,57 @@ +#!/usr/bin/env bash +# doesn't matter if this gets repeated later on in a hooks file # Use a console with emojis support for a better experience # Stash uncommited changes -MSG="pre-push-$(date +%s)" -git stash push -kqum $MSG -STASH_LIST=$(git stash list) +MSG="pre-push-$(date +%s)"; +git stash push -kqum "$MSG"; +STASH_LIST=$(git stash list); if [[ "$STASH_LIST" == *"$MSG"* ]]; then - echo "Uncommited changes stashed with message '$MSG', if you abort before they are restored run \`git stash pop\`" + echo "Uncommited changes stashed with message '$MSG', if you abort before they are restored run \`git stash pop\`"; fi restore_stashed() { if [[ "$STASH_LIST" == *"$MSG"* ]]; then - git stash pop -q --index - echo "Stashed changes '$MSG' restored" + git stash pop -q --index; + echo "Stashed changes '$MSG' restored"; fi } # Run style checker and print state -pycodestyle --config=./ci/tox.ini ./capa/ > style-checker-output.log 2>&1 +pycodestyle --config=./ci/tox.ini ./capa/ > style-checker-output.log 2>&1; if [ $? == 0 ]; then - echo 'Style checker succeeds!! 💘' + echo 'Style checker succeeds!! 💘'; else - echo 'Style checker failed 😭 PUSH ABORTED\nCheck style-checker-output.log for details' - restore_stashed - exit 1 + echo 'Style checker failed 😭 PUSH ABORTED'; + echo 'Check style-checker-output.log for details'; + restore_stashed; + exit 1; fi # Run rule linter and print state -python ./scripts/lint.py ./rules/ > rule-linter-output.log 2>&1 +python ./scripts/lint.py ./rules/ > rule-linter-output.log 2>&1; if [ $? == 0 ]; then - echo 'Rule linter succeeds!! 💖' + echo 'Rule linter succeeds!! 💖'; else - echo 'Rule linter failed 😭 PUSH ABORTED\nCheck rule-linter-output.log for details' - restore_stashed - exit 2 + echo 'Rule linter failed 😭 PUSH ABORTED'; + echo 'Check rule-linter-output.log for details'; + restore_stashed; + exit 2; fi # Run tests -echo 'Running tests, please wait ⌛' -pytest tests/ --maxfail=1 +echo 'Running tests, please wait ⌛'; +pytest tests/ --maxfail=1; if [ $? == 0 ]; then - echo 'Tests succeed!! 🎉' + echo 'Tests succeed!! 🎉'; else - echo 'Tests failed 😓 PUSH ABORTED\nRun `pytest -v --cov=capa test/` if you need more details' - restore_stashed - exit 3 + echo 'Tests failed 😓 PUSH ABORTED'; + echo 'Run `pytest -v --cov=capa test/` if you need more details'; + restore_stashed; + exit 3; fi -echo 'PUSH SUCCEEDED 🎉🎉' +echo 'PUSH SUCCEEDED 🎉🎉'; -restore_stashed +restore_stashed; diff --git a/scripts/setup-hooks.sh b/scripts/setup-hooks.sh index 5c5feae3..95d35016 100755 --- a/scripts/setup-hooks.sh +++ b/scripts/setup-hooks.sh @@ -1,23 +1,21 @@ #!/usr/bin/env bash -set -e -set -u -set -o pipefail +set -euo pipefail -GIT_DIR=`git rev-parse --show-toplevel` -cd $GIT_DIR +GIT_DIR=$(git rev-parse --show-toplevel); +cd "$GIT_DIR"; # hooks may exist already (e.g. git-lfs configuration) # If the `.git/hooks/$arg` file doesn't exist it, initialize with `#!/bin/sh` # After that append `scripts/hooks/$arg` and ensure they can be run create_hook() { if [[ ! -e .git/hooks/$1 ]]; then - echo "#!/bin/sh" > ".git/hooks/$1" + echo "#!/bin/sh" > ".git/hooks/$1"; fi - cat scripts/hooks/$1 >> ".git/hooks/$1" - chmod +x .git/hooks/$1 + cat scripts/hooks/"$1" >> ".git/hooks/$1"; + chmod +x .git/hooks/"$1"; } -echo '\n#### Copying hooks into .git/hooks' -create_hook 'post-commit' -create_hook 'pre-push' +printf '\n#### Copying hooks into .git/hooks'; +create_hook 'post-commit'; +create_hook 'pre-push'; From 9d9d821c22584369a5bcddb9e0759ae5a1669b7d Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Fri, 26 Jun 2020 22:39:10 -0600 Subject: [PATCH 12/61] ruleset: add __getitem__ accessor to easily fetch rule by name --- capa/rules.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/capa/rules.py b/capa/rules.py index 160eba90..d60c17c9 100644 --- a/capa/rules.py +++ b/capa/rules.py @@ -706,6 +706,9 @@ class RuleSet(object): def __len__(self): return len(self.rules) + def __getitem__(self, rulename): + return self.rules[rulename] + @staticmethod def _get_rules_for_scope(rules, scope): ''' From d0345fcd539039da61c808f0819ced6260f54d28 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Fri, 26 Jun 2020 22:54:13 -0600 Subject: [PATCH 13/61] render: start to implement common result document format --- capa/main.py | 12 ++- capa/render/__init__.py | 222 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 231 insertions(+), 3 deletions(-) create mode 100644 capa/render/__init__.py diff --git a/capa/main.py b/capa/main.py index 63d13cfb..f7d2dc6e 100644 --- a/capa/main.py +++ b/capa/main.py @@ -13,6 +13,7 @@ import argparse import capa.rules import capa.engine +import capa.render import capa.features import capa.features.freeze import capa.features.extractors @@ -110,6 +111,7 @@ def find_capabilities(ruleset, extractor, disable_progress=None): matches.update(all_bb_matches) matches.update(all_function_matches) matches.update(all_file_matches) + return matches @@ -635,6 +637,8 @@ def main(argv=None): help='Path to rule file or directory, use embedded rules by default') parser.add_argument('-t', '--tag', type=str, help='Filter on rule meta field values') + parser.add_argument('--json', action='store_true', + help='Emit JSON instead of text') parser.add_argument('-v', '--verbose', action='store_true', help='Enable verbose output') parser.add_argument('-vv', '--vverbose', action='store_true', @@ -735,12 +739,14 @@ def main(argv=None): if not (args.verbose or args.vverbose): return -1 + if args.json: + print(capa.render.render_json(rules, capabilities)) if args.vverbose: - render_capabilities_vverbose(rules, capabilities) + print(capa.render.render_vverbose(rules, capabilities)) elif args.verbose: - render_capabilities_verbose(rules, capabilities) + print(capa.render.render_verbose(rules, capabilities)) else: - render_capabilities_default(rules, capabilities) + print(capa.render.render_default(rules, capabilities)) logger.info('done.') diff --git a/capa/render/__init__.py b/capa/render/__init__.py new file mode 100644 index 00000000..a4ed629e --- /dev/null +++ b/capa/render/__init__.py @@ -0,0 +1,222 @@ +import json + +import capa.engine + + +def convert_statement_to_result_document(rules, statement): + """ + args: + rules (RuleSet): + node (Statement): + + returns: Dict[str, Any] + """ + if isinstance(statement, capa.engine.And): + return { + 'type': 'and', + } + elif isinstance(statement, capa.engine.Or): + return { + 'type': 'or', + } + elif isinstance(statement, capa.engine.Not): + return { + 'type': 'not', + } + elif isinstance(statement, capa.engine.Or): + return { + 'type': 'or', + } + elif isinstance(statement, capa.engine.Some) and statement.count == 0: + return { + 'type': 'optional' + } + elif isinstance(statement, capa.engine.Some) and statement.count > 0: + return { + 'type': 'some', + 'count': statement.count, + } + elif isinstance(statement, capa.engine.Range): + return { + 'type': 'range', + 'min': statement.min, + 'max': statement.max, + } + elif isinstance(statement, capa.engine.Regex): + return { + 'type': 'regex', + 'pattern': statement.pattern, + } + elif isinstance(statement, capa.engine.Subscope): + return { + 'type': 'subscope', + 'scope': statement.scope, + } + else: + raise RuntimeError("unexpected match statement type: " + str(statement)) + + +def convert_feature_to_result_document(rules, feature): + """ + args: + rules (RuleSet): + node (Feature): + + returns: Dict[str, Any] + """ + name, value = feature.freeze_serialize() + + name = name.lower() + if name == 'matchedrule': + name = 'match' + + if isinstance(value, list) and len(value) == 1: + value = value[0] + + if name == 'match': + rule_name = value + rule = rules[rule_name] + if rule.meta.get('capa/subscope-rule'): + name = rule.meta['scope'] + # TODO: link this logic together, when present + + return { + 'type': name, + name: value, + } + + +def convert_node_to_result_document(rules, node): + """ + + args: + rules (RuleSet): + node (Statement|Feature): + + returns: Dict[str, Any] + """ + + if isinstance(node, capa.engine.Statement): + return { + 'type': 'statement', + 'statement': convert_statement_to_result_document(rules, node), + } + elif isinstance(node, capa.features.Feature): + return { + 'type': 'feature', + 'feature': convert_feature_to_result_document(rules, node), + } + else: + raise RuntimeError("unexpected match node type") + + +def convert_match_to_result_document(rules, result): + """ + convert the given rule set and Result instance into a common, Python-native data structure. + this will become part of the "result document" format that can be emitted to JSON. + + args: + rules (RuleSet): + result (Result): + + returns: Dict[str, Any] + """ + doc = { + 'success': bool(result.success), + 'node': convert_node_to_result_document(rules, result.statement), + 'children': [ + convert_match_to_result_document(rules, child) + for child in result.children + ], + } + + if isinstance(result.statement, capa.features.Feature): + if bool(result.success): + doc['locations'] = result.locations + + # TODO: can a feature ever have children? suspect so with `match`? + + return doc + + +def convert_capabilities_to_result_document(rules, capabilities): + """ + convert the given rule set and capabilties result to a common, Python-native data structure. + this format can be directly emitted to JSON, or passed to the other `render_*` routines + to render as text. + + TODO: document the structure and provide examples + + schema: + + ```json + { + $rule-name: { + "meta": {...copied from rule.meta...}, + "matches: { + $address: {...TODO: match details...}, + ... + } + }, + ... + } + ``` + + args: + rules (RuleSet): + capabilities (Dict[str, List[Tuple[int, Result]]]): + + returns: Dict[str, Any] + """ + doc = {} + + for rule_name, matches in capabilities.items(): + rule = rules[rule_name] + + if rule.meta.get('capa/subscope-rule'): + continue + + doc[rule_name] = { + 'meta': dict(rule.meta), + 'matches': { + addr: convert_match_to_result_document(rules, match) + for (addr, match) in matches + }, + } + + return doc + + +def render_vverbose(rules, capabilities): + doc = convert_capabilities_to_result_document(rules, capabilities) + return '' + + +def render_verbose(rules, capabilities): + doc = convert_capabilities_to_result_document(rules, capabilities) + return '' + + +def render_default(rules, capabilities): + doc = convert_capabilities_to_result_document(rules, capabilities) + return '' + + +class CapaJsonObjectEncoder(json.JSONEncoder): + def default(self, obj): + if isinstance(obj, (list, dict, str, unicode, int, float, bool, type(None))): + return json.JSONEncoder.default(self, obj) + elif isinstance(obj, set): + return list(sorted(obj)) + else: + # probably will TypeError + return json.JSONEncoder.default(self, obj) + + +def render_json(rules, capabilities): + return json.dumps( + convert_capabilities_to_result_document(rules, capabilities), + cls=CapaJsonObjectEncoder, + sort_keys=True, + indent=4, + ) From dd70e5a2f67784a849fd42a13d9d1dc7e0e09940 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Fri, 26 Jun 2020 22:54:48 -0600 Subject: [PATCH 14/61] main: tweak logging levels --- capa/main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/capa/main.py b/capa/main.py index f7d2dc6e..8cd8a830 100644 --- a/capa/main.py +++ b/capa/main.py @@ -589,7 +589,7 @@ def get_rules(rule_path): if os.path.isfile(rule_path): rule_paths.append(rule_path) elif os.path.isdir(rule_path): - logger.info('reading rules from directory %s', rule_path) + logger.debug('reading rules from directory %s', rule_path) for root, dirs, files in os.walk(rule_path): for file in files: if not file.endswith('.yml'): @@ -601,7 +601,7 @@ def get_rules(rule_path): rules = [] for rule_path in rule_paths: - logger.info('reading rule file: %s', rule_path) + logger.debug('reading rule file: %s', rule_path) try: rule = capa.rules.Rule.from_yaml_file(rule_path) except capa.rules.InvalidRule: From d3b02a0b4d7138cb2252a338c8f431f9d19a9bb6 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sat, 27 Jun 2020 08:04:35 -0600 Subject: [PATCH 15/61] render: splice in results for match statements --- capa/engine.py | 2 +- capa/main.py | 2 +- capa/render/__init__.py | 123 +++++++++++++++++++++++++++------------- 3 files changed, 87 insertions(+), 40 deletions(-) diff --git a/capa/engine.py b/capa/engine.py index 99512eb2..aa3d3527 100644 --- a/capa/engine.py +++ b/capa/engine.py @@ -216,7 +216,7 @@ class Subscope(Statement): def topologically_order_rules(rules): ''' order the given rules such that dependencies show up before dependents. - this means that as we match rules, we can add features, and these + this means that as we match rules, we can add features for the matches, and these will be matched by subsequent rules if they follow this order. assumes that the rule dependency graph is a DAG. diff --git a/capa/main.py b/capa/main.py index 8cd8a830..e5b27877 100644 --- a/capa/main.py +++ b/capa/main.py @@ -741,7 +741,7 @@ def main(argv=None): if args.json: print(capa.render.render_json(rules, capabilities)) - if args.vverbose: + elif args.vverbose: print(capa.render.render_vverbose(rules, capabilities)) elif args.verbose: print(capa.render.render_verbose(rules, capabilities)) diff --git a/capa/render/__init__.py b/capa/render/__init__.py index a4ed629e..8161fc79 100644 --- a/capa/render/__init__.py +++ b/capa/render/__init__.py @@ -3,13 +3,17 @@ import json import capa.engine -def convert_statement_to_result_document(rules, statement): +def convert_statement_to_result_document(statement): """ - args: - rules (RuleSet): - node (Statement): + "statement": { + "type": "or" + }, - returns: Dict[str, Any] + "statement": { + "max": 9223372036854775808, + "min": 2, + "type": "range" + }, """ if isinstance(statement, capa.engine.And): return { @@ -56,63 +60,80 @@ def convert_statement_to_result_document(rules, statement): raise RuntimeError("unexpected match statement type: " + str(statement)) -def convert_feature_to_result_document(rules, feature): +def convert_feature_to_result_document(feature): """ - args: - rules (RuleSet): - node (Feature): + "feature": { + "number": 6, + "type": "number" + }, - returns: Dict[str, Any] + "feature": { + "api": "ws2_32.WSASocket", + "type": "api" + }, + + "feature": { + "match": "create TCP socket", + "type": "match" + }, + + "feature": { + "characteristic": [ + "loop", + true + ], + "type": "characteristic" + }, """ name, value = feature.freeze_serialize() + # make the terms pretty name = name.lower() if name == 'matchedrule': name = 'match' + # in the common case, there's a single argument + # so use it directly. + # like: name=number value=1 if isinstance(value, list) and len(value) == 1: value = value[0] - if name == 'match': - rule_name = value - rule = rules[rule_name] - if rule.meta.get('capa/subscope-rule'): - name = rule.meta['scope'] - # TODO: link this logic together, when present - return { 'type': name, name: value, } -def convert_node_to_result_document(rules, node): +def convert_node_to_result_document(node): """ + "node": { + "type": "statement", + "statement": { ... } + }, - args: - rules (RuleSet): - node (Statement|Feature): - - returns: Dict[str, Any] + "node": { + "type": "feature", + "feature": { ... } + }, """ if isinstance(node, capa.engine.Statement): return { 'type': 'statement', - 'statement': convert_statement_to_result_document(rules, node), + 'statement': convert_statement_to_result_document(node), } elif isinstance(node, capa.features.Feature): return { 'type': 'feature', - 'feature': convert_feature_to_result_document(rules, node), + 'feature': convert_feature_to_result_document(node), } else: raise RuntimeError("unexpected match node type") -def convert_match_to_result_document(rules, result): +def convert_match_to_result_document(rules, capabilities, result): """ - convert the given rule set and Result instance into a common, Python-native data structure. + convert the given Result instance into a common, Python-native data structure. this will become part of the "result document" format that can be emitted to JSON. args: @@ -123,29 +144,58 @@ def convert_match_to_result_document(rules, result): """ doc = { 'success': bool(result.success), - 'node': convert_node_to_result_document(rules, result.statement), + 'node': convert_node_to_result_document(result.statement), 'children': [ - convert_match_to_result_document(rules, child) + convert_match_to_result_document(rules, capabilities, child) for child in result.children ], } + # logic expression, like `and`, don't have locations - their children do. + # so only add `locations` to feature nodes. if isinstance(result.statement, capa.features.Feature): if bool(result.success): doc['locations'] = result.locations - # TODO: can a feature ever have children? suspect so with `match`? + # if we have a `match` statement, then we're referencing another rule. + # this could an external rule (written by a human), or + # rule generated to support a subscope (basic block, etc.) + # we still want to include the matching logic in this tree. + # + # so, we need to lookup the other rule results + # and then filter those down to the address used here. + # finally, splice that logic into this tree. + if (doc['node']['type'] == 'feature' and + doc['node']['feature']['type'] == 'match' and + # only add subtree on success, + # because there won't be results for the other rule on failure. + doc['success']): + + rule_name = doc['node']['feature']['match'] + rule = rules[rule_name] + rule_matches = {address: result for (address, result) in capabilities[rule_name]} + + if rule.meta.get('capa/subscope-rule'): + # for a subscope rule, rename the rule name to the scope, + # which is consistent with the rule text. + # + # e.g. `contain loop/30c4c78e29bf4d54894fc74f664c62e8` -> `basic block` + scope = rule.meta['scope'] + doc['node']['feature']['match'] = scope + + for location in doc['locations']: + doc['children'].append(convert_match_to_result_document(rules, capabilities, rule_matches[location])) return doc def convert_capabilities_to_result_document(rules, capabilities): """ - convert the given rule set and capabilties result to a common, Python-native data structure. + convert the given rule set and capabilities result to a common, Python-native data structure. this format can be directly emitted to JSON, or passed to the other `render_*` routines to render as text. - TODO: document the structure and provide examples + see examples of substructures in above routines. schema: @@ -154,7 +204,7 @@ def convert_capabilities_to_result_document(rules, capabilities): $rule-name: { "meta": {...copied from rule.meta...}, "matches: { - $address: {...TODO: match details...}, + $address: {...match details...}, ... } }, @@ -162,11 +212,9 @@ def convert_capabilities_to_result_document(rules, capabilities): } ``` - args: + Args: rules (RuleSet): capabilities (Dict[str, List[Tuple[int, Result]]]): - - returns: Dict[str, Any] """ doc = {} @@ -179,7 +227,7 @@ def convert_capabilities_to_result_document(rules, capabilities): doc[rule_name] = { 'meta': dict(rule.meta), 'matches': { - addr: convert_match_to_result_document(rules, match) + addr: convert_match_to_result_document(rules, capabilities, match) for (addr, match) in matches }, } @@ -218,5 +266,4 @@ def render_json(rules, capabilities): convert_capabilities_to_result_document(rules, capabilities), cls=CapaJsonObjectEncoder, sort_keys=True, - indent=4, ) From 135329ed1ded62a56208e13d0cb4104fb2d4e5b8 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sat, 27 Jun 2020 08:06:13 -0600 Subject: [PATCH 16/61] pep8 --- capa/render/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/capa/render/__init__.py b/capa/render/__init__.py index 8161fc79..93b8544d 100644 --- a/capa/render/__init__.py +++ b/capa/render/__init__.py @@ -165,11 +165,11 @@ def convert_match_to_result_document(rules, capabilities, result): # so, we need to lookup the other rule results # and then filter those down to the address used here. # finally, splice that logic into this tree. - if (doc['node']['type'] == 'feature' and - doc['node']['feature']['type'] == 'match' and + if (doc['node']['type'] == 'feature' + and doc['node']['feature']['type'] == 'match' # only add subtree on success, # because there won't be results for the other rule on failure. - doc['success']): + and doc['success']): rule_name = doc['node']['feature']['match'] rule = rules[rule_name] From 1ea9b6e1e8851d35ed4d83d5dce726468deb9c2d Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sat, 27 Jun 2020 09:05:43 -0600 Subject: [PATCH 17/61] render: implement default renderer --- capa/render/__init__.py | 3 +- capa/render/default.py | 69 +++++++++++++++++++++++++++++++++++++++++ setup.py | 2 ++ 3 files changed, 73 insertions(+), 1 deletion(-) create mode 100644 capa/render/default.py diff --git a/capa/render/__init__.py b/capa/render/__init__.py index 93b8544d..0aeb5226 100644 --- a/capa/render/__init__.py +++ b/capa/render/__init__.py @@ -1,6 +1,7 @@ import json import capa.engine +import capa.render.default def convert_statement_to_result_document(statement): @@ -247,7 +248,7 @@ def render_verbose(rules, capabilities): def render_default(rules, capabilities): doc = convert_capabilities_to_result_document(rules, capabilities) - return '' + return capa.render.default.render_default(doc) class CapaJsonObjectEncoder(json.JSONEncoder): diff --git a/capa/render/default.py b/capa/render/default.py new file mode 100644 index 00000000..2c087b5f --- /dev/null +++ b/capa/render/default.py @@ -0,0 +1,69 @@ +import collections + +import six +import tabulate +import termcolor + + +def bold(s): + """draw attention to the given string""" + return termcolor.colored(s, 'blue') + + +def render_capabilities(doc, ostream): + rows = [] + for (namespace, name, rule) in sorted(map(lambda rule: (rule['meta']['namespace'], rule['meta']['name'], rule), doc.values())): + if rule['meta'].get('lib'): + continue + if rule['meta'].get('capa/subscope'): + continue + + rows.append((bold(name), namespace)) + + ostream.write(tabulate.tabulate(rows, headers=['CAPABILITY', 'NAMESPACE'], tablefmt="psql")) + ostream.write("\n") + + +def render_attack(doc, ostream): + tactics = collections.defaultdict(set) + for rule in doc.values(): + if rule['meta'].get('lib'): + continue + if rule['meta'].get('capa/subscope'): + continue + if not rule['meta'].get('att&ck'): + continue + + for attack in rule['meta']['att&ck']: + tactic, _, rest = attack.partition('::') + if '::' in rest: + technique, _, rest = rest.partition('::') + subtechnique, _, id = rest.rpartition(' ') + tactics[tactic].add((technique, subtechnique, id)) + else: + technique, _, id = rest.rpartition(' ') + tactics[tactic].add((technique, id)) + + for tactic, techniques in sorted(tactics.items()): + rows = [] + for spec in sorted(techniques): + if len(spec) == 2: + technique, id = spec + rows.append(("%s %s" % (bold(technique), id), )) + elif len(spec) == 3: + technique, subtechnique, id = spec + rows.append(("%s::%s %s" % (bold(technique), subtechnique, id), )) + else: + raise RuntimeError("unexpected ATT&CK spec format") + ostream.write(tabulate.tabulate(rows, headers=['ATT&CK tactic: ' + bold(tactic.upper())], tablefmt="psql")) + ostream.write("\n") + + +def render_default(doc): + ostream = six.StringIO() + + render_attack(doc, ostream) + ostream.write("\n") + render_capabilities(doc, ostream) + + return ostream.getvalue() diff --git a/setup.py b/setup.py index 317b1ab7..29caa174 100644 --- a/setup.py +++ b/setup.py @@ -9,6 +9,8 @@ requirements = [ "tqdm", "pyyaml", "tabulate", + "colorama", + "termcolor", "ruamel.yaml" ] From 87999a820e11315e452ee2cdba80aef0b6165c70 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sat, 27 Jun 2020 09:06:26 -0600 Subject: [PATCH 18/61] main: use colorama to switch of coloring, if necessary --- capa/main.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/capa/main.py b/capa/main.py index e5b27877..0bf68800 100644 --- a/capa/main.py +++ b/capa/main.py @@ -10,6 +10,7 @@ import collections import tqdm import argparse +import colorama import capa.rules import capa.engine @@ -739,6 +740,11 @@ def main(argv=None): if not (args.verbose or args.vverbose): return -1 + # colorama will detect: + # - when on Windows console, and fixup coloring, and + # - when not an interactive session, and disable coloring + # renderers should use coloring and assume it will be stripped out if necessary. + colorama.init() if args.json: print(capa.render.render_json(rules, capabilities)) elif args.vverbose: @@ -747,6 +753,7 @@ def main(argv=None): print(capa.render.render_verbose(rules, capabilities)) else: print(capa.render.render_default(rules, capabilities)) + colorama.deinit() logger.info('done.') From 9c0bba7021c290693f50206c1c8df0ca427ff4ff Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sat, 27 Jun 2020 17:51:37 -0600 Subject: [PATCH 19/61] render: factor common routines into utils --- capa/render/default.py | 27 +++++++-------------------- capa/render/utils.py | 25 +++++++++++++++++++++++++ capa/render/verbose.py | 13 +++++++++++++ 3 files changed, 45 insertions(+), 20 deletions(-) create mode 100644 capa/render/utils.py create mode 100644 capa/render/verbose.py diff --git a/capa/render/default.py b/capa/render/default.py index 2c087b5f..f0294494 100644 --- a/capa/render/default.py +++ b/capa/render/default.py @@ -2,23 +2,14 @@ import collections import six import tabulate -import termcolor - -def bold(s): - """draw attention to the given string""" - return termcolor.colored(s, 'blue') +import capa.render.utils as rutils def render_capabilities(doc, ostream): rows = [] - for (namespace, name, rule) in sorted(map(lambda rule: (rule['meta']['namespace'], rule['meta']['name'], rule), doc.values())): - if rule['meta'].get('lib'): - continue - if rule['meta'].get('capa/subscope'): - continue - - rows.append((bold(name), namespace)) + for rule in rutils.capability_rules(doc): + rows.append((rutils.bold(rule['meta']['name']), rule['meta']['namespace'])) ostream.write(tabulate.tabulate(rows, headers=['CAPABILITY', 'NAMESPACE'], tablefmt="psql")) ostream.write("\n") @@ -26,11 +17,7 @@ def render_capabilities(doc, ostream): def render_attack(doc, ostream): tactics = collections.defaultdict(set) - for rule in doc.values(): - if rule['meta'].get('lib'): - continue - if rule['meta'].get('capa/subscope'): - continue + for rule in rutils.capability_rules(doc): if not rule['meta'].get('att&ck'): continue @@ -49,13 +36,13 @@ def render_attack(doc, ostream): for spec in sorted(techniques): if len(spec) == 2: technique, id = spec - rows.append(("%s %s" % (bold(technique), id), )) + rows.append(("%s %s" % (rutils.bold(technique), id), )) elif len(spec) == 3: technique, subtechnique, id = spec - rows.append(("%s::%s %s" % (bold(technique), subtechnique, id), )) + rows.append(("%s::%s %s" % (rutils.bold(technique), subtechnique, id), )) else: raise RuntimeError("unexpected ATT&CK spec format") - ostream.write(tabulate.tabulate(rows, headers=['ATT&CK tactic: ' + bold(tactic.upper())], tablefmt="psql")) + ostream.write(tabulate.tabulate(rows, headers=['ATT&CK tactic: ' + rutils.bold(tactic.upper())], tablefmt="psql")) ostream.write("\n") diff --git a/capa/render/utils.py b/capa/render/utils.py new file mode 100644 index 00000000..10c04607 --- /dev/null +++ b/capa/render/utils.py @@ -0,0 +1,25 @@ +import termcolor + + +def bold(s): + """draw attention to the given string""" + return termcolor.colored(s, 'blue') + + +def capability_rules(doc): + """enumerate the rules in (namespace, name) order that are 'capability' rules (not lib/subscope/disposition/etc).""" + for rule in sorted(map(lambda rule: (rule['meta']['namespace'], rule['meta']['name'], rule), doc.values())): + if rule['meta'].get('lib'): + continue + if rule['meta'].get('capa/subscope'): + continue + if rule['meta'].get('maec/analysis-conclusion'): + continue + if rule['meta'].get('maec/analysis-conclusion-ov'): + continue + if rule['meta'].get('maec/malware-category'): + continue + if rule['meta'].get('maec/malware-category-ov'): + continue + + yield rule diff --git a/capa/render/verbose.py b/capa/render/verbose.py new file mode 100644 index 00000000..2b2d222b --- /dev/null +++ b/capa/render/verbose.py @@ -0,0 +1,13 @@ +import collections + +import six +import tabulate + +import capa.render.utils + + +def render_verbose(doc): + ostream = six.StringIO() + + + return ostream.getvalue() From 1ac5813ed23daa7d5f2d63af3596a22669160d14 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sat, 27 Jun 2020 18:03:34 -0600 Subject: [PATCH 20/61] main: add --debug flag to manipulate logging level closes #33 --- capa/main.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/capa/main.py b/capa/main.py index 0bf68800..ae10d50e 100644 --- a/capa/main.py +++ b/capa/main.py @@ -641,9 +641,11 @@ def main(argv=None): parser.add_argument('--json', action='store_true', help='Emit JSON instead of text') parser.add_argument('-v', '--verbose', action='store_true', - help='Enable verbose output') + help='Enable verbose result document (no effect with --json)') parser.add_argument('-vv', '--vverbose', action='store_true', - help='Enable very verbose output') + help='Enable very verbose result document (no effect with --json)') + parser.add_argument('-d', '--debug', action='store_true', + help='Enable debugging output on STDERR') parser.add_argument('-q', '--quiet', action='store_true', help='Disable all output but errors') parser.add_argument('-f', '--format', choices=[f[0] for f in formats], default='auto', @@ -653,7 +655,7 @@ def main(argv=None): if args.quiet: logging.basicConfig(level=logging.ERROR) logging.getLogger().setLevel(logging.ERROR) - elif args.verbose: + elif args.debug: logging.basicConfig(level=logging.DEBUG) logging.getLogger().setLevel(logging.DEBUG) else: From 0266d31b21fd1c9d5901e62ad97704c25b5940e5 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sat, 27 Jun 2020 18:04:28 -0600 Subject: [PATCH 21/61] render: style --- capa/render/__init__.py | 5 +++-- capa/render/utils.py | 11 +++++++++-- capa/render/verbose.py | 11 ++++------- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/capa/render/__init__.py b/capa/render/__init__.py index 0aeb5226..bc979d2b 100644 --- a/capa/render/__init__.py +++ b/capa/render/__init__.py @@ -1,7 +1,6 @@ import json import capa.engine -import capa.render.default def convert_statement_to_result_document(statement): @@ -242,11 +241,13 @@ def render_vverbose(rules, capabilities): def render_verbose(rules, capabilities): + import capa.render.verbose doc = convert_capabilities_to_result_document(rules, capabilities) - return '' + return capa.render.verbose.render_verbose(doc) def render_default(rules, capabilities): + import capa.render.default doc = convert_capabilities_to_result_document(rules, capabilities) return capa.render.default.render_default(doc) diff --git a/capa/render/utils.py b/capa/render/utils.py index 10c04607..09bdaf67 100644 --- a/capa/render/utils.py +++ b/capa/render/utils.py @@ -1,3 +1,4 @@ +import six import termcolor @@ -8,7 +9,7 @@ def bold(s): def capability_rules(doc): """enumerate the rules in (namespace, name) order that are 'capability' rules (not lib/subscope/disposition/etc).""" - for rule in sorted(map(lambda rule: (rule['meta']['namespace'], rule['meta']['name'], rule), doc.values())): + for (_, _, rule) in sorted(map(lambda rule: (rule['meta']['namespace'], rule['meta']['name'], rule), doc.values())): if rule['meta'].get('lib'): continue if rule['meta'].get('capa/subscope'): @@ -18,8 +19,14 @@ def capability_rules(doc): if rule['meta'].get('maec/analysis-conclusion-ov'): continue if rule['meta'].get('maec/malware-category'): - continue + continue if rule['meta'].get('maec/malware-category-ov'): continue yield rule + + +class StringIO(six.StringIO): + def writeln(self, s): + self.write(s) + self.write('\n') diff --git a/capa/render/verbose.py b/capa/render/verbose.py index 2b2d222b..ef5559a4 100644 --- a/capa/render/verbose.py +++ b/capa/render/verbose.py @@ -1,13 +1,10 @@ -import collections - -import six -import tabulate - -import capa.render.utils +import capa.render.utils as rutils def render_verbose(doc): - ostream = six.StringIO() + ostream = rutils.StringIO() + for rule in rutils.capability_rules(doc): + ostream.writeln(rutils.bold(rule['meta']['name'])) return ostream.getvalue() From 4479b9da967ff1bf0291b53cfc84619e22039321 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sat, 27 Jun 2020 18:17:48 -0600 Subject: [PATCH 22/61] render: implement verbose mode --- capa/render/verbose.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/capa/render/verbose.py b/capa/render/verbose.py index ef5559a4..a4791ae2 100644 --- a/capa/render/verbose.py +++ b/capa/render/verbose.py @@ -1,3 +1,22 @@ +""" +example:: + + send data + namespace communication + author william.ballenthin@fireeye.com + description all known techniques for sending data to a potential C2 server + scope function + examples BFB9B5391A13D0AFD787E87AB90F14F5:0x13145D60 + matches 0x10004363 + 0x100046c9 + 0x1000454e + 0x10003a13 + 0x10003415 + 0x10003797 +""" +import tabulate + +import capa.rules import capa.render.utils as rutils @@ -7,4 +26,20 @@ def render_verbose(doc): for rule in rutils.capability_rules(doc): ostream.writeln(rutils.bold(rule['meta']['name'])) + rows = [] + for key in capa.rules.META_KEYS: + if key == 'name' or key not in rule['meta']: + continue + + v = rule['meta'][key] + if isinstance(v, list) and len(v) == 1: + v = v[0] + rows.append((key, v)) + + locations = doc[rule['meta']['name']]['matches'].keys() + rows.append(('matches', '\n'.join(map(hex, locations)))) + + ostream.writeln(tabulate.tabulate(rows, tablefmt='plain')) + ostream.write('\n') + return ostream.getvalue() From bee29e4d3db2b4a3b0712c74ad22c836dd5f3092 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sat, 27 Jun 2020 18:21:22 -0600 Subject: [PATCH 23/61] render: add examples of output --- capa/render/default.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/capa/render/default.py b/capa/render/default.py index f0294494..68da804d 100644 --- a/capa/render/default.py +++ b/capa/render/default.py @@ -7,6 +7,17 @@ import capa.render.utils as rutils def render_capabilities(doc, ostream): + """ + example:: + + +-------------------------------------------------------+-------------------------------------------------+ + | CAPABILITY | NAMESPACE | + |-------------------------------------------------------+-------------------------------------------------| + | check for OutputDebugString error | anti-analysis/anti-debugging/debugger-detection | + | read and send data from client to server | c2/file-transfer | + | ... | ... | + +-------------------------------------------------------+-------------------------------------------------+ + """ rows = [] for rule in rutils.capability_rules(doc): rows.append((rutils.bold(rule['meta']['name']), rule['meta']['namespace'])) @@ -16,6 +27,17 @@ def render_capabilities(doc, ostream): def render_attack(doc, ostream): + """ + example:: + + +----------------------------------------------------------------------+ + | ATT&CK tactic: EXECUTION | + |----------------------------------------------------------------------| + | Command and Scripting Interpreter::Windows Command Shell [T1059.003] | + | Shared Modules [T1129] | + | ... | + +----------------------------------------------------------------------+ + """ tactics = collections.defaultdict(set) for rule in rutils.capability_rules(doc): if not rule['meta'].get('att&ck'): From cd8446b7cd2e2eeec8014dae394909c709bb1ead Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sun, 28 Jun 2020 01:26:18 -0600 Subject: [PATCH 24/61] render: add vverbose mode impl --- capa/render/vverbose.py | 104 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) create mode 100644 capa/render/vverbose.py diff --git a/capa/render/vverbose.py b/capa/render/vverbose.py new file mode 100644 index 00000000..7af04e42 --- /dev/null +++ b/capa/render/vverbose.py @@ -0,0 +1,104 @@ +import tabulate + +import capa.rules +import capa.render.utils as rutils + + +def render_statement(ostream, statement, indent=0): + ostream.write(' ' * indent) + if statement['type'] in ('and', 'or', 'optional'): + ostream.write(statement['type']) + ostream.writeln(':') + elif statement['type'] == 'not': + # we won't have successful results for the children of a not + # so display a placeholder `...` + ostream.writeln('not: ...') + elif statement['type'] == 'some': + ostream.write(statement['count'] + ' or more') + ostream.writeln(':') + elif statement['type'] == 'range': + ostream.write('range(%d, %d)' % (statement['min'], statement['max'])) + ostream.writeln(':') + elif statement['type'] == 'subscope': + ostream.write(statement['subscope']) + ostream.writeln(':') + elif statement['type'] == 'regex': + ostream.writeln('string: /%s/' % (statement['pattern'])) + else: + raise RuntimeError("unexpected match statement type: " + str(statement)) + + +def render_feature(ostream, match, feature, indent=0): + ostream.write(' ' * indent) + + if feature['type'] in ('string', 'api', 'number', 'offset', 'mnemonic', 'basic block', 'export', 'import', 'section', 'match'): + ostream.write(feature['type']) + ostream.write(': ') + ostream.write(rutils.bold2(feature[feature['type']])) + elif feature['type'] == 'characteristic': + ostream.write('characteristic(%s): %s' % (rutils.bold2(feature['characteristic'][0]), feature['characteristic'][1])) + else: + raise RuntimeError('unexpected feature type: ' + str(feature)) + + if len(match['locations']) == 1: + ostream.write(' @ ') + ostream.write(hex(list(match['locations'])[0])) + elif len(match['locations']) > 1: + ostream.write(' @ ') + ostream.write(', '.join(map(hex, sorted(match['locations'])))) + + ostream.write('\n') + + +def render_node(ostream, match, node, indent=0): + if node['type'] == 'statement': + render_statement(ostream, node['statement'], indent=indent) + elif node['type'] == 'feature': + render_feature(ostream, match, node['feature'], indent=indent) + else: + raise RuntimeError('unexpected node type: ' + str(node)) + + +def render_match(ostream, match, indent=0): + if not match['success']: + return + + if match['node'].get('statement', {}).get('type') == 'optional' and not any(map(lambda m: m['success'], match['children'])): + return + + render_node(ostream, match, match['node'], indent=indent) + + for child in match['children']: + render_match(ostream, child, indent=indent+1) + + +def render_vverbose(doc): + ostream = rutils.StringIO() + + for rule in rutils.capability_rules(doc): + ostream.writeln(rutils.bold(rule['meta']['name'])) + + rows = [] + for key in capa.rules.META_KEYS: + if key == 'name' or key not in rule['meta']: + continue + + v = rule['meta'][key] + if isinstance(v, list) and len(v) == 1: + v = v[0] + rows.append((key, v)) + + ostream.writeln(tabulate.tabulate(rows, tablefmt='plain')) + + if rule['meta']['scope'] == capa.rules.FILE_SCOPE: + render_match(ostream, match, indent=0) + else: + for location, match in doc[rule['meta']['name']]['matches'].items(): + ostream.write(rule['meta']['scope']) + ostream.write(' @ ') + ostream.writeln(hex(location)) + render_match(ostream, match, indent=1) + + ostream.write('\n') + + return ostream.getvalue() \ No newline at end of file From 58f3d105c20782d13207cf45211a7349730fc5cf Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sun, 28 Jun 2020 01:26:53 -0600 Subject: [PATCH 25/61] pep8 --- capa/render/vverbose.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/capa/render/vverbose.py b/capa/render/vverbose.py index 7af04e42..7649f103 100644 --- a/capa/render/vverbose.py +++ b/capa/render/vverbose.py @@ -69,7 +69,7 @@ def render_match(ostream, match, indent=0): render_node(ostream, match, match['node'], indent=indent) for child in match['children']: - render_match(ostream, child, indent=indent+1) + render_match(ostream, child, indent=indent + 1) def render_vverbose(doc): @@ -101,4 +101,4 @@ def render_vverbose(doc): ostream.write('\n') - return ostream.getvalue() \ No newline at end of file + return ostream.getvalue() From 70c590dfc6e6b8fcc9e3cd1bcfd0a6adafecfb45 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sun, 28 Jun 2020 01:27:30 -0600 Subject: [PATCH 26/61] render: translate match nodes from features to statements --- capa/render/__init__.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/capa/render/__init__.py b/capa/render/__init__.py index bc979d2b..1f33b193 100644 --- a/capa/render/__init__.py +++ b/capa/render/__init__.py @@ -54,7 +54,7 @@ def convert_statement_to_result_document(statement): elif isinstance(statement, capa.engine.Subscope): return { 'type': 'subscope', - 'scope': statement.scope, + 'subscope': statement.scope, } else: raise RuntimeError("unexpected match statement type: " + str(statement)) @@ -176,12 +176,17 @@ def convert_match_to_result_document(rules, capabilities, result): rule_matches = {address: result for (address, result) in capabilities[rule_name]} if rule.meta.get('capa/subscope-rule'): - # for a subscope rule, rename the rule name to the scope, - # which is consistent with the rule text. + # for a subscope rule, fixup the node to be a scope node, rather than a match feature node. # # e.g. `contain loop/30c4c78e29bf4d54894fc74f664c62e8` -> `basic block` scope = rule.meta['scope'] - doc['node']['feature']['match'] = scope + doc['node'] = { + 'type': 'statement', + 'statement': { + 'type': 'subscope', + 'subscope': scope, + }, + } for location in doc['locations']: doc['children'].append(convert_match_to_result_document(rules, capabilities, rule_matches[location])) From 20dffcdd5b7492d4fb0cc1daad2d60b391101396 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sun, 28 Jun 2020 01:28:13 -0600 Subject: [PATCH 27/61] render: verbose: don't display locations for file scope matches --- capa/render/verbose.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/capa/render/verbose.py b/capa/render/verbose.py index a4791ae2..7d916e16 100644 --- a/capa/render/verbose.py +++ b/capa/render/verbose.py @@ -36,8 +36,9 @@ def render_verbose(doc): v = v[0] rows.append((key, v)) - locations = doc[rule['meta']['name']]['matches'].keys() - rows.append(('matches', '\n'.join(map(hex, locations)))) + if rule['meta']['scope'] != capa.rules.FILE_SCOPE: + locations = doc[rule['meta']['name']]['matches'].keys() + rows.append(('matches', '\n'.join(map(hex, locations)))) ostream.writeln(tabulate.tabulate(rows, tablefmt='plain')) ostream.write('\n') From 49140195034a277b969d4c6397542fe5bb2f608e Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sun, 28 Jun 2020 01:28:42 -0600 Subject: [PATCH 28/61] render: utils: add second level bold style --- capa/render/utils.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/capa/render/utils.py b/capa/render/utils.py index 09bdaf67..f279231a 100644 --- a/capa/render/utils.py +++ b/capa/render/utils.py @@ -7,6 +7,11 @@ def bold(s): return termcolor.colored(s, 'blue') +def bold2(s): + """draw attention to the given string, within a `bold` section""" + return termcolor.colored(s, 'green') + + def capability_rules(doc): """enumerate the rules in (namespace, name) order that are 'capability' rules (not lib/subscope/disposition/etc).""" for (_, _, rule) in sorted(map(lambda rule: (rule['meta']['namespace'], rule['meta']['name'], rule), doc.values())): From df333042bffe132830be85b105afb0459e6cef69 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sun, 28 Jun 2020 01:29:03 -0600 Subject: [PATCH 29/61] render: wire up vverbose mode --- capa/render/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/capa/render/__init__.py b/capa/render/__init__.py index 1f33b193..82699d83 100644 --- a/capa/render/__init__.py +++ b/capa/render/__init__.py @@ -241,8 +241,9 @@ def convert_capabilities_to_result_document(rules, capabilities): def render_vverbose(rules, capabilities): + import capa.render.vverbose doc = convert_capabilities_to_result_document(rules, capabilities) - return '' + return capa.render.vverbose.render_vverbose(doc) def render_verbose(rules, capabilities): From cfb4a9177be69f4a3a5b05c0dab5dee514694d9b Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sun, 28 Jun 2020 08:26:44 -0600 Subject: [PATCH 30/61] main: add -j for --json --- capa/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/main.py b/capa/main.py index ae10d50e..0194ee95 100644 --- a/capa/main.py +++ b/capa/main.py @@ -638,7 +638,7 @@ def main(argv=None): help='Path to rule file or directory, use embedded rules by default') parser.add_argument('-t', '--tag', type=str, help='Filter on rule meta field values') - parser.add_argument('--json', action='store_true', + parser.add_argument('-j', '--json', action='store_true', help='Emit JSON instead of text') parser.add_argument('-v', '--verbose', action='store_true', help='Enable verbose result document (no effect with --json)') From 0f18ce23b866238c4272573896b93dec99164c8d Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sun, 28 Jun 2020 08:28:23 -0600 Subject: [PATCH 31/61] render: remove some doc that wasn't useful --- capa/render/__init__.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/capa/render/__init__.py b/capa/render/__init__.py index 82699d83..b0f14120 100644 --- a/capa/render/__init__.py +++ b/capa/render/__init__.py @@ -135,12 +135,6 @@ def convert_match_to_result_document(rules, capabilities, result): """ convert the given Result instance into a common, Python-native data structure. this will become part of the "result document" format that can be emitted to JSON. - - args: - rules (RuleSet): - result (Result): - - returns: Dict[str, Any] """ doc = { 'success': bool(result.success), From 6bf63f72fd34cd11dc4b13ec7110d50f16a2f501 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sun, 28 Jun 2020 08:30:43 -0600 Subject: [PATCH 32/61] render: document import loop and fix --- capa/render/__init__.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/capa/render/__init__.py b/capa/render/__init__.py index b0f14120..e79363dc 100644 --- a/capa/render/__init__.py +++ b/capa/render/__init__.py @@ -235,18 +235,25 @@ def convert_capabilities_to_result_document(rules, capabilities): def render_vverbose(rules, capabilities): + # there's an import loop here + # if capa.render imports capa.render.vverbose + # and capa.render.vverbose import capa.render (implicitly, as a submodule) + # so, defer the import until routine is called, breaking the import loop. import capa.render.vverbose doc = convert_capabilities_to_result_document(rules, capabilities) return capa.render.vverbose.render_vverbose(doc) def render_verbose(rules, capabilities): + # break import loop import capa.render.verbose doc = convert_capabilities_to_result_document(rules, capabilities) return capa.render.verbose.render_verbose(doc) def render_default(rules, capabilities): + # break import loop + import capa.render.verbose import capa.render.default doc = convert_capabilities_to_result_document(rules, capabilities) return capa.render.default.render_default(doc) From 21e3b75a3a4a09a317056d66d687c13914a303ba Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sun, 28 Jun 2020 08:41:31 -0600 Subject: [PATCH 33/61] linter: show expected/found filenames --- scripts/lint.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/lint.py b/scripts/lint.py index def5aa0e..25b6b464 100644 --- a/scripts/lint.py +++ b/scripts/lint.py @@ -42,7 +42,7 @@ class NameCasing(Lint): class FilenameDoesntMatchRuleName(Lint): name = 'filename doesn\'t match the rule name' - recommendation = 'Rename rule file to match the rule name' + recommendation = 'Rename rule file to match the rule name, expected: "{:s}", found: "{:s}"' def check_rule(self, ctx, rule): expected = rule.name @@ -56,6 +56,8 @@ class FilenameDoesntMatchRuleName(Lint): found = os.path.basename(rule.meta['capa/path']) + self.recommendation = self.recommendation.format(expected, found) + return expected != found From 9f2c4248e5999c277b53affcf0930641a4aa3ae0 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sun, 28 Jun 2020 08:44:32 -0600 Subject: [PATCH 34/61] render: display addresses as upper case hex --- capa/render/utils.py | 5 +++++ capa/render/verbose.py | 2 +- capa/render/vverbose.py | 6 +++--- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/capa/render/utils.py b/capa/render/utils.py index f279231a..b0c0abd8 100644 --- a/capa/render/utils.py +++ b/capa/render/utils.py @@ -12,6 +12,11 @@ def bold2(s): return termcolor.colored(s, 'green') +def hex(n): + """render the given number using upper case hex, like: 0x123ABC""" + return '0x%X' % n + + def capability_rules(doc): """enumerate the rules in (namespace, name) order that are 'capability' rules (not lib/subscope/disposition/etc).""" for (_, _, rule) in sorted(map(lambda rule: (rule['meta']['namespace'], rule['meta']['name'], rule), doc.values())): diff --git a/capa/render/verbose.py b/capa/render/verbose.py index 7d916e16..56cb899a 100644 --- a/capa/render/verbose.py +++ b/capa/render/verbose.py @@ -38,7 +38,7 @@ def render_verbose(doc): if rule['meta']['scope'] != capa.rules.FILE_SCOPE: locations = doc[rule['meta']['name']]['matches'].keys() - rows.append(('matches', '\n'.join(map(hex, locations)))) + rows.append(('matches', '\n'.join(map(rutils.hex, locations)))) ostream.writeln(tabulate.tabulate(rows, tablefmt='plain')) ostream.write('\n') diff --git a/capa/render/vverbose.py b/capa/render/vverbose.py index 7649f103..c34e9a78 100644 --- a/capa/render/vverbose.py +++ b/capa/render/vverbose.py @@ -42,10 +42,10 @@ def render_feature(ostream, match, feature, indent=0): if len(match['locations']) == 1: ostream.write(' @ ') - ostream.write(hex(list(match['locations'])[0])) + ostream.write(rutils.hex(list(match['locations'])[0])) elif len(match['locations']) > 1: ostream.write(' @ ') - ostream.write(', '.join(map(hex, sorted(match['locations'])))) + ostream.write(', '.join(map(rutils.hex, sorted(match['locations'])))) ostream.write('\n') @@ -96,7 +96,7 @@ def render_vverbose(doc): for location, match in doc[rule['meta']['name']]['matches'].items(): ostream.write(rule['meta']['scope']) ostream.write(' @ ') - ostream.writeln(hex(location)) + ostream.writeln(rutils.hex(location)) render_match(ostream, match, indent=1) ostream.write('\n') From 67c511e0850f92669062a7f9170512892c8af84d Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sun, 28 Jun 2020 08:47:50 -0600 Subject: [PATCH 35/61] render: display lists of things as comma separated --- capa/render/vverbose.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/capa/render/vverbose.py b/capa/render/vverbose.py index c34e9a78..4119c955 100644 --- a/capa/render/vverbose.py +++ b/capa/render/vverbose.py @@ -86,6 +86,8 @@ def render_vverbose(doc): v = rule['meta'][key] if isinstance(v, list) and len(v) == 1: v = v[0] + elif isinstance(v, list) and len(v) > 1: + v = ', '.join(v) rows.append((key, v)) ostream.writeln(tabulate.tabulate(rows, tablefmt='plain')) From eca87ab976f0c05646155d48ab7bba400a1a0ccc Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sun, 28 Jun 2020 08:50:05 -0600 Subject: [PATCH 36/61] render: verbose: only show some meta keys --- capa/render/verbose.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/render/verbose.py b/capa/render/verbose.py index 56cb899a..fe002745 100644 --- a/capa/render/verbose.py +++ b/capa/render/verbose.py @@ -27,7 +27,7 @@ def render_verbose(doc): ostream.writeln(rutils.bold(rule['meta']['name'])) rows = [] - for key in capa.rules.META_KEYS: + for key in ('namespace', 'description', 'scope'): if key == 'name' or key not in rule['meta']: continue From e3245385c128281fafe6d684d3eefbc00fe2b53d Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sun, 28 Jun 2020 08:52:30 -0600 Subject: [PATCH 37/61] linter: use posixpath to normalize slashes --- scripts/lint.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/lint.py b/scripts/lint.py index 25b6b464..0a25f731 100644 --- a/scripts/lint.py +++ b/scripts/lint.py @@ -13,6 +13,7 @@ import hashlib import logging import os.path import itertools +import posixpath import argparse @@ -87,7 +88,7 @@ class NamespaceDoesntMatchRulePath(Lint): if 'lib' in rule.meta: return False - return rule.meta['namespace'] not in rule.meta['capa/path'].replace('\\', '/') + return rule.meta['namespace'] not in posixpath.normpath(rule.meta['capa/path']) class MissingScope(Lint): From cad438a9bd3289bb2bb52d5f341934d00a36495f Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sun, 28 Jun 2020 08:53:41 -0600 Subject: [PATCH 38/61] render: remove dead code --- capa/render/__init__.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/capa/render/__init__.py b/capa/render/__init__.py index e79363dc..dbe13c54 100644 --- a/capa/render/__init__.py +++ b/capa/render/__init__.py @@ -27,10 +27,6 @@ def convert_statement_to_result_document(statement): return { 'type': 'not', } - elif isinstance(statement, capa.engine.Or): - return { - 'type': 'or', - } elif isinstance(statement, capa.engine.Some) and statement.count == 0: return { 'type': 'optional' From 50dc945103a9a0e2c3756445f53aaa7b609b1195 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sun, 28 Jun 2020 08:54:46 -0600 Subject: [PATCH 39/61] render: dont forget about bytes feature --- capa/render/vverbose.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/render/vverbose.py b/capa/render/vverbose.py index 4119c955..58051acc 100644 --- a/capa/render/vverbose.py +++ b/capa/render/vverbose.py @@ -31,7 +31,7 @@ def render_statement(ostream, statement, indent=0): def render_feature(ostream, match, feature, indent=0): ostream.write(' ' * indent) - if feature['type'] in ('string', 'api', 'number', 'offset', 'mnemonic', 'basic block', 'export', 'import', 'section', 'match'): + if feature['type'] in ('string', 'bytes', 'api', 'number', 'offset', 'mnemonic', 'basic block', 'export', 'import', 'section', 'match'): ostream.write(feature['type']) ostream.write(': ') ostream.write(rutils.bold2(feature[feature['type']])) From bdc635a0f98d30670fdee9ceedd413062f757968 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sun, 28 Jun 2020 09:19:53 -0600 Subject: [PATCH 40/61] render: capture and display matched regex string --- capa/render/__init__.py | 2 ++ capa/render/vverbose.py | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/capa/render/__init__.py b/capa/render/__init__.py index dbe13c54..773984c2 100644 --- a/capa/render/__init__.py +++ b/capa/render/__init__.py @@ -46,6 +46,8 @@ def convert_statement_to_result_document(statement): return { 'type': 'regex', 'pattern': statement.pattern, + # the string that was matched + 'match': statement.match, } elif isinstance(statement, capa.engine.Subscope): return { diff --git a/capa/render/vverbose.py b/capa/render/vverbose.py index 58051acc..c5e3009a 100644 --- a/capa/render/vverbose.py +++ b/capa/render/vverbose.py @@ -23,7 +23,10 @@ def render_statement(ostream, statement, indent=0): ostream.write(statement['subscope']) ostream.writeln(':') elif statement['type'] == 'regex': - ostream.writeln('string: /%s/' % (statement['pattern'])) + # regex is a `Statement` not a `Feature` + # this is because it doesn't get extracted, but applies to all strings in scope. + # so we have to handle it here + ostream.writeln('string: %s' % (statement['match'])) else: raise RuntimeError("unexpected match statement type: " + str(statement)) From 9be448a900c0f58d040ac11b23b7d144b0e98b5d Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sun, 28 Jun 2020 09:20:34 -0600 Subject: [PATCH 41/61] render: dont display implied True for characteristic --- capa/render/vverbose.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/capa/render/vverbose.py b/capa/render/vverbose.py index c5e3009a..b925cdfa 100644 --- a/capa/render/vverbose.py +++ b/capa/render/vverbose.py @@ -39,7 +39,8 @@ def render_feature(ostream, match, feature, indent=0): ostream.write(': ') ostream.write(rutils.bold2(feature[feature['type']])) elif feature['type'] == 'characteristic': - ostream.write('characteristic(%s): %s' % (rutils.bold2(feature['characteristic'][0]), feature['characteristic'][1])) + ostream.write('characteristic(%s)' % (rutils.bold2(feature['characteristic'][0]))) + # note that regex is found in `render_statement` else: raise RuntimeError('unexpected feature type: ' + str(feature)) From 4c4b538a6af95f6deca14e7bd4689792e9f4ea82 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sun, 28 Jun 2020 09:31:15 -0600 Subject: [PATCH 42/61] render: emit bytes uppercase, separated --- capa/features/__init__.py | 2 +- capa/render/vverbose.py | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/capa/features/__init__.py b/capa/features/__init__.py index 368e3e5f..be4a7c70 100644 --- a/capa/features/__init__.py +++ b/capa/features/__init__.py @@ -106,7 +106,7 @@ class Bytes(Feature): def freeze_serialize(self): return (self.__class__.__name__, - map(lambda x: codecs.encode(x, 'hex'), self.args)) + map(lambda x: codecs.encode(x, 'hex').upper(), self.args)) @classmethod def freeze_deserialize(cls, args): diff --git a/capa/render/vverbose.py b/capa/render/vverbose.py index b925cdfa..3b2d24d0 100644 --- a/capa/render/vverbose.py +++ b/capa/render/vverbose.py @@ -34,10 +34,16 @@ def render_statement(ostream, statement, indent=0): def render_feature(ostream, match, feature, indent=0): ostream.write(' ' * indent) - if feature['type'] in ('string', 'bytes', 'api', 'number', 'offset', 'mnemonic', 'basic block', 'export', 'import', 'section', 'match'): + if feature['type'] in ('string', 'api', 'number', 'offset', 'mnemonic', 'basic block', 'export', 'import', 'section', 'match'): ostream.write(feature['type']) ostream.write(': ') ostream.write(rutils.bold2(feature[feature['type']])) + elif feature['type'] == 'bytes': + ostream.write('bytes: ') + bytes = feature['bytes'] + for i in range(len(bytes)//2): + ostream.write(rutils.bold2(bytes[i:i + 2])) + ostream.write(' ') elif feature['type'] == 'characteristic': ostream.write('characteristic(%s)' % (rutils.bold2(feature['characteristic'][0]))) # note that regex is found in `render_statement` From 71177c9d773ace241cb23567200eb13a0431237d Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sun, 28 Jun 2020 09:32:16 -0600 Subject: [PATCH 43/61] pep8 --- capa/render/vverbose.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/capa/render/vverbose.py b/capa/render/vverbose.py index 3b2d24d0..1c26eb49 100644 --- a/capa/render/vverbose.py +++ b/capa/render/vverbose.py @@ -40,8 +40,10 @@ def render_feature(ostream, match, feature, indent=0): ostream.write(rutils.bold2(feature[feature['type']])) elif feature['type'] == 'bytes': ostream.write('bytes: ') + # bytes is the uppercase, hex-encoded string. + # it should always be an even number of characters (its hex). bytes = feature['bytes'] - for i in range(len(bytes)//2): + for i in range(len(bytes) // 2): ostream.write(rutils.bold2(bytes[i:i + 2])) ostream.write(' ') elif feature['type'] == 'characteristic': From 617b55ae3cf6e898c2c91f0fef004b8d9c4c56b6 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sun, 28 Jun 2020 09:37:48 -0600 Subject: [PATCH 44/61] render: only show 4 locations before using an ellipsis --- capa/render/vverbose.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/capa/render/vverbose.py b/capa/render/vverbose.py index 1c26eb49..7cd74291 100644 --- a/capa/render/vverbose.py +++ b/capa/render/vverbose.py @@ -52,12 +52,19 @@ def render_feature(ostream, match, feature, indent=0): else: raise RuntimeError('unexpected feature type: ' + str(feature)) - if len(match['locations']) == 1: + locations = list(sorted(match['locations'])) + if len(locations) == 1: ostream.write(' @ ') - ostream.write(rutils.hex(list(match['locations'])[0])) - elif len(match['locations']) > 1: + ostream.write(rutils.hex(locations[0])) + elif len(locations) > 1: ostream.write(' @ ') - ostream.write(', '.join(map(rutils.hex, sorted(match['locations'])))) + if len(locations) > 4: + # don't display too many locations, because it becomes very noisy. + # probably only the first handful of locations will be useful for inspection. + ostream.write(', '.join(map(rutils.hex, locations[0:4]))) + ostream.write('...') + else: + ostream.write(', '.join(map(rutils.hex, locations))) ostream.write('\n') From d04c9db399322405a2f83ceb0188eb5ffc1656ac Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sun, 28 Jun 2020 09:39:52 -0600 Subject: [PATCH 45/61] render: ellipsis formatting --- capa/render/vverbose.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/render/vverbose.py b/capa/render/vverbose.py index 7cd74291..aa44a6b0 100644 --- a/capa/render/vverbose.py +++ b/capa/render/vverbose.py @@ -62,7 +62,7 @@ def render_feature(ostream, match, feature, indent=0): # don't display too many locations, because it becomes very noisy. # probably only the first handful of locations will be useful for inspection. ostream.write(', '.join(map(rutils.hex, locations[0:4]))) - ostream.write('...') + ostream.write(', ...') else: ostream.write(', '.join(map(rutils.hex, locations))) From 1d00f188f1f3af1bbf1220162932c987a5bffe2c Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sun, 28 Jun 2020 09:53:14 -0600 Subject: [PATCH 46/61] render: format numbers and offsets as hex --- capa/render/vverbose.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/capa/render/vverbose.py b/capa/render/vverbose.py index aa44a6b0..2877e9ca 100644 --- a/capa/render/vverbose.py +++ b/capa/render/vverbose.py @@ -34,10 +34,14 @@ def render_statement(ostream, statement, indent=0): def render_feature(ostream, match, feature, indent=0): ostream.write(' ' * indent) - if feature['type'] in ('string', 'api', 'number', 'offset', 'mnemonic', 'basic block', 'export', 'import', 'section', 'match'): + if feature['type'] in ('string', 'api', 'mnemonic', 'basic block', 'export', 'import', 'section', 'match'): ostream.write(feature['type']) ostream.write(': ') ostream.write(rutils.bold2(feature[feature['type']])) + elif feature['type'] in ('number', 'offset'): + ostream.write(feature['type']) + ostream.write(': ') + ostream.write(rutils.bold2(rutils.hex(feature[feature['type']]))) elif feature['type'] == 'bytes': ostream.write('bytes: ') # bytes is the uppercase, hex-encoded string. From c55ce3c1f06ba4864d5ee1fa50da3e7227a538aa Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sun, 28 Jun 2020 09:55:08 -0600 Subject: [PATCH 47/61] render: hint number of hidden locations --- capa/render/vverbose.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/render/vverbose.py b/capa/render/vverbose.py index 2877e9ca..2f24f50a 100644 --- a/capa/render/vverbose.py +++ b/capa/render/vverbose.py @@ -66,7 +66,7 @@ def render_feature(ostream, match, feature, indent=0): # don't display too many locations, because it becomes very noisy. # probably only the first handful of locations will be useful for inspection. ostream.write(', '.join(map(rutils.hex, locations[0:4]))) - ostream.write(', ...') + ostream.write(', and %d more...' % (len(locations) - 4)) else: ostream.write(', '.join(map(rutils.hex, locations))) From a355f2f0c6964583fe435242a007c614df3eca3f Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sun, 28 Jun 2020 10:33:28 -0600 Subject: [PATCH 48/61] render: hint the number of matches --- capa/render/default.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/capa/render/default.py b/capa/render/default.py index 68da804d..1a8ca444 100644 --- a/capa/render/default.py +++ b/capa/render/default.py @@ -20,7 +20,12 @@ def render_capabilities(doc, ostream): """ rows = [] for rule in rutils.capability_rules(doc): - rows.append((rutils.bold(rule['meta']['name']), rule['meta']['namespace'])) + count = len(rule['matches']) + if count == 1: + capability = rutils.bold(rule['meta']['name']) + else: + capability = '%s (%d matches)' % (rutils.bold(rule['meta']['name']), count) + rows.append((capability, rule['meta']['namespace'])) ostream.write(tabulate.tabulate(rows, headers=['CAPABILITY', 'NAMESPACE'], tablefmt="psql")) ostream.write("\n") From cb8fedc1b934684ce325e7b0d08bfd5802a13bb7 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sun, 28 Jun 2020 11:29:36 -0600 Subject: [PATCH 49/61] engine: Range should never return children results --- capa/engine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/engine.py b/capa/engine.py index aa3d3527..6922a0f8 100644 --- a/capa/engine.py +++ b/capa/engine.py @@ -155,7 +155,7 @@ class Range(Statement): def evaluate(self, ctx): if self.child not in ctx: - return Result(False, self, [self.child]) + return Result(False, self, []) count = len(ctx[self.child]) return Result(self.min <= count <= self.max, self, [], locations=ctx[self.child]) From 5f598e8a084a4cf19bff996476badc8366e214f6 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sun, 28 Jun 2020 11:30:23 -0600 Subject: [PATCH 50/61] render: learn to render Range/Count statements --- capa/render/__init__.py | 1 + capa/render/vverbose.py | 27 +++++++++++++++++++++++++-- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/capa/render/__init__.py b/capa/render/__init__.py index 773984c2..51793115 100644 --- a/capa/render/__init__.py +++ b/capa/render/__init__.py @@ -41,6 +41,7 @@ def convert_statement_to_result_document(statement): 'type': 'range', 'min': statement.min, 'max': statement.max, + 'child': convert_feature_to_result_document(statement.child), } elif isinstance(statement, capa.engine.Regex): return { diff --git a/capa/render/vverbose.py b/capa/render/vverbose.py index 2f24f50a..db03c3d8 100644 --- a/capa/render/vverbose.py +++ b/capa/render/vverbose.py @@ -17,8 +17,31 @@ def render_statement(ostream, statement, indent=0): ostream.write(statement['count'] + ' or more') ostream.writeln(':') elif statement['type'] == 'range': - ostream.write('range(%d, %d)' % (statement['min'], statement['max'])) - ostream.writeln(':') + # `range` is a weird node, its almost a hybrid of statement+feature. + # it is a specific feature repeated multiple times. + # there's no additional logic in the feature part, just the existence of a feature. + # so, we have to inline some of the feature rendering here. + + child = statement['child'] + if child['type'] in ('string', 'bytes', 'api', 'mnemonic', 'basic block', 'export', 'import', 'section', 'match'): + feature = '%s(%s)' % (child['type'], rutils.bold2(child[child['type']])) + elif child['type'] in ('number', 'offset'): + feature = '%s(%s)' % (child['type'], rutils.bold2(rutils.hex(child[child['type']]))) + elif child['type'] == 'characteristic': + feature = 'characteristic(%s)' % (rutils.bold2(child['characteristic'][0])) + else: + raise RuntimeError('unexpected feature type: ' + str(child)) + + ostream.write('count(%s): ' % feature) + + if statement['max'] == statement['min']: + ostream.writeln('%d' % (statement['min'])) + elif statement['min'] == 0: + ostream.writeln('%d or fewer' % (statement['max'])) + elif statement['max'] == (1 << 64 - 1): + ostream.writeln('%d or more' % (statement['min'])) + else: + ostream.writeln('between %d and %d' % (statement['min'], statement['max'])) elif statement['type'] == 'subscope': ostream.write(statement['subscope']) ostream.writeln(':') From ef4be10543b2fe2cf01c779f40bf4d1eb829af46 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sun, 28 Jun 2020 11:48:23 -0600 Subject: [PATCH 51/61] render: show logic under `not` nodes --- capa/render/vverbose.py | 52 ++++++++++++++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 11 deletions(-) diff --git a/capa/render/vverbose.py b/capa/render/vverbose.py index db03c3d8..492248c7 100644 --- a/capa/render/vverbose.py +++ b/capa/render/vverbose.py @@ -10,9 +10,8 @@ def render_statement(ostream, statement, indent=0): ostream.write(statement['type']) ostream.writeln(':') elif statement['type'] == 'not': - # we won't have successful results for the children of a not - # so display a placeholder `...` - ostream.writeln('not: ...') + # this statement is handled specially in `render_match` using the MODE_SUCCESS/MODE_FAILURE flags. + ostream.writeln('not:') elif statement['type'] == 'some': ostream.write(statement['count'] + ' or more') ostream.writeln(':') @@ -79,7 +78,10 @@ def render_feature(ostream, match, feature, indent=0): else: raise RuntimeError('unexpected feature type: ' + str(feature)) - locations = list(sorted(match['locations'])) + # its possible to have an empty locations array here, + # such as when we're in MODE_FAILURE and showing the logic + # under a `not` statement (which will have no matched locations). + locations = list(sorted(match.get('locations', []))) if len(locations) == 1: ostream.write(' @ ') ostream.write(rutils.hex(locations[0])) @@ -105,17 +107,45 @@ def render_node(ostream, match, node, indent=0): raise RuntimeError('unexpected node type: ' + str(node)) -def render_match(ostream, match, indent=0): - if not match['success']: - return +# display nodes that successfully evaluated against the sample. +MODE_SUCCESS = 'success' - if match['node'].get('statement', {}).get('type') == 'optional' and not any(map(lambda m: m['success'], match['children'])): - return +# display nodes that did not evaluate to True against the sample. +# this is useful when rendering the logic tree under a `not` node. +MODE_FAILURE = 'failure' + + +def render_match(ostream, match, indent=1, mode=MODE_SUCCESS): + child_mode = mode + if mode == MODE_SUCCESS: + # display only nodes that evaluated successfully. + if not match['success']: + return + # optional statement with no successful children is empty + if (match['node'].get('statement', {}).get('type') == 'optional' + and not any(map(lambda m: m['success'], match['children']))): + return + # not statement, so invert the child mode to show failed evaluations + if match['node'].get('statement', {}).get('type') == 'not': + child_mode = MODE_FAILURE + elif mode == MODE_FAILURE: + # display only nodes that did not evaluate to True + if match['success']: + return + # optional statement with successful children is not relevant + if (match['node'].get('statement', {}).get('type') == 'optional' + and any(map(lambda m: m['success'], match['children']))): + return + # not statement, so invert the child mode to show successful evaluations + if match['node'].get('statement', {}).get('type') == 'not': + child_mode = MODE_SUCCESS + else: + raise RuntimeError('unexpected mode: ' + mode) render_node(ostream, match, match['node'], indent=indent) for child in match['children']: - render_match(ostream, child, indent=indent + 1) + render_match(ostream, child, indent=indent + 1, mode=child_mode) def render_vverbose(doc): @@ -145,7 +175,7 @@ def render_vverbose(doc): ostream.write(rule['meta']['scope']) ostream.write(' @ ') ostream.writeln(rutils.hex(location)) - render_match(ostream, match, indent=1) + render_match(ostream, match) ostream.write('\n') From c34578b80c28eb39218938310e57e5bad6743bff Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sun, 28 Jun 2020 11:52:03 -0600 Subject: [PATCH 52/61] render: hint number of matches in verbose and vverbose mode --- capa/render/verbose.py | 8 +++++++- capa/render/vverbose.py | 8 +++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/capa/render/verbose.py b/capa/render/verbose.py index fe002745..95261f1a 100644 --- a/capa/render/verbose.py +++ b/capa/render/verbose.py @@ -24,7 +24,13 @@ def render_verbose(doc): ostream = rutils.StringIO() for rule in rutils.capability_rules(doc): - ostream.writeln(rutils.bold(rule['meta']['name'])) + count = len(rule['matches']) + if count == 1: + capability = rutils.bold(rule['meta']['name']) + else: + capability = '%s (%d matches)' % (rutils.bold(rule['meta']['name']), count) + + ostream.writeln(capability) rows = [] for key in ('namespace', 'description', 'scope'): diff --git a/capa/render/vverbose.py b/capa/render/vverbose.py index 492248c7..135ee2cd 100644 --- a/capa/render/vverbose.py +++ b/capa/render/vverbose.py @@ -152,7 +152,13 @@ def render_vverbose(doc): ostream = rutils.StringIO() for rule in rutils.capability_rules(doc): - ostream.writeln(rutils.bold(rule['meta']['name'])) + count = len(rule['matches']) + if count == 1: + capability = rutils.bold(rule['meta']['name']) + else: + capability = '%s (%d matches)' % (rutils.bold(rule['meta']['name']), count) + + ostream.writeln(capability) rows = [] for key in capa.rules.META_KEYS: From f77e1f4946d51f0f03e0fcac422f7a169cc4920b Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sun, 28 Jun 2020 11:55:33 -0600 Subject: [PATCH 53/61] render: include rule source in the json --- capa/render/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/capa/render/__init__.py b/capa/render/__init__.py index 51793115..36f39daf 100644 --- a/capa/render/__init__.py +++ b/capa/render/__init__.py @@ -224,6 +224,7 @@ def convert_capabilities_to_result_document(rules, capabilities): doc[rule_name] = { 'meta': dict(rule.meta), + 'source': rule.definition, 'matches': { addr: convert_match_to_result_document(rules, capabilities, match) for (addr, match) in matches From 7c51fd4890be2be1c9f9fa0e5616153ac6646281 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sun, 28 Jun 2020 12:04:53 -0600 Subject: [PATCH 54/61] render: fix rendering of file scope results --- capa/render/vverbose.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/capa/render/vverbose.py b/capa/render/vverbose.py index 135ee2cd..c7114bbf 100644 --- a/capa/render/vverbose.py +++ b/capa/render/vverbose.py @@ -115,7 +115,7 @@ MODE_SUCCESS = 'success' MODE_FAILURE = 'failure' -def render_match(ostream, match, indent=1, mode=MODE_SUCCESS): +def render_match(ostream, match, indent=0, mode=MODE_SUCCESS): child_mode = mode if mode == MODE_SUCCESS: # display only nodes that evaluated successfully. @@ -174,14 +174,22 @@ def render_vverbose(doc): ostream.writeln(tabulate.tabulate(rows, tablefmt='plain')) + if rule['meta']['scope'] == capa.rules.FILE_SCOPE: - render_match(ostream, match, indent=0) + matches = list(doc[rule['meta']['name']]['matches'].values()) + if len(matches) != 1: + # i think there should only ever be one match per file-scope rule, + # because we do the file-scope evaluation a single time. + # but i'm not 100% sure if this is/will always be true. + # so, lets be explicit about our assumptions and raise an exception if they fail. + raise RuntimeError('unexpected file scope match count: ' + len(matches)) + render_match(ostream, matches[0], indent=0) else: - for location, match in doc[rule['meta']['name']]['matches'].items(): + for location, match in sorted(doc[rule['meta']['name']]['matches'].items()): ostream.write(rule['meta']['scope']) ostream.write(' @ ') ostream.writeln(rutils.hex(location)) - render_match(ostream, match) + render_match(ostream, match, indent=1) ostream.write('\n') From c9ab5f9dda20c4c2b75861cb718d425ced1f3914 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sun, 28 Jun 2020 12:05:15 -0600 Subject: [PATCH 55/61] render: fix handling of file-scope rules --- capa/render/vverbose.py | 1 - 1 file changed, 1 deletion(-) diff --git a/capa/render/vverbose.py b/capa/render/vverbose.py index c7114bbf..d7fedb1f 100644 --- a/capa/render/vverbose.py +++ b/capa/render/vverbose.py @@ -174,7 +174,6 @@ def render_vverbose(doc): ostream.writeln(tabulate.tabulate(rows, tablefmt='plain')) - if rule['meta']['scope'] == capa.rules.FILE_SCOPE: matches = list(doc[rule['meta']['name']]['matches'].values()) if len(matches) != 1: From 112ddb43c93c42f7ce80862696f777bcbc4dc86d Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sun, 28 Jun 2020 12:13:17 -0600 Subject: [PATCH 56/61] render: hint min width of tables --- capa/render/default.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/capa/render/default.py b/capa/render/default.py index 1a8ca444..73294c7c 100644 --- a/capa/render/default.py +++ b/capa/render/default.py @@ -6,6 +6,15 @@ import tabulate import capa.render.utils as rutils + +def width(s, character_count): + """pad the given string to at least `character_count`""" + if len(s) < character_count: + return s + ' ' * (character_count - len(s)) + else: + return s + + def render_capabilities(doc, ostream): """ example:: @@ -27,7 +36,7 @@ def render_capabilities(doc, ostream): capability = '%s (%d matches)' % (rutils.bold(rule['meta']['name']), count) rows.append((capability, rule['meta']['namespace'])) - ostream.write(tabulate.tabulate(rows, headers=['CAPABILITY', 'NAMESPACE'], tablefmt="psql")) + ostream.write(tabulate.tabulate(rows, headers=[width('CAPABILITY', 40), width('NAMESPACE', 40)], tablefmt="psql")) ostream.write("\n") @@ -69,7 +78,7 @@ def render_attack(doc, ostream): rows.append(("%s::%s %s" % (rutils.bold(technique), subtechnique, id), )) else: raise RuntimeError("unexpected ATT&CK spec format") - ostream.write(tabulate.tabulate(rows, headers=['ATT&CK tactic: ' + rutils.bold(tactic.upper())], tablefmt="psql")) + ostream.write(tabulate.tabulate(rows, headers=[width('ATT&CK tactic: ' + rutils.bold(tactic.upper()), 80)], tablefmt="psql")) ostream.write("\n") From a12bcf238bdce3d6d492e4b668875a7597fc4f9a Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sun, 28 Jun 2020 12:13:31 -0600 Subject: [PATCH 57/61] pep8 --- capa/render/default.py | 1 - 1 file changed, 1 deletion(-) diff --git a/capa/render/default.py b/capa/render/default.py index 73294c7c..0eb5e0a7 100644 --- a/capa/render/default.py +++ b/capa/render/default.py @@ -6,7 +6,6 @@ import tabulate import capa.render.utils as rutils - def width(s, character_count): """pad the given string to at least `character_count`""" if len(s) < character_count: From ed12c4758ba0840e86a93beae2024a5ae088bc99 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sun, 28 Jun 2020 12:23:07 -0600 Subject: [PATCH 58/61] render: group ATT&CK tags in a single table --- capa/render/default.py | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/capa/render/default.py b/capa/render/default.py index 0eb5e0a7..978eb1cc 100644 --- a/capa/render/default.py +++ b/capa/render/default.py @@ -21,7 +21,7 @@ def render_capabilities(doc, ostream): +-------------------------------------------------------+-------------------------------------------------+ | CAPABILITY | NAMESPACE | |-------------------------------------------------------+-------------------------------------------------| - | check for OutputDebugString error | anti-analysis/anti-debugging/debugger-detection | + | check for OutputDebugString error (2 matches) | anti-analysis/anti-debugging/debugger-detection | | read and send data from client to server | c2/file-transfer | | ... | ... | +-------------------------------------------------------+-------------------------------------------------+ @@ -35,21 +35,25 @@ def render_capabilities(doc, ostream): capability = '%s (%d matches)' % (rutils.bold(rule['meta']['name']), count) rows.append((capability, rule['meta']['namespace'])) - ostream.write(tabulate.tabulate(rows, headers=[width('CAPABILITY', 40), width('NAMESPACE', 40)], tablefmt="psql")) - ostream.write("\n") + ostream.write(tabulate.tabulate(rows, headers=[width('CAPABILITY', 40), width('NAMESPACE', 40)], tablefmt='psql')) + ostream.write('\n') def render_attack(doc, ostream): """ example:: - +----------------------------------------------------------------------+ - | ATT&CK tactic: EXECUTION | - |----------------------------------------------------------------------| - | Command and Scripting Interpreter::Windows Command Shell [T1059.003] | - | Shared Modules [T1129] | - | ... | - +----------------------------------------------------------------------+ + +------------------------+----------------------------------------------------------------------+ + | ATT&CK Tactic | ATT&CK Technique | + |------------------------+----------------------------------------------------------------------| + | DEFENSE EVASION | Obfuscated Files or Information [T1027] | + | DISCOVERY | Query Registry [T1012] | + | | System Information Discovery [T1082] | + | EXECUTION | Command and Scripting Interpreter::Windows Command Shell [T1059.003] | + | | Shared Modules [T1129] | + | EXFILTRATION | Exfiltration Over C2 Channel [T1041] | + | PERSISTENCE | Create or Modify System Process::Windows Service [T1543.003] | + +------------------------+----------------------------------------------------------------------+ """ tactics = collections.defaultdict(set) for rule in rutils.capability_rules(doc): @@ -66,19 +70,21 @@ def render_attack(doc, ostream): technique, _, id = rest.rpartition(' ') tactics[tactic].add((technique, id)) + rows = [] for tactic, techniques in sorted(tactics.items()): - rows = [] + inner_rows = [] for spec in sorted(techniques): if len(spec) == 2: technique, id = spec - rows.append(("%s %s" % (rutils.bold(technique), id), )) + inner_rows.append('%s %s' % (rutils.bold(technique), id)) elif len(spec) == 3: technique, subtechnique, id = spec - rows.append(("%s::%s %s" % (rutils.bold(technique), subtechnique, id), )) + inner_rows.append('%s::%s %s' % (rutils.bold(technique), subtechnique, id)) else: - raise RuntimeError("unexpected ATT&CK spec format") - ostream.write(tabulate.tabulate(rows, headers=[width('ATT&CK tactic: ' + rutils.bold(tactic.upper()), 80)], tablefmt="psql")) - ostream.write("\n") + raise RuntimeError('unexpected ATT&CK spec format') + rows.append((rutils.bold(tactic.upper()), '\n'.join(inner_rows), )) + ostream.write(tabulate.tabulate(rows, headers=[width('ATT&CK Tactic', 20), width('ATT&CK Technique', 60)], tablefmt='psql')) + ostream.write('\n') def render_default(doc): From 3b8f6862433978166b5b9ae34e6c04c097c4b9e4 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sun, 28 Jun 2020 12:52:45 -0600 Subject: [PATCH 59/61] readme: update example and describe namespace, removing rule-category --- README.md | 216 ++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 143 insertions(+), 73 deletions(-) diff --git a/README.md b/README.md index f3fb3d16..b1883c18 100644 --- a/README.md +++ b/README.md @@ -5,23 +5,51 @@ You run it against a .exe or .dll and it tells you what it thinks the program ca For example, it might suggest that the file is a backdoor, is capable of installing services, or relies on HTTP to communicate. ``` -λ capa.exe suspicious.exe -q +$ capa.exe suspicious.exe -objectives: - communication - data manipulation - machine access control - -behaviors: - communication-via-http - encrypt data - load code functionality - -techniques: - send-http-request - encrypt data using rc4 - load pe ++------------------------+----------------------------------------------------------------------+ +| ATT&CK Tactic | ATT&CK Technique | +|------------------------+----------------------------------------------------------------------| +| DEFENSE EVASION | Obfuscated Files or Information [T1027] | +| DISCOVERY | Query Registry [T1012] | +| | System Information Discovery [T1082] | +| EXECUTION | Command and Scripting Interpreter::Windows Command Shell [T1059.003] | +| | Shared Modules [T1129] | +| EXFILTRATION | Exfiltration Over C2 Channel [T1041] | +| PERSISTENCE | Create or Modify System Process::Windows Service [T1543.003] | ++------------------------+----------------------------------------------------------------------+ ++-------------------------------------------------------+-------------------------------------------------+ +| CAPABILITY | NAMESPACE | +|-------------------------------------------------------+-------------------------------------------------| +| check for OutputDebugString error | anti-analysis/anti-debugging/debugger-detection | +| read and send data from client to server | c2/file-transfer | +| execute shell command and capture output | c2/shell | +| receive data (2 matches) | communication | +| send data (6 matches) | communication | +| connect to HTTP server (3 matches) | communication/http/client | +| send HTTP request (3 matches) | communication/http/client | +| create pipe | communication/named-pipe/create | +| get socket status (2 matches) | communication/socket | +| receive data on socket (2 matches) | communication/socket/receive | +| send data on socket (3 matches) | communication/socket/send | +| connect TCP socket | communication/socket/tcp | +| encode data using Base64 | data-manipulation/encoding/base64 | +| encode data using XOR (6 matches) | data-manipulation/encoding/xor | +| run as a service | executable/pe | +| get common file path (3 matches) | host-interaction/file-system | +| read file | host-interaction/file-system/read | +| write file (2 matches) | host-interaction/file-system/write | +| print debug messages (2 matches) | host-interaction/log/debug/write-event | +| resolve DNS | host-interaction/network/dns/resolve | +| get hostname | host-interaction/os/hostname | +| create a process with modified I/O handles and window | host-interaction/process/create | +| create process | host-interaction/process/create | +| create registry key | host-interaction/registry/create | +| create service | host-interaction/service/create | +| create thread | host-interaction/thread/create | +| persist via Windows service | persistence/service | ++-------------------------------------------------------+-------------------------------------------------+ ``` # download @@ -66,57 +94,92 @@ For more information about how to use capa, including running it as an IDA scrip # example -Here we run capa against an unknown binary (`level32.exe`), +Here we run capa against an unknown binary (`suspicious.exe`), and the tool reports that the program can decode data via XOR, -references data in its resource section, writes to a file, and spawns a new process. -Taken together, this makes us think that `level32.exe` could be a dropper. -Therefore, our next analysis step might be to run `level32.exe` in a sandbox and try to recover the payload. +contains an embedded PE, writes to a file, and spawns a new process. +Taken together, this makes us think that `suspicious.exe` could be a dropper or backdoor. +Therefore, our next analysis step might be to run `suspicious.exe` in a sandbox and try to recover the payload. ``` -λ capa.exe level32.exe -q -disposition: malicious -category: dropper +$ capa.exe suspicious.exe -objectives: - data manipulation - machine access control ++------------------------+----------------------------------------------------------------------+ +| ATT&CK Tactic | ATT&CK Technique | +|------------------------+----------------------------------------------------------------------| +| DEFENSE EVASION | Obfuscated Files or Information [T1027] | +| DISCOVERY | Query Registry [T1012] | +| | System Information Discovery [T1082] | +| EXECUTION | Command and Scripting Interpreter::Windows Command Shell [T1059.003] | +| | Shared Modules [T1129] | +| EXFILTRATION | Exfiltration Over C2 Channel [T1041] | +| PERSISTENCE | Create or Modify System Process::Windows Service [T1543.003] | ++------------------------+----------------------------------------------------------------------+ -behaviors: - encrypt data - load code functionality - -techniques: - encrypt data using rc4 - load pe - -anomalies: - embedded PE file ++-------------------------------------------------------+-------------------------------------------------+ +| CAPABILITY | NAMESPACE | +|-------------------------------------------------------+-------------------------------------------------| +| check for OutputDebugString error | anti-analysis/anti-debugging/debugger-detection | +| read and send data from client to server | c2/file-transfer | +| execute shell command and capture output | c2/shell | +| receive data (2 matches) | communication | +| send data (6 matches) | communication | +| connect to HTTP server (3 matches) | communication/http/client | +| send HTTP request (3 matches) | communication/http/client | +| create pipe | communication/named-pipe/create | +| get socket status (2 matches) | communication/socket | +| receive data on socket (2 matches) | communication/socket/receive | +| send data on socket (3 matches) | communication/socket/send | +| connect TCP socket | communication/socket/tcp | +| encode data using Base64 | data-manipulation/encoding/base64 | +| encode data using XOR (6 matches) | data-manipulation/encoding/xor | +| run as a service | executable/pe | +| contain an embedded PE file | executable/subfile/pe | +| get common file path (3 matches) | host-interaction/file-system | +| read file | host-interaction/file-system/read | +| write file (2 matches) | host-interaction/file-system/write | +| print debug messages (2 matches) | host-interaction/log/debug/write-event | +| resolve DNS | host-interaction/network/dns/resolve | +| get hostname | host-interaction/os/hostname | +| create a process with modified I/O handles and window | host-interaction/process/create | +| create process | host-interaction/process/create | +| create registry key | host-interaction/registry/create | +| create service | host-interaction/service/create | +| create thread | host-interaction/thread/create | +| persist via Windows service | persistence/service | ++-------------------------------------------------------+-------------------------------------------------+ ``` By passing the `-vv` flag (for Very Verbose), capa reports exactly where it found evidence of these capabilities. This is useful for at least two reasons: - - it helps explain why we should trust the results, and enables us to verify the conclusions + - it helps explain why we should trust the results, and enables us to verify the conclusions, and - it shows where within the binary an experienced analyst might study with IDA Pro ``` -λ capa.exe level32.exe -q -vv -rule load PE file: - - function 0x401c58: +λ capa.exe suspicious.exe -vv +execute shell command and capture output +namespace c2/shell +author matthew.williams@fireeye.com +scope function +att&ck Execution::Command and Scripting Interpreter::Windows Command Shell [T1059.003] +references https://docs.microsoft.com/en-us/windows/win32/api/processthreadsapi/ns-processthreadsapi-startupinfoa +examples Practical Malware Analysis Lab 14-02.exe_:0x4011C0 +function @ 0x10003A13 + and: + match: create a process with modified I/O handles and window @ 0x10003A13 + and: + or: + api: kernel32.CreateProcess @ 0x10003D6D + number: 0x101 @ 0x10003B03 + or: + number: 0x44 @ 0x10003ADC + optional: + api: kernel32.GetStartupInfo @ 0x10003AE4 + match: create pipe @ 0x10003A13 or: - and: - mnemonic(cmp): - - virtual address: 0x401c58 - - virtual address: 0x401c68 - - virtual address: 0x401c74 - - virtual address: 0x401c7f - - virtual address: 0x401c8a - or: - number(0x4550): - - virtual address: 0x401c68 - or: - number(0x5a4d): - - virtual address: 0x401c58 + api: kernel32.CreatePipe @ 0x10003ACB + or: + string: cmd.exe /c @ 0x10003AED ... ``` @@ -131,24 +194,27 @@ In some regards, capa rules are a mixture of the OpenIOC, Yara, and YAML formats Here's an example rule used by capa: ``` -───────┬──────────────────────────────────────────────────────── - │ File: rules/calculate-crc32.yml -───────┼──────────────────────────────────────────────────────── +───────┬────────────────────────────────────────────────────────────────────────── + │ File: rules/data-manipulation/checksum/crc32/chechsum-data-with-crc32.yml +───────┼────────────────────────────────────────────────────────────────────────── 1 │ rule: 2 │ meta: - 3 │ name: calculate CRC32 - 4 | rule-category: data-manipulation/hash-data/hash-data-using-crc32 + 3 │ name: checksum data with CRC32 + 4 │ namespace: data-manipulation/checksum/crc32 5 │ author: moritz.raabe@fireeye.com 6 │ scope: function 7 │ examples: 8 │ - 2D3EDC218A90F03089CC01715A9F047F:0x403CBD - 9 │ features: - 10 │ - and: - 11 │ - mnemonic: shr - 12 │ - number: 0xEDB88320 - 13 │ - number: 8 - 14 │ - characteristic(nzxor): True -───────┴──────────────────────────────────────────────────────── + 9 │ - 7D28CB106CB54876B2A5C111724A07CD:0x402350 # RtlComputeCrc32 + 10 │ features: + 11 │ - or: + 12 │ - and: + 13 │ - mnemonic: shr + 14 │ - number: 0xEDB88320 + 15 │ - number: 8 + 16 │ - characteristic(nzxor): true + 17 │ - api: RtlComputeCrc32 +────────────────────────────────────────────────────────────────────────────────── ``` Rules are yaml files that follow a certain schema. @@ -159,18 +225,22 @@ The top-level element is a dictionary named `rule` with two required children di ## meta block -The meta block contains metadata that identifies the rule, categorizes into behaviors, +The meta block contains metadata that identifies the rule, groups the technique, and provides references to additional documentation. Here are the common fields: - `name` is required. This string should uniquely identify the rule. - - `rule-category` is required when a rule describes a behavior (as opposed to matching a role or disposition). -The rule category specifies an objective, behavior, and technique matched by this rule, -using a format like `$objective/$behavior/$technique`. -An objective is a high-level goal of a program, such as "communication". -A behavior is something that a program may do, such as "communication via socket". -A technique is a way of implementing some behavior, such as "send-data". + - `namespace` is required when a rule describes a technique (as opposed to matching a role or disposition). +The namespace helps us group rules into buckets, such as `host-manipulation/file-system` or `impact/wipe-disk`. +When capa emits its final report, it orders the results by category, so related techniques show up together. + + - `att&ck` is an optional list of [ATT&CK framework](https://attack.mitre.org/) techniques that the rule implies, like +`Discovery::Query Registry [T1012]` or `Persistence::Create or Modify System Process::Windows Service [T1543.003]`. +These tags are used to derive the ATT&CK mapping for the sample when the report gets rendered. + + - `mbc` is an optional list of [Malware Behavior Catalog](https://github.com/MBCProject/mbc-markdown) techniques that the rule implies, +like the ATT&CK list. - `maec/malware-category` is required when the rule describes a role, such as `dropper` or `backdoor`. @@ -189,10 +259,10 @@ A technique is a way of implementing some behavior, such as "send-data". - `author` specifies the name or handle of the rule author. - - `examples` is a list of references to samples that should match the capability. + - `examples` is a required list of references to samples that should match the capability. When the rule scope is `function`, then the reference should be `:`. - - `reference` lists related information in a book, article, blog post, etc. + - `references` lists related information in a book, article, blog post, etc. Other fields are allowed but not defined in this specification. `description` is probably a good one. From 96158c6ca59ef9c4aa132a509d2fd37e80797f39 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sun, 28 Jun 2020 12:58:34 -0600 Subject: [PATCH 60/61] main: update detection for unsupported files via namespace matches --- capa/main.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/capa/main.py b/capa/main.py index 0194ee95..b45b1e79 100644 --- a/capa/main.py +++ b/capa/main.py @@ -418,18 +418,18 @@ def render_capabilities_vverbose(ruleset, results): render_result(res, indent=' ') -def appears_rule_cat(rules, capabilities, rule_cat): +def has_rule_with_namespace(rules, capabilities, rule_cat): for rule_name in capabilities.keys(): - if rules.rules[rule_name].meta.get('rule-category', '').startswith(rule_cat): + if rules.rules[rule_name].meta.get('namespace', '').startswith(rule_cat): return True return False -def is_file_limitation(rules, capabilities, is_standalone=True): +def has_file_limitation(rules, capabilities, is_standalone=True): file_limitations = { # capa will likely detect installer specific functionality. # this is probably not what the user wants. - 'other-features/installer/': [ + 'executable/installer': [ ' This sample appears to be an installer.', ' ', ' capa cannot handle installers well. This means the results may be misleading or incomplete.' @@ -438,7 +438,7 @@ def is_file_limitation(rules, capabilities, is_standalone=True): # capa won't detect much in .NET samples. # it might match some file-level things. # for consistency, bail on things that we don't support. - 'other-features/compiled-to-dot-net': [ + 'runtime/dotnet': [ ' This sample appears to be a .NET module.', ' ', ' .NET is a cross-platform framework for running managed applications.', @@ -448,7 +448,7 @@ def is_file_limitation(rules, capabilities, is_standalone=True): # capa will detect dozens of capabilities for AutoIt samples, # but these are due to the AutoIt runtime, not the payload script. # so, don't confuse the user with FP matches - bail instead - 'other-features/compiled-with-autoit': [ + 'compiler/autoit': [ ' This sample appears to be compiled with AutoIt.', ' ', ' AutoIt is a freeware BASIC-like scripting language designed for automating the Windows GUI.', @@ -456,7 +456,7 @@ def is_file_limitation(rules, capabilities, is_standalone=True): ' You may have to analyze the file manually, using a tool like the AutoIt decompiler MyAut2Exe.' ], # capa won't detect much in packed samples - 'anti-analysis/packing/': [ + 'anti-analysis/packer/': [ ' This sample appears to be packed.', ' ', ' Packed samples have often been obfuscated to hide their logic.', @@ -466,7 +466,7 @@ def is_file_limitation(rules, capabilities, is_standalone=True): } for category, dialogue in file_limitations.items(): - if not appears_rule_cat(rules, capabilities, category): + if not has_rule_with_namespace(rules, capabilities, category): continue logger.warning('-' * 80) for line in dialogue: @@ -736,7 +736,7 @@ def main(argv=None): capabilities = find_capabilities(rules, extractor) - if is_file_limitation(rules, capabilities): + if has_file_limitation(rules, capabilities): # bail if capa encountered file limitation e.g. a packed binary # do show the output in verbose mode, though. if not (args.verbose or args.vverbose): @@ -793,7 +793,7 @@ def ida_main(): import capa.features.extractors.ida capabilities = find_capabilities(rules, capa.features.extractors.ida.IdaFeatureExtractor()) - if is_file_limitation(rules, capabilities, is_standalone=False): + if has_file_limitation(rules, capabilities, is_standalone=False): capa.ida.helpers.inform_user_ida_ui('capa encountered warnings during analysis') render_capabilities_default(rules, capabilities) From 39a49fb4b9da6702b83b02fa1fbf34ff2c548bd6 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Sun, 28 Jun 2020 13:01:56 -0600 Subject: [PATCH 61/61] submodule: rules: bump to master --- rules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rules b/rules index 0d775647..9f023a30 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit 0d775647853659e77fefaeed35403771591475ce +Subproject commit 9f023a301ada34e43ce1f12d9f8d068545a7f85d