From 482bb4487653d58ac609235fb6c0d90e62ee08a0 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Thu, 2 Jul 2020 12:17:09 -0600 Subject: [PATCH 1/9] main: collect metadata for inclusion in json document closes #45 --- capa/ida/ida_capa_explorer.py | 22 +++++++++++++---- capa/main.py | 42 +++++++++++++++++++++++++++++--- capa/render/__init__.py | 46 ++++++++++++++++++++--------------- capa/render/utils.py | 2 +- capa/render/verbose.py | 2 +- capa/render/vverbose.py | 4 +-- 6 files changed, 86 insertions(+), 32 deletions(-) diff --git a/capa/ida/ida_capa_explorer.py b/capa/ida/ida_capa_explorer.py index ec55c34f..06f23b8b 100644 --- a/capa/ida/ida_capa_explorer.py +++ b/capa/ida/ida_capa_explorer.py @@ -1,8 +1,10 @@ import os import logging +import datetime import collections import idaapi +import idautils from PyQt5 import QtGui, QtCore, QtWidgets import capa.main @@ -362,12 +364,22 @@ class CapaExplorerForm(idaapi.PluginForm): logger.info("analysis completed.") - doc = capa.render.convert_capabilities_to_result_document(rules, capabilities) + meta = { + "timestamp": datetime.datetime.now().isoformat(), + # "argv" is not relevant here + "sample": { + "md5": idautils.GetInputFileMD5(), + # "sha1" not easily accessible + # "sha256" not easily accessible + "path": idaapi.get_input_file_path(), + }, + "analysis": { + # "format" is difficult to determine via IDAPython + "extractor": "ida", + } + } - import json - - with open("C:\\Users\\spring\\Desktop\\hmm.json", "w") as twitter_data_file: - json.dump(doc, twitter_data_file, indent=4, sort_keys=True, cls=capa.render.CapaJsonObjectEncoder) + doc = capa.render.convert_capabilities_to_result_document(meta, rules, capabilities) self.model_data.render_capa_doc(doc) self.render_capa_doc_summary(doc) diff --git a/capa/main.py b/capa/main.py index 34264ede..4d5850e6 100644 --- a/capa/main.py +++ b/capa/main.py @@ -4,8 +4,10 @@ capa - detect capabilities in programs. """ import os import sys +import hashlib import logging import os.path +import datetime import collections import tqdm @@ -320,6 +322,34 @@ def get_rules(rule_path): return rules +def collect_metadata(argv, path, format, extractor): + md5 = hashlib.md5() + sha1 = hashlib.sha1() + sha256 = hashlib.sha256() + + with open(path, 'rb') as f: + buf = f.read() + + md5.update(buf) + sha1.update(buf) + sha256.update(buf) + + return { + "timestamp": datetime.datetime.now().isoformat(), + "argv": argv, + "sample": { + "md5": md5.hexdigest(), + "sha1": sha1.hexdigest(), + "sha256": sha256.hexdigest(), + "path": os.path.normpath(path), + }, + "analysis": { + "format": format, + "extractor": extractor.__class__.__name__, + }, + } + + def main(argv=None): if argv is None: argv = sys.argv[1:] @@ -420,9 +450,11 @@ def main(argv=None): taste = f.read(8) if (args.format == "freeze") or (args.format == "auto" and capa.features.freeze.is_freeze(taste)): + format = "freeze" with open(args.sample, "rb") as f: extractor = capa.features.freeze.load(f.read()) else: + format = args.format try: extractor = get_extractor(args.sample, args.format) except UnsupportedFormatError: @@ -446,6 +478,8 @@ def main(argv=None): logger.error("-" * 80) return -1 + meta = collect_metadata(argv, args.sample, format, extractor) + capabilities = find_capabilities(rules, extractor) if has_file_limitation(rules, capabilities): @@ -460,13 +494,13 @@ def main(argv=None): # renderers should use coloring and assume it will be stripped out if necessary. colorama.init() if args.json: - print(capa.render.render_json(rules, capabilities)) + print(capa.render.render_json(meta, rules, capabilities)) elif args.vverbose: - print(capa.render.render_vverbose(rules, capabilities)) + print(capa.render.render_vverbose(meta, rules, capabilities)) elif args.verbose: - print(capa.render.render_verbose(rules, capabilities)) + print(capa.render.render_verbose(meta, rules, capabilities)) else: - print(capa.render.render_default(rules, capabilities)) + print(capa.render.render_default(meta, rules, capabilities)) colorama.deinit() logger.info("done.") diff --git a/capa/render/__init__.py b/capa/render/__init__.py index 2dabb022..a9ba1784 100644 --- a/capa/render/__init__.py +++ b/capa/render/__init__.py @@ -175,7 +175,7 @@ def convert_match_to_result_document(rules, capabilities, result): return doc -def convert_capabilities_to_result_document(rules, capabilities): +def convert_capabilities_to_result_document(meta, rules, capabilities): """ convert the given rule set and capabilities result to a common, Python-native data structure. this format can be directly emitted to JSON, or passed to the other `render_*` routines @@ -187,22 +187,29 @@ def convert_capabilities_to_result_document(rules, capabilities): ```json { - $rule-name: { - "meta": {...copied from rule.meta...}, - "matches: { - $address: {...match details...}, - ... - } - }, - ... + "meta": {...}, + "rules: { + $rule-name: { + "meta": {...copied from rule.meta...}, + "matches: { + $address: {...match details...}, + ... + } + }, + ... + } } ``` Args: + meta (Dict[str, Any]): rules (RuleSet): capabilities (Dict[str, List[Tuple[int, Result]]]): """ - doc = {} + doc = { + "meta": meta, + "rules": {}, + } for rule_name, matches in capabilities.items(): rule = rules[rule_name] @@ -210,7 +217,7 @@ def convert_capabilities_to_result_document(rules, capabilities): if rule.meta.get("capa/subscope-rule"): continue - doc[rule_name] = { + doc["rules"][rule_name] = { "meta": dict(rule.meta), "source": rule.definition, "matches": { @@ -221,35 +228,36 @@ def convert_capabilities_to_result_document(rules, capabilities): return doc -def render_vverbose(rules, capabilities): +def render_vverbose(meta, rules, capabilities): # there's an import loop here # if capa.render imports capa.render.vverbose # and capa.render.vverbose import capa.render (implicitly, as a submodule) # so, defer the import until routine is called, breaking the import loop. import capa.render.vverbose - doc = convert_capabilities_to_result_document(rules, capabilities) + doc = convert_capabilities_to_result_document(meta, rules, capabilities) return capa.render.vverbose.render_vverbose(doc) -def render_verbose(rules, capabilities): +def render_verbose(meta, rules, capabilities): # break import loop import capa.render.verbose - doc = convert_capabilities_to_result_document(rules, capabilities) + doc = convert_capabilities_to_result_document(meta, rules, capabilities) return capa.render.verbose.render_verbose(doc) -def render_default(rules, capabilities): +def render_default(meta, rules, capabilities): # break import loop import capa.render.verbose import capa.render.default - doc = convert_capabilities_to_result_document(rules, capabilities) + doc = convert_capabilities_to_result_document(meta, rules, capabilities) return capa.render.default.render_default(doc) class CapaJsonObjectEncoder(json.JSONEncoder): + """JSON encoder that emits Python sets as sorted lists""" def default(self, obj): if isinstance(obj, (list, dict, int, float, bool, type(None))) or isinstance(obj, six.string_types): return json.JSONEncoder.default(self, obj) @@ -260,7 +268,7 @@ class CapaJsonObjectEncoder(json.JSONEncoder): return json.JSONEncoder.default(self, obj) -def render_json(rules, capabilities): +def render_json(meta, rules, capabilities): return json.dumps( - convert_capabilities_to_result_document(rules, capabilities), cls=CapaJsonObjectEncoder, sort_keys=True, + convert_capabilities_to_result_document(meta, rules, capabilities), cls=CapaJsonObjectEncoder, sort_keys=True, ) diff --git a/capa/render/utils.py b/capa/render/utils.py index 670c1bcf..6ef1e854 100644 --- a/capa/render/utils.py +++ b/capa/render/utils.py @@ -20,7 +20,7 @@ def hex(n): def capability_rules(doc): """enumerate the rules in (namespace, name) order that are 'capability' rules (not lib/subscope/disposition/etc).""" for (_, _, rule) in sorted( - map(lambda rule: (rule["meta"].get("namespace", ""), rule["meta"]["name"], rule), doc.values()) + map(lambda rule: (rule["meta"].get("namespace", ""), rule["meta"]["name"], rule), doc["rules"].values()) ): if rule["meta"].get("lib"): continue diff --git a/capa/render/verbose.py b/capa/render/verbose.py index cf13402f..c89697bb 100644 --- a/capa/render/verbose.py +++ b/capa/render/verbose.py @@ -43,7 +43,7 @@ def render_verbose(doc): rows.append((key, v)) if rule["meta"]["scope"] != capa.rules.FILE_SCOPE: - locations = doc[rule["meta"]["name"]]["matches"].keys() + locations = doc["rules"][rule["meta"]["name"]]["matches"].keys() rows.append(("matches", "\n".join(map(rutils.hex, locations)))) ostream.writeln(tabulate.tabulate(rows, tablefmt="plain")) diff --git a/capa/render/vverbose.py b/capa/render/vverbose.py index a0182b11..d738223b 100644 --- a/capa/render/vverbose.py +++ b/capa/render/vverbose.py @@ -165,7 +165,7 @@ def render_vverbose(doc): ostream.writeln(tabulate.tabulate(rows, tablefmt="plain")) if rule["meta"]["scope"] == capa.rules.FILE_SCOPE: - matches = list(doc[rule["meta"]["name"]]["matches"].values()) + matches = list(doc["rules"][rule["meta"]["name"]]["matches"].values()) if len(matches) != 1: # i think there should only ever be one match per file-scope rule, # because we do the file-scope evaluation a single time. @@ -174,7 +174,7 @@ def render_vverbose(doc): raise RuntimeError("unexpected file scope match count: " + len(matches)) render_match(ostream, matches[0], indent=0) else: - for location, match in sorted(doc[rule["meta"]["name"]]["matches"].items()): + for location, match in sorted(doc["rules"][rule["meta"]["name"]]["matches"].items()): ostream.write(rule["meta"]["scope"]) ostream.write(" @ ") ostream.writeln(rutils.hex(location)) From cff7e9195a2180364348ce000deb2b3957415fb8 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Thu, 2 Jul 2020 12:17:53 -0600 Subject: [PATCH 2/9] pep8 --- capa/ida/ida_capa_explorer.py | 2 +- capa/main.py | 7 ++----- capa/render/__init__.py | 1 + 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/capa/ida/ida_capa_explorer.py b/capa/ida/ida_capa_explorer.py index 06f23b8b..f42c0978 100644 --- a/capa/ida/ida_capa_explorer.py +++ b/capa/ida/ida_capa_explorer.py @@ -376,7 +376,7 @@ class CapaExplorerForm(idaapi.PluginForm): "analysis": { # "format" is difficult to determine via IDAPython "extractor": "ida", - } + }, } doc = capa.render.convert_capabilities_to_result_document(meta, rules, capabilities) diff --git a/capa/main.py b/capa/main.py index 4d5850e6..9d3d96e0 100644 --- a/capa/main.py +++ b/capa/main.py @@ -327,7 +327,7 @@ def collect_metadata(argv, path, format, extractor): sha1 = hashlib.sha1() sha256 = hashlib.sha256() - with open(path, 'rb') as f: + with open(path, "rb") as f: buf = f.read() md5.update(buf) @@ -343,10 +343,7 @@ def collect_metadata(argv, path, format, extractor): "sha256": sha256.hexdigest(), "path": os.path.normpath(path), }, - "analysis": { - "format": format, - "extractor": extractor.__class__.__name__, - }, + "analysis": {"format": format, "extractor": extractor.__class__.__name__,}, } diff --git a/capa/render/__init__.py b/capa/render/__init__.py index a9ba1784..5b625217 100644 --- a/capa/render/__init__.py +++ b/capa/render/__init__.py @@ -258,6 +258,7 @@ def render_default(meta, rules, capabilities): class CapaJsonObjectEncoder(json.JSONEncoder): """JSON encoder that emits Python sets as sorted lists""" + def default(self, obj): if isinstance(obj, (list, dict, int, float, bool, type(None))) or isinstance(obj, six.string_types): return json.JSONEncoder.default(self, obj) From 2fd59f0674c440dc8373dbc073421cfc929dcca7 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Thu, 2 Jul 2020 12:18:03 -0600 Subject: [PATCH 3/9] setup: add pytest tweaks to dev deps --- setup.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 84ea4f26..3522e8e2 100644 --- a/setup.py +++ b/setup.py @@ -40,7 +40,9 @@ setuptools.setup( entry_points={"console_scripts": ["capa=capa.main:main",]}, include_package_data=True, install_requires=requirements, - extras_require={"dev": ["pytest", "pytest-sugar", "pycodestyle", "black", "isort"]}, + extras_require={ + "dev": ["pytest", "pytest-sugar", "pytest-instafail", "pytest-cov", "pycodestyle", "black", "isort"] + }, zip_safe=False, keywords="capa", classifiers=[ From 6575a019c3d12a6a153ff09c6d77c096dcefe8f7 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Thu, 2 Jul 2020 15:31:08 -0600 Subject: [PATCH 4/9] render: add report header and metadata closes #45 --- capa/main.py | 1 + capa/render/default.py | 21 ++++++++++++++++++++- capa/render/verbose.py | 16 ++++++++++++++++ capa/render/vverbose.py | 17 +++++++++++++++++ 4 files changed, 54 insertions(+), 1 deletion(-) diff --git a/capa/main.py b/capa/main.py index 9d3d96e0..d8584d9d 100644 --- a/capa/main.py +++ b/capa/main.py @@ -336,6 +336,7 @@ def collect_metadata(argv, path, format, extractor): return { "timestamp": datetime.datetime.now().isoformat(), + "version": capa.version.__version__, "argv": argv, "sample": { "md5": md5.hexdigest(), diff --git a/capa/render/default.py b/capa/render/default.py index a93bdda7..55cc8639 100644 --- a/capa/render/default.py +++ b/capa/render/default.py @@ -5,6 +5,7 @@ import tabulate import capa.render.utils as rutils +tabulate.PRESERVE_WHITESPACE = True def width(s, character_count): """pad the given string to at least `character_count`""" @@ -14,6 +15,22 @@ def width(s, character_count): return s +def render_meta(doc, ostream): + rows = [( + rutils.bold("Capa Report for"), + rutils.bold(doc["meta"]["sample"]["md5"]), + )] + + for k in ("timestamp", "version"): + rows.append((width(k, 22), width(doc["meta"][k], 60))) + + for k in ("path", "md5"): + rows.append((k, doc["meta"]["sample"][k])) + + ostream.write(tabulate.tabulate(rows, tablefmt="psql")) + ostream.write("\n") + + def render_capabilities(doc, ostream): """ example:: @@ -90,8 +107,10 @@ def render_attack(doc, ostream): def render_default(doc): - ostream = six.StringIO() + ostream = rutils.StringIO() + render_meta(doc, ostream) + ostream.write("\n") render_attack(doc, ostream) ostream.write("\n") render_capabilities(doc, ostream) diff --git a/capa/render/verbose.py b/capa/render/verbose.py index c89697bb..7aebd70b 100644 --- a/capa/render/verbose.py +++ b/capa/render/verbose.py @@ -23,6 +23,22 @@ import capa.render.utils as rutils def render_verbose(doc): ostream = rutils.StringIO() + rows = [( + rutils.bold("Capa Report for"), + rutils.bold(doc["meta"]["sample"]["md5"]), + )] + for k in ("timestamp", "version"): + rows.append((k,doc["meta"][k])) + + for k in ("path", "md5", "sha1", "sha256"): + rows.append((k, doc["meta"]["sample"][k])) + + for k in ("format", "extractor"): + rows.append((k, doc["meta"]["analysis"][k])) + + ostream.writeln(tabulate.tabulate(rows, tablefmt="plain")) + ostream.write("\n") + for rule in rutils.capability_rules(doc): count = len(rule["matches"]) if count == 1: diff --git a/capa/render/vverbose.py b/capa/render/vverbose.py index d738223b..ddfb7592 100644 --- a/capa/render/vverbose.py +++ b/capa/render/vverbose.py @@ -141,6 +141,23 @@ def render_match(ostream, match, indent=0, mode=MODE_SUCCESS): def render_vverbose(doc): ostream = rutils.StringIO() + rows = [( + rutils.bold("Capa Report for"), + rutils.bold(doc["meta"]["sample"]["md5"]), + )] + for k in ("timestamp", "version"): + rows.append((k,doc["meta"][k])) + + for k in ("path", "md5", "sha1", "sha256"): + rows.append((k, doc["meta"]["sample"][k])) + + for k in ("format", "extractor"): + rows.append((k, doc["meta"]["analysis"][k])) + + ostream.writeln(rutils.bold("Capa Report for " + doc["meta"]["sample"]["md5"])) + ostream.writeln(tabulate.tabulate(rows, tablefmt="plain")) + ostream.write("\n") + for rule in rutils.capability_rules(doc): count = len(rule["matches"]) if count == 1: From 267664934252c5b5b21f10d2fb03d0b1d71dc62f Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Thu, 2 Jul 2020 15:31:47 -0600 Subject: [PATCH 5/9] pep8 --- capa/render/default.py | 6 ++---- capa/render/verbose.py | 7 ++----- capa/render/vverbose.py | 7 ++----- 3 files changed, 6 insertions(+), 14 deletions(-) diff --git a/capa/render/default.py b/capa/render/default.py index 55cc8639..c46ef0cf 100644 --- a/capa/render/default.py +++ b/capa/render/default.py @@ -7,6 +7,7 @@ import capa.render.utils as rutils tabulate.PRESERVE_WHITESPACE = True + def width(s, character_count): """pad the given string to at least `character_count`""" if len(s) < character_count: @@ -16,10 +17,7 @@ def width(s, character_count): def render_meta(doc, ostream): - rows = [( - rutils.bold("Capa Report for"), - rutils.bold(doc["meta"]["sample"]["md5"]), - )] + rows = [(rutils.bold("Capa Report for"), rutils.bold(doc["meta"]["sample"]["md5"]),)] for k in ("timestamp", "version"): rows.append((width(k, 22), width(doc["meta"][k], 60))) diff --git a/capa/render/verbose.py b/capa/render/verbose.py index 7aebd70b..1190ea6c 100644 --- a/capa/render/verbose.py +++ b/capa/render/verbose.py @@ -23,12 +23,9 @@ import capa.render.utils as rutils def render_verbose(doc): ostream = rutils.StringIO() - rows = [( - rutils.bold("Capa Report for"), - rutils.bold(doc["meta"]["sample"]["md5"]), - )] + rows = [(rutils.bold("Capa Report for"), rutils.bold(doc["meta"]["sample"]["md5"]),)] for k in ("timestamp", "version"): - rows.append((k,doc["meta"][k])) + rows.append((k, doc["meta"][k])) for k in ("path", "md5", "sha1", "sha256"): rows.append((k, doc["meta"]["sample"][k])) diff --git a/capa/render/vverbose.py b/capa/render/vverbose.py index ddfb7592..79dd429e 100644 --- a/capa/render/vverbose.py +++ b/capa/render/vverbose.py @@ -141,12 +141,9 @@ def render_match(ostream, match, indent=0, mode=MODE_SUCCESS): def render_vverbose(doc): ostream = rutils.StringIO() - rows = [( - rutils.bold("Capa Report for"), - rutils.bold(doc["meta"]["sample"]["md5"]), - )] + rows = [(rutils.bold("Capa Report for"), rutils.bold(doc["meta"]["sample"]["md5"]),)] for k in ("timestamp", "version"): - rows.append((k,doc["meta"][k])) + rows.append((k, doc["meta"][k])) for k in ("path", "md5", "sha1", "sha256"): rows.append((k, doc["meta"]["sample"][k])) From ff44801e5c6a31c1e52b6886b4126114b306c6a4 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Thu, 2 Jul 2020 16:24:37 -0600 Subject: [PATCH 6/9] render: meta: add base address --- capa/features/extractors/__init__.py | 9 +++++++++ capa/features/extractors/ida/__init__.py | 3 +++ capa/features/extractors/viv/__init__.py | 4 ++++ capa/main.py | 6 +++++- capa/render/verbose.py | 4 +++- capa/render/vverbose.py | 4 +++- 6 files changed, 27 insertions(+), 3 deletions(-) diff --git a/capa/features/extractors/__init__.py b/capa/features/extractors/__init__.py index b44adbc6..85d1bd49 100644 --- a/capa/features/extractors/__init__.py +++ b/capa/features/extractors/__init__.py @@ -39,6 +39,15 @@ class FeatureExtractor(object): # super(FeatureExtractor, self).__init__() + @abc.abstractmethod + def get_base_address(self): + """ + fetch the preferred load address at which the sample was analyzed. + + returns: int + """ + raise NotImplemented + @abc.abstractmethod def extract_file_features(self): """ diff --git a/capa/features/extractors/ida/__init__.py b/capa/features/extractors/ida/__init__.py index 378a5c90..5091fb34 100644 --- a/capa/features/extractors/ida/__init__.py +++ b/capa/features/extractors/ida/__init__.py @@ -43,6 +43,9 @@ class IdaFeatureExtractor(FeatureExtractor): def __init__(self): super(IdaFeatureExtractor, self).__init__() + def get_base_address(self): + return idaapi.get_imagebase() + def extract_file_features(self): for feature, va in capa.features.extractors.ida.file.extract_features(): yield feature, va diff --git a/capa/features/extractors/viv/__init__.py b/capa/features/extractors/viv/__init__.py index 1e27083d..82950418 100644 --- a/capa/features/extractors/viv/__init__.py +++ b/capa/features/extractors/viv/__init__.py @@ -44,6 +44,10 @@ class VivisectFeatureExtractor(FeatureExtractor): self.vw = vw self.path = path + def get_base_address(self): + # assume there is only one file loaded into the vw + return list(self.vw.filemeta.values())[0]["imagebase"] + def extract_file_features(self): for feature, va in capa.features.extractors.viv.file.extract_features(self.vw, self.path): yield feature, va diff --git a/capa/main.py b/capa/main.py index d8584d9d..0e29e1dc 100644 --- a/capa/main.py +++ b/capa/main.py @@ -344,7 +344,11 @@ def collect_metadata(argv, path, format, extractor): "sha256": sha256.hexdigest(), "path": os.path.normpath(path), }, - "analysis": {"format": format, "extractor": extractor.__class__.__name__,}, + "analysis": { + "format": format, + "extractor": extractor.__class__.__name__, + "base_address": extractor.get_base_address(), + }, } diff --git a/capa/render/verbose.py b/capa/render/verbose.py index 1190ea6c..24e9cd3c 100644 --- a/capa/render/verbose.py +++ b/capa/render/verbose.py @@ -31,7 +31,9 @@ def render_verbose(doc): rows.append((k, doc["meta"]["sample"][k])) for k in ("format", "extractor"): - rows.append((k, doc["meta"]["analysis"][k])) + rows.append((k.replace("_", " "), doc["meta"]["analysis"][k])) + + rows.append(("base address", rutils.hex(doc["meta"]["analysis"]["base_address"]))) ostream.writeln(tabulate.tabulate(rows, tablefmt="plain")) ostream.write("\n") diff --git a/capa/render/vverbose.py b/capa/render/vverbose.py index 79dd429e..2fe96367 100644 --- a/capa/render/vverbose.py +++ b/capa/render/vverbose.py @@ -149,7 +149,9 @@ def render_vverbose(doc): rows.append((k, doc["meta"]["sample"][k])) for k in ("format", "extractor"): - rows.append((k, doc["meta"]["analysis"][k])) + rows.append((k.replace("_", " "), doc["meta"]["analysis"][k])) + + rows.append(("base address", rutils.hex(doc["meta"]["analysis"]["base_address"]))) ostream.writeln(rutils.bold("Capa Report for " + doc["meta"]["sample"]["md5"])) ostream.writeln(tabulate.tabulate(rows, tablefmt="plain")) From e13b5c77c6c64e0970de2670627db88c09200a17 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Thu, 2 Jul 2020 16:26:55 -0600 Subject: [PATCH 7/9] render: ida: add sha256, filetype --- capa/ida/ida_capa_explorer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/capa/ida/ida_capa_explorer.py b/capa/ida/ida_capa_explorer.py index f42c0978..ed0b54ee 100644 --- a/capa/ida/ida_capa_explorer.py +++ b/capa/ida/ida_capa_explorer.py @@ -370,11 +370,11 @@ class CapaExplorerForm(idaapi.PluginForm): "sample": { "md5": idautils.GetInputFileMD5(), # "sha1" not easily accessible - # "sha256" not easily accessible + "sha256": idaapi.retrieve_input_file_sha256(), "path": idaapi.get_input_file_path(), }, "analysis": { - # "format" is difficult to determine via IDAPython + "format": idaapi.get_file_type_name(), "extractor": "ida", }, } From 1ad8cefd0fb578e4a297144110e28c9134b8edbb Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Thu, 2 Jul 2020 16:31:21 -0600 Subject: [PATCH 8/9] render: ida: move collect_metadata into capa.ida.helpers --- capa/ida/helpers/__init__.py | 16 ++++++++++++++++ capa/ida/ida_capa_explorer.py | 18 +----------------- capa/main.py | 12 +++++------- 3 files changed, 22 insertions(+), 24 deletions(-) diff --git a/capa/ida/helpers/__init__.py b/capa/ida/helpers/__init__.py index 7a230000..f9e972b1 100644 --- a/capa/ida/helpers/__init__.py +++ b/capa/ida/helpers/__init__.py @@ -1,7 +1,9 @@ import logging +import datetime import idc import idaapi +import idautils logger = logging.getLogger("capa") @@ -48,3 +50,17 @@ def get_func_start_ea(ea): """ """ f = idaapi.get_func(ea) return f if f is None else f.start_ea + + +def collect_metadata(): + return { + "timestamp": datetime.datetime.now().isoformat(), + # "argv" is not relevant here + "sample": { + "md5": idautils.GetInputFileMD5(), + # "sha1" not easily accessible + "sha256": idaapi.retrieve_input_file_sha256(), + "path": idaapi.get_input_file_path(), + }, + "analysis": {"format": idaapi.get_file_type_name(), "extractor": "ida", }, + } diff --git a/capa/ida/ida_capa_explorer.py b/capa/ida/ida_capa_explorer.py index ed0b54ee..ec0f4274 100644 --- a/capa/ida/ida_capa_explorer.py +++ b/capa/ida/ida_capa_explorer.py @@ -1,10 +1,8 @@ import os import logging -import datetime import collections import idaapi -import idautils from PyQt5 import QtGui, QtCore, QtWidgets import capa.main @@ -364,21 +362,7 @@ class CapaExplorerForm(idaapi.PluginForm): logger.info("analysis completed.") - meta = { - "timestamp": datetime.datetime.now().isoformat(), - # "argv" is not relevant here - "sample": { - "md5": idautils.GetInputFileMD5(), - # "sha1" not easily accessible - "sha256": idaapi.retrieve_input_file_sha256(), - "path": idaapi.get_input_file_path(), - }, - "analysis": { - "format": idaapi.get_file_type_name(), - "extractor": "ida", - }, - } - + meta = capa.ida.helpers.collect_metadata() doc = capa.render.convert_capabilities_to_result_document(meta, rules, capabilities) self.model_data.render_capa_doc(doc) diff --git a/capa/main.py b/capa/main.py index 0e29e1dc..ee582724 100644 --- a/capa/main.py +++ b/capa/main.py @@ -511,11 +511,12 @@ def main(argv=None): def ida_main(): + import capa.ida.helpers + import capa.features.extractors.ida + logging.basicConfig(level=logging.INFO) logging.getLogger().setLevel(logging.INFO) - import capa.ida.helpers - if not capa.ida.helpers.is_supported_file_type(): return -1 @@ -536,18 +537,15 @@ def ida_main(): logger.debug("default rule path (source method): %s", rules_path) rules = get_rules(rules_path) - import capa.rules - rules = capa.rules.RuleSet(rules) - import capa.features.extractors.ida - capabilities = find_capabilities(rules, capa.features.extractors.ida.IdaFeatureExtractor()) if has_file_limitation(rules, capabilities, is_standalone=False): capa.ida.helpers.inform_user_ida_ui("capa encountered warnings during analysis") - render_capabilities_default(rules, capabilities) + meta = capa.ida.helpers.collect_metadata() + print(capa.render.render_default(meta, rules, capabilities)) def is_runtime_ida(): From 959abf960992652f1abc8ea3aa949519c85ac337 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Thu, 2 Jul 2020 16:31:48 -0600 Subject: [PATCH 9/9] pep8 --- capa/ida/helpers/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/ida/helpers/__init__.py b/capa/ida/helpers/__init__.py index f9e972b1..06997137 100644 --- a/capa/ida/helpers/__init__.py +++ b/capa/ida/helpers/__init__.py @@ -62,5 +62,5 @@ def collect_metadata(): "sha256": idaapi.retrieve_input_file_sha256(), "path": idaapi.get_input_file_path(), }, - "analysis": {"format": idaapi.get_file_type_name(), "extractor": "ida", }, + "analysis": {"format": idaapi.get_file_type_name(), "extractor": "ida",}, }