From b31c76614fe1cb354a5d2a70444eed228a2de2cb Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Fri, 3 Jul 2020 01:21:58 -0600 Subject: [PATCH 01/32] submodule: rules: update --- rules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rules b/rules index 799b7bbf..ba7843e7 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit 799b7bbf4bfe198194370a6cb86853882743ba56 +Subproject commit ba7843e7a8e1754f08225abf48dbb87626f82026 From 2f78c681e42293f8f7e1c0eec291e5e853a38a90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ana=20Mar=C3=ADa=20Mart=C3=ADnez=20G=C3=B3mez?= Date: Fri, 3 Jul 2020 11:30:39 +0200 Subject: [PATCH 02/32] Document how to install development dependencies Introduced in: https://github.com/fireeye/capa/commit/d1dd997b7bb58aedb059cdcd68b74117543c6088 --- doc/installation.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/doc/installation.md b/doc/installation.md index 403574d5..d8a689b0 100644 --- a/doc/installation.md +++ b/doc/installation.md @@ -47,6 +47,14 @@ Next, use `pip` to install the source code in "editable" mode. This means that P You'll find that the `capa.exe` (Windows) or `capa` (Linux) executables in your path now invoke the capa binary from this directory. +If you want to install the development dependencies, which you need to run the code formatters, syntax checker, rule linter and tests (and for the [hooks](#4-setup-hooks-optional)) run: + +`$ pip install -e ./local/path/to/src[dev]` + +If you are using zsh, do not forget to escape the square brackets: + +`$ pip install -e ./local/path/to/src\[dev\]` + ### 4. Setup hooks [optional] If you plan to contribute to capa, you may want to setup the hooks. From 26d2f120381d6c8033e6237822a413081ced097a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ana=20Mar=C3=ADa=20Mart=C3=ADnez=20G=C3=B3mez?= Date: Fri, 3 Jul 2020 11:35:52 +0200 Subject: [PATCH 03/32] Add Python3 requirement for black When installing the development dependencies with Python2, it fails as black is not available for Python2. --- doc/installation.md | 2 ++ setup.py | 8 +++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/doc/installation.md b/doc/installation.md index d8a689b0..4d165f9d 100644 --- a/doc/installation.md +++ b/doc/installation.md @@ -55,6 +55,8 @@ If you are using zsh, do not forget to escape the square brackets: `$ pip install -e ./local/path/to/src\[dev\]` +Note that some development dependencies require Python3. + ### 4. Setup hooks [optional] If you plan to contribute to capa, you may want to setup the hooks. diff --git a/setup.py b/setup.py index 3522e8e2..b23bc084 100644 --- a/setup.py +++ b/setup.py @@ -41,7 +41,13 @@ setuptools.setup( include_package_data=True, install_requires=requirements, extras_require={ - "dev": ["pytest", "pytest-sugar", "pytest-instafail", "pytest-cov", "pycodestyle", "black", "isort"] + "dev": ["pytest", + "pytest-sugar", + "pytest-instafail", + "pytest-cov", + "pycodestyle", + "black ; python_version>'3.0'", + "isort"] }, zip_safe=False, keywords="capa", From ca7cf93d181df4674c5b6b37157f385aa4b6bb6a Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Fri, 3 Jul 2020 09:14:34 -0600 Subject: [PATCH 04/32] submodule: rules: update --- rules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rules b/rules index ba7843e7..54885300 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit ba7843e7a8e1754f08225abf48dbb87626f82026 +Subproject commit 548853005591996b11d6b8d1140c9e353254e9f2 From 93e7206bb2a5105214f1e1e38f2fbd7f23233f20 Mon Sep 17 00:00:00 2001 From: Michael Hunhoff Date: Fri, 3 Jul 2020 14:32:41 -0600 Subject: [PATCH 05/32] removing circular import --- capa/features/extractors/__init__.py | 13 ------------- capa/features/extractors/ida/__init__.py | 8 +++++--- 2 files changed, 5 insertions(+), 16 deletions(-) diff --git a/capa/features/extractors/__init__.py b/capa/features/extractors/__init__.py index 85d1bd49..fed476f2 100644 --- a/capa/features/extractors/__init__.py +++ b/capa/features/extractors/__init__.py @@ -1,18 +1,5 @@ import abc -try: - import ida -except (ImportError, SyntaxError): - pass - -try: - import viv -except (ImportError, SyntaxError): - pass - -__all__ = ["ida", "viv"] - - class FeatureExtractor(object): """ FeatureExtractor defines the interface for fetching features from a sample. diff --git a/capa/features/extractors/ida/__init__.py b/capa/features/extractors/ida/__init__.py index 5091fb34..c89ac82f 100644 --- a/capa/features/extractors/ida/__init__.py +++ b/capa/features/extractors/ida/__init__.py @@ -5,9 +5,9 @@ import idaapi import capa.features.extractors.ida.file import capa.features.extractors.ida.insn -import capa.features.extractors.ida.helpers import capa.features.extractors.ida.function import capa.features.extractors.ida.basicblock + from capa.features.extractors import FeatureExtractor @@ -51,7 +51,8 @@ class IdaFeatureExtractor(FeatureExtractor): yield feature, va def get_functions(self): - for f in capa.features.extractors.ida.helpers.get_functions(ignore_thunks=True, ignore_libs=True): + import capa.features.extractors.ida.helpers as ida_helpers + for f in ida_helpers.get_functions(ignore_thunks=True, ignore_libs=True): yield add_va_int_cast(f) def extract_function_features(self, f): @@ -67,7 +68,8 @@ class IdaFeatureExtractor(FeatureExtractor): yield feature, va def get_instructions(self, f, bb): - for insn in capa.features.extractors.ida.helpers.get_instructions_in_range(bb.start_ea, bb.end_ea): + import capa.features.extractors.ida.helpers as ida_helpers + for insn in ida_helpers.get_instructions_in_range(bb.start_ea, bb.end_ea): yield add_va_int_cast(insn) def extract_insn_features(self, f, bb, insn): From 6dc75c5f29583d2b1e669d11b17d5f51423fe092 Mon Sep 17 00:00:00 2001 From: Michael Hunhoff Date: Fri, 3 Jul 2020 14:34:32 -0600 Subject: [PATCH 06/32] adding support for basicblock feature introduced by 78 --- capa/ida/explorer/model.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/capa/ida/explorer/model.py b/capa/ida/explorer/model.py index 4d876fee..3325e283 100644 --- a/capa/ida/explorer/model.py +++ b/capa/ida/explorer/model.py @@ -530,6 +530,9 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel): parent, display, source=doc["rules"].get(feature[feature["type"]], {}).get("source", "") ) + if feature["type"] == "basicblock": + return CapaExplorerBlockItem(parent, location) + if feature["type"] in instruction_view: return CapaExplorerInstructionViewItem(parent, display, location) From 5317e1e11e53b0070d59b791ff457eb93fabbb8c Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Fri, 3 Jul 2020 09:32:37 -0600 Subject: [PATCH 07/32] feature extractor: null: add get_base_address() closes #88 --- capa/features/extractors/__init__.py | 4 ++++ tests/test_freeze.py | 1 + 2 files changed, 5 insertions(+) diff --git a/capa/features/extractors/__init__.py b/capa/features/extractors/__init__.py index fed476f2..75426041 100644 --- a/capa/features/extractors/__init__.py +++ b/capa/features/extractors/__init__.py @@ -180,6 +180,7 @@ class NullFeatureExtractor(FeatureExtractor): example:: extractor = NullFeatureExtractor({ + 'base address: 0x401000, 'file features': [ (0x402345, capa.features.Characteristic('embedded pe')), ], @@ -214,6 +215,9 @@ class NullFeatureExtractor(FeatureExtractor): super(NullFeatureExtractor, self).__init__() self.features = features + def get_base_address(self): + return self.features["base address"] + def extract_file_features(self): for p in self.features.get("file features", []): va, feature = p diff --git a/tests/test_freeze.py b/tests/test_freeze.py index 7b251264..f81114ca 100644 --- a/tests/test_freeze.py +++ b/tests/test_freeze.py @@ -10,6 +10,7 @@ from fixtures import * EXTRACTOR = capa.features.extractors.NullFeatureExtractor( { + "base address": 0x401000, "file features": [(0x402345, capa.features.Characteristic("embedded pe")),], "functions": { 0x401000: { From 80bdb4a54586ba42dc11c0deeb9ec1a7a3f066ee Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Fri, 3 Jul 2020 09:32:58 -0600 Subject: [PATCH 08/32] pep8 --- capa/features/extractors/__init__.py | 1 + capa/features/extractors/ida/__init__.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/capa/features/extractors/__init__.py b/capa/features/extractors/__init__.py index 75426041..091c1830 100644 --- a/capa/features/extractors/__init__.py +++ b/capa/features/extractors/__init__.py @@ -1,5 +1,6 @@ import abc + class FeatureExtractor(object): """ FeatureExtractor defines the interface for fetching features from a sample. diff --git a/capa/features/extractors/ida/__init__.py b/capa/features/extractors/ida/__init__.py index c89ac82f..c2bc79f5 100644 --- a/capa/features/extractors/ida/__init__.py +++ b/capa/features/extractors/ida/__init__.py @@ -7,7 +7,6 @@ import capa.features.extractors.ida.file import capa.features.extractors.ida.insn import capa.features.extractors.ida.function import capa.features.extractors.ida.basicblock - from capa.features.extractors import FeatureExtractor @@ -52,6 +51,7 @@ class IdaFeatureExtractor(FeatureExtractor): def get_functions(self): import capa.features.extractors.ida.helpers as ida_helpers + for f in ida_helpers.get_functions(ignore_thunks=True, ignore_libs=True): yield add_va_int_cast(f) @@ -69,6 +69,7 @@ class IdaFeatureExtractor(FeatureExtractor): def get_instructions(self, f, bb): import capa.features.extractors.ida.helpers as ida_helpers + for insn in ida_helpers.get_instructions_in_range(bb.start_ea, bb.end_ea): yield add_va_int_cast(insn) From 0692e940e97e45a779f13d479d39a281b74e3af6 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Sun, 5 Jul 2020 14:29:17 -0600 Subject: [PATCH 09/32] usage: more words --- doc/usage.md | 72 +++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 51 insertions(+), 21 deletions(-) diff --git a/doc/usage.md b/doc/usage.md index dfd6f06d..7e37ad9e 100644 --- a/doc/usage.md +++ b/doc/usage.md @@ -1,35 +1,65 @@ # capa usage - -# command line -After you have downloaded the standalone version of capa or installed it via `pip` (see the [installation](installation.md) documentation) you can run capa directly from your terminal shell. - -- `$ capa -h` -- `$ capa malware.exe` - -In this mode capa relies on vivisect which only runs under Python 2. - -## only run selected rules -Use the `-t` option to only run selected rules. This is the preferred method over specifying a rule path which fails if dependent rules reside in other directories. - ``` -$ capa -t communication malware.exe +usage: capa [-h] [-r RULES] [-t TAG] [--version] [-j] [-v] [-vv] [-d] [-q] + [-f {auto,pe,sc32,sc64,freeze}] + sample + +detect capabilities in programs. + +positional arguments: + sample Path to sample to analyze + +optional arguments: + -h, --help show this help message and exit + -r RULES, --rules RULES + Path to rule file or directory, use embedded rules by + default + -t TAG, --tag TAG Filter on rule meta field values + --version Print the executable version and exit + -j, --json Emit JSON instead of text + -v, --verbose Enable verbose result document (no effect with --json) + -vv, --vverbose Enable very verbose result document (no effect with + --json) + -d, --debug Enable debugging output on STDERR + -q, --quiet Disable all output but errors + -f {auto,pe,sc32,sc64,freeze}, --format {auto,pe,sc32,sc64,freeze} + Select sample format, auto: (default) detect file type + automatically, pe: Windows PE file, sc32: 32-bit + shellcode, sc64: 64-bit shellcode, freeze: features + previously frozen by capa ``` -# IDA Pro -capa runs from within IDA Pro. Run `capa/main.py` via File - Script file... (ALT + F7). +## tips and tricks -When running in IDA, capa uses IDA's disassembly and file analysis as its backend. These results may vary from the standalone version that uses vivisect. + - [match only rules by given author or namespace](#only-run-selected-rules) + - [IDA Pro capa explorer](#capa-explorer) + - [IDA Pro rule generator](#rule-generator) -In IDA, capa supports Python 2 and Python 3. If you encounter issues with your specific setup, please open a new [Issue](https://github.com/fireeye/capa/issues). +### only run selected rules +Use the `-t` option to run rules with the given metadata value (see therule fields `rule.meta.*`). +For example, `capa -t william.ballenthin@mandiant.com` runs rules that reference Willi's email address (probably as the author), or +`capa -t communication` runs rules with the namespace `communication`. -## IDA Pro plugins -capa comes with two IDA Pro plugins located in the `capa/ida` directory. +### IDA Pro integrations +You can run capa from within IDA Pro. Run `capa/main.py` via `File - Script file...` (or ALT + F7). +When running in IDA, capa uses IDA's disassembly and file analysis as its backend. +These results may vary from the standalone version that uses vivisect. +IDA's analysis is generally a bit faster and more thorough than vivisect's, so you might prefer this mode. -### capa explorer +When run under IDA, capa supports both Python 2 and Python 3 interpreters. +If you encounter issues with your specific setup, please open a new [Issue](https://github.com/fireeye/capa/issues). + +Additionally, capa comes with two IDA Pro plugins located in the `capa/ida` directory: the explorer and the rule generator. + +#### capa explorer The capa explorer allows you to interactively display and browse capabilities capa identified in a binary. +As you select rules or logic, capa will highlight the addresses that support its analysis conclusions. +We like to use capa to help find the most interesting parts of a program, such as where the C2 mechanism might be. ![capa explorer](capa_explorer.png) -### rule generator +#### rule generator The rule generator helps you to easily write new rules based on the function you are currently analyzing in your IDA disassembly view. +It shows the features that capa can extract from the function, and lets you quickly pull these into a rule template. +You'll still have to provide the logic structures (`and`, `or`, `not`, etc.) but the features will be prepared for you. From 5cd10bfc75ae07dabee02e117ca8f479db50ae44 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Fri, 3 Jul 2020 09:38:48 -0600 Subject: [PATCH 10/32] main: load shellcode at 0x690000 closes #94 --- capa/main.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/capa/main.py b/capa/main.py index ca90b24c..52516a8f 100644 --- a/capa/main.py +++ b/capa/main.py @@ -193,6 +193,9 @@ def is_supported_file_type(sample): return False +SHELLCODE_BASE = 0x690000 + + def get_shellcode_vw(sample, arch="auto"): """ Return shellcode workspace using explicit arch or via auto detect @@ -205,13 +208,12 @@ def get_shellcode_vw(sample, arch="auto"): # choose arch with most functions, idea by Jay G. vw_cands = [] for arch in ["i386", "amd64"]: - vw_cands.append(viv_utils.getShellcodeWorkspace(sample_bytes, arch)) + vw_cands.append(viv_utils.getShellcodeWorkspace(sample_bytes, arch, base=SHELLCODE_BASE)) if not vw_cands: raise ValueError("could not generate vivisect workspace") vw = max(vw_cands, key=lambda vw: len(vw.getFunctions())) else: - vw = viv_utils.getShellcodeWorkspace(sample_bytes, arch) - vw.setMeta("Format", "blob") # TODO fix in viv_utils + vw = viv_utils.getShellcodeWorkspace(sample_bytes, arch, base=SHELLCODE_BASE) return vw From ff639737b8198b0bf12937643662a0dd0d2c1493 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Fri, 3 Jul 2020 09:49:41 -0600 Subject: [PATCH 11/32] render: simplify metadata display closes #91 --- capa/render/default.py | 14 ++++++-------- capa/render/verbose.py | 22 ++++++++++------------ capa/render/vverbose.py | 23 ++++++++++------------- 3 files changed, 26 insertions(+), 33 deletions(-) diff --git a/capa/render/default.py b/capa/render/default.py index c46ef0cf..d7971124 100644 --- a/capa/render/default.py +++ b/capa/render/default.py @@ -17,13 +17,11 @@ def width(s, character_count): def render_meta(doc, ostream): - rows = [(rutils.bold("Capa Report for"), rutils.bold(doc["meta"]["sample"]["md5"]),)] - - for k in ("timestamp", "version"): - rows.append((width(k, 22), width(doc["meta"][k], 60))) - - for k in ("path", "md5"): - rows.append((k, doc["meta"]["sample"][k])) + rows = [] + rows.append((width("md5", 22), width(doc["meta"]["sample"]["md5"], 82))) + rows.append(("path", doc["meta"]["sample"]["path"])) + rows.append(("timestamp", doc["meta"]["timestamp"])) + rows.append(("capa version", doc["meta"]["version"])) ostream.write(tabulate.tabulate(rows, tablefmt="psql")) ostream.write("\n") @@ -99,7 +97,7 @@ def render_attack(doc, ostream): raise RuntimeError("unexpected ATT&CK spec format") rows.append((rutils.bold(tactic.upper()), "\n".join(inner_rows),)) ostream.write( - tabulate.tabulate(rows, headers=[width("ATT&CK Tactic", 20), width("ATT&CK Technique", 60)], tablefmt="psql") + tabulate.tabulate(rows, headers=[width("ATT&CK Tactic", 20), width("ATT&CK Technique", 80)], tablefmt="psql") ) ostream.write("\n") diff --git a/capa/render/verbose.py b/capa/render/verbose.py index 24e9cd3c..c2576155 100644 --- a/capa/render/verbose.py +++ b/capa/render/verbose.py @@ -23,18 +23,16 @@ import capa.render.utils as rutils def render_verbose(doc): ostream = rutils.StringIO() - rows = [(rutils.bold("Capa Report for"), rutils.bold(doc["meta"]["sample"]["md5"]),)] - for k in ("timestamp", "version"): - rows.append((k, doc["meta"][k])) - - for k in ("path", "md5", "sha1", "sha256"): - rows.append((k, doc["meta"]["sample"][k])) - - for k in ("format", "extractor"): - rows.append((k.replace("_", " "), doc["meta"]["analysis"][k])) - - rows.append(("base address", rutils.hex(doc["meta"]["analysis"]["base_address"]))) - + rows = [] + rows.append(("md5", doc["meta"]["sample"]["md5"])) + rows.append(("sha1", doc["meta"]["sample"]["sha1"])) + rows.append(("sha256", doc["meta"]["sample"]["sha256"])) + rows.append(("path", doc["meta"]["sample"]["path"])) + rows.append(("timestamp", doc["meta"]["timestamp"])) + rows.append(("capa version", doc["meta"]["version"])) + rows.append(("format", doc["meta"]["analysis"]["format"])) + rows.append(("extractor", doc["meta"]["analysis"]["extractor"])) + rows.append(("base address", hex(doc["meta"]["analysis"]["base_address"]))) ostream.writeln(tabulate.tabulate(rows, tablefmt="plain")) ostream.write("\n") diff --git a/capa/render/vverbose.py b/capa/render/vverbose.py index 2fe96367..342a9dd4 100644 --- a/capa/render/vverbose.py +++ b/capa/render/vverbose.py @@ -141,19 +141,16 @@ def render_match(ostream, match, indent=0, mode=MODE_SUCCESS): def render_vverbose(doc): ostream = rutils.StringIO() - rows = [(rutils.bold("Capa Report for"), rutils.bold(doc["meta"]["sample"]["md5"]),)] - for k in ("timestamp", "version"): - rows.append((k, doc["meta"][k])) - - for k in ("path", "md5", "sha1", "sha256"): - rows.append((k, doc["meta"]["sample"][k])) - - for k in ("format", "extractor"): - rows.append((k.replace("_", " "), doc["meta"]["analysis"][k])) - - rows.append(("base address", rutils.hex(doc["meta"]["analysis"]["base_address"]))) - - ostream.writeln(rutils.bold("Capa Report for " + doc["meta"]["sample"]["md5"])) + rows = [] + rows.append(("md5", doc["meta"]["sample"]["md5"])) + rows.append(("sha1", doc["meta"]["sample"]["sha1"])) + rows.append(("sha256", doc["meta"]["sample"]["sha256"])) + rows.append(("path", doc["meta"]["sample"]["path"])) + rows.append(("timestamp", doc["meta"]["timestamp"])) + rows.append(("capa version", doc["meta"]["version"])) + rows.append(("format", doc["meta"]["analysis"]["format"])) + rows.append(("extractor", doc["meta"]["analysis"]["extractor"])) + rows.append(("base address", hex(doc["meta"]["analysis"]["base_address"]))) ostream.writeln(tabulate.tabulate(rows, tablefmt="plain")) ostream.write("\n") From 867de57062713aef2fc522fb2937560bcb81e82a Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Fri, 3 Jul 2020 10:12:03 -0600 Subject: [PATCH 12/32] main: find_capabilities: extract feature counts per item, too closes #95 closes #96 --- capa/ida/ida_capa_explorer.py | 7 ++++-- capa/main.py | 32 ++++++++++++++++++-------- scripts/lint.py | 2 +- scripts/testbed/run_rule_on_testbed.py | 3 ++- tests/test_freeze.py | 2 +- tests/test_main.py | 10 ++++---- 6 files changed, 37 insertions(+), 19 deletions(-) diff --git a/capa/ida/ida_capa_explorer.py b/capa/ida/ida_capa_explorer.py index 970e8a62..801985bf 100644 --- a/capa/ida/ida_capa_explorer.py +++ b/capa/ida/ida_capa_explorer.py @@ -339,7 +339,11 @@ class CapaExplorerForm(idaapi.PluginForm): rules_path = os.path.join(os.path.dirname(self.file_loc), "../..", "rules") rules = capa.main.get_rules(rules_path) rules = capa.rules.RuleSet(rules) - capabilities = capa.main.find_capabilities(rules, capa.features.extractors.ida.IdaFeatureExtractor(), True) + + meta = capa.ida.helpers.collect_metadata() + + capabilities, counts = capa.main.find_capabilities(rules, capa.features.extractors.ida.IdaFeatureExtractor(), True) + meta.update(counts) # support binary files specifically for x86/AMD64 shellcode # warn user binary file is loaded but still allow capa to process it @@ -364,7 +368,6 @@ class CapaExplorerForm(idaapi.PluginForm): logger.info("analysis completed.") - meta = capa.ida.helpers.collect_metadata() doc = capa.render.convert_capabilities_to_result_document(meta, rules, capabilities) self.model_data.render_capa_doc(doc) diff --git a/capa/main.py b/capa/main.py index 52516a8f..9cbb1589 100644 --- a/capa/main.py +++ b/capa/main.py @@ -68,7 +68,7 @@ def find_function_capabilities(ruleset, extractor, f): function_features[capa.features.MatchedRule(rule_name)].add(va) _, function_matches = capa.engine.match(ruleset.function_rules, function_features, oint(f)) - return function_matches, bb_matches + return function_matches, bb_matches, len(function_features) def find_file_capabilities(ruleset, extractor, function_features): @@ -84,20 +84,30 @@ def find_file_capabilities(ruleset, extractor, function_features): if feature not in file_features: file_features[feature] = set() - logger.info("analyzed file and extracted %d features", len(file_features)) + logger.debug("analyzed file and extracted %d features", len(file_features)) file_features.update(function_features) _, matches = capa.engine.match(ruleset.file_rules, file_features, 0x0) - return matches + return matches, len(file_features) def find_capabilities(ruleset, extractor, disable_progress=None): all_function_matches = collections.defaultdict(list) all_bb_matches = collections.defaultdict(list) + meta = { + "counts": { + "file": 0, + "functions": {}, + } + } + for f in tqdm.tqdm(extractor.get_functions(), disable=disable_progress, unit=" functions"): - function_matches, bb_matches = find_function_capabilities(ruleset, extractor, f) + function_matches, bb_matches, feature_count = find_function_capabilities(ruleset, extractor, f) + meta["counts"]["functions"][f.__int__()] = feature_count + logger.debug("analyzed function 0x%x and extracted %d features", f.__int__(), feature_count) + for rule_name, res in function_matches.items(): all_function_matches[rule_name].extend(res) for rule_name, res in bb_matches.items(): @@ -110,14 +120,15 @@ def find_capabilities(ruleset, extractor, disable_progress=None): for rule_name, results in all_function_matches.items() } - all_file_matches = find_file_capabilities(ruleset, extractor, function_features) + all_file_matches, feature_count = find_file_capabilities(ruleset, extractor, function_features) + meta["counts"]["file"] = feature_count matches = {} matches.update(all_bb_matches) matches.update(all_function_matches) matches.update(all_file_matches) - return matches + return matches, meta def has_rule_with_namespace(rules, capabilities, rule_cat): @@ -485,7 +496,8 @@ def main(argv=None): meta = collect_metadata(argv, args.sample, format, extractor) - capabilities = find_capabilities(rules, extractor) + capabilities, counts = find_capabilities(rules, extractor) + meta.update(counts) if has_file_limitation(rules, capabilities): # bail if capa encountered file limitation e.g. a packed binary @@ -542,12 +554,14 @@ def ida_main(): rules = get_rules(rules_path) rules = capa.rules.RuleSet(rules) - capabilities = find_capabilities(rules, capa.features.extractors.ida.IdaFeatureExtractor()) + meta = capa.ida.helpers.collect_metadata() + + capabilities, counts = find_capabilities(rules, capa.features.extractors.ida.IdaFeatureExtractor()) + meta.update(counts) if has_file_limitation(rules, capabilities, is_standalone=False): capa.ida.helpers.inform_user_ida_ui("capa encountered warnings during analysis") - meta = capa.ida.helpers.collect_metadata() print(capa.render.render_default(meta, rules, capabilities)) diff --git a/scripts/lint.py b/scripts/lint.py index 69bf3bf4..7638e2c7 100644 --- a/scripts/lint.py +++ b/scripts/lint.py @@ -180,7 +180,7 @@ class DoesntMatchExample(Lint): try: extractor = capa.main.get_extractor(path, "auto") - capabilities = capa.main.find_capabilities(ctx["rules"], extractor, disable_progress=True) + capabilities, meta = capa.main.find_capabilities(ctx["rules"], extractor, disable_progress=True) except Exception as e: logger.error("failed to extract capabilities: %s %s %s", rule.name, path, e) return True diff --git a/scripts/testbed/run_rule_on_testbed.py b/scripts/testbed/run_rule_on_testbed.py index 2e6b9ce8..aa78a830 100644 --- a/scripts/testbed/run_rule_on_testbed.py +++ b/scripts/testbed/run_rule_on_testbed.py @@ -93,7 +93,8 @@ def get_capabilities(path, rules): logger.debug("matching rules in %s", path) with open(path, "rb") as f: extractor = capa.features.freeze.load(f.read()) - return capa.main.find_capabilities(rules, extractor, disable_progress=True) + capabilities, meta = capa.main.find_capabilities(rules, extractor, disable_progress=True) + return capabilities def get_function_hits(capabilities, rule_name): diff --git a/tests/test_freeze.py b/tests/test_freeze.py index f81114ca..ef9fe1bd 100644 --- a/tests/test_freeze.py +++ b/tests/test_freeze.py @@ -59,7 +59,7 @@ def test_null_feature_extractor(): ), ] ) - capabilities = capa.main.find_capabilities(rules, EXTRACTOR) + capabilities, meta = capa.main.find_capabilities(rules, EXTRACTOR) assert "xor loop" in capabilities diff --git a/tests/test_main.py b/tests/test_main.py index 3010b494..c35b4b4b 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -147,7 +147,7 @@ def test_match_across_scopes_file_function(sample_9324d1a8ae37a36ae560c37448c970 extractor = capa.features.extractors.viv.VivisectFeatureExtractor( sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path, ) - capabilities = capa.main.find_capabilities(rules, extractor) + capabilities, meta = capa.main.find_capabilities(rules, extractor) assert "install service" in capabilities assert ".text section" in capabilities assert ".text section and install service" in capabilities @@ -212,7 +212,7 @@ def test_match_across_scopes(sample_9324d1a8ae37a36ae560c37448c9705a): extractor = capa.features.extractors.viv.VivisectFeatureExtractor( sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path ) - capabilities = capa.main.find_capabilities(rules, extractor) + capabilities, meta = capa.main.find_capabilities(rules, extractor) assert "tight loop" in capabilities assert "kill thread loop" in capabilities assert "kill thread program" in capabilities @@ -241,7 +241,7 @@ def test_subscope_bb_rules(sample_9324d1a8ae37a36ae560c37448c9705a): extractor = capa.features.extractors.viv.VivisectFeatureExtractor( sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path, ) - capabilities = capa.main.find_capabilities(rules, extractor) + capabilities, meta = capa.main.find_capabilities(rules, extractor) assert "test rule" in capabilities @@ -267,7 +267,7 @@ def test_byte_matching(sample_9324d1a8ae37a36ae560c37448c9705a): extractor = capa.features.extractors.viv.VivisectFeatureExtractor( sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path, ) - capabilities = capa.main.find_capabilities(rules, extractor) + capabilities, meta = capa.main.find_capabilities(rules, extractor) assert "byte match test" in capabilities @@ -294,5 +294,5 @@ def test_count_bb(sample_9324d1a8ae37a36ae560c37448c9705a): extractor = capa.features.extractors.viv.VivisectFeatureExtractor( sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path, ) - capabilities = capa.main.find_capabilities(rules, extractor) + capabilities, meta = capa.main.find_capabilities(rules, extractor) assert "count bb" in capabilities From 3b7c8cd1e7e47ca5180d79641e16d73bde2e5193 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Fri, 3 Jul 2020 10:12:21 -0600 Subject: [PATCH 13/32] pep8 --- capa/ida/ida_capa_explorer.py | 4 +++- capa/main.py | 7 +------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/capa/ida/ida_capa_explorer.py b/capa/ida/ida_capa_explorer.py index 801985bf..20757fc3 100644 --- a/capa/ida/ida_capa_explorer.py +++ b/capa/ida/ida_capa_explorer.py @@ -342,7 +342,9 @@ class CapaExplorerForm(idaapi.PluginForm): meta = capa.ida.helpers.collect_metadata() - capabilities, counts = capa.main.find_capabilities(rules, capa.features.extractors.ida.IdaFeatureExtractor(), True) + capabilities, counts = capa.main.find_capabilities( + rules, capa.features.extractors.ida.IdaFeatureExtractor(), True + ) meta.update(counts) # support binary files specifically for x86/AMD64 shellcode diff --git a/capa/main.py b/capa/main.py index 9cbb1589..b51e654d 100644 --- a/capa/main.py +++ b/capa/main.py @@ -96,12 +96,7 @@ def find_capabilities(ruleset, extractor, disable_progress=None): all_function_matches = collections.defaultdict(list) all_bb_matches = collections.defaultdict(list) - meta = { - "counts": { - "file": 0, - "functions": {}, - } - } + meta = {"counts": {"file": 0, "functions": {},}} for f in tqdm.tqdm(extractor.get_functions(), disable=disable_progress, unit=" functions"): function_matches, bb_matches, feature_count = find_function_capabilities(ruleset, extractor, f) From ce7fb39aa841a48e36c3570e745fdb70c2bd9c60 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Fri, 3 Jul 2020 10:33:14 -0600 Subject: [PATCH 14/32] render: show feature counts closes #96 --- capa/render/verbose.py | 2 ++ capa/render/vverbose.py | 23 +++++++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/capa/render/verbose.py b/capa/render/verbose.py index c2576155..7c9d8fa6 100644 --- a/capa/render/verbose.py +++ b/capa/render/verbose.py @@ -33,6 +33,8 @@ def render_verbose(doc): rows.append(("format", doc["meta"]["analysis"]["format"])) rows.append(("extractor", doc["meta"]["analysis"]["extractor"])) rows.append(("base address", hex(doc["meta"]["analysis"]["base_address"]))) + rows.append(("function count", len(doc["meta"]["counts"]["functions"]))) + rows.append(("total feature count", doc["meta"]["counts"]["file"] + sum(doc["meta"]["counts"]["functions"].values()))) ostream.writeln(tabulate.tabulate(rows, tablefmt="plain")) ostream.write("\n") diff --git a/capa/render/vverbose.py b/capa/render/vverbose.py index 342a9dd4..46e7c6c7 100644 --- a/capa/render/vverbose.py +++ b/capa/render/vverbose.py @@ -1,3 +1,5 @@ +import collections + import tabulate import capa.rules @@ -151,9 +153,30 @@ def render_vverbose(doc): rows.append(("format", doc["meta"]["analysis"]["format"])) rows.append(("extractor", doc["meta"]["analysis"]["extractor"])) rows.append(("base address", hex(doc["meta"]["analysis"]["base_address"]))) + rows.append(("function count", len(doc["meta"]["counts"]["functions"]))) + rows.append( + ("total feature count", doc["meta"]["counts"]["file"] + sum(doc["meta"]["counts"]["functions"].values()))) ostream.writeln(tabulate.tabulate(rows, tablefmt="plain")) ostream.write("\n") + matches_by_function = collections.defaultdict(set) + for rule in rutils.capability_rules(doc): + for va in rule["matches"].keys(): + matches_by_function[va].add(rule["meta"]["name"]) + + ostream.writeln("## functions") + for va, feature_count in sorted(doc["meta"]["counts"]["functions"].items()): + va = int(va) + ostream.write("function at 0x%x with %d features: " % (va, feature_count)) + if not matches_by_function.get(va, {}): + ostream.writeln("no matches") + else: + ostream.writeln("") + for rule_name in matches_by_function[va]: + ostream.writeln(" - " + rule_name) + + ostream.write("\n") + ostream.writeln("## rules") for rule in rutils.capability_rules(doc): count = len(rule["matches"]) if count == 1: From 744bbf9f1850fc6722109c1b5e217f6a4042fd6a Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Fri, 3 Jul 2020 10:33:24 -0600 Subject: [PATCH 15/32] pep8 --- capa/render/verbose.py | 4 +++- capa/render/vverbose.py | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/capa/render/verbose.py b/capa/render/verbose.py index 7c9d8fa6..830d8e3a 100644 --- a/capa/render/verbose.py +++ b/capa/render/verbose.py @@ -34,7 +34,9 @@ def render_verbose(doc): rows.append(("extractor", doc["meta"]["analysis"]["extractor"])) rows.append(("base address", hex(doc["meta"]["analysis"]["base_address"]))) rows.append(("function count", len(doc["meta"]["counts"]["functions"]))) - rows.append(("total feature count", doc["meta"]["counts"]["file"] + sum(doc["meta"]["counts"]["functions"].values()))) + rows.append( + ("total feature count", doc["meta"]["counts"]["file"] + sum(doc["meta"]["counts"]["functions"].values())) + ) ostream.writeln(tabulate.tabulate(rows, tablefmt="plain")) ostream.write("\n") diff --git a/capa/render/vverbose.py b/capa/render/vverbose.py index 46e7c6c7..7ca73fa3 100644 --- a/capa/render/vverbose.py +++ b/capa/render/vverbose.py @@ -155,7 +155,8 @@ def render_vverbose(doc): rows.append(("base address", hex(doc["meta"]["analysis"]["base_address"]))) rows.append(("function count", len(doc["meta"]["counts"]["functions"]))) rows.append( - ("total feature count", doc["meta"]["counts"]["file"] + sum(doc["meta"]["counts"]["functions"].values()))) + ("total feature count", doc["meta"]["counts"]["file"] + sum(doc["meta"]["counts"]["functions"].values())) + ) ostream.writeln(tabulate.tabulate(rows, tablefmt="plain")) ostream.write("\n") From 788f11a865a4042944ff91818b13d755612b6f6f Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Fri, 3 Jul 2020 10:41:38 -0600 Subject: [PATCH 16/32] render: refactor sections and add doc --- capa/render/verbose.py | 41 +++++++++++++++++++--- capa/render/vverbose.py | 76 +++++++++++++++++++++++++++++------------ 2 files changed, 91 insertions(+), 26 deletions(-) diff --git a/capa/render/verbose.py b/capa/render/verbose.py index 830d8e3a..029995b7 100644 --- a/capa/render/verbose.py +++ b/capa/render/verbose.py @@ -19,10 +19,22 @@ import tabulate import capa.rules import capa.render.utils as rutils +def render_meta(ostream, doc): + """ + like: -def render_verbose(doc): - ostream = rutils.StringIO() - + md5 84882c9d43e23d63b82004fae74ebb61 + sha1 c6fb3b50d946bec6f391aefa4e54478cf8607211 + sha256 5eced7367ed63354b4ed5c556e2363514293f614c2c2eb187273381b2ef5f0f9 + path /tmp/suspicious.dll_ + timestamp 2020-07-03T10:17:05.796933 + capa version 0.0.0 + format auto + extractor VivisectFeatureExtractor + base address 0x10000000 + function count 42 + total feature count 1918 + """ rows = [] rows.append(("md5", doc["meta"]["sample"]["md5"])) rows.append(("sha1", doc["meta"]["sample"]["sha1"])) @@ -38,8 +50,19 @@ def render_verbose(doc): ("total feature count", doc["meta"]["counts"]["file"] + sum(doc["meta"]["counts"]["functions"].values())) ) ostream.writeln(tabulate.tabulate(rows, tablefmt="plain")) - ostream.write("\n") + +def render_rules(ostream, doc): + """ + like: + + receive data (2 matches) + namespace communication + description all known techniques for receiving data from a potential C2 server + scope function + matches 0x10003A13 + 0x10003797 + """ for rule in rutils.capability_rules(doc): count = len(rule["matches"]) if count == 1: @@ -66,4 +89,14 @@ def render_verbose(doc): ostream.writeln(tabulate.tabulate(rows, tablefmt="plain")) ostream.write("\n") + +def render_verbose(doc): + ostream = rutils.StringIO() + + render_meta(ostream, doc) + ostream.write("\n") + + render_rules(ostream, doc) + ostream.write("\n") + return ostream.getvalue() diff --git a/capa/render/vverbose.py b/capa/render/vverbose.py index 7ca73fa3..510c832c 100644 --- a/capa/render/vverbose.py +++ b/capa/render/vverbose.py @@ -4,6 +4,7 @@ import tabulate import capa.rules import capa.render.utils as rutils +import capa.render.verbose def render_locations(ostream, match): @@ -140,26 +141,28 @@ def render_match(ostream, match, indent=0, mode=MODE_SUCCESS): render_match(ostream, child, indent=indent + 1, mode=child_mode) -def render_vverbose(doc): - ostream = rutils.StringIO() - - rows = [] - rows.append(("md5", doc["meta"]["sample"]["md5"])) - rows.append(("sha1", doc["meta"]["sample"]["sha1"])) - rows.append(("sha256", doc["meta"]["sample"]["sha256"])) - rows.append(("path", doc["meta"]["sample"]["path"])) - rows.append(("timestamp", doc["meta"]["timestamp"])) - rows.append(("capa version", doc["meta"]["version"])) - rows.append(("format", doc["meta"]["analysis"]["format"])) - rows.append(("extractor", doc["meta"]["analysis"]["extractor"])) - rows.append(("base address", hex(doc["meta"]["analysis"]["base_address"]))) - rows.append(("function count", len(doc["meta"]["counts"]["functions"]))) - rows.append( - ("total feature count", doc["meta"]["counts"]["file"] + sum(doc["meta"]["counts"]["functions"].values())) - ) - ostream.writeln(tabulate.tabulate(rows, tablefmt="plain")) - ostream.write("\n") +def render_functions(ostream, doc): + """ + like: + ## functions + function at 0x10001000 with 66 features: no matches + function at 0x100012b0 with 73 features: no matches + function at 0x1000321a with 33 features: + - get hostname + - initialize Winsock library + function at 0x10003286 with 63 features: + - create thread + - terminate thread + function at 0x10003415 with 116 features: + - write file + - send data + - link function at runtime + - create HTTP request + - get common file path + - send HTTP request + - connect to HTTP server + """ matches_by_function = collections.defaultdict(set) for rule in rutils.capability_rules(doc): for va in rule["matches"].keys(): @@ -168,7 +171,7 @@ def render_vverbose(doc): ostream.writeln("## functions") for va, feature_count in sorted(doc["meta"]["counts"]["functions"].items()): va = int(va) - ostream.write("function at 0x%x with %d features: " % (va, feature_count)) + ostream.write("function at 0x%X with %d features: " % (va, feature_count)) if not matches_by_function.get(va, {}): ostream.writeln("no matches") else: @@ -176,7 +179,24 @@ def render_vverbose(doc): for rule_name in matches_by_function[va]: ostream.writeln(" - " + rule_name) - ostream.write("\n") + +def render_rules(ostream, doc): + """ + like: + + ## rules + check for OutputDebugString error + namespace anti-analysis/anti-debugging/debugger-detection + author michael.hunhoff@fireeye.com + scope function + mbc Anti-Behavioral Analysis::Detect Debugger::OutputDebugString + examples Practical Malware Analysis Lab 16-02.exe_:0x401020 + function @ 0x10004706 + and: + api: kernel32.SetLastError @ 0x100047C2 + api: kernel32.GetLastError @ 0x10004A87 + api: kernel32.OutputDebugString @ 0x10004767, 0x10004787, 0x10004816, 0x10004895 + """ ostream.writeln("## rules") for rule in rutils.capability_rules(doc): count = len(rule["matches"]) @@ -216,7 +236,19 @@ def render_vverbose(doc): ostream.write(" @ ") ostream.writeln(rutils.hex(location)) render_match(ostream, match, indent=1) - ostream.write("\n") + +def render_vverbose(doc): + ostream = rutils.StringIO() + + capa.render.verbose.render_meta(ostream, doc) + ostream.write("\n") + + render_functions(ostream, doc) + ostream.write("\n") + + render_rules(ostream, doc) + ostream.write("\n") + return ostream.getvalue() From 03dcc92cb3021d2642e291b3dcbff4c62e3bb0b3 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Fri, 3 Jul 2020 10:41:49 -0600 Subject: [PATCH 17/32] pep8 --- capa/render/verbose.py | 1 + 1 file changed, 1 insertion(+) diff --git a/capa/render/verbose.py b/capa/render/verbose.py index 029995b7..e753545b 100644 --- a/capa/render/verbose.py +++ b/capa/render/verbose.py @@ -19,6 +19,7 @@ import tabulate import capa.rules import capa.render.utils as rutils + def render_meta(ostream, doc): """ like: From f7c460777ff8bd59f218b0e11579c7efbf1fd6b5 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Fri, 3 Jul 2020 20:51:41 -0600 Subject: [PATCH 18/32] render: use list literal --- capa/render/default.py | 11 ++++++----- capa/render/verbose.py | 27 +++++++++++++-------------- setup.py | 16 +++++++++------- 3 files changed, 28 insertions(+), 26 deletions(-) diff --git a/capa/render/default.py b/capa/render/default.py index d7971124..bce458d2 100644 --- a/capa/render/default.py +++ b/capa/render/default.py @@ -17,11 +17,12 @@ def width(s, character_count): def render_meta(doc, ostream): - rows = [] - rows.append((width("md5", 22), width(doc["meta"]["sample"]["md5"], 82))) - rows.append(("path", doc["meta"]["sample"]["path"])) - rows.append(("timestamp", doc["meta"]["timestamp"])) - rows.append(("capa version", doc["meta"]["version"])) + rows = [ + (width("md5", 22), width(doc["meta"]["sample"]["md5"], 82)), + ("path", doc["meta"]["sample"]["path"]), + ("timestamp", doc["meta"]["timestamp"]), + ("capa version", doc["meta"]["version"]), + ] ostream.write(tabulate.tabulate(rows, tablefmt="psql")) ostream.write("\n") diff --git a/capa/render/verbose.py b/capa/render/verbose.py index e753545b..65dac839 100644 --- a/capa/render/verbose.py +++ b/capa/render/verbose.py @@ -36,20 +36,19 @@ def render_meta(ostream, doc): function count 42 total feature count 1918 """ - rows = [] - rows.append(("md5", doc["meta"]["sample"]["md5"])) - rows.append(("sha1", doc["meta"]["sample"]["sha1"])) - rows.append(("sha256", doc["meta"]["sample"]["sha256"])) - rows.append(("path", doc["meta"]["sample"]["path"])) - rows.append(("timestamp", doc["meta"]["timestamp"])) - rows.append(("capa version", doc["meta"]["version"])) - rows.append(("format", doc["meta"]["analysis"]["format"])) - rows.append(("extractor", doc["meta"]["analysis"]["extractor"])) - rows.append(("base address", hex(doc["meta"]["analysis"]["base_address"]))) - rows.append(("function count", len(doc["meta"]["counts"]["functions"]))) - rows.append( - ("total feature count", doc["meta"]["counts"]["file"] + sum(doc["meta"]["counts"]["functions"].values())) - ) + rows = [ + ("md5", doc["meta"]["sample"]["md5"]), + ("sha1", doc["meta"]["sample"]["sha1"]), + ("sha256", doc["meta"]["sample"]["sha256"]), + ("path", doc["meta"]["sample"]["path"]), + ("timestamp", doc["meta"]["timestamp"]), + ("capa version", doc["meta"]["version"]), + ("format", doc["meta"]["analysis"]["format"]), + ("extractor", doc["meta"]["analysis"]["extractor"]), + ("base address", hex(doc["meta"]["analysis"]["base_address"])), + ("function count", len(doc["meta"]["counts"]["functions"])), + ("total feature count", doc["meta"]["counts"]["file"] + sum(doc["meta"]["counts"]["functions"].values())), + ] ostream.writeln(tabulate.tabulate(rows, tablefmt="plain")) diff --git a/setup.py b/setup.py index b23bc084..a01a05ca 100644 --- a/setup.py +++ b/setup.py @@ -41,13 +41,15 @@ setuptools.setup( include_package_data=True, install_requires=requirements, extras_require={ - "dev": ["pytest", - "pytest-sugar", - "pytest-instafail", - "pytest-cov", - "pycodestyle", - "black ; python_version>'3.0'", - "isort"] + "dev": [ + "pytest", + "pytest-sugar", + "pytest-instafail", + "pytest-cov", + "pycodestyle", + "black ; python_version>'3.0'", + "isort", + ] }, zip_safe=False, keywords="capa", From c8cb0cd8f78a444bc9286a85af7a17945cf7601e Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Fri, 3 Jul 2020 20:52:53 -0600 Subject: [PATCH 19/32] render: default: remove extra fields from meta --- capa/render/default.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/capa/render/default.py b/capa/render/default.py index bce458d2..36cfb43e 100644 --- a/capa/render/default.py +++ b/capa/render/default.py @@ -20,8 +20,6 @@ def render_meta(doc, ostream): rows = [ (width("md5", 22), width(doc["meta"]["sample"]["md5"], 82)), ("path", doc["meta"]["sample"]["path"]), - ("timestamp", doc["meta"]["timestamp"]), - ("capa version", doc["meta"]["version"]), ] ostream.write(tabulate.tabulate(rows, tablefmt="psql")) From caa0b3850b09db6135d0c9cf408ab6a45902c2e1 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Fri, 3 Jul 2020 20:55:31 -0600 Subject: [PATCH 20/32] render: default: tweak column width for common case --- capa/render/default.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/render/default.py b/capa/render/default.py index 36cfb43e..6bdef70a 100644 --- a/capa/render/default.py +++ b/capa/render/default.py @@ -47,7 +47,7 @@ def render_capabilities(doc, ostream): capability = "%s (%d matches)" % (rutils.bold(rule["meta"]["name"]), count) rows.append((capability, rule["meta"]["namespace"])) - ostream.write(tabulate.tabulate(rows, headers=[width("CAPABILITY", 40), width("NAMESPACE", 40)], tablefmt="psql")) + ostream.write(tabulate.tabulate(rows, headers=[width("CAPABILITY", 50), width("NAMESPACE", 50)], tablefmt="psql")) ostream.write("\n") From b716fb698030cdcaf05fd2c985cae969fc7acee2 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Fri, 3 Jul 2020 21:02:55 -0600 Subject: [PATCH 21/32] meta: store feature counts in doc.meta.analysis.feature_counts --- capa/ida/ida_capa_explorer.py | 2 +- capa/main.py | 10 +++++----- capa/render/verbose.py | 4 ++-- capa/render/vverbose.py | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/capa/ida/ida_capa_explorer.py b/capa/ida/ida_capa_explorer.py index 20757fc3..3230668c 100644 --- a/capa/ida/ida_capa_explorer.py +++ b/capa/ida/ida_capa_explorer.py @@ -345,7 +345,7 @@ class CapaExplorerForm(idaapi.PluginForm): capabilities, counts = capa.main.find_capabilities( rules, capa.features.extractors.ida.IdaFeatureExtractor(), True ) - meta.update(counts) + meta["analysis"].update(counts) # support binary files specifically for x86/AMD64 shellcode # warn user binary file is loaded but still allow capa to process it diff --git a/capa/main.py b/capa/main.py index 23fe4b4e..c883786a 100644 --- a/capa/main.py +++ b/capa/main.py @@ -96,11 +96,11 @@ def find_capabilities(ruleset, extractor, disable_progress=None): all_function_matches = collections.defaultdict(list) all_bb_matches = collections.defaultdict(list) - meta = {"counts": {"file": 0, "functions": {},}} + meta = {"feature_counts": {"file": 0, "functions": {},}} for f in tqdm.tqdm(extractor.get_functions(), disable=disable_progress, unit=" functions"): function_matches, bb_matches, feature_count = find_function_capabilities(ruleset, extractor, f) - meta["counts"]["functions"][f.__int__()] = feature_count + meta["feature_counts"]["functions"][f.__int__()] = feature_count logger.debug("analyzed function 0x%x and extracted %d features", f.__int__(), feature_count) for rule_name, res in function_matches.items(): @@ -116,7 +116,7 @@ def find_capabilities(ruleset, extractor, disable_progress=None): } all_file_matches, feature_count = find_file_capabilities(ruleset, extractor, function_features) - meta["counts"]["file"] = feature_count + meta["feature_counts"]["file"] = feature_count matches = {} matches.update(all_bb_matches) @@ -492,7 +492,7 @@ def main(argv=None): meta = collect_metadata(argv, args.sample, format, extractor) capabilities, counts = find_capabilities(rules, extractor) - meta.update(counts) + meta["analysis"].update(counts) if has_file_limitation(rules, capabilities): # bail if capa encountered file limitation e.g. a packed binary @@ -552,7 +552,7 @@ def ida_main(): meta = capa.ida.helpers.collect_metadata() capabilities, counts = find_capabilities(rules, capa.features.extractors.ida.IdaFeatureExtractor()) - meta.update(counts) + meta["analysis"].update(counts) if has_file_limitation(rules, capabilities, is_standalone=False): capa.ida.helpers.inform_user_ida_ui("capa encountered warnings during analysis") diff --git a/capa/render/verbose.py b/capa/render/verbose.py index 65dac839..e680129b 100644 --- a/capa/render/verbose.py +++ b/capa/render/verbose.py @@ -46,8 +46,8 @@ def render_meta(ostream, doc): ("format", doc["meta"]["analysis"]["format"]), ("extractor", doc["meta"]["analysis"]["extractor"]), ("base address", hex(doc["meta"]["analysis"]["base_address"])), - ("function count", len(doc["meta"]["counts"]["functions"])), - ("total feature count", doc["meta"]["counts"]["file"] + sum(doc["meta"]["counts"]["functions"].values())), + ("function count", len(doc["meta"]["analysis"]["feature_counts"]["functions"])), + ("total feature count", doc["meta"]["analysis"]["feature_counts"]["file"] + sum(doc["meta"]["analysis"]["feature_counts"]["functions"].values())), ] ostream.writeln(tabulate.tabulate(rows, tablefmt="plain")) diff --git a/capa/render/vverbose.py b/capa/render/vverbose.py index 510c832c..fd7a6661 100644 --- a/capa/render/vverbose.py +++ b/capa/render/vverbose.py @@ -169,7 +169,7 @@ def render_functions(ostream, doc): matches_by_function[va].add(rule["meta"]["name"]) ostream.writeln("## functions") - for va, feature_count in sorted(doc["meta"]["counts"]["functions"].items()): + for va, feature_count in sorted(doc["meta"]["analysis"]["feature_counts"]["functions"].items()): va = int(va) ostream.write("function at 0x%X with %d features: " % (va, feature_count)) if not matches_by_function.get(va, {}): From a5c3080829b031e4f400c658c3a889a4533a325e Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Fri, 3 Jul 2020 21:03:09 -0600 Subject: [PATCH 22/32] pep8 --- capa/render/verbose.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/capa/render/verbose.py b/capa/render/verbose.py index e680129b..4b096206 100644 --- a/capa/render/verbose.py +++ b/capa/render/verbose.py @@ -47,7 +47,11 @@ def render_meta(ostream, doc): ("extractor", doc["meta"]["analysis"]["extractor"]), ("base address", hex(doc["meta"]["analysis"]["base_address"])), ("function count", len(doc["meta"]["analysis"]["feature_counts"]["functions"])), - ("total feature count", doc["meta"]["analysis"]["feature_counts"]["file"] + sum(doc["meta"]["analysis"]["feature_counts"]["functions"].values())), + ( + "total feature count", + doc["meta"]["analysis"]["feature_counts"]["file"] + + sum(doc["meta"]["analysis"]["feature_counts"]["functions"].values()), + ), ] ostream.writeln(tabulate.tabulate(rows, tablefmt="plain")) From 6a9842f44fb79fb91605f1fa3d360f34c3ce13c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ana=20Mar=C3=ADa=20Mart=C3=ADnez=20G=C3=B3mez?= Date: Fri, 3 Jul 2020 10:20:11 +0200 Subject: [PATCH 23/32] Fix misplaced type annotation A misplaced type annotation in `capa/main.py` causes that black fails to solve offenses in this file. It may also make capa fail in newer Python versions. Replace type by `schema` as we don't use type checker tools. --- capa/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/main.py b/capa/main.py index ca90b24c..284c3d03 100644 --- a/capa/main.py +++ b/capa/main.py @@ -104,7 +104,7 @@ def find_capabilities(ruleset, extractor, disable_progress=None): all_bb_matches[rule_name].extend(res) # mapping from matched rule feature to set of addresses at which it matched. - # type: Dict[MatchedRule, Set[int]] + # schema: Dic[MatchedRule: Set[int] function_features = { capa.features.MatchedRule(rule_name): set(map(lambda p: p[0], results)) for rule_name, results in all_function_matches.items() From 50d82f98ad34e11681a098e69dabcf9ea71b077d Mon Sep 17 00:00:00 2001 From: Moritz Raabe Date: Mon, 6 Jul 2020 12:30:31 +0200 Subject: [PATCH 24/32] after #83 all basic block features are also function scope features --- capa/rules.py | 18 ++++-------------- rules | 2 +- 2 files changed, 5 insertions(+), 15 deletions(-) diff --git a/capa/rules.py b/capa/rules.py index bff1f19e..f81cfac3 100644 --- a/capa/rules.py +++ b/capa/rules.py @@ -58,24 +58,11 @@ SUPPORTED_FEATURES = { capa.features.String, }, FUNCTION_SCOPE: { - capa.features.MatchedRule, - capa.features.insn.API, - capa.features.insn.Number, - capa.features.String, - capa.features.Bytes, - capa.features.insn.Offset, - capa.features.insn.Mnemonic, + # plus basic block scope features, see below capa.features.basicblock.BasicBlock, capa.features.Characteristic("switch"), - capa.features.Characteristic("nzxor"), - capa.features.Characteristic("peb access"), - capa.features.Characteristic("fs access"), - capa.features.Characteristic("gs access"), - capa.features.Characteristic("cross section flow"), - capa.features.Characteristic("stack string"), capa.features.Characteristic("calls from"), capa.features.Characteristic("calls to"), - capa.features.Characteristic("indirect call"), capa.features.Characteristic("loop"), capa.features.Characteristic("recursive call"), }, @@ -98,6 +85,9 @@ SUPPORTED_FEATURES = { }, } +# all basic block scope features are also function scope features +SUPPORTED_FEATURES[FUNCTION_SCOPE].update(SUPPORTED_FEATURES[BASIC_BLOCK_SCOPE]) + class InvalidRule(ValueError): def __init__(self, msg): diff --git a/rules b/rules index 54885300..a8621978 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit 548853005591996b11d6b8d1140c9e353254e9f2 +Subproject commit a8621978cf510a53965b919d7575a1e57d8284ce From 9e0d2ffd7bcd5847f0e705ee439ff4b14b72c64d Mon Sep 17 00:00:00 2001 From: Moritz Raabe Date: Mon, 6 Jul 2020 13:00:28 +0200 Subject: [PATCH 25/32] account for Windows paths --- scripts/lint.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/scripts/lint.py b/scripts/lint.py index 69bf3bf4..c0cc53df 100644 --- a/scripts/lint.py +++ b/scripts/lint.py @@ -89,7 +89,7 @@ class NamespaceDoesntMatchRulePath(Lint): if "lib" in rule.meta: return False - return rule.meta["namespace"] not in posixpath.normpath(rule.meta["capa/path"]) + return rule.meta["namespace"] not in get_normpath(rule.meta["capa/path"]) class MissingScope(Lint): @@ -216,7 +216,7 @@ class LibRuleNotInLibDirectory(Lint): if "lib" not in rule.meta: return False - return "/lib/" not in posixpath.normpath(rule.meta["capa/path"]) + return "/lib/" not in get_normpath(rule.meta["capa/path"]) class LibRuleHasNamespace(Lint): @@ -314,6 +314,10 @@ FEATURE_LINTS = ( ) +def get_normpath(path): + return posixpath.normpath(path).replace(os.sep, "/") + + def lint_features(ctx, rule): features = get_features(ctx, rule) return run_feature_lints(FEATURE_LINTS, ctx, features) From 845938d4e6a7640354b34d1730346ebc1b0d37e1 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Mon, 6 Jul 2020 13:48:30 -0600 Subject: [PATCH 26/32] scripts: show-features: add doc --- scripts/show-features.py | 56 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 55 insertions(+), 1 deletion(-) diff --git a/scripts/show-features.py b/scripts/show-features.py index e8ea988b..a6486cc8 100644 --- a/scripts/show-features.py +++ b/scripts/show-features.py @@ -1,6 +1,60 @@ #!/usr/bin/env python2 """ -show the features extracted by capa. +show-features + +Show the features that capa extracts from the given sample, +to assist with the development of rules. + +If you have a function with a capability that you'd like to detect, +you can run this tool and grep for the function/basic block/instruction addresses +to see what capa picks up. +This way, you can verify that capa successfully notices the features you'd reference. + +Example:: + + $ python scripts/show-features.py /tmp/suspicious.dll_ + ... + file: 0x10004e4d: export(__entry) + file: 0x10004706: export(Install) + file: 0x10004c2b: export(uninstallA) + file: 0x10005034: import(kernel32.GetStartupInfoA) + file: 0x10005034: import(GetStartupInfoA) + file: 0x10005048: import(kernel32.SetLastError) + file: 0x00004e10: string(Y29ubmVjdA==) + file: 0x00004e28: string(practicalmalwareanalysis.com) + file: 0x00004e68: string(serve.html) + file: 0x00004eb8: string(dW5zdXBwb3J0) + file: 0x00004ec8: string(c2xlZXA=) + func: 0x100012c2: characteristic(calls to) + func: 0x10001000: characteristic(loop) + bb : 0x10001000: basic block + insn: 0x10001000: mnemonic(push) + insn: 0x10001001: mnemonic(push) + insn: 0x10001002: mnemonic(push) + insn: 0x10001003: mnemonic(push) + insn: 0x10001004: mnemonic(push) + insn: 0x10001005: mnemonic(push) + insn: 0x10001006: mnemonic(xor) + insn: 0x10001008: number(0x1) + insn: 0x10001008: mnemonic(mov) + bb : 0x1000100a: basic block + bb : 0x1000100a: characteristic(tight loop) + insn: 0x1000100a: mnemonic(movzx) + insn: 0x1000100d: mnemonic(mov) + insn: 0x1000100f: offset(0x1000A7C8) + insn: 0x1000100f: mnemonic(mov) + insn: 0x10001015: offset(0x100075C8) + insn: 0x10001015: mnemonic(mov) + insn: 0x1000101b: mnemonic(mov) + insn: 0x1000101d: number(0x80) + insn: 0x1000101d: mnemonic(and) + insn: 0x10001020: mnemonic(neg) + insn: 0x10001022: mnemonic(sbb) + insn: 0x10001024: number(0x1B) + insn: 0x10001024: mnemonic(and) + insn: 0x10001027: number(0x1) + insn: 0x10001027: mnemonic(shl) + ... """ import sys import logging From 484179c0421d988e8ec67a2725bf80dc5419020f Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Mon, 6 Jul 2020 14:06:06 -0600 Subject: [PATCH 27/32] render: move capabilities by function to its own standalone script --- capa/render/vverbose.py | 43 ---- scripts/show-capabilities-by-function.py | 238 +++++++++++++++++++++++ 2 files changed, 238 insertions(+), 43 deletions(-) create mode 100644 scripts/show-capabilities-by-function.py diff --git a/capa/render/vverbose.py b/capa/render/vverbose.py index fd7a6661..df204745 100644 --- a/capa/render/vverbose.py +++ b/capa/render/vverbose.py @@ -141,45 +141,6 @@ def render_match(ostream, match, indent=0, mode=MODE_SUCCESS): render_match(ostream, child, indent=indent + 1, mode=child_mode) -def render_functions(ostream, doc): - """ - like: - - ## functions - function at 0x10001000 with 66 features: no matches - function at 0x100012b0 with 73 features: no matches - function at 0x1000321a with 33 features: - - get hostname - - initialize Winsock library - function at 0x10003286 with 63 features: - - create thread - - terminate thread - function at 0x10003415 with 116 features: - - write file - - send data - - link function at runtime - - create HTTP request - - get common file path - - send HTTP request - - connect to HTTP server - """ - matches_by_function = collections.defaultdict(set) - for rule in rutils.capability_rules(doc): - for va in rule["matches"].keys(): - matches_by_function[va].add(rule["meta"]["name"]) - - ostream.writeln("## functions") - for va, feature_count in sorted(doc["meta"]["analysis"]["feature_counts"]["functions"].items()): - va = int(va) - ostream.write("function at 0x%X with %d features: " % (va, feature_count)) - if not matches_by_function.get(va, {}): - ostream.writeln("no matches") - else: - ostream.writeln("") - for rule_name in matches_by_function[va]: - ostream.writeln(" - " + rule_name) - - def render_rules(ostream, doc): """ like: @@ -197,7 +158,6 @@ def render_rules(ostream, doc): api: kernel32.GetLastError @ 0x10004A87 api: kernel32.OutputDebugString @ 0x10004767, 0x10004787, 0x10004816, 0x10004895 """ - ostream.writeln("## rules") for rule in rutils.capability_rules(doc): count = len(rule["matches"]) if count == 1: @@ -245,9 +205,6 @@ def render_vverbose(doc): capa.render.verbose.render_meta(ostream, doc) ostream.write("\n") - render_functions(ostream, doc) - ostream.write("\n") - render_rules(ostream, doc) ostream.write("\n") diff --git a/scripts/show-capabilities-by-function.py b/scripts/show-capabilities-by-function.py new file mode 100644 index 00000000..bb73d389 --- /dev/null +++ b/scripts/show-capabilities-by-function.py @@ -0,0 +1,238 @@ +#!/usr/bin/env python2 +""" +show-capabilities-by-function + +Invoke capa to extract the capabilities of the given sample +and emit the results grouped by function. + +This is useful to identify "complex functions" - that is, +functions that implement a lot of different types of logic. + +Example:: + + $ python scripts/show-capabilities-by-function.py /tmp/suspicious.dll_ + function at 0x1000321A with 33 features: + - get hostname + - initialize Winsock library + function at 0x10003286 with 63 features: + - create thread + - terminate thread + function at 0x10003415 with 116 features: + - write file + - send data + - link function at runtime + - create HTTP request + - get common file path + - send HTTP request + - connect to HTTP server + function at 0x10003797 with 81 features: + - get socket status + - send data + - receive data + - create TCP socket + - send data on socket + - receive data on socket + - act as TCP client + - resolve DNS + - create UDP socket + - initialize Winsock library + - set socket configuration + - connect TCP socket + ... +""" +import os +import sys +import logging +import collections + +import colorama +import argparse + +import capa.main +import capa.rules +import capa.engine +import capa.render +import capa.render.utils as rutils +import capa.features +import capa.features.freeze +import capa.features.extractors.viv + + +logger = logging.getLogger("capa.show-capabilities-by-function") + + +def render_matches_by_function(doc): + """ + like: + + function at 0x1000321a with 33 features: + - get hostname + - initialize Winsock library + function at 0x10003286 with 63 features: + - create thread + - terminate thread + function at 0x10003415 with 116 features: + - write file + - send data + - link function at runtime + - create HTTP request + - get common file path + - send HTTP request + - connect to HTTP server + """ + ostream = rutils.StringIO() + + matches_by_function = collections.defaultdict(set) + for rule in rutils.capability_rules(doc): + for va in rule["matches"].keys(): + matches_by_function[va].add(rule["meta"]["name"]) + + for va, feature_count in sorted(doc["meta"]["analysis"]["feature_counts"]["functions"].items()): + va = int(va) + if not matches_by_function.get(va, {}): + continue + ostream.writeln("function at 0x%X with %d features: " % (va, feature_count)) + for rule_name in matches_by_function[va]: + ostream.writeln(" - " + rule_name) + + ostream.write("\n") + return ostream.getvalue() + + +def main(argv=None): + if argv is None: + argv = sys.argv[1:] + + formats = [ + ("auto", "(default) detect file type automatically"), + ("pe", "Windows PE file"), + ("sc32", "32-bit shellcode"), + ("sc64", "64-bit shellcode"), + ("freeze", "features previously frozen by capa"), + ] + format_help = ", ".join(["%s: %s" % (f[0], f[1]) for f in formats]) + + parser = argparse.ArgumentParser(description="detect capabilities in programs.") + parser.add_argument("sample", type=str, help="Path to sample to analyze") + parser.add_argument( + "-r", + "--rules", + type=str, + default="(embedded rules)", + help="Path to rule file or directory, use embedded rules by default", + ) + parser.add_argument("-t", "--tag", type=str, help="Filter on rule meta field values") + parser.add_argument("-d", "--debug", action="store_true", help="Enable debugging output on STDERR") + parser.add_argument("-q", "--quiet", action="store_true", help="Disable all output but errors") + parser.add_argument( + "-f", "--format", choices=[f[0] for f in formats], default="auto", + help="Select sample format, %s" % format_help + ) + args = parser.parse_args(args=argv) + + if args.quiet: + logging.basicConfig(level=logging.ERROR) + logging.getLogger().setLevel(logging.ERROR) + elif args.debug: + logging.basicConfig(level=logging.DEBUG) + logging.getLogger().setLevel(logging.DEBUG) + else: + logging.basicConfig(level=logging.INFO) + logging.getLogger().setLevel(logging.INFO) + + # disable vivisect-related logging, it's verbose and not relevant for capa users + capa.main.set_vivisect_log_level(logging.CRITICAL) + + # py2 doesn't know about cp65001, which is a variant of utf-8 on windows + # tqdm bails when trying to render the progress bar in this setup. + # because cp65001 is utf-8, we just map that codepage to the utf-8 codec. + # see #380 and: https://stackoverflow.com/a/3259271/87207 + import codecs + + codecs.register(lambda name: codecs.lookup("utf-8") if name == "cp65001" else None) + + if args.rules == "(embedded rules)": + logger.info("-" * 80) + logger.info(" Using default embedded rules.") + logger.info(" To provide your own rules, use the form `capa.exe ./path/to/rules/ /path/to/mal.exe`.") + logger.info(" You can see the current default rule set here:") + logger.info(" https://github.com/fireeye/capa-rules") + logger.info("-" * 80) + + logger.debug("detected running from source") + args.rules = os.path.join(os.path.dirname(__file__), "..", "rules") + logger.debug("default rule path (source method): %s", args.rules) + else: + logger.info("using rules path: %s", args.rules) + + try: + rules = capa.main.get_rules(args.rules) + rules = capa.rules.RuleSet(rules) + logger.info("successfully loaded %s rules", len(rules)) + if args.tag: + rules = rules.filter_rules_by_meta(args.tag) + logger.info("selected %s rules", len(rules)) + except (IOError, capa.rules.InvalidRule, capa.rules.InvalidRuleSet) as e: + logger.error("%s", str(e)) + return -1 + + with open(args.sample, "rb") as f: + taste = f.read(8) + + if (args.format == "freeze") or (args.format == "auto" and capa.features.freeze.is_freeze(taste)): + format = "freeze" + with open(args.sample, "rb") as f: + extractor = capa.features.freeze.load(f.read()) + else: + format = args.format + try: + extractor = capa.main.get_extractor(args.sample, args.format) + except capa.main.UnsupportedFormatError: + logger.error("-" * 80) + logger.error(" Input file does not appear to be a PE file.") + logger.error(" ") + logger.error( + " capa currently only supports analyzing PE files (or shellcode, when using --format sc32|sc64)." + ) + logger.error( + " If you don't know the input file type, you can try using the `file` utility to guess it.") + logger.error("-" * 80) + return -1 + except capa.main.UnsupportedRuntimeError: + logger.error("-" * 80) + logger.error(" Unsupported runtime or Python interpreter.") + logger.error(" ") + logger.error(" capa supports running under Python 2.7 using Vivisect for binary analysis.") + logger.error(" It can also run within IDA Pro, using either Python 2.7 or 3.5+.") + logger.error(" ") + logger.error( + " If you're seeing this message on the command line, please ensure you're running Python 2.7.") + logger.error("-" * 80) + return -1 + + meta = capa.main.collect_metadata(argv, args.sample, format, extractor) + capabilities, counts = capa.main.find_capabilities(rules, extractor) + meta["analysis"].update(counts) + + if capa.main.has_file_limitation(rules, capabilities): + # bail if capa encountered file limitation e.g. a packed binary + # do show the output in verbose mode, though. + if not (args.verbose or args.vverbose or args.json): + return -1 + + # colorama will detect: + # - when on Windows console, and fixup coloring, and + # - when not an interactive session, and disable coloring + # renderers should use coloring and assume it will be stripped out if necessary. + colorama.init() + doc = capa.render.convert_capabilities_to_result_document(meta, rules, capabilities) + print(render_matches_by_function(doc)) + colorama.deinit() + + logger.info("done.") + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) \ No newline at end of file From ec4fa4c199a79769fd0a043e185d08bc83f48958 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Mon, 6 Jul 2020 14:06:37 -0600 Subject: [PATCH 28/32] main: don't log expected non-rule files --- capa/main.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/capa/main.py b/capa/main.py index c883786a..503f5f09 100644 --- a/capa/main.py +++ b/capa/main.py @@ -307,7 +307,10 @@ def get_rules(rule_path): for root, dirs, files in os.walk(rule_path): for file in files: if not file.endswith(".yml"): - logger.warning("skipping non-.yml file: %s", file) + if not (file.endswith(".md") or file.endswith(".git")): + # expect to see readme.md, format.md, and maybe a .git directory + # other things maybe are rules, but are mis-named. + logger.warning("skipping non-.yml file: %s", file) continue rule_path = os.path.join(root, file) From ecba9986fca37295934f8dd590dce1e807c08009 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Mon, 6 Jul 2020 14:07:02 -0600 Subject: [PATCH 29/32] pep8 --- scripts/show-capabilities-by-function.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/scripts/show-capabilities-by-function.py b/scripts/show-capabilities-by-function.py index bb73d389..80b09b7b 100644 --- a/scripts/show-capabilities-by-function.py +++ b/scripts/show-capabilities-by-function.py @@ -45,19 +45,18 @@ import sys import logging import collections -import colorama import argparse +import colorama import capa.main import capa.rules import capa.engine import capa.render -import capa.render.utils as rutils import capa.features +import capa.render.utils as rutils import capa.features.freeze import capa.features.extractors.viv - logger = logging.getLogger("capa.show-capabilities-by-function") @@ -125,8 +124,11 @@ def main(argv=None): parser.add_argument("-d", "--debug", action="store_true", help="Enable debugging output on STDERR") parser.add_argument("-q", "--quiet", action="store_true", help="Disable all output but errors") parser.add_argument( - "-f", "--format", choices=[f[0] for f in formats], default="auto", - help="Select sample format, %s" % format_help + "-f", + "--format", + choices=[f[0] for f in formats], + default="auto", + help="Select sample format, %s" % format_help, ) args = parser.parse_args(args=argv) @@ -195,7 +197,8 @@ def main(argv=None): " capa currently only supports analyzing PE files (or shellcode, when using --format sc32|sc64)." ) logger.error( - " If you don't know the input file type, you can try using the `file` utility to guess it.") + " If you don't know the input file type, you can try using the `file` utility to guess it." + ) logger.error("-" * 80) return -1 except capa.main.UnsupportedRuntimeError: @@ -206,7 +209,8 @@ def main(argv=None): logger.error(" It can also run within IDA Pro, using either Python 2.7 or 3.5+.") logger.error(" ") logger.error( - " If you're seeing this message on the command line, please ensure you're running Python 2.7.") + " If you're seeing this message on the command line, please ensure you're running Python 2.7." + ) logger.error("-" * 80) return -1 @@ -235,4 +239,4 @@ def main(argv=None): if __name__ == "__main__": - sys.exit(main()) \ No newline at end of file + sys.exit(main()) From 86bf376b19d99c631ed8856742a89b15f688f1b7 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Mon, 6 Jul 2020 14:15:40 -0600 Subject: [PATCH 30/32] readme: add links to further information --- README.md | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 91264a68..fb97714a 100644 --- a/README.md +++ b/README.md @@ -61,9 +61,7 @@ Alternatively, you can fetch a nightly build of a standalone binary from one of - Linux: TODO - OSX: TODO -## installation - -See [doc/installation.md](doc/installation.md) for information on how to setup the project, including how to use it as a Python library. +To use capa as a library or integrate with another tool, see [doc/installation.md](doc/installation.md) for further setup instructions. For more information about how to use capa, including running it as an IDA script/plugin see [doc/usage.md](doc/usage.md). @@ -142,3 +140,9 @@ Here's an example rule used by capa: The [github.com/fireeye/capa-rules](https://github.com/fireeye/capa-rules) repository contains hundreds of standard library rules that are distributed with capa. Please learn to write rules and contribute new entries as you find interesting techniques in malware. + +# further information + - [doc/usage.md](doc/usage.md) + - [doc/installation.md](doc/installation.md) + - [github.com/fireeye/capa-rules](https://github.com/fireeye/capa-rules) + - [doc/rule format.md](https://github.com/fireeye/capa-rules/blob/master/doc/format.md) From ea39379ec83de15ec55dc22db9e332562f9e31b7 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Mon, 6 Jul 2020 14:21:57 -0600 Subject: [PATCH 31/32] installation: list development setup --- doc/installation.md | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/doc/installation.md b/doc/installation.md index be199336..6fd958bb 100644 --- a/doc/installation.md +++ b/doc/installation.md @@ -44,15 +44,17 @@ Finally, use `pip` to install the source code in "editable" mode. This means tha You'll find that the `capa.exe` (Windows) or `capa` (Linux) executables in your path now invoke the capa binary from this directory. -If you want to install the development dependencies, which you need to run the code formatters, syntax checker, rule linter and tests (and for the [hooks](#4-setup-hooks-optional)) run: +We use the following tools to ensure consistent code style and formatting: + - [black](https://github.com/psf/black) code formatter, with `-l 120` + - [isort](https://pypi.org/project/isort/) code formatter, with `--length-sort --line-width 120` + - [dos2unix](https://linux.die.net/man/1/dos2unix) for UNIX-style LF newlines + - [capafmt](https://github.com/fireeye/capa/blob/master/scripts/capafmt.py) rule formatter + +To install these development dependencies, run: `$ pip install -e ./local/path/to/src[dev]` -If you are using zsh, do not forget to escape the square brackets: - -`$ pip install -e ./local/path/to/src\[dev\]` - -Note that some development dependencies require Python3. +Note that some development dependencies (including the black code formatter) require Python3. ### 4. Setup hooks [optional] From 55de5061ea21a106ebff5e2d80ae2446d48d8838 Mon Sep 17 00:00:00 2001 From: Moritz Raabe Date: Mon, 6 Jul 2020 19:24:35 +0200 Subject: [PATCH 32/32] increase stack cookie byte delta --- capa/features/extractors/viv/insn.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/capa/features/extractors/viv/insn.py b/capa/features/extractors/viv/insn.py index 52c0362c..1516b13e 100644 --- a/capa/features/extractors/viv/insn.py +++ b/capa/features/extractors/viv/insn.py @@ -8,6 +8,11 @@ from capa.features.insn import Number, Offset, Mnemonic from capa.features.extractors.viv.indirect_calls import NotFoundError, resolve_indirect_call +# security cookie checks may perform non-zeroing XORs, these are expected within a certain +# byte range within the first and returning basic blocks, this helps to reduce FP features +SECURITY_COOKIE_BYTES_DELTA = 0x40 + + def interface_extract_instruction_XXX(f, bb, insn): """ parse features from the given instruction. @@ -257,11 +262,12 @@ def is_security_cookie(f, bb, insn): # expect security cookie init in first basic block within first bytes (instructions) bb0 = f.basic_blocks[0] - if bb == bb0 and insn.va < bb.va + 30: + + if bb == bb0 and insn.va < (bb.va + SECURITY_COOKIE_BYTES_DELTA): return True # ... or within last bytes (instructions) before a return - elif bb.instructions[-1].isReturn() and insn.va > bb.va + bb.size - 30: + elif bb.instructions[-1].isReturn() and insn.va > (bb.va + bb.size - SECURITY_COOKIE_BYTES_DELTA): return True return False