From b31c76614fe1cb354a5d2a70444eed228a2de2cb Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Fri, 3 Jul 2020 01:21:58 -0600
Subject: [PATCH 01/32] submodule: rules: update

---
 rules | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rules b/rules
index 799b7bbf..ba7843e7 160000
--- a/rules
+++ b/rules
@@ -1 +1 @@
-Subproject commit 799b7bbf4bfe198194370a6cb86853882743ba56
+Subproject commit ba7843e7a8e1754f08225abf48dbb87626f82026

From 2f78c681e42293f8f7e1c0eec291e5e853a38a90 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ana=20Mar=C3=ADa=20Mart=C3=ADnez=20G=C3=B3mez?=
 <anamaria.martinezgom@FireEye.com>
Date: Fri, 3 Jul 2020 11:30:39 +0200
Subject: [PATCH 02/32] Document how to install development dependencies

Introduced in:
https://github.com/fireeye/capa/commit/d1dd997b7bb58aedb059cdcd68b74117543c6088
---
 doc/installation.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/doc/installation.md b/doc/installation.md
index 403574d5..d8a689b0 100644
--- a/doc/installation.md
+++ b/doc/installation.md
@@ -47,6 +47,14 @@ Next, use `pip` to install the source code in "editable" mode. This means that P
 
 You'll find that the `capa.exe` (Windows) or `capa` (Linux) executables in your path now invoke the capa binary from this directory.
 
+If you want to install the development dependencies, which you need to run the code formatters, syntax checker, rule linter and tests (and for the [hooks](#4-setup-hooks-optional)) run:
+
+`$ pip install -e ./local/path/to/src[dev]`
+
+If you are using zsh, do not forget to escape the square brackets:
+
+`$ pip install -e ./local/path/to/src\[dev\]`
+
 ### 4. Setup hooks [optional]
 
 If you plan to contribute to capa, you may want to setup the hooks.

From 26d2f120381d6c8033e6237822a413081ced097a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ana=20Mar=C3=ADa=20Mart=C3=ADnez=20G=C3=B3mez?=
 <anamaria.martinezgom@FireEye.com>
Date: Fri, 3 Jul 2020 11:35:52 +0200
Subject: [PATCH 03/32] Add Python3 requirement for black

When installing the development dependencies with Python2, it fails as
black is not available for Python2.
---
 doc/installation.md | 2 ++
 setup.py            | 8 +++++++-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/doc/installation.md b/doc/installation.md
index d8a689b0..4d165f9d 100644
--- a/doc/installation.md
+++ b/doc/installation.md
@@ -55,6 +55,8 @@ If you are using zsh, do not forget to escape the square brackets:
 
 `$ pip install -e ./local/path/to/src\[dev\]`
 
+Note that some development dependencies require Python3.
+
 ### 4. Setup hooks [optional]
 
 If you plan to contribute to capa, you may want to setup the hooks.
diff --git a/setup.py b/setup.py
index 3522e8e2..b23bc084 100644
--- a/setup.py
+++ b/setup.py
@@ -41,7 +41,13 @@ setuptools.setup(
     include_package_data=True,
     install_requires=requirements,
     extras_require={
-        "dev": ["pytest", "pytest-sugar", "pytest-instafail", "pytest-cov", "pycodestyle", "black", "isort"]
+        "dev": ["pytest",
+                "pytest-sugar",
+                "pytest-instafail",
+                "pytest-cov",
+                "pycodestyle",
+                "black ; python_version>'3.0'",
+                "isort"]
     },
     zip_safe=False,
     keywords="capa",

From ca7cf93d181df4674c5b6b37157f385aa4b6bb6a Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Fri, 3 Jul 2020 09:14:34 -0600
Subject: [PATCH 04/32] submodule: rules: update

---
 rules | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rules b/rules
index ba7843e7..54885300 160000
--- a/rules
+++ b/rules
@@ -1 +1 @@
-Subproject commit ba7843e7a8e1754f08225abf48dbb87626f82026
+Subproject commit 548853005591996b11d6b8d1140c9e353254e9f2

From 93e7206bb2a5105214f1e1e38f2fbd7f23233f20 Mon Sep 17 00:00:00 2001
From: Michael Hunhoff <mike.hunhoff@gmail.com>
Date: Fri, 3 Jul 2020 14:32:41 -0600
Subject: [PATCH 05/32] removing circular import

---
 capa/features/extractors/__init__.py     | 13 -------------
 capa/features/extractors/ida/__init__.py |  8 +++++---
 2 files changed, 5 insertions(+), 16 deletions(-)

diff --git a/capa/features/extractors/__init__.py b/capa/features/extractors/__init__.py
index 85d1bd49..fed476f2 100644
--- a/capa/features/extractors/__init__.py
+++ b/capa/features/extractors/__init__.py
@@ -1,18 +1,5 @@
 import abc
 
-try:
-    import ida
-except (ImportError, SyntaxError):
-    pass
-
-try:
-    import viv
-except (ImportError, SyntaxError):
-    pass
-
-__all__ = ["ida", "viv"]
-
-
 class FeatureExtractor(object):
     """
     FeatureExtractor defines the interface for fetching features from a sample.
diff --git a/capa/features/extractors/ida/__init__.py b/capa/features/extractors/ida/__init__.py
index 5091fb34..c89ac82f 100644
--- a/capa/features/extractors/ida/__init__.py
+++ b/capa/features/extractors/ida/__init__.py
@@ -5,9 +5,9 @@ import idaapi
 
 import capa.features.extractors.ida.file
 import capa.features.extractors.ida.insn
-import capa.features.extractors.ida.helpers
 import capa.features.extractors.ida.function
 import capa.features.extractors.ida.basicblock
+
 from capa.features.extractors import FeatureExtractor
 
 
@@ -51,7 +51,8 @@ class IdaFeatureExtractor(FeatureExtractor):
             yield feature, va
 
     def get_functions(self):
-        for f in capa.features.extractors.ida.helpers.get_functions(ignore_thunks=True, ignore_libs=True):
+        import capa.features.extractors.ida.helpers as ida_helpers
+        for f in ida_helpers.get_functions(ignore_thunks=True, ignore_libs=True):
             yield add_va_int_cast(f)
 
     def extract_function_features(self, f):
@@ -67,7 +68,8 @@ class IdaFeatureExtractor(FeatureExtractor):
             yield feature, va
 
     def get_instructions(self, f, bb):
-        for insn in capa.features.extractors.ida.helpers.get_instructions_in_range(bb.start_ea, bb.end_ea):
+        import capa.features.extractors.ida.helpers as ida_helpers
+        for insn in ida_helpers.get_instructions_in_range(bb.start_ea, bb.end_ea):
             yield add_va_int_cast(insn)
 
     def extract_insn_features(self, f, bb, insn):

From 6dc75c5f29583d2b1e669d11b17d5f51423fe092 Mon Sep 17 00:00:00 2001
From: Michael Hunhoff <mike.hunhoff@gmail.com>
Date: Fri, 3 Jul 2020 14:34:32 -0600
Subject: [PATCH 06/32] adding support for basicblock feature introduced by 78

---
 capa/ida/explorer/model.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/capa/ida/explorer/model.py b/capa/ida/explorer/model.py
index 4d876fee..3325e283 100644
--- a/capa/ida/explorer/model.py
+++ b/capa/ida/explorer/model.py
@@ -530,6 +530,9 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
                 parent, display, source=doc["rules"].get(feature[feature["type"]], {}).get("source", "")
             )
 
+        if feature["type"] == "basicblock":
+            return CapaExplorerBlockItem(parent, location)
+
         if feature["type"] in instruction_view:
             return CapaExplorerInstructionViewItem(parent, display, location)
 

From 5317e1e11e53b0070d59b791ff457eb93fabbb8c Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Fri, 3 Jul 2020 09:32:37 -0600
Subject: [PATCH 07/32] feature extractor: null: add get_base_address()

closes #88
---
 capa/features/extractors/__init__.py | 4 ++++
 tests/test_freeze.py                 | 1 +
 2 files changed, 5 insertions(+)

diff --git a/capa/features/extractors/__init__.py b/capa/features/extractors/__init__.py
index fed476f2..75426041 100644
--- a/capa/features/extractors/__init__.py
+++ b/capa/features/extractors/__init__.py
@@ -180,6 +180,7 @@ class NullFeatureExtractor(FeatureExtractor):
     example::
 
         extractor = NullFeatureExtractor({
+            'base address: 0x401000,
             'file features': [
                 (0x402345, capa.features.Characteristic('embedded pe')),
             ],
@@ -214,6 +215,9 @@ class NullFeatureExtractor(FeatureExtractor):
         super(NullFeatureExtractor, self).__init__()
         self.features = features
 
+    def get_base_address(self):
+        return self.features["base address"]
+
     def extract_file_features(self):
         for p in self.features.get("file features", []):
             va, feature = p
diff --git a/tests/test_freeze.py b/tests/test_freeze.py
index 7b251264..f81114ca 100644
--- a/tests/test_freeze.py
+++ b/tests/test_freeze.py
@@ -10,6 +10,7 @@ from fixtures import *
 
 EXTRACTOR = capa.features.extractors.NullFeatureExtractor(
     {
+        "base address": 0x401000,
         "file features": [(0x402345, capa.features.Characteristic("embedded pe")),],
         "functions": {
             0x401000: {

From 80bdb4a54586ba42dc11c0deeb9ec1a7a3f066ee Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Fri, 3 Jul 2020 09:32:58 -0600
Subject: [PATCH 08/32] pep8

---
 capa/features/extractors/__init__.py     | 1 +
 capa/features/extractors/ida/__init__.py | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/capa/features/extractors/__init__.py b/capa/features/extractors/__init__.py
index 75426041..091c1830 100644
--- a/capa/features/extractors/__init__.py
+++ b/capa/features/extractors/__init__.py
@@ -1,5 +1,6 @@
 import abc
 
+
 class FeatureExtractor(object):
     """
     FeatureExtractor defines the interface for fetching features from a sample.
diff --git a/capa/features/extractors/ida/__init__.py b/capa/features/extractors/ida/__init__.py
index c89ac82f..c2bc79f5 100644
--- a/capa/features/extractors/ida/__init__.py
+++ b/capa/features/extractors/ida/__init__.py
@@ -7,7 +7,6 @@ import capa.features.extractors.ida.file
 import capa.features.extractors.ida.insn
 import capa.features.extractors.ida.function
 import capa.features.extractors.ida.basicblock
-
 from capa.features.extractors import FeatureExtractor
 
 
@@ -52,6 +51,7 @@ class IdaFeatureExtractor(FeatureExtractor):
 
     def get_functions(self):
         import capa.features.extractors.ida.helpers as ida_helpers
+
         for f in ida_helpers.get_functions(ignore_thunks=True, ignore_libs=True):
             yield add_va_int_cast(f)
 
@@ -69,6 +69,7 @@ class IdaFeatureExtractor(FeatureExtractor):
 
     def get_instructions(self, f, bb):
         import capa.features.extractors.ida.helpers as ida_helpers
+
         for insn in ida_helpers.get_instructions_in_range(bb.start_ea, bb.end_ea):
             yield add_va_int_cast(insn)
 

From 0692e940e97e45a779f13d479d39a281b74e3af6 Mon Sep 17 00:00:00 2001
From: Willi Ballenthin <willi.ballenthin@gmail.com>
Date: Sun, 5 Jul 2020 14:29:17 -0600
Subject: [PATCH 09/32] usage: more words

---
 doc/usage.md | 72 +++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 51 insertions(+), 21 deletions(-)

diff --git a/doc/usage.md b/doc/usage.md
index dfd6f06d..7e37ad9e 100644
--- a/doc/usage.md
+++ b/doc/usage.md
@@ -1,35 +1,65 @@
 # capa usage
 
-
-# command line
-After you have downloaded the standalone version of capa or installed it via `pip` (see the [installation](installation.md) documentation) you can run capa directly from your terminal shell.
-
-- `$ capa -h`
-- `$ capa malware.exe`
-
-In this mode capa relies on vivisect which only runs under Python 2.
-
-## only run selected rules
-Use the `-t` option to only run selected rules. This is the preferred method over specifying a rule path which fails if dependent rules reside in other directories.
-
 ```
-$ capa -t communication malware.exe
+usage: capa [-h] [-r RULES] [-t TAG] [--version] [-j] [-v] [-vv] [-d] [-q]
+            [-f {auto,pe,sc32,sc64,freeze}]
+            sample
+
+detect capabilities in programs.
+
+positional arguments:
+  sample                Path to sample to analyze
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -r RULES, --rules RULES
+                        Path to rule file or directory, use embedded rules by
+                        default
+  -t TAG, --tag TAG     Filter on rule meta field values
+  --version             Print the executable version and exit
+  -j, --json            Emit JSON instead of text
+  -v, --verbose         Enable verbose result document (no effect with --json)
+  -vv, --vverbose       Enable very verbose result document (no effect with
+                        --json)
+  -d, --debug           Enable debugging output on STDERR
+  -q, --quiet           Disable all output but errors
+  -f {auto,pe,sc32,sc64,freeze}, --format {auto,pe,sc32,sc64,freeze}
+                        Select sample format, auto: (default) detect file type
+                        automatically, pe: Windows PE file, sc32: 32-bit
+                        shellcode, sc64: 64-bit shellcode, freeze: features
+                        previously frozen by capa
 ```
 
-# IDA Pro
-capa runs from within IDA Pro. Run `capa/main.py` via File - Script file... (ALT + F7).
+## tips and tricks
 
-When running in IDA, capa uses IDA's disassembly and file analysis as its backend. These results may vary from the standalone version that uses vivisect.
+  - [match only rules by given author or namespace](#only-run-selected-rules)
+  - [IDA Pro capa explorer](#capa-explorer)
+  - [IDA Pro rule generator](#rule-generator)
 
-In IDA, capa supports Python 2 and Python 3. If you encounter issues with your specific setup, please open a new [Issue](https://github.com/fireeye/capa/issues). 
+### only run selected rules
+Use the `-t` option to run rules with the given metadata value (see therule  fields `rule.meta.*`).
+For example, `capa -t william.ballenthin@mandiant.com` runs rules that reference Willi's email address (probably as the author), or
+`capa -t communication` runs rules with the namespace `communication`.
 
-## IDA Pro plugins
-capa comes with two IDA Pro plugins located in the `capa/ida` directory.
+### IDA Pro integrations
+You can run capa from within IDA Pro. Run `capa/main.py` via `File - Script file...` (or ALT + F7). 
+When running in IDA, capa uses IDA's disassembly and file analysis as its backend. 
+These results may vary from the standalone version that uses vivisect.
+IDA's analysis is generally a bit faster and more thorough than vivisect's, so you might prefer this mode.
 
-### capa explorer
+When run under IDA, capa supports both Python 2 and Python 3 interpreters.
+If you encounter issues with your specific setup, please open a new [Issue](https://github.com/fireeye/capa/issues).
+
+Additionally, capa comes with two IDA Pro plugins located in the `capa/ida` directory: the explorer and the rule generator.
+
+#### capa explorer
 The capa explorer allows you to interactively display and browse capabilities capa identified in a binary.
+As you select rules or logic, capa will highlight the addresses that support its analysis conclusions.
+We like to use capa to help find the most interesting parts of a program, such as where the C2 mechanism might be.
 
 ![capa explorer](capa_explorer.png)
 
-### rule generator
+#### rule generator
 The rule generator helps you to easily write new rules based on the function you are currently analyzing in your IDA disassembly view.
+It shows the features that capa can extract from the function, and lets you quickly pull these into a rule template.
+You'll still have to provide the logic structures (`and`, `or`, `not`, etc.) but the features will be prepared for you.

From 5cd10bfc75ae07dabee02e117ca8f479db50ae44 Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Fri, 3 Jul 2020 09:38:48 -0600
Subject: [PATCH 10/32] main: load shellcode at 0x690000

closes #94
---
 capa/main.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/capa/main.py b/capa/main.py
index ca90b24c..52516a8f 100644
--- a/capa/main.py
+++ b/capa/main.py
@@ -193,6 +193,9 @@ def is_supported_file_type(sample):
         return False
 
 
+SHELLCODE_BASE = 0x690000
+
+
 def get_shellcode_vw(sample, arch="auto"):
     """
     Return shellcode workspace using explicit arch or via auto detect
@@ -205,13 +208,12 @@ def get_shellcode_vw(sample, arch="auto"):
         # choose arch with most functions, idea by Jay G.
         vw_cands = []
         for arch in ["i386", "amd64"]:
-            vw_cands.append(viv_utils.getShellcodeWorkspace(sample_bytes, arch))
+            vw_cands.append(viv_utils.getShellcodeWorkspace(sample_bytes, arch, base=SHELLCODE_BASE))
         if not vw_cands:
             raise ValueError("could not generate vivisect workspace")
         vw = max(vw_cands, key=lambda vw: len(vw.getFunctions()))
     else:
-        vw = viv_utils.getShellcodeWorkspace(sample_bytes, arch)
-    vw.setMeta("Format", "blob")  # TODO fix in viv_utils
+        vw = viv_utils.getShellcodeWorkspace(sample_bytes, arch, base=SHELLCODE_BASE)
     return vw
 
 

From ff639737b8198b0bf12937643662a0dd0d2c1493 Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Fri, 3 Jul 2020 09:49:41 -0600
Subject: [PATCH 11/32] render: simplify metadata display

closes #91
---
 capa/render/default.py  | 14 ++++++--------
 capa/render/verbose.py  | 22 ++++++++++------------
 capa/render/vverbose.py | 23 ++++++++++-------------
 3 files changed, 26 insertions(+), 33 deletions(-)

diff --git a/capa/render/default.py b/capa/render/default.py
index c46ef0cf..d7971124 100644
--- a/capa/render/default.py
+++ b/capa/render/default.py
@@ -17,13 +17,11 @@ def width(s, character_count):
 
 
 def render_meta(doc, ostream):
-    rows = [(rutils.bold("Capa Report for"), rutils.bold(doc["meta"]["sample"]["md5"]),)]
-
-    for k in ("timestamp", "version"):
-        rows.append((width(k, 22), width(doc["meta"][k], 60)))
-
-    for k in ("path", "md5"):
-        rows.append((k, doc["meta"]["sample"][k]))
+    rows = []
+    rows.append((width("md5", 22), width(doc["meta"]["sample"]["md5"], 82)))
+    rows.append(("path", doc["meta"]["sample"]["path"]))
+    rows.append(("timestamp", doc["meta"]["timestamp"]))
+    rows.append(("capa version", doc["meta"]["version"]))
 
     ostream.write(tabulate.tabulate(rows, tablefmt="psql"))
     ostream.write("\n")
@@ -99,7 +97,7 @@ def render_attack(doc, ostream):
                 raise RuntimeError("unexpected ATT&CK spec format")
         rows.append((rutils.bold(tactic.upper()), "\n".join(inner_rows),))
     ostream.write(
-        tabulate.tabulate(rows, headers=[width("ATT&CK Tactic", 20), width("ATT&CK Technique", 60)], tablefmt="psql")
+        tabulate.tabulate(rows, headers=[width("ATT&CK Tactic", 20), width("ATT&CK Technique", 80)], tablefmt="psql")
     )
     ostream.write("\n")
 
diff --git a/capa/render/verbose.py b/capa/render/verbose.py
index 24e9cd3c..c2576155 100644
--- a/capa/render/verbose.py
+++ b/capa/render/verbose.py
@@ -23,18 +23,16 @@ import capa.render.utils as rutils
 def render_verbose(doc):
     ostream = rutils.StringIO()
 
-    rows = [(rutils.bold("Capa Report for"), rutils.bold(doc["meta"]["sample"]["md5"]),)]
-    for k in ("timestamp", "version"):
-        rows.append((k, doc["meta"][k]))
-
-    for k in ("path", "md5", "sha1", "sha256"):
-        rows.append((k, doc["meta"]["sample"][k]))
-
-    for k in ("format", "extractor"):
-        rows.append((k.replace("_", " "), doc["meta"]["analysis"][k]))
-
-    rows.append(("base address", rutils.hex(doc["meta"]["analysis"]["base_address"])))
-
+    rows = []
+    rows.append(("md5", doc["meta"]["sample"]["md5"]))
+    rows.append(("sha1", doc["meta"]["sample"]["sha1"]))
+    rows.append(("sha256", doc["meta"]["sample"]["sha256"]))
+    rows.append(("path", doc["meta"]["sample"]["path"]))
+    rows.append(("timestamp", doc["meta"]["timestamp"]))
+    rows.append(("capa version", doc["meta"]["version"]))
+    rows.append(("format", doc["meta"]["analysis"]["format"]))
+    rows.append(("extractor", doc["meta"]["analysis"]["extractor"]))
+    rows.append(("base address", hex(doc["meta"]["analysis"]["base_address"])))
     ostream.writeln(tabulate.tabulate(rows, tablefmt="plain"))
     ostream.write("\n")
 
diff --git a/capa/render/vverbose.py b/capa/render/vverbose.py
index 2fe96367..342a9dd4 100644
--- a/capa/render/vverbose.py
+++ b/capa/render/vverbose.py
@@ -141,19 +141,16 @@ def render_match(ostream, match, indent=0, mode=MODE_SUCCESS):
 def render_vverbose(doc):
     ostream = rutils.StringIO()
 
-    rows = [(rutils.bold("Capa Report for"), rutils.bold(doc["meta"]["sample"]["md5"]),)]
-    for k in ("timestamp", "version"):
-        rows.append((k, doc["meta"][k]))
-
-    for k in ("path", "md5", "sha1", "sha256"):
-        rows.append((k, doc["meta"]["sample"][k]))
-
-    for k in ("format", "extractor"):
-        rows.append((k.replace("_", " "), doc["meta"]["analysis"][k]))
-
-    rows.append(("base address", rutils.hex(doc["meta"]["analysis"]["base_address"])))
-
-    ostream.writeln(rutils.bold("Capa Report for " + doc["meta"]["sample"]["md5"]))
+    rows = []
+    rows.append(("md5", doc["meta"]["sample"]["md5"]))
+    rows.append(("sha1", doc["meta"]["sample"]["sha1"]))
+    rows.append(("sha256", doc["meta"]["sample"]["sha256"]))
+    rows.append(("path", doc["meta"]["sample"]["path"]))
+    rows.append(("timestamp", doc["meta"]["timestamp"]))
+    rows.append(("capa version", doc["meta"]["version"]))
+    rows.append(("format", doc["meta"]["analysis"]["format"]))
+    rows.append(("extractor", doc["meta"]["analysis"]["extractor"]))
+    rows.append(("base address", hex(doc["meta"]["analysis"]["base_address"])))
     ostream.writeln(tabulate.tabulate(rows, tablefmt="plain"))
     ostream.write("\n")
 

From 867de57062713aef2fc522fb2937560bcb81e82a Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Fri, 3 Jul 2020 10:12:03 -0600
Subject: [PATCH 12/32] main: find_capabilities: extract feature counts per
 item, too

closes #95
closes #96
---
 capa/ida/ida_capa_explorer.py          |  7 ++++--
 capa/main.py                           | 32 ++++++++++++++++++--------
 scripts/lint.py                        |  2 +-
 scripts/testbed/run_rule_on_testbed.py |  3 ++-
 tests/test_freeze.py                   |  2 +-
 tests/test_main.py                     | 10 ++++----
 6 files changed, 37 insertions(+), 19 deletions(-)

diff --git a/capa/ida/ida_capa_explorer.py b/capa/ida/ida_capa_explorer.py
index 970e8a62..801985bf 100644
--- a/capa/ida/ida_capa_explorer.py
+++ b/capa/ida/ida_capa_explorer.py
@@ -339,7 +339,11 @@ class CapaExplorerForm(idaapi.PluginForm):
         rules_path = os.path.join(os.path.dirname(self.file_loc), "../..", "rules")
         rules = capa.main.get_rules(rules_path)
         rules = capa.rules.RuleSet(rules)
-        capabilities = capa.main.find_capabilities(rules, capa.features.extractors.ida.IdaFeatureExtractor(), True)
+
+        meta = capa.ida.helpers.collect_metadata()
+
+        capabilities, counts = capa.main.find_capabilities(rules, capa.features.extractors.ida.IdaFeatureExtractor(), True)
+        meta.update(counts)
 
         # support binary files specifically for x86/AMD64 shellcode
         # warn user binary file is loaded but still allow capa to process it
@@ -364,7 +368,6 @@ class CapaExplorerForm(idaapi.PluginForm):
 
         logger.info("analysis completed.")
 
-        meta = capa.ida.helpers.collect_metadata()
         doc = capa.render.convert_capabilities_to_result_document(meta, rules, capabilities)
 
         self.model_data.render_capa_doc(doc)
diff --git a/capa/main.py b/capa/main.py
index 52516a8f..9cbb1589 100644
--- a/capa/main.py
+++ b/capa/main.py
@@ -68,7 +68,7 @@ def find_function_capabilities(ruleset, extractor, f):
                 function_features[capa.features.MatchedRule(rule_name)].add(va)
 
     _, function_matches = capa.engine.match(ruleset.function_rules, function_features, oint(f))
-    return function_matches, bb_matches
+    return function_matches, bb_matches, len(function_features)
 
 
 def find_file_capabilities(ruleset, extractor, function_features):
@@ -84,20 +84,30 @@ def find_file_capabilities(ruleset, extractor, function_features):
             if feature not in file_features:
                 file_features[feature] = set()
 
-    logger.info("analyzed file and extracted %d features", len(file_features))
+    logger.debug("analyzed file and extracted %d features", len(file_features))
 
     file_features.update(function_features)
 
     _, matches = capa.engine.match(ruleset.file_rules, file_features, 0x0)
-    return matches
+    return matches, len(file_features)
 
 
 def find_capabilities(ruleset, extractor, disable_progress=None):
     all_function_matches = collections.defaultdict(list)
     all_bb_matches = collections.defaultdict(list)
 
+    meta = {
+        "counts": {
+            "file": 0,
+            "functions": {},
+        }
+    }
+
     for f in tqdm.tqdm(extractor.get_functions(), disable=disable_progress, unit=" functions"):
-        function_matches, bb_matches = find_function_capabilities(ruleset, extractor, f)
+        function_matches, bb_matches, feature_count = find_function_capabilities(ruleset, extractor, f)
+        meta["counts"]["functions"][f.__int__()] = feature_count
+        logger.debug("analyzed function 0x%x and extracted %d features", f.__int__(), feature_count)
+
         for rule_name, res in function_matches.items():
             all_function_matches[rule_name].extend(res)
         for rule_name, res in bb_matches.items():
@@ -110,14 +120,15 @@ def find_capabilities(ruleset, extractor, disable_progress=None):
         for rule_name, results in all_function_matches.items()
     }
 
-    all_file_matches = find_file_capabilities(ruleset, extractor, function_features)
+    all_file_matches, feature_count = find_file_capabilities(ruleset, extractor, function_features)
+    meta["counts"]["file"] = feature_count
 
     matches = {}
     matches.update(all_bb_matches)
     matches.update(all_function_matches)
     matches.update(all_file_matches)
 
-    return matches
+    return matches, meta
 
 
 def has_rule_with_namespace(rules, capabilities, rule_cat):
@@ -485,7 +496,8 @@ def main(argv=None):
 
     meta = collect_metadata(argv, args.sample, format, extractor)
 
-    capabilities = find_capabilities(rules, extractor)
+    capabilities, counts = find_capabilities(rules, extractor)
+    meta.update(counts)
 
     if has_file_limitation(rules, capabilities):
         # bail if capa encountered file limitation e.g. a packed binary
@@ -542,12 +554,14 @@ def ida_main():
     rules = get_rules(rules_path)
     rules = capa.rules.RuleSet(rules)
 
-    capabilities = find_capabilities(rules, capa.features.extractors.ida.IdaFeatureExtractor())
+    meta = capa.ida.helpers.collect_metadata()
+
+    capabilities, counts = find_capabilities(rules, capa.features.extractors.ida.IdaFeatureExtractor())
+    meta.update(counts)
 
     if has_file_limitation(rules, capabilities, is_standalone=False):
         capa.ida.helpers.inform_user_ida_ui("capa encountered warnings during analysis")
 
-    meta = capa.ida.helpers.collect_metadata()
     print(capa.render.render_default(meta, rules, capabilities))
 
 
diff --git a/scripts/lint.py b/scripts/lint.py
index 69bf3bf4..7638e2c7 100644
--- a/scripts/lint.py
+++ b/scripts/lint.py
@@ -180,7 +180,7 @@ class DoesntMatchExample(Lint):
 
             try:
                 extractor = capa.main.get_extractor(path, "auto")
-                capabilities = capa.main.find_capabilities(ctx["rules"], extractor, disable_progress=True)
+                capabilities, meta = capa.main.find_capabilities(ctx["rules"], extractor, disable_progress=True)
             except Exception as e:
                 logger.error("failed to extract capabilities: %s %s %s", rule.name, path, e)
                 return True
diff --git a/scripts/testbed/run_rule_on_testbed.py b/scripts/testbed/run_rule_on_testbed.py
index 2e6b9ce8..aa78a830 100644
--- a/scripts/testbed/run_rule_on_testbed.py
+++ b/scripts/testbed/run_rule_on_testbed.py
@@ -93,7 +93,8 @@ def get_capabilities(path, rules):
     logger.debug("matching rules in %s", path)
     with open(path, "rb") as f:
         extractor = capa.features.freeze.load(f.read())
-    return capa.main.find_capabilities(rules, extractor, disable_progress=True)
+    capabilities, meta = capa.main.find_capabilities(rules, extractor, disable_progress=True)
+    return capabilities
 
 
 def get_function_hits(capabilities, rule_name):
diff --git a/tests/test_freeze.py b/tests/test_freeze.py
index f81114ca..ef9fe1bd 100644
--- a/tests/test_freeze.py
+++ b/tests/test_freeze.py
@@ -59,7 +59,7 @@ def test_null_feature_extractor():
             ),
         ]
     )
-    capabilities = capa.main.find_capabilities(rules, EXTRACTOR)
+    capabilities, meta = capa.main.find_capabilities(rules, EXTRACTOR)
     assert "xor loop" in capabilities
 
 
diff --git a/tests/test_main.py b/tests/test_main.py
index 3010b494..c35b4b4b 100644
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -147,7 +147,7 @@ def test_match_across_scopes_file_function(sample_9324d1a8ae37a36ae560c37448c970
     extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
         sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path,
     )
-    capabilities = capa.main.find_capabilities(rules, extractor)
+    capabilities, meta = capa.main.find_capabilities(rules, extractor)
     assert "install service" in capabilities
     assert ".text section" in capabilities
     assert ".text section and install service" in capabilities
@@ -212,7 +212,7 @@ def test_match_across_scopes(sample_9324d1a8ae37a36ae560c37448c9705a):
     extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
         sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path
     )
-    capabilities = capa.main.find_capabilities(rules, extractor)
+    capabilities, meta = capa.main.find_capabilities(rules, extractor)
     assert "tight loop" in capabilities
     assert "kill thread loop" in capabilities
     assert "kill thread program" in capabilities
@@ -241,7 +241,7 @@ def test_subscope_bb_rules(sample_9324d1a8ae37a36ae560c37448c9705a):
     extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
         sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path,
     )
-    capabilities = capa.main.find_capabilities(rules, extractor)
+    capabilities, meta = capa.main.find_capabilities(rules, extractor)
     assert "test rule" in capabilities
 
 
@@ -267,7 +267,7 @@ def test_byte_matching(sample_9324d1a8ae37a36ae560c37448c9705a):
     extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
         sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path,
     )
-    capabilities = capa.main.find_capabilities(rules, extractor)
+    capabilities, meta = capa.main.find_capabilities(rules, extractor)
     assert "byte match test" in capabilities
 
 
@@ -294,5 +294,5 @@ def test_count_bb(sample_9324d1a8ae37a36ae560c37448c9705a):
     extractor = capa.features.extractors.viv.VivisectFeatureExtractor(
         sample_9324d1a8ae37a36ae560c37448c9705a.vw, sample_9324d1a8ae37a36ae560c37448c9705a.path,
     )
-    capabilities = capa.main.find_capabilities(rules, extractor)
+    capabilities, meta = capa.main.find_capabilities(rules, extractor)
     assert "count bb" in capabilities

From 3b7c8cd1e7e47ca5180d79641e16d73bde2e5193 Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Fri, 3 Jul 2020 10:12:21 -0600
Subject: [PATCH 13/32] pep8

---
 capa/ida/ida_capa_explorer.py | 4 +++-
 capa/main.py                  | 7 +------
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/capa/ida/ida_capa_explorer.py b/capa/ida/ida_capa_explorer.py
index 801985bf..20757fc3 100644
--- a/capa/ida/ida_capa_explorer.py
+++ b/capa/ida/ida_capa_explorer.py
@@ -342,7 +342,9 @@ class CapaExplorerForm(idaapi.PluginForm):
 
         meta = capa.ida.helpers.collect_metadata()
 
-        capabilities, counts = capa.main.find_capabilities(rules, capa.features.extractors.ida.IdaFeatureExtractor(), True)
+        capabilities, counts = capa.main.find_capabilities(
+            rules, capa.features.extractors.ida.IdaFeatureExtractor(), True
+        )
         meta.update(counts)
 
         # support binary files specifically for x86/AMD64 shellcode
diff --git a/capa/main.py b/capa/main.py
index 9cbb1589..b51e654d 100644
--- a/capa/main.py
+++ b/capa/main.py
@@ -96,12 +96,7 @@ def find_capabilities(ruleset, extractor, disable_progress=None):
     all_function_matches = collections.defaultdict(list)
     all_bb_matches = collections.defaultdict(list)
 
-    meta = {
-        "counts": {
-            "file": 0,
-            "functions": {},
-        }
-    }
+    meta = {"counts": {"file": 0, "functions": {},}}
 
     for f in tqdm.tqdm(extractor.get_functions(), disable=disable_progress, unit=" functions"):
         function_matches, bb_matches, feature_count = find_function_capabilities(ruleset, extractor, f)

From ce7fb39aa841a48e36c3570e745fdb70c2bd9c60 Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Fri, 3 Jul 2020 10:33:14 -0600
Subject: [PATCH 14/32] render: show feature counts

closes #96
---
 capa/render/verbose.py  |  2 ++
 capa/render/vverbose.py | 23 +++++++++++++++++++++++
 2 files changed, 25 insertions(+)

diff --git a/capa/render/verbose.py b/capa/render/verbose.py
index c2576155..7c9d8fa6 100644
--- a/capa/render/verbose.py
+++ b/capa/render/verbose.py
@@ -33,6 +33,8 @@ def render_verbose(doc):
     rows.append(("format", doc["meta"]["analysis"]["format"]))
     rows.append(("extractor", doc["meta"]["analysis"]["extractor"]))
     rows.append(("base address", hex(doc["meta"]["analysis"]["base_address"])))
+    rows.append(("function count", len(doc["meta"]["counts"]["functions"])))
+    rows.append(("total feature count", doc["meta"]["counts"]["file"] + sum(doc["meta"]["counts"]["functions"].values())))
     ostream.writeln(tabulate.tabulate(rows, tablefmt="plain"))
     ostream.write("\n")
 
diff --git a/capa/render/vverbose.py b/capa/render/vverbose.py
index 342a9dd4..46e7c6c7 100644
--- a/capa/render/vverbose.py
+++ b/capa/render/vverbose.py
@@ -1,3 +1,5 @@
+import collections
+
 import tabulate
 
 import capa.rules
@@ -151,9 +153,30 @@ def render_vverbose(doc):
     rows.append(("format", doc["meta"]["analysis"]["format"]))
     rows.append(("extractor", doc["meta"]["analysis"]["extractor"]))
     rows.append(("base address", hex(doc["meta"]["analysis"]["base_address"])))
+    rows.append(("function count", len(doc["meta"]["counts"]["functions"])))
+    rows.append(
+        ("total feature count", doc["meta"]["counts"]["file"] + sum(doc["meta"]["counts"]["functions"].values())))
     ostream.writeln(tabulate.tabulate(rows, tablefmt="plain"))
     ostream.write("\n")
 
+    matches_by_function = collections.defaultdict(set)
+    for rule in rutils.capability_rules(doc):
+        for va in rule["matches"].keys():
+            matches_by_function[va].add(rule["meta"]["name"])
+
+    ostream.writeln("## functions")
+    for va, feature_count in sorted(doc["meta"]["counts"]["functions"].items()):
+        va = int(va)
+        ostream.write("function at 0x%x with %d features: " % (va, feature_count))
+        if not matches_by_function.get(va, {}):
+            ostream.writeln("no matches")
+        else:
+            ostream.writeln("")
+            for rule_name in matches_by_function[va]:
+                ostream.writeln("  - " + rule_name)
+
+    ostream.write("\n")
+    ostream.writeln("## rules")
     for rule in rutils.capability_rules(doc):
         count = len(rule["matches"])
         if count == 1:

From 744bbf9f1850fc6722109c1b5e217f6a4042fd6a Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Fri, 3 Jul 2020 10:33:24 -0600
Subject: [PATCH 15/32] pep8

---
 capa/render/verbose.py  | 4 +++-
 capa/render/vverbose.py | 3 ++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/capa/render/verbose.py b/capa/render/verbose.py
index 7c9d8fa6..830d8e3a 100644
--- a/capa/render/verbose.py
+++ b/capa/render/verbose.py
@@ -34,7 +34,9 @@ def render_verbose(doc):
     rows.append(("extractor", doc["meta"]["analysis"]["extractor"]))
     rows.append(("base address", hex(doc["meta"]["analysis"]["base_address"])))
     rows.append(("function count", len(doc["meta"]["counts"]["functions"])))
-    rows.append(("total feature count", doc["meta"]["counts"]["file"] + sum(doc["meta"]["counts"]["functions"].values())))
+    rows.append(
+        ("total feature count", doc["meta"]["counts"]["file"] + sum(doc["meta"]["counts"]["functions"].values()))
+    )
     ostream.writeln(tabulate.tabulate(rows, tablefmt="plain"))
     ostream.write("\n")
 
diff --git a/capa/render/vverbose.py b/capa/render/vverbose.py
index 46e7c6c7..7ca73fa3 100644
--- a/capa/render/vverbose.py
+++ b/capa/render/vverbose.py
@@ -155,7 +155,8 @@ def render_vverbose(doc):
     rows.append(("base address", hex(doc["meta"]["analysis"]["base_address"])))
     rows.append(("function count", len(doc["meta"]["counts"]["functions"])))
     rows.append(
-        ("total feature count", doc["meta"]["counts"]["file"] + sum(doc["meta"]["counts"]["functions"].values())))
+        ("total feature count", doc["meta"]["counts"]["file"] + sum(doc["meta"]["counts"]["functions"].values()))
+    )
     ostream.writeln(tabulate.tabulate(rows, tablefmt="plain"))
     ostream.write("\n")
 

From 788f11a865a4042944ff91818b13d755612b6f6f Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Fri, 3 Jul 2020 10:41:38 -0600
Subject: [PATCH 16/32] render: refactor sections and add doc

---
 capa/render/verbose.py  | 41 +++++++++++++++++++---
 capa/render/vverbose.py | 76 +++++++++++++++++++++++++++++------------
 2 files changed, 91 insertions(+), 26 deletions(-)

diff --git a/capa/render/verbose.py b/capa/render/verbose.py
index 830d8e3a..029995b7 100644
--- a/capa/render/verbose.py
+++ b/capa/render/verbose.py
@@ -19,10 +19,22 @@ import tabulate
 import capa.rules
 import capa.render.utils as rutils
 
+def render_meta(ostream, doc):
+    """
+    like:
 
-def render_verbose(doc):
-    ostream = rutils.StringIO()
-
+        md5                  84882c9d43e23d63b82004fae74ebb61
+        sha1                 c6fb3b50d946bec6f391aefa4e54478cf8607211
+        sha256               5eced7367ed63354b4ed5c556e2363514293f614c2c2eb187273381b2ef5f0f9
+        path                 /tmp/suspicious.dll_
+        timestamp            2020-07-03T10:17:05.796933
+        capa version         0.0.0
+        format               auto
+        extractor            VivisectFeatureExtractor
+        base address         0x10000000
+        function count       42
+        total feature count  1918
+    """
     rows = []
     rows.append(("md5", doc["meta"]["sample"]["md5"]))
     rows.append(("sha1", doc["meta"]["sample"]["sha1"]))
@@ -38,8 +50,19 @@ def render_verbose(doc):
         ("total feature count", doc["meta"]["counts"]["file"] + sum(doc["meta"]["counts"]["functions"].values()))
     )
     ostream.writeln(tabulate.tabulate(rows, tablefmt="plain"))
-    ostream.write("\n")
 
+
+def render_rules(ostream, doc):
+    """
+    like:
+
+        receive data (2 matches)
+        namespace    communication
+        description  all known techniques for receiving data from a potential C2 server
+        scope        function
+        matches      0x10003A13
+                     0x10003797
+    """
     for rule in rutils.capability_rules(doc):
         count = len(rule["matches"])
         if count == 1:
@@ -66,4 +89,14 @@ def render_verbose(doc):
         ostream.writeln(tabulate.tabulate(rows, tablefmt="plain"))
         ostream.write("\n")
 
+
+def render_verbose(doc):
+    ostream = rutils.StringIO()
+
+    render_meta(ostream, doc)
+    ostream.write("\n")
+
+    render_rules(ostream, doc)
+    ostream.write("\n")
+
     return ostream.getvalue()
diff --git a/capa/render/vverbose.py b/capa/render/vverbose.py
index 7ca73fa3..510c832c 100644
--- a/capa/render/vverbose.py
+++ b/capa/render/vverbose.py
@@ -4,6 +4,7 @@ import tabulate
 
 import capa.rules
 import capa.render.utils as rutils
+import capa.render.verbose
 
 
 def render_locations(ostream, match):
@@ -140,26 +141,28 @@ def render_match(ostream, match, indent=0, mode=MODE_SUCCESS):
         render_match(ostream, child, indent=indent + 1, mode=child_mode)
 
 
-def render_vverbose(doc):
-    ostream = rutils.StringIO()
-
-    rows = []
-    rows.append(("md5", doc["meta"]["sample"]["md5"]))
-    rows.append(("sha1", doc["meta"]["sample"]["sha1"]))
-    rows.append(("sha256", doc["meta"]["sample"]["sha256"]))
-    rows.append(("path", doc["meta"]["sample"]["path"]))
-    rows.append(("timestamp", doc["meta"]["timestamp"]))
-    rows.append(("capa version", doc["meta"]["version"]))
-    rows.append(("format", doc["meta"]["analysis"]["format"]))
-    rows.append(("extractor", doc["meta"]["analysis"]["extractor"]))
-    rows.append(("base address", hex(doc["meta"]["analysis"]["base_address"])))
-    rows.append(("function count", len(doc["meta"]["counts"]["functions"])))
-    rows.append(
-        ("total feature count", doc["meta"]["counts"]["file"] + sum(doc["meta"]["counts"]["functions"].values()))
-    )
-    ostream.writeln(tabulate.tabulate(rows, tablefmt="plain"))
-    ostream.write("\n")
+def render_functions(ostream, doc):
+    """
+    like:
 
+        ## functions
+        function at 0x10001000 with 66 features: no matches
+        function at 0x100012b0 with 73 features: no matches
+        function at 0x1000321a with 33 features:
+          - get hostname
+          - initialize Winsock library
+        function at 0x10003286 with 63 features:
+          - create thread
+          - terminate thread
+        function at 0x10003415 with 116 features:
+          - write file
+          - send data
+          - link function at runtime
+          - create HTTP request
+          - get common file path
+          - send HTTP request
+          - connect to HTTP server
+    """
     matches_by_function = collections.defaultdict(set)
     for rule in rutils.capability_rules(doc):
         for va in rule["matches"].keys():
@@ -168,7 +171,7 @@ def render_vverbose(doc):
     ostream.writeln("## functions")
     for va, feature_count in sorted(doc["meta"]["counts"]["functions"].items()):
         va = int(va)
-        ostream.write("function at 0x%x with %d features: " % (va, feature_count))
+        ostream.write("function at 0x%X with %d features: " % (va, feature_count))
         if not matches_by_function.get(va, {}):
             ostream.writeln("no matches")
         else:
@@ -176,7 +179,24 @@ def render_vverbose(doc):
             for rule_name in matches_by_function[va]:
                 ostream.writeln("  - " + rule_name)
 
-    ostream.write("\n")
+
+def render_rules(ostream, doc):
+    """
+    like:
+
+        ## rules
+        check for OutputDebugString error
+        namespace  anti-analysis/anti-debugging/debugger-detection
+        author     michael.hunhoff@fireeye.com
+        scope      function
+        mbc        Anti-Behavioral Analysis::Detect Debugger::OutputDebugString
+        examples   Practical Malware Analysis Lab 16-02.exe_:0x401020
+        function @ 0x10004706
+          and:
+            api: kernel32.SetLastError @ 0x100047C2
+            api: kernel32.GetLastError @ 0x10004A87
+            api: kernel32.OutputDebugString @ 0x10004767, 0x10004787, 0x10004816, 0x10004895
+    """
     ostream.writeln("## rules")
     for rule in rutils.capability_rules(doc):
         count = len(rule["matches"])
@@ -216,7 +236,19 @@ def render_vverbose(doc):
                 ostream.write(" @ ")
                 ostream.writeln(rutils.hex(location))
                 render_match(ostream, match, indent=1)
-
         ostream.write("\n")
 
+
+def render_vverbose(doc):
+    ostream = rutils.StringIO()
+
+    capa.render.verbose.render_meta(ostream, doc)
+    ostream.write("\n")
+
+    render_functions(ostream, doc)
+    ostream.write("\n")
+
+    render_rules(ostream, doc)
+    ostream.write("\n")
+
     return ostream.getvalue()

From 03dcc92cb3021d2642e291b3dcbff4c62e3bb0b3 Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Fri, 3 Jul 2020 10:41:49 -0600
Subject: [PATCH 17/32] pep8

---
 capa/render/verbose.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/capa/render/verbose.py b/capa/render/verbose.py
index 029995b7..e753545b 100644
--- a/capa/render/verbose.py
+++ b/capa/render/verbose.py
@@ -19,6 +19,7 @@ import tabulate
 import capa.rules
 import capa.render.utils as rutils
 
+
 def render_meta(ostream, doc):
     """
     like:

From f7c460777ff8bd59f218b0e11579c7efbf1fd6b5 Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Fri, 3 Jul 2020 20:51:41 -0600
Subject: [PATCH 18/32] render: use list literal

---
 capa/render/default.py | 11 ++++++-----
 capa/render/verbose.py | 27 +++++++++++++--------------
 setup.py               | 16 +++++++++-------
 3 files changed, 28 insertions(+), 26 deletions(-)

diff --git a/capa/render/default.py b/capa/render/default.py
index d7971124..bce458d2 100644
--- a/capa/render/default.py
+++ b/capa/render/default.py
@@ -17,11 +17,12 @@ def width(s, character_count):
 
 
 def render_meta(doc, ostream):
-    rows = []
-    rows.append((width("md5", 22), width(doc["meta"]["sample"]["md5"], 82)))
-    rows.append(("path", doc["meta"]["sample"]["path"]))
-    rows.append(("timestamp", doc["meta"]["timestamp"]))
-    rows.append(("capa version", doc["meta"]["version"]))
+    rows = [
+        (width("md5", 22), width(doc["meta"]["sample"]["md5"], 82)),
+        ("path", doc["meta"]["sample"]["path"]),
+        ("timestamp", doc["meta"]["timestamp"]),
+        ("capa version", doc["meta"]["version"]),
+    ]
 
     ostream.write(tabulate.tabulate(rows, tablefmt="psql"))
     ostream.write("\n")
diff --git a/capa/render/verbose.py b/capa/render/verbose.py
index e753545b..65dac839 100644
--- a/capa/render/verbose.py
+++ b/capa/render/verbose.py
@@ -36,20 +36,19 @@ def render_meta(ostream, doc):
         function count       42
         total feature count  1918
     """
-    rows = []
-    rows.append(("md5", doc["meta"]["sample"]["md5"]))
-    rows.append(("sha1", doc["meta"]["sample"]["sha1"]))
-    rows.append(("sha256", doc["meta"]["sample"]["sha256"]))
-    rows.append(("path", doc["meta"]["sample"]["path"]))
-    rows.append(("timestamp", doc["meta"]["timestamp"]))
-    rows.append(("capa version", doc["meta"]["version"]))
-    rows.append(("format", doc["meta"]["analysis"]["format"]))
-    rows.append(("extractor", doc["meta"]["analysis"]["extractor"]))
-    rows.append(("base address", hex(doc["meta"]["analysis"]["base_address"])))
-    rows.append(("function count", len(doc["meta"]["counts"]["functions"])))
-    rows.append(
-        ("total feature count", doc["meta"]["counts"]["file"] + sum(doc["meta"]["counts"]["functions"].values()))
-    )
+    rows = [
+        ("md5", doc["meta"]["sample"]["md5"]),
+        ("sha1", doc["meta"]["sample"]["sha1"]),
+        ("sha256", doc["meta"]["sample"]["sha256"]),
+        ("path", doc["meta"]["sample"]["path"]),
+        ("timestamp", doc["meta"]["timestamp"]),
+        ("capa version", doc["meta"]["version"]),
+        ("format", doc["meta"]["analysis"]["format"]),
+        ("extractor", doc["meta"]["analysis"]["extractor"]),
+        ("base address", hex(doc["meta"]["analysis"]["base_address"])),
+        ("function count", len(doc["meta"]["counts"]["functions"])),
+        ("total feature count", doc["meta"]["counts"]["file"] + sum(doc["meta"]["counts"]["functions"].values())),
+    ]
     ostream.writeln(tabulate.tabulate(rows, tablefmt="plain"))
 
 
diff --git a/setup.py b/setup.py
index b23bc084..a01a05ca 100644
--- a/setup.py
+++ b/setup.py
@@ -41,13 +41,15 @@ setuptools.setup(
     include_package_data=True,
     install_requires=requirements,
     extras_require={
-        "dev": ["pytest",
-                "pytest-sugar",
-                "pytest-instafail",
-                "pytest-cov",
-                "pycodestyle",
-                "black ; python_version>'3.0'",
-                "isort"]
+        "dev": [
+            "pytest",
+            "pytest-sugar",
+            "pytest-instafail",
+            "pytest-cov",
+            "pycodestyle",
+            "black ; python_version>'3.0'",
+            "isort",
+        ]
     },
     zip_safe=False,
     keywords="capa",

From c8cb0cd8f78a444bc9286a85af7a17945cf7601e Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Fri, 3 Jul 2020 20:52:53 -0600
Subject: [PATCH 19/32] render: default: remove extra fields from meta

---
 capa/render/default.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/capa/render/default.py b/capa/render/default.py
index bce458d2..36cfb43e 100644
--- a/capa/render/default.py
+++ b/capa/render/default.py
@@ -20,8 +20,6 @@ def render_meta(doc, ostream):
     rows = [
         (width("md5", 22), width(doc["meta"]["sample"]["md5"], 82)),
         ("path", doc["meta"]["sample"]["path"]),
-        ("timestamp", doc["meta"]["timestamp"]),
-        ("capa version", doc["meta"]["version"]),
     ]
 
     ostream.write(tabulate.tabulate(rows, tablefmt="psql"))

From caa0b3850b09db6135d0c9cf408ab6a45902c2e1 Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Fri, 3 Jul 2020 20:55:31 -0600
Subject: [PATCH 20/32] render: default: tweak column width for common case

---
 capa/render/default.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/capa/render/default.py b/capa/render/default.py
index 36cfb43e..6bdef70a 100644
--- a/capa/render/default.py
+++ b/capa/render/default.py
@@ -47,7 +47,7 @@ def render_capabilities(doc, ostream):
             capability = "%s (%d matches)" % (rutils.bold(rule["meta"]["name"]), count)
         rows.append((capability, rule["meta"]["namespace"]))
 
-    ostream.write(tabulate.tabulate(rows, headers=[width("CAPABILITY", 40), width("NAMESPACE", 40)], tablefmt="psql"))
+    ostream.write(tabulate.tabulate(rows, headers=[width("CAPABILITY", 50), width("NAMESPACE", 50)], tablefmt="psql"))
     ostream.write("\n")
 
 

From b716fb698030cdcaf05fd2c985cae969fc7acee2 Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Fri, 3 Jul 2020 21:02:55 -0600
Subject: [PATCH 21/32] meta: store feature counts in
 doc.meta.analysis.feature_counts

---
 capa/ida/ida_capa_explorer.py |  2 +-
 capa/main.py                  | 10 +++++-----
 capa/render/verbose.py        |  4 ++--
 capa/render/vverbose.py       |  2 +-
 4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/capa/ida/ida_capa_explorer.py b/capa/ida/ida_capa_explorer.py
index 20757fc3..3230668c 100644
--- a/capa/ida/ida_capa_explorer.py
+++ b/capa/ida/ida_capa_explorer.py
@@ -345,7 +345,7 @@ class CapaExplorerForm(idaapi.PluginForm):
         capabilities, counts = capa.main.find_capabilities(
             rules, capa.features.extractors.ida.IdaFeatureExtractor(), True
         )
-        meta.update(counts)
+        meta["analysis"].update(counts)
 
         # support binary files specifically for x86/AMD64 shellcode
         # warn user binary file is loaded but still allow capa to process it
diff --git a/capa/main.py b/capa/main.py
index 23fe4b4e..c883786a 100644
--- a/capa/main.py
+++ b/capa/main.py
@@ -96,11 +96,11 @@ def find_capabilities(ruleset, extractor, disable_progress=None):
     all_function_matches = collections.defaultdict(list)
     all_bb_matches = collections.defaultdict(list)
 
-    meta = {"counts": {"file": 0, "functions": {},}}
+    meta = {"feature_counts": {"file": 0, "functions": {},}}
 
     for f in tqdm.tqdm(extractor.get_functions(), disable=disable_progress, unit=" functions"):
         function_matches, bb_matches, feature_count = find_function_capabilities(ruleset, extractor, f)
-        meta["counts"]["functions"][f.__int__()] = feature_count
+        meta["feature_counts"]["functions"][f.__int__()] = feature_count
         logger.debug("analyzed function 0x%x and extracted %d features", f.__int__(), feature_count)
 
         for rule_name, res in function_matches.items():
@@ -116,7 +116,7 @@ def find_capabilities(ruleset, extractor, disable_progress=None):
     }
 
     all_file_matches, feature_count = find_file_capabilities(ruleset, extractor, function_features)
-    meta["counts"]["file"] = feature_count
+    meta["feature_counts"]["file"] = feature_count
 
     matches = {}
     matches.update(all_bb_matches)
@@ -492,7 +492,7 @@ def main(argv=None):
     meta = collect_metadata(argv, args.sample, format, extractor)
 
     capabilities, counts = find_capabilities(rules, extractor)
-    meta.update(counts)
+    meta["analysis"].update(counts)
 
     if has_file_limitation(rules, capabilities):
         # bail if capa encountered file limitation e.g. a packed binary
@@ -552,7 +552,7 @@ def ida_main():
     meta = capa.ida.helpers.collect_metadata()
 
     capabilities, counts = find_capabilities(rules, capa.features.extractors.ida.IdaFeatureExtractor())
-    meta.update(counts)
+    meta["analysis"].update(counts)
 
     if has_file_limitation(rules, capabilities, is_standalone=False):
         capa.ida.helpers.inform_user_ida_ui("capa encountered warnings during analysis")
diff --git a/capa/render/verbose.py b/capa/render/verbose.py
index 65dac839..e680129b 100644
--- a/capa/render/verbose.py
+++ b/capa/render/verbose.py
@@ -46,8 +46,8 @@ def render_meta(ostream, doc):
         ("format", doc["meta"]["analysis"]["format"]),
         ("extractor", doc["meta"]["analysis"]["extractor"]),
         ("base address", hex(doc["meta"]["analysis"]["base_address"])),
-        ("function count", len(doc["meta"]["counts"]["functions"])),
-        ("total feature count", doc["meta"]["counts"]["file"] + sum(doc["meta"]["counts"]["functions"].values())),
+        ("function count", len(doc["meta"]["analysis"]["feature_counts"]["functions"])),
+        ("total feature count", doc["meta"]["analysis"]["feature_counts"]["file"] + sum(doc["meta"]["analysis"]["feature_counts"]["functions"].values())),
     ]
     ostream.writeln(tabulate.tabulate(rows, tablefmt="plain"))
 
diff --git a/capa/render/vverbose.py b/capa/render/vverbose.py
index 510c832c..fd7a6661 100644
--- a/capa/render/vverbose.py
+++ b/capa/render/vverbose.py
@@ -169,7 +169,7 @@ def render_functions(ostream, doc):
             matches_by_function[va].add(rule["meta"]["name"])
 
     ostream.writeln("## functions")
-    for va, feature_count in sorted(doc["meta"]["counts"]["functions"].items()):
+    for va, feature_count in sorted(doc["meta"]["analysis"]["feature_counts"]["functions"].items()):
         va = int(va)
         ostream.write("function at 0x%X with %d features: " % (va, feature_count))
         if not matches_by_function.get(va, {}):

From a5c3080829b031e4f400c658c3a889a4533a325e Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Fri, 3 Jul 2020 21:03:09 -0600
Subject: [PATCH 22/32] pep8

---
 capa/render/verbose.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/capa/render/verbose.py b/capa/render/verbose.py
index e680129b..4b096206 100644
--- a/capa/render/verbose.py
+++ b/capa/render/verbose.py
@@ -47,7 +47,11 @@ def render_meta(ostream, doc):
         ("extractor", doc["meta"]["analysis"]["extractor"]),
         ("base address", hex(doc["meta"]["analysis"]["base_address"])),
         ("function count", len(doc["meta"]["analysis"]["feature_counts"]["functions"])),
-        ("total feature count", doc["meta"]["analysis"]["feature_counts"]["file"] + sum(doc["meta"]["analysis"]["feature_counts"]["functions"].values())),
+        (
+            "total feature count",
+            doc["meta"]["analysis"]["feature_counts"]["file"]
+            + sum(doc["meta"]["analysis"]["feature_counts"]["functions"].values()),
+        ),
     ]
     ostream.writeln(tabulate.tabulate(rows, tablefmt="plain"))
 

From 6a9842f44fb79fb91605f1fa3d360f34c3ce13c0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ana=20Mar=C3=ADa=20Mart=C3=ADnez=20G=C3=B3mez?=
 <anamaria.martinezgom@FireEye.com>
Date: Fri, 3 Jul 2020 10:20:11 +0200
Subject: [PATCH 23/32] Fix misplaced type annotation

A misplaced type annotation in `capa/main.py` causes that black fails to
solve offenses in this file. It may also make capa fail in newer Python
versions. Replace type by `schema` as we don't use type checker tools.
---
 capa/main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/capa/main.py b/capa/main.py
index ca90b24c..284c3d03 100644
--- a/capa/main.py
+++ b/capa/main.py
@@ -104,7 +104,7 @@ def find_capabilities(ruleset, extractor, disable_progress=None):
             all_bb_matches[rule_name].extend(res)
 
     # mapping from matched rule feature to set of addresses at which it matched.
-    # type: Dict[MatchedRule, Set[int]]
+    # schema: Dic[MatchedRule: Set[int]
     function_features = {
         capa.features.MatchedRule(rule_name): set(map(lambda p: p[0], results))
         for rule_name, results in all_function_matches.items()

From 50d82f98ad34e11681a098e69dabcf9ea71b077d Mon Sep 17 00:00:00 2001
From: Moritz Raabe <moritz.raabe@fireeye.com>
Date: Mon, 6 Jul 2020 12:30:31 +0200
Subject: [PATCH 24/32] after #83 all basic block features are also function
 scope features

---
 capa/rules.py | 18 ++++--------------
 rules         |  2 +-
 2 files changed, 5 insertions(+), 15 deletions(-)

diff --git a/capa/rules.py b/capa/rules.py
index bff1f19e..f81cfac3 100644
--- a/capa/rules.py
+++ b/capa/rules.py
@@ -58,24 +58,11 @@ SUPPORTED_FEATURES = {
         capa.features.String,
     },
     FUNCTION_SCOPE: {
-        capa.features.MatchedRule,
-        capa.features.insn.API,
-        capa.features.insn.Number,
-        capa.features.String,
-        capa.features.Bytes,
-        capa.features.insn.Offset,
-        capa.features.insn.Mnemonic,
+        # plus basic block scope features, see below
         capa.features.basicblock.BasicBlock,
         capa.features.Characteristic("switch"),
-        capa.features.Characteristic("nzxor"),
-        capa.features.Characteristic("peb access"),
-        capa.features.Characteristic("fs access"),
-        capa.features.Characteristic("gs access"),
-        capa.features.Characteristic("cross section flow"),
-        capa.features.Characteristic("stack string"),
         capa.features.Characteristic("calls from"),
         capa.features.Characteristic("calls to"),
-        capa.features.Characteristic("indirect call"),
         capa.features.Characteristic("loop"),
         capa.features.Characteristic("recursive call"),
     },
@@ -98,6 +85,9 @@ SUPPORTED_FEATURES = {
     },
 }
 
+# all basic block scope features are also function scope features
+SUPPORTED_FEATURES[FUNCTION_SCOPE].update(SUPPORTED_FEATURES[BASIC_BLOCK_SCOPE])
+
 
 class InvalidRule(ValueError):
     def __init__(self, msg):
diff --git a/rules b/rules
index 54885300..a8621978 160000
--- a/rules
+++ b/rules
@@ -1 +1 @@
-Subproject commit 548853005591996b11d6b8d1140c9e353254e9f2
+Subproject commit a8621978cf510a53965b919d7575a1e57d8284ce

From 9e0d2ffd7bcd5847f0e705ee439ff4b14b72c64d Mon Sep 17 00:00:00 2001
From: Moritz Raabe <moritz.raabe@fireeye.com>
Date: Mon, 6 Jul 2020 13:00:28 +0200
Subject: [PATCH 25/32] account for Windows paths

---
 scripts/lint.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/scripts/lint.py b/scripts/lint.py
index 69bf3bf4..c0cc53df 100644
--- a/scripts/lint.py
+++ b/scripts/lint.py
@@ -89,7 +89,7 @@ class NamespaceDoesntMatchRulePath(Lint):
         if "lib" in rule.meta:
             return False
 
-        return rule.meta["namespace"] not in posixpath.normpath(rule.meta["capa/path"])
+        return rule.meta["namespace"] not in get_normpath(rule.meta["capa/path"])
 
 
 class MissingScope(Lint):
@@ -216,7 +216,7 @@ class LibRuleNotInLibDirectory(Lint):
         if "lib" not in rule.meta:
             return False
 
-        return "/lib/" not in posixpath.normpath(rule.meta["capa/path"])
+        return "/lib/" not in get_normpath(rule.meta["capa/path"])
 
 
 class LibRuleHasNamespace(Lint):
@@ -314,6 +314,10 @@ FEATURE_LINTS = (
 )
 
 
+def get_normpath(path):
+    return posixpath.normpath(path).replace(os.sep, "/")
+
+
 def lint_features(ctx, rule):
     features = get_features(ctx, rule)
     return run_feature_lints(FEATURE_LINTS, ctx, features)

From 845938d4e6a7640354b34d1730346ebc1b0d37e1 Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Mon, 6 Jul 2020 13:48:30 -0600
Subject: [PATCH 26/32] scripts: show-features: add doc

---
 scripts/show-features.py | 56 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 55 insertions(+), 1 deletion(-)

diff --git a/scripts/show-features.py b/scripts/show-features.py
index e8ea988b..a6486cc8 100644
--- a/scripts/show-features.py
+++ b/scripts/show-features.py
@@ -1,6 +1,60 @@
 #!/usr/bin/env python2
 """
-show the features extracted by capa.
+show-features
+
+Show the features that capa extracts from the given sample,
+to assist with the development of rules.
+
+If you have a function with a capability that you'd like to detect,
+you can run this tool and grep for the function/basic block/instruction addresses
+to see what capa picks up.
+This way, you can verify that capa successfully notices the features you'd reference.
+
+Example::
+
+    $ python scripts/show-features.py /tmp/suspicious.dll_
+    ...
+    file: 0x10004e4d: export(__entry)
+    file: 0x10004706: export(Install)
+    file: 0x10004c2b: export(uninstallA)
+    file: 0x10005034: import(kernel32.GetStartupInfoA)
+    file: 0x10005034: import(GetStartupInfoA)
+    file: 0x10005048: import(kernel32.SetLastError)
+    file: 0x00004e10: string(Y29ubmVjdA==)
+    file: 0x00004e28: string(practicalmalwareanalysis.com)
+    file: 0x00004e68: string(serve.html)
+    file: 0x00004eb8: string(dW5zdXBwb3J0)
+    file: 0x00004ec8: string(c2xlZXA=)
+    func: 0x100012c2: characteristic(calls to)
+    func: 0x10001000: characteristic(loop)
+    bb  : 0x10001000: basic block
+    insn: 0x10001000: mnemonic(push)
+    insn: 0x10001001: mnemonic(push)
+    insn: 0x10001002: mnemonic(push)
+    insn: 0x10001003: mnemonic(push)
+    insn: 0x10001004: mnemonic(push)
+    insn: 0x10001005: mnemonic(push)
+    insn: 0x10001006: mnemonic(xor)
+    insn: 0x10001008: number(0x1)
+    insn: 0x10001008: mnemonic(mov)
+    bb  : 0x1000100a: basic block
+    bb  : 0x1000100a: characteristic(tight loop)
+    insn: 0x1000100a: mnemonic(movzx)
+    insn: 0x1000100d: mnemonic(mov)
+    insn: 0x1000100f: offset(0x1000A7C8)
+    insn: 0x1000100f: mnemonic(mov)
+    insn: 0x10001015: offset(0x100075C8)
+    insn: 0x10001015: mnemonic(mov)
+    insn: 0x1000101b: mnemonic(mov)
+    insn: 0x1000101d: number(0x80)
+    insn: 0x1000101d: mnemonic(and)
+    insn: 0x10001020: mnemonic(neg)
+    insn: 0x10001022: mnemonic(sbb)
+    insn: 0x10001024: number(0x1B)
+    insn: 0x10001024: mnemonic(and)
+    insn: 0x10001027: number(0x1)
+    insn: 0x10001027: mnemonic(shl)
+    ...
 """
 import sys
 import logging

From 484179c0421d988e8ec67a2725bf80dc5419020f Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Mon, 6 Jul 2020 14:06:06 -0600
Subject: [PATCH 27/32] render: move capabilities by function to its own
 standalone script

---
 capa/render/vverbose.py                  |  43 ----
 scripts/show-capabilities-by-function.py | 238 +++++++++++++++++++++++
 2 files changed, 238 insertions(+), 43 deletions(-)
 create mode 100644 scripts/show-capabilities-by-function.py

diff --git a/capa/render/vverbose.py b/capa/render/vverbose.py
index fd7a6661..df204745 100644
--- a/capa/render/vverbose.py
+++ b/capa/render/vverbose.py
@@ -141,45 +141,6 @@ def render_match(ostream, match, indent=0, mode=MODE_SUCCESS):
         render_match(ostream, child, indent=indent + 1, mode=child_mode)
 
 
-def render_functions(ostream, doc):
-    """
-    like:
-
-        ## functions
-        function at 0x10001000 with 66 features: no matches
-        function at 0x100012b0 with 73 features: no matches
-        function at 0x1000321a with 33 features:
-          - get hostname
-          - initialize Winsock library
-        function at 0x10003286 with 63 features:
-          - create thread
-          - terminate thread
-        function at 0x10003415 with 116 features:
-          - write file
-          - send data
-          - link function at runtime
-          - create HTTP request
-          - get common file path
-          - send HTTP request
-          - connect to HTTP server
-    """
-    matches_by_function = collections.defaultdict(set)
-    for rule in rutils.capability_rules(doc):
-        for va in rule["matches"].keys():
-            matches_by_function[va].add(rule["meta"]["name"])
-
-    ostream.writeln("## functions")
-    for va, feature_count in sorted(doc["meta"]["analysis"]["feature_counts"]["functions"].items()):
-        va = int(va)
-        ostream.write("function at 0x%X with %d features: " % (va, feature_count))
-        if not matches_by_function.get(va, {}):
-            ostream.writeln("no matches")
-        else:
-            ostream.writeln("")
-            for rule_name in matches_by_function[va]:
-                ostream.writeln("  - " + rule_name)
-
-
 def render_rules(ostream, doc):
     """
     like:
@@ -197,7 +158,6 @@ def render_rules(ostream, doc):
             api: kernel32.GetLastError @ 0x10004A87
             api: kernel32.OutputDebugString @ 0x10004767, 0x10004787, 0x10004816, 0x10004895
     """
-    ostream.writeln("## rules")
     for rule in rutils.capability_rules(doc):
         count = len(rule["matches"])
         if count == 1:
@@ -245,9 +205,6 @@ def render_vverbose(doc):
     capa.render.verbose.render_meta(ostream, doc)
     ostream.write("\n")
 
-    render_functions(ostream, doc)
-    ostream.write("\n")
-
     render_rules(ostream, doc)
     ostream.write("\n")
 
diff --git a/scripts/show-capabilities-by-function.py b/scripts/show-capabilities-by-function.py
new file mode 100644
index 00000000..bb73d389
--- /dev/null
+++ b/scripts/show-capabilities-by-function.py
@@ -0,0 +1,238 @@
+#!/usr/bin/env python2
+"""
+show-capabilities-by-function
+
+Invoke capa to extract the capabilities of the given sample
+and emit the results grouped by function.
+
+This is useful to identify "complex functions" - that is,
+functions that implement a lot of different types of logic.
+
+Example::
+
+    $ python scripts/show-capabilities-by-function.py /tmp/suspicious.dll_
+    function at 0x1000321A with 33 features:
+      - get hostname
+      - initialize Winsock library
+    function at 0x10003286 with 63 features:
+      - create thread
+      - terminate thread
+    function at 0x10003415 with 116 features:
+      - write file
+      - send data
+      - link function at runtime
+      - create HTTP request
+      - get common file path
+      - send HTTP request
+      - connect to HTTP server
+    function at 0x10003797 with 81 features:
+      - get socket status
+      - send data
+      - receive data
+      - create TCP socket
+      - send data on socket
+      - receive data on socket
+      - act as TCP client
+      - resolve DNS
+      - create UDP socket
+      - initialize Winsock library
+      - set socket configuration
+      - connect TCP socket
+    ...
+"""
+import os
+import sys
+import logging
+import collections
+
+import colorama
+import argparse
+
+import capa.main
+import capa.rules
+import capa.engine
+import capa.render
+import capa.render.utils as rutils
+import capa.features
+import capa.features.freeze
+import capa.features.extractors.viv
+
+
+logger = logging.getLogger("capa.show-capabilities-by-function")
+
+
+def render_matches_by_function(doc):
+    """
+        like:
+
+            function at 0x1000321a with 33 features:
+              - get hostname
+              - initialize Winsock library
+            function at 0x10003286 with 63 features:
+              - create thread
+              - terminate thread
+            function at 0x10003415 with 116 features:
+              - write file
+              - send data
+              - link function at runtime
+              - create HTTP request
+              - get common file path
+              - send HTTP request
+              - connect to HTTP server
+    """
+    ostream = rutils.StringIO()
+
+    matches_by_function = collections.defaultdict(set)
+    for rule in rutils.capability_rules(doc):
+        for va in rule["matches"].keys():
+            matches_by_function[va].add(rule["meta"]["name"])
+
+    for va, feature_count in sorted(doc["meta"]["analysis"]["feature_counts"]["functions"].items()):
+        va = int(va)
+        if not matches_by_function.get(va, {}):
+            continue
+        ostream.writeln("function at 0x%X with %d features: " % (va, feature_count))
+        for rule_name in matches_by_function[va]:
+            ostream.writeln("  - " + rule_name)
+
+    ostream.write("\n")
+    return ostream.getvalue()
+
+
+def main(argv=None):
+    if argv is None:
+        argv = sys.argv[1:]
+
+        formats = [
+            ("auto", "(default) detect file type automatically"),
+            ("pe", "Windows PE file"),
+            ("sc32", "32-bit shellcode"),
+            ("sc64", "64-bit shellcode"),
+            ("freeze", "features previously frozen by capa"),
+        ]
+        format_help = ", ".join(["%s: %s" % (f[0], f[1]) for f in formats])
+
+        parser = argparse.ArgumentParser(description="detect capabilities in programs.")
+        parser.add_argument("sample", type=str, help="Path to sample to analyze")
+        parser.add_argument(
+            "-r",
+            "--rules",
+            type=str,
+            default="(embedded rules)",
+            help="Path to rule file or directory, use embedded rules by default",
+        )
+        parser.add_argument("-t", "--tag", type=str, help="Filter on rule meta field values")
+        parser.add_argument("-d", "--debug", action="store_true", help="Enable debugging output on STDERR")
+        parser.add_argument("-q", "--quiet", action="store_true", help="Disable all output but errors")
+        parser.add_argument(
+            "-f", "--format", choices=[f[0] for f in formats], default="auto",
+            help="Select sample format, %s" % format_help
+        )
+        args = parser.parse_args(args=argv)
+
+        if args.quiet:
+            logging.basicConfig(level=logging.ERROR)
+            logging.getLogger().setLevel(logging.ERROR)
+        elif args.debug:
+            logging.basicConfig(level=logging.DEBUG)
+            logging.getLogger().setLevel(logging.DEBUG)
+        else:
+            logging.basicConfig(level=logging.INFO)
+            logging.getLogger().setLevel(logging.INFO)
+
+        # disable vivisect-related logging, it's verbose and not relevant for capa users
+        capa.main.set_vivisect_log_level(logging.CRITICAL)
+
+        # py2 doesn't know about cp65001, which is a variant of utf-8 on windows
+        # tqdm bails when trying to render the progress bar in this setup.
+        # because cp65001 is utf-8, we just map that codepage to the utf-8 codec.
+        # see #380 and: https://stackoverflow.com/a/3259271/87207
+        import codecs
+
+        codecs.register(lambda name: codecs.lookup("utf-8") if name == "cp65001" else None)
+
+        if args.rules == "(embedded rules)":
+            logger.info("-" * 80)
+            logger.info(" Using default embedded rules.")
+            logger.info(" To provide your own rules, use the form `capa.exe  ./path/to/rules/  /path/to/mal.exe`.")
+            logger.info(" You can see the current default rule set here:")
+            logger.info("     https://github.com/fireeye/capa-rules")
+            logger.info("-" * 80)
+
+            logger.debug("detected running from source")
+            args.rules = os.path.join(os.path.dirname(__file__), "..", "rules")
+            logger.debug("default rule path (source method): %s", args.rules)
+        else:
+            logger.info("using rules path: %s", args.rules)
+
+        try:
+            rules = capa.main.get_rules(args.rules)
+            rules = capa.rules.RuleSet(rules)
+            logger.info("successfully loaded %s rules", len(rules))
+            if args.tag:
+                rules = rules.filter_rules_by_meta(args.tag)
+                logger.info("selected %s rules", len(rules))
+        except (IOError, capa.rules.InvalidRule, capa.rules.InvalidRuleSet) as e:
+            logger.error("%s", str(e))
+            return -1
+
+        with open(args.sample, "rb") as f:
+            taste = f.read(8)
+
+        if (args.format == "freeze") or (args.format == "auto" and capa.features.freeze.is_freeze(taste)):
+            format = "freeze"
+            with open(args.sample, "rb") as f:
+                extractor = capa.features.freeze.load(f.read())
+        else:
+            format = args.format
+            try:
+                extractor = capa.main.get_extractor(args.sample, args.format)
+            except capa.main.UnsupportedFormatError:
+                logger.error("-" * 80)
+                logger.error(" Input file does not appear to be a PE file.")
+                logger.error(" ")
+                logger.error(
+                    " capa currently only supports analyzing PE files (or shellcode, when using --format sc32|sc64)."
+                )
+                logger.error(
+                    " If you don't know the input file type, you can try using the `file` utility to guess it.")
+                logger.error("-" * 80)
+                return -1
+            except capa.main.UnsupportedRuntimeError:
+                logger.error("-" * 80)
+                logger.error(" Unsupported runtime or Python interpreter.")
+                logger.error(" ")
+                logger.error(" capa supports running under Python 2.7 using Vivisect for binary analysis.")
+                logger.error(" It can also run within IDA Pro, using either Python 2.7 or 3.5+.")
+                logger.error(" ")
+                logger.error(
+                    " If you're seeing this message on the command line, please ensure you're running Python 2.7.")
+                logger.error("-" * 80)
+                return -1
+
+        meta = capa.main.collect_metadata(argv, args.sample, format, extractor)
+        capabilities, counts = capa.main.find_capabilities(rules, extractor)
+        meta["analysis"].update(counts)
+
+        if capa.main.has_file_limitation(rules, capabilities):
+            # bail if capa encountered file limitation e.g. a packed binary
+            # do show the output in verbose mode, though.
+            if not (args.verbose or args.vverbose or args.json):
+                return -1
+
+        # colorama will detect:
+        #  - when on Windows console, and fixup coloring, and
+        #  - when not an interactive session, and disable coloring
+        # renderers should use coloring and assume it will be stripped out if necessary.
+        colorama.init()
+        doc = capa.render.convert_capabilities_to_result_document(meta, rules, capabilities)
+        print(render_matches_by_function(doc))
+        colorama.deinit()
+
+        logger.info("done.")
+
+        return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
\ No newline at end of file

From ec4fa4c199a79769fd0a043e185d08bc83f48958 Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Mon, 6 Jul 2020 14:06:37 -0600
Subject: [PATCH 28/32] main: don't log expected non-rule files

---
 capa/main.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/capa/main.py b/capa/main.py
index c883786a..503f5f09 100644
--- a/capa/main.py
+++ b/capa/main.py
@@ -307,7 +307,10 @@ def get_rules(rule_path):
         for root, dirs, files in os.walk(rule_path):
             for file in files:
                 if not file.endswith(".yml"):
-                    logger.warning("skipping non-.yml file: %s", file)
+                    if not (file.endswith(".md") or file.endswith(".git")):
+                        # expect to see readme.md, format.md, and maybe a .git directory
+                        # other things maybe are rules, but are mis-named.
+                        logger.warning("skipping non-.yml file: %s", file)
                     continue
 
                 rule_path = os.path.join(root, file)

From ecba9986fca37295934f8dd590dce1e807c08009 Mon Sep 17 00:00:00 2001
From: William Ballenthin <william.ballenthin@fireeye.com>
Date: Mon, 6 Jul 2020 14:07:02 -0600
Subject: [PATCH 29/32] pep8

---
 scripts/show-capabilities-by-function.py | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/scripts/show-capabilities-by-function.py b/scripts/show-capabilities-by-function.py
index bb73d389..80b09b7b 100644
--- a/scripts/show-capabilities-by-function.py
+++ b/scripts/show-capabilities-by-function.py
@@ -45,19 +45,18 @@ import sys
 import logging
 import collections
 
-import colorama
 import argparse
+import colorama
 
 import capa.main
 import capa.rules
 import capa.engine
 import capa.render
-import capa.render.utils as rutils
 import capa.features
+import capa.render.utils as rutils
 import capa.features.freeze
 import capa.features.extractors.viv
 
-
 logger = logging.getLogger("capa.show-capabilities-by-function")
 
 
@@ -125,8 +124,11 @@ def main(argv=None):
         parser.add_argument("-d", "--debug", action="store_true", help="Enable debugging output on STDERR")
         parser.add_argument("-q", "--quiet", action="store_true", help="Disable all output but errors")
         parser.add_argument(
-            "-f", "--format", choices=[f[0] for f in formats], default="auto",
-            help="Select sample format, %s" % format_help
+            "-f",
+            "--format",
+            choices=[f[0] for f in formats],
+            default="auto",
+            help="Select sample format, %s" % format_help,
         )
         args = parser.parse_args(args=argv)
 
@@ -195,7 +197,8 @@ def main(argv=None):
                     " capa currently only supports analyzing PE files (or shellcode, when using --format sc32|sc64)."
                 )
                 logger.error(
-                    " If you don't know the input file type, you can try using the `file` utility to guess it.")
+                    " If you don't know the input file type, you can try using the `file` utility to guess it."
+                )
                 logger.error("-" * 80)
                 return -1
             except capa.main.UnsupportedRuntimeError:
@@ -206,7 +209,8 @@ def main(argv=None):
                 logger.error(" It can also run within IDA Pro, using either Python 2.7 or 3.5+.")
                 logger.error(" ")
                 logger.error(
-                    " If you're seeing this message on the command line, please ensure you're running Python 2.7.")
+                    " If you're seeing this message on the command line, please ensure you're running Python 2.7."
+                )
                 logger.error("-" * 80)
                 return -1
 
@@ -235,4 +239,4 @@ def main(argv=None):
 
 
 if __name__ == "__main__":
-    sys.exit(main())
\ No newline at end of file
+    sys.exit(main())

From 86bf376b19d99c631ed8856742a89b15f688f1b7 Mon Sep 17 00:00:00 2001
From: Willi Ballenthin <willi.ballenthin@gmail.com>
Date: Mon, 6 Jul 2020 14:15:40 -0600
Subject: [PATCH 30/32] readme: add links to further information

---
 README.md | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 91264a68..fb97714a 100644
--- a/README.md
+++ b/README.md
@@ -61,9 +61,7 @@ Alternatively, you can fetch a nightly build of a standalone binary from one of
 - Linux: TODO
 - OSX: TODO
 
-## installation
-
-See [doc/installation.md](doc/installation.md) for information on how to setup the project, including how to use it as a Python library.
+To use capa as a library or integrate with another tool, see [doc/installation.md](doc/installation.md) for further setup instructions.
 
 For more information about how to use capa, including running it as an IDA script/plugin see [doc/usage.md](doc/usage.md).
 
@@ -142,3 +140,9 @@ Here's an example rule used by capa:
 
 The [github.com/fireeye/capa-rules](https://github.com/fireeye/capa-rules) repository contains hundreds of standard library rules that are distributed with capa.
 Please learn to write rules and contribute new entries as you find interesting techniques in malware.
+
+# further information
+  - [doc/usage.md](doc/usage.md)
+  - [doc/installation.md](doc/installation.md)
+  - [github.com/fireeye/capa-rules](https://github.com/fireeye/capa-rules)
+  - [doc/rule format.md](https://github.com/fireeye/capa-rules/blob/master/doc/format.md)

From ea39379ec83de15ec55dc22db9e332562f9e31b7 Mon Sep 17 00:00:00 2001
From: Willi Ballenthin <willi.ballenthin@gmail.com>
Date: Mon, 6 Jul 2020 14:21:57 -0600
Subject: [PATCH 31/32] installation: list development setup

---
 doc/installation.md | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/doc/installation.md b/doc/installation.md
index be199336..6fd958bb 100644
--- a/doc/installation.md
+++ b/doc/installation.md
@@ -44,15 +44,17 @@ Finally, use `pip` to install the source code in "editable" mode. This means tha
 
 You'll find that the `capa.exe` (Windows) or `capa` (Linux) executables in your path now invoke the capa binary from this directory.
 
-If you want to install the development dependencies, which you need to run the code formatters, syntax checker, rule linter and tests (and for the [hooks](#4-setup-hooks-optional)) run:
+We use the following tools to ensure consistent code style and formatting:
+  - [black](https://github.com/psf/black) code formatter, with `-l 120`
+  - [isort](https://pypi.org/project/isort/) code formatter, with `--length-sort --line-width 120`
+  - [dos2unix](https://linux.die.net/man/1/dos2unix) for UNIX-style LF newlines
+  - [capafmt](https://github.com/fireeye/capa/blob/master/scripts/capafmt.py) rule formatter
+
+To install these development dependencies, run:
 
 `$ pip install -e ./local/path/to/src[dev]`
 
-If you are using zsh, do not forget to escape the square brackets:
-
-`$ pip install -e ./local/path/to/src\[dev\]`
-
-Note that some development dependencies require Python3.
+Note that some development dependencies (including the black code formatter) require Python3.
 
 ### 4. Setup hooks [optional]
 

From 55de5061ea21a106ebff5e2d80ae2446d48d8838 Mon Sep 17 00:00:00 2001
From: Moritz Raabe <moritz.raabe@fireeye.com>
Date: Mon, 6 Jul 2020 19:24:35 +0200
Subject: [PATCH 32/32] increase stack cookie byte delta

---
 capa/features/extractors/viv/insn.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/capa/features/extractors/viv/insn.py b/capa/features/extractors/viv/insn.py
index 52c0362c..1516b13e 100644
--- a/capa/features/extractors/viv/insn.py
+++ b/capa/features/extractors/viv/insn.py
@@ -8,6 +8,11 @@ from capa.features.insn import Number, Offset, Mnemonic
 from capa.features.extractors.viv.indirect_calls import NotFoundError, resolve_indirect_call
 
 
+# security cookie checks may perform non-zeroing XORs, these are expected within a certain
+# byte range within the first and returning basic blocks, this helps to reduce FP features
+SECURITY_COOKIE_BYTES_DELTA = 0x40
+
+
 def interface_extract_instruction_XXX(f, bb, insn):
     """
     parse features from the given instruction.
@@ -257,11 +262,12 @@ def is_security_cookie(f, bb, insn):
 
     # expect security cookie init in first basic block within first bytes (instructions)
     bb0 = f.basic_blocks[0]
-    if bb == bb0 and insn.va < bb.va + 30:
+
+    if bb == bb0 and insn.va < (bb.va + SECURITY_COOKIE_BYTES_DELTA):
         return True
 
     # ... or within last bytes (instructions) before a return
-    elif bb.instructions[-1].isReturn() and insn.va > bb.va + bb.size - 30:
+    elif bb.instructions[-1].isReturn() and insn.va > (bb.va + bb.size - SECURITY_COOKIE_BYTES_DELTA):
         return True
 
     return False