init show-object-layout using assemblage

detect user code via entry points (main function name)
mute unknown lines
2025-12-12 23:59:48 -08:00 · 2024-10-22 09:40:09 +00:00 · 2024-10-22 09:21:59 +00:00 · 2024-10-22 09:21:40 +00:00 · 2024-10-22 09:21:16 +00:00 · 2024-10-21 12:43:47 +00:00
85 changed files with 7174 additions and 5144 deletions
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -4,3 +4,6 @@ updates:
    directory: "/"
    schedule:
      interval: "weekly"
+    ignore:
+      - dependency-name: "*"
+        update-types: ["version-update:semver-patch"]
--- a/.github/mypy/mypy.ini
+++ b/.github/mypy/mypy.ini
@@ -1,8 +1,5 @@
 [mypy]

-[mypy-tqdm.*]
-ignore_missing_imports = True
-
 [mypy-ruamel.*]
 ignore_missing_imports = True

--- a/.github/pyinstaller/pyinstaller.spec
+++ b/.github/pyinstaller/pyinstaller.spec
@@ -2,7 +2,6 @@
 # Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
 import sys

-import wcwidth
 import capa.rules.cache

 from pathlib import Path
@@ -29,13 +28,6 @@ a = Analysis(
        ("../../rules", "rules"),
        ("../../sigs", "sigs"),
        ("../../cache", "cache"),
-        # capa.render.default uses tabulate that depends on wcwidth.
-        # it seems wcwidth uses a json file `version.json`
-        # and this doesn't get picked up by pyinstaller automatically.
-        # so we manually embed the wcwidth resources here.
-        #
-        # ref: https://stackoverflow.com/a/62278462/87207
-        (Path(wcwidth.__file__).parent, "wcwidth"),
    ],
    # when invoking pyinstaller from the project root,
    # this gets run from the project root.
@@ -48,11 +40,6 @@ a = Analysis(
        "tkinter",
        "_tkinter",
        "Tkinter",
-        # tqdm provides renderers for ipython,
-        # however, this drags in a lot of dependencies.
-        # since we don't spawn a notebook, we can safely remove these.
-        "IPython",
-        "ipywidgets",
        # these are pulled in by networkx
        # but we don't need to compute the strongly connected components.
        "numpy",
@@ -70,7 +57,10 @@ a = Analysis(
        "qt5",
        "pyqtwebengine",
        "pyasn1",
+        # don't pull in Binary Ninja/IDA bindings that should
+        # only be installed locally.
        "binaryninja",
+        "ida",
    ],
 )

--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -30,8 +30,8 @@ jobs:
            python_version: 3.8
          - os: ubuntu-20.04
            artifact_name: capa
-            asset_name: linux-py311
-            python_version: 3.11
+            asset_name: linux-py312
+            python_version: 3.12
          - os: windows-2019
            artifact_name: capa.exe
            asset_name: windows
@@ -88,7 +88,7 @@ jobs:
            asset_name: linux
          - os: ubuntu-22.04
            artifact_name: capa
-            asset_name: linux-py311
+            asset_name: linux-py312
          - os: windows-2022
            artifact_name: capa.exe
            asset_name: windows
@@ -114,7 +114,7 @@ jobs:
        include:
          - asset_name: linux
            artifact_name: capa
-          - asset_name: linux-py311
+          - asset_name: linux-py312
            artifact_name: capa
          - asset_name: windows
            artifact_name: capa.exe
--- a/.github/workflows/web-deploy.yml
+++ b/.github/workflows/web-deploy.yml
@@ -43,7 +43,7 @@ jobs:
          fetch-depth: 1
          show-progress: true
      - name: Set up Node
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@0a44ba7841725637a19e28fa30b79a866c81b0a6 # v4.0.4
        with:
          node-version: 20
          cache: 'npm'
--- a/.github/workflows/web-tests.yml
+++ b/.github/workflows/web-tests.yml
@@ -19,7 +19,7 @@ jobs:
        show-progress: true

    - name: Set up Node
-      uses: actions/setup-node@v3
+      uses: actions/setup-node@0a44ba7841725637a19e28fa30b79a866c81b0a6 # v4.0.4
      with:
        node-version: 20
        cache: 'npm'
--- a/.gitignore
+++ b/.gitignore
@@ -127,3 +127,4 @@ Pipfile.lock
 .github/binja/download_headless.py
 .github/binja/BinaryNinja-headless.zip
 justfile
+data/
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -108,6 +108,7 @@ repos:
        -   "--check-untyped-defs"
        -   "--ignore-missing-imports"
        -   "--config-file=.github/mypy/mypy.ini"
+        -   "--enable-incomplete-feature=NewGenericSyntax"
        -   "capa/"
        -   "scripts/"
        -   "tests/"
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
--- a/capa/analysis/init.py
+++ b/capa/analysis/init.py
--- a/capa/analysis/flirt.py
+++ b/capa/analysis/flirt.py
@@ -0,0 +1,38 @@
+# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at: [package root]/LICENSE.txt
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+#  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and limitations under the License.
+
+from pydantic import BaseModel
+
+import capa.features.extractors.ida.idalib as idalib
+
+if not idalib.has_idalib():
+    raise RuntimeError("cannot find IDA idalib module.")
+
+if not idalib.load_idalib():
+    raise RuntimeError("failed to load IDA idalib module.")
+
+import idaapi
+import idautils
+
+
+class FunctionId(BaseModel):
+    va: int
+    is_library: bool
+    name: str
+
+
+def get_flirt_matches(lib_only=True):
+    for fva in idautils.Functions():
+        f = idaapi.get_func(fva)
+        is_lib = bool(f.flags & idaapi.FUNC_LIB)
+        fname = idaapi.get_func_name(fva)
+
+        if lib_only and not is_lib:
+            continue
+
+        yield FunctionId(va=fva, is_library=is_lib, name=fname)
--- a/capa/analysis/libraries.py
+++ b/capa/analysis/libraries.py
@@ -0,0 +1,240 @@
+# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at: [package root]/LICENSE.txt
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+#  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and limitations under the License.
+import io
+import sys
+import logging
+import argparse
+import tempfile
+import contextlib
+from enum import Enum
+from typing import List, Optional
+from pathlib import Path
+
+import rich
+from pydantic import BaseModel
+from rich.text import Text
+from rich.console import Console
+
+import capa.main
+import capa.helpers
+import capa.analysis.flirt
+import capa.analysis.strings
+import capa.features.extractors.ida.idalib as idalib
+
+if not idalib.has_idalib():
+    raise RuntimeError("cannot find IDA idalib module.")
+
+if not idalib.load_idalib():
+    raise RuntimeError("failed to load IDA idalib module.")
+
+import idaapi
+import idapro
+import ida_auto
+import idautils
+
+logger = logging.getLogger(__name__)
+
+
+class Classification(str, Enum):
+    USER = "user"
+    LIBRARY = "library"
+    UNKNOWN = "unknown"
+
+
+class Method(str, Enum):
+    FLIRT = "flirt"
+    STRINGS = "strings"
+    THUNK = "thunk"
+    ENTRYPOINT = "entrypoint"
+
+
+class FunctionClassification(BaseModel):
+    va: int
+    classification: Classification
+    # name per the disassembler/analysis tool
+    # may be combined with the recovered/suspected name TODO below
+    name: str
+
+    # if is library, this must be provided
+    method: Optional[Method]
+
+    # TODO if is library, recovered/suspected name?
+
+    # if is library, these can optionally be provided.
+    library_name: Optional[str] = None
+    library_version: Optional[str] = None
+
+
+class FunctionIdResults(BaseModel):
+    function_classifications: List[FunctionClassification]
+
+
+@contextlib.contextmanager
+def ida_session(input_path: Path, use_temp_dir=True):
+    if use_temp_dir:
+        t = Path(tempfile.mkdtemp(prefix="ida-")) / input_path.name
+    else:
+        t = input_path
+
+    logger.debug("using %s", str(t))
+    # stderr=True is used here to redirect the spinner banner to stderr,
+    # so that users can redirect capa's output.
+    console = Console(stderr=True, quiet=False)
+
+    try:
+        if use_temp_dir:
+            t.write_bytes(input_path.read_bytes())
+
+        # idalib writes to stdout (ugh), so we have to capture that
+        # so as not to screw up structured output.
+        with capa.helpers.stdout_redirector(io.BytesIO()):
+            idapro.enable_console_messages(False)
+            with capa.main.timing("analyze program"):
+                with console.status("analyzing program...", spinner="dots"):
+                    if idapro.open_database(str(t.absolute()), run_auto_analysis=True):
+                        raise RuntimeError("failed to analyze input file")
+
+            logger.debug("idalib: waiting for analysis...")
+            ida_auto.auto_wait()
+            logger.debug("idalib: opened database.")
+
+        yield
+    finally:
+        idapro.close_database()
+        if use_temp_dir:
+            t.unlink()
+
+
+def is_thunk_function(fva):
+    f = idaapi.get_func(fva)
+    return bool(f.flags & idaapi.FUNC_THUNK)
+
+
+def main(argv=None):
+    if argv is None:
+        argv = sys.argv[1:]
+
+    parser = argparse.ArgumentParser(description="Identify library functions using various strategies.")
+    capa.main.install_common_args(parser, wanted={"input_file"})
+    parser.add_argument("--store-idb", action="store_true", default=False, help="store IDA database file")
+    parser.add_argument("--min-string-length", type=int, default=8, help="minimum string length")
+    parser.add_argument("-j", "--json", action="store_true", help="emit JSON instead of text")
+    args = parser.parse_args(args=argv)
+
+    try:
+        capa.main.handle_common_args(args)
+    except capa.main.ShouldExitError as e:
+        return e.status_code
+
+    dbs = capa.analysis.strings.get_default_databases()
+    capa.analysis.strings.prune_databases(dbs, n=args.min_string_length)
+
+    function_classifications: List[FunctionClassification] = []
+    with ida_session(args.input_file, use_temp_dir=not args.store_idb):
+        with capa.main.timing("FLIRT-based library identification"):
+            # TODO: add more signature (files)
+            # TOOD: apply more signatures
+            for flirt_match in capa.analysis.flirt.get_flirt_matches():
+                function_classifications.append(
+                    FunctionClassification(
+                        va=flirt_match.va,
+                        name=flirt_match.name,
+                        classification=Classification.LIBRARY,
+                        method=Method.FLIRT,
+                        # note: we cannot currently include which signature matched per function via the IDA API
+                    )
+                )
+
+        # thunks
+        for fva in idautils.Functions():
+            if is_thunk_function(fva):
+                function_classifications.append(
+                    FunctionClassification(
+                        va=fva,
+                        name=idaapi.get_func_name(fva),
+                        classification=Classification.LIBRARY,
+                        method=Method.THUNK,
+                    )
+                )
+
+        with capa.main.timing("string-based library identification"):
+            for string_match in capa.analysis.strings.get_string_matches(dbs):
+                function_classifications.append(
+                    FunctionClassification(
+                        va=string_match.va,
+                        name=idaapi.get_func_name(string_match.va),
+                        classification=Classification.LIBRARY,
+                        method=Method.STRINGS,
+                        library_name=string_match.metadata.library_name,
+                        library_version=string_match.metadata.library_version,
+                    )
+                )
+
+        for va in idautils.Functions():
+            name = idaapi.get_func_name(va)
+            if name not in {"WinMain", }:
+                continue
+
+            function_classifications.append(
+                FunctionClassification(
+                    va=va,
+                    name=name,
+                    classification=Classification.USER,
+                    method=Method.ENTRYPOINT,
+                )
+            )
+
+        doc = FunctionIdResults(function_classifications=[])
+        classifications_by_va = capa.analysis.strings.create_index(function_classifications, "va")
+        for va in idautils.Functions():
+            if classifications := classifications_by_va.get(va):
+                doc.function_classifications.extend(classifications)
+            else:
+                doc.function_classifications.append(
+                    FunctionClassification(
+                        va=va,
+                        name=idaapi.get_func_name(va),
+                        classification=Classification.UNKNOWN,
+                        method=None,
+                    )
+                )
+
+        if args.json:
+            print(doc.model_dump_json())  # noqa: T201 print found
+
+        else:
+            table = rich.table.Table()
+            table.add_column("FVA")
+            table.add_column("CLASSIFICATION")
+            table.add_column("METHOD")
+            table.add_column("FNAME")
+            table.add_column("EXTRA INFO")
+
+            classifications_by_va = capa.analysis.strings.create_index(doc.function_classifications, "va", sorted_=True)
+            for va, classifications in classifications_by_va.items():
+                name = ", ".join({c.name for c in classifications})
+                if "sub_" in name:
+                    name = Text(name, style="grey53")
+
+                classification = {c.classification for c in classifications}
+                method = {c.method for c in classifications if c.method}
+                extra = {f"{c.library_name}@{c.library_version}" for c in classifications if c.library_name}
+
+                table.add_row(
+                    hex(va),
+                    ", ".join(classification) if classification != {"unknown"} else Text("unknown", style="grey53"),
+                    ", ".join(method),
+                    name,
+                    ", ".join(extra),
+                )
+
+            rich.print(table)
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/capa/analysis/requirements.txt
+++ b/capa/analysis/requirements.txt
@@ -0,0 +1,2 @@
+# temporary extra file to track dependencies of the analysis directory
+nltk==3.9.1
--- a/capa/analysis/strings/init.py
+++ b/capa/analysis/strings/init.py
@@ -0,0 +1,269 @@
+# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at: [package root]/LICENSE.txt
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+#  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and limitations under the License.
+
+"""
+further requirements:
+  - nltk
+"""
+import gzip
+import logging
+import collections
+from typing import Any, Dict, Mapping
+from pathlib import Path
+from dataclasses import dataclass
+
+import msgspec
+
+import capa.features.extractors.strings
+
+logger = logging.getLogger(__name__)
+
+
+class LibraryString(msgspec.Struct):
+    string: str
+    library_name: str
+    library_version: str
+    file_path: str | None = None
+    function_name: str | None = None
+    line_number: int | None = None
+
+
+@dataclass
+class LibraryStringDatabase:
+    metadata_by_string: Dict[str, LibraryString]
+
+    def __len__(self) -> int:
+        return len(self.metadata_by_string)
+
+    @classmethod
+    def from_file(cls, path: Path) -> "LibraryStringDatabase":
+        metadata_by_string: Dict[str, LibraryString] = {}
+        decoder = msgspec.json.Decoder(type=LibraryString)
+        for line in gzip.decompress(path.read_bytes()).split(b"\n"):
+            if not line:
+                continue
+            s = decoder.decode(line)
+            metadata_by_string[s.string] = s
+
+        return cls(metadata_by_string=metadata_by_string)
+
+
+DEFAULT_FILENAMES = (
+    "brotli.jsonl.gz",
+    "bzip2.jsonl.gz",
+    "cryptopp.jsonl.gz",
+    "curl.jsonl.gz",
+    "detours.jsonl.gz",
+    "jemalloc.jsonl.gz",
+    "jsoncpp.jsonl.gz",
+    "kcp.jsonl.gz",
+    "liblzma.jsonl.gz",
+    "libsodium.jsonl.gz",
+    "libpcap.jsonl.gz",
+    "mbedtls.jsonl.gz",
+    "openssl.jsonl.gz",
+    "sqlite3.jsonl.gz",
+    "tomcrypt.jsonl.gz",
+    "wolfssl.jsonl.gz",
+    "zlib.jsonl.gz",
+)
+
+DEFAULT_PATHS = tuple(Path(__file__).parent / "data" / "oss" / filename for filename in DEFAULT_FILENAMES) + (
+    Path(__file__).parent / "data" / "crt" / "msvc_v143.jsonl.gz",
+)
+
+
+def get_default_databases() -> list[LibraryStringDatabase]:
+    return [LibraryStringDatabase.from_file(path) for path in DEFAULT_PATHS]
+
+
+@dataclass
+class WindowsApiStringDatabase:
+    dll_names: set[str]
+    api_names: set[str]
+
+    def __len__(self) -> int:
+        return len(self.dll_names) + len(self.api_names)
+
+    @classmethod
+    def from_dir(cls, path: Path) -> "WindowsApiStringDatabase":
+        dll_names: set[str] = set()
+        api_names: set[str] = set()
+
+        for line in gzip.decompress((path / "dlls.txt.gz").read_bytes()).decode("utf-8").splitlines():
+            if not line:
+                continue
+            dll_names.add(line)
+
+        for line in gzip.decompress((path / "apis.txt.gz").read_bytes()).decode("utf-8").splitlines():
+            if not line:
+                continue
+            api_names.add(line)
+
+        return cls(dll_names=dll_names, api_names=api_names)
+
+    @classmethod
+    def from_defaults(cls) -> "WindowsApiStringDatabase":
+        return cls.from_dir(Path(__file__).parent / "data" / "winapi")
+
+
+def extract_strings(buf, n=4):
+    yield from capa.features.extractors.strings.extract_ascii_strings(buf, n=n)
+    yield from capa.features.extractors.strings.extract_unicode_strings(buf, n=n)
+
+
+def prune_databases(dbs: list[LibraryStringDatabase], n=8):
+    """remove less trustyworthy database entries.
+
+    such as:
+      - those found in multiple databases
+      - those that are English words
+      - those that are too short
+      - Windows API and DLL names
+    """
+
+    # TODO: consider applying these filters directly to the persisted databases, not at load time.
+
+    winapi = WindowsApiStringDatabase.from_defaults()
+
+    try:
+        from nltk.corpus import words as nltk_words
+
+        nltk_words.words()
+    except (ImportError, LookupError):
+        # one-time download of dataset.
+        # this probably doesn't work well for embedded use.
+        import nltk
+
+        nltk.download("words")
+        from nltk.corpus import words as nltk_words
+    words = set(nltk_words.words())
+
+    counter: collections.Counter[str] = collections.Counter()
+    to_remove = set()
+    for db in dbs:
+        for string in db.metadata_by_string.keys():
+            counter[string] += 1
+
+            if string in words:
+                to_remove.add(string)
+                continue
+
+            if len(string) < n:
+                to_remove.add(string)
+                continue
+
+            if string in winapi.api_names:
+                to_remove.add(string)
+                continue
+
+            if string in winapi.dll_names:
+                to_remove.add(string)
+                continue
+
+    for string, count in counter.most_common():
+        if count <= 1:
+            break
+
+        # remove strings that are seen in more than one database
+        to_remove.add(string)
+
+    for db in dbs:
+        for string in to_remove:
+            if string in db.metadata_by_string:
+                del db.metadata_by_string[string]
+
+
+def get_function_strings():
+    import idaapi
+    import idautils
+
+    import capa.features.extractors.ida.helpers as ida_helpers
+
+    strings_by_function = collections.defaultdict(set)
+    for ea in idautils.Functions():
+        f = idaapi.get_func(ea)
+
+        # ignore library functions and thunk functions as identified by IDA
+        if f.flags & idaapi.FUNC_THUNK:
+            continue
+        if f.flags & idaapi.FUNC_LIB:
+            continue
+
+        for bb in ida_helpers.get_function_blocks(f):
+            for insn in ida_helpers.get_instructions_in_range(bb.start_ea, bb.end_ea):
+                ref = capa.features.extractors.ida.helpers.find_data_reference_from_insn(insn)
+                if ref == insn.ea:
+                    continue
+
+                string = capa.features.extractors.ida.helpers.find_string_at(ref)
+                if not string:
+                    continue
+
+                strings_by_function[ea].add(string)
+
+    return strings_by_function
+
+
+@dataclass
+class LibraryStringClassification:
+    va: int
+    string: str
+    library_name: str
+    metadata: LibraryString
+
+
+def create_index(s: list, k: str, sorted_: bool = False) -> Mapping[Any, list]:
+    """create an index of the elements in `s` using the key `k`, optionally sorted by `k`"""
+    if sorted_:
+        s = sorted(s, key=lambda x: getattr(x, k))
+
+    s_by_k = collections.defaultdict(list)
+    for v in s:
+        p = getattr(v, k)
+        s_by_k[p].append(v)
+    return s_by_k
+
+
+def get_string_matches(dbs: list[LibraryStringDatabase]) -> list[LibraryStringClassification]:
+    matches: list[LibraryStringClassification] = []
+
+    for function, strings in sorted(get_function_strings().items()):
+        for string in strings:
+            for db in dbs:
+                if metadata := db.metadata_by_string.get(string):
+                    matches.append(
+                        LibraryStringClassification(
+                            va=function,
+                            string=string,
+                            library_name=metadata.library_name,
+                            metadata=metadata,
+                        )
+                    )
+
+    # if there are less than N strings per library, ignore that library
+    matches_by_library = create_index(matches, "library_name")
+    for library_name, library_matches in matches_by_library.items():
+        if len(library_matches) > 5:
+            continue
+
+        logger.info("pruning library %s: only %d matched string", library_name, len(library_matches))
+        matches = [m for m in matches if m.library_name != library_name]
+
+    # if there are conflicts within a single function, don't label it
+    matches_by_function = create_index(matches, "va")
+    for va, function_matches in matches_by_function.items():
+        library_names = {m.library_name for m in function_matches}
+        if len(library_names) == 1:
+            continue
+
+        logger.info("conflicting matches: 0x%x: %s", va, sorted(library_names))
+        # this is potentially slow (O(n**2)) but hopefully fast enough in practice.
+        matches = [m for m in matches if m.va != va]
+
+    return matches
--- a/capa/analysis/strings/main.py
+++ b/capa/analysis/strings/main.py
@@ -0,0 +1,130 @@
+# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at: [package root]/LICENSE.txt
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+#  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and limitations under the License.
+import sys
+import logging
+import collections
+from pathlib import Path
+
+import rich
+from rich.text import Text
+
+import capa.analysis.strings
+import capa.features.extractors.strings
+import capa.features.extractors.ida.helpers as ida_helpers
+
+logger = logging.getLogger(__name__)
+
+
+def open_ida(input_path: Path):
+    import tempfile
+
+    import idapro
+
+    t = Path(tempfile.mkdtemp(prefix="ida-")) / input_path.name
+    t.write_bytes(input_path.read_bytes())
+    # resource leak: we should delete this upon exit
+
+    idapro.enable_console_messages(False)
+    idapro.open_database(str(t.absolute()), run_auto_analysis=True)
+
+    import ida_auto
+
+    ida_auto.auto_wait()
+
+
+def main():
+    logging.basicConfig(level=logging.DEBUG)
+
+    # use n=8 to ignore common words
+    N = 8
+
+    input_path = Path(sys.argv[1])
+
+    dbs = capa.analysis.strings.get_default_databases()
+    capa.analysis.strings.prune_databases(dbs, n=N)
+
+    strings_by_library = collections.defaultdict(set)
+    for string in capa.analysis.strings.extract_strings(input_path.read_bytes(), n=N):
+        for db in dbs:
+            if metadata := db.metadata_by_string.get(string.s):
+                strings_by_library[metadata.library_name].add(string.s)
+
+    console = rich.get_console()
+    console.print("found libraries:", style="bold")
+    for library, strings in sorted(strings_by_library.items(), key=lambda p: len(p[1]), reverse=True):
+        console.print(f"  - [b]{library}[/] ({len(strings)} strings)")
+
+        for string in sorted(strings)[:10]:
+            console.print(f"    - {string}", markup=False, style="grey37")
+
+        if len(strings) > 10:
+            console.print("    ...", style="grey37")
+
+    if not strings_by_library:
+        console.print("  (none)", style="grey37")
+        # since we're not going to find any strings
+        # return early and don't do IDA analysis
+        return
+
+    open_ida(input_path)
+
+    import idaapi
+    import idautils
+    import ida_funcs
+
+    strings_by_function = collections.defaultdict(set)
+    for ea in idautils.Functions():
+        f = idaapi.get_func(ea)
+
+        # ignore library functions and thunk functions as identified by IDA
+        if f.flags & idaapi.FUNC_THUNK:
+            continue
+        if f.flags & idaapi.FUNC_LIB:
+            continue
+
+        for bb in ida_helpers.get_function_blocks(f):
+            for insn in ida_helpers.get_instructions_in_range(bb.start_ea, bb.end_ea):
+                ref = capa.features.extractors.ida.helpers.find_data_reference_from_insn(insn)
+                if ref == insn.ea:
+                    continue
+
+                string = capa.features.extractors.ida.helpers.find_string_at(ref)
+                if not string:
+                    continue
+
+                for db in dbs:
+                    if metadata := db.metadata_by_string.get(string):
+                        strings_by_function[ea].add(string)
+
+    # ensure there are at least XXX functions renamed, or ignore those entries
+
+    console.print("functions:", style="bold")
+    for function, strings in sorted(strings_by_function.items()):
+        if strings:
+            name = ida_funcs.get_func_name(function)
+
+            console.print(f"  [b]{name}[/]@{function:08x}:")
+
+            for string in strings:
+                for db in dbs:
+                    if metadata := db.metadata_by_string.get(string):
+                        location = Text(
+                            f"{metadata.library_name}@{metadata.library_version}::{metadata.function_name}",
+                            style="grey37",
+                        )
+                        console.print("    - ", location, ": ", string.rstrip())
+
+    console.print()
+
+    console.print(
+        f"found {len(strings_by_function)} library functions across {len(list(idautils.Functions()))} functions"
+    )
+
+
+if __name__ == "__main__":
+    main()
--- a/capa/analysis/strings/data/crt/msvc_v143.jsonl.gz
+++ b/capa/analysis/strings/data/crt/msvc_v143.jsonl.gz
--- a/capa/analysis/strings/data/oss/.gitignore
+++ b/capa/analysis/strings/data/oss/.gitignore
@@ -0,0 +1,3 @@
+*.csv
+*.jsonl
+*.jsonl.gz
--- a/capa/analysis/strings/data/oss/brotli.jsonl.gz
+++ b/capa/analysis/strings/data/oss/brotli.jsonl.gz
--- a/capa/analysis/strings/data/oss/bzip2.jsonl.gz
+++ b/capa/analysis/strings/data/oss/bzip2.jsonl.gz
--- a/capa/analysis/strings/data/oss/cryptopp.jsonl.gz
+++ b/capa/analysis/strings/data/oss/cryptopp.jsonl.gz
--- a/capa/analysis/strings/data/oss/curl.jsonl.gz
+++ b/capa/analysis/strings/data/oss/curl.jsonl.gz
--- a/capa/analysis/strings/data/oss/detours.jsonl.gz
+++ b/capa/analysis/strings/data/oss/detours.jsonl.gz
--- a/capa/analysis/strings/data/oss/jemalloc.jsonl.gz
+++ b/capa/analysis/strings/data/oss/jemalloc.jsonl.gz
--- a/capa/analysis/strings/data/oss/jsoncpp.jsonl.gz
+++ b/capa/analysis/strings/data/oss/jsoncpp.jsonl.gz
--- a/capa/analysis/strings/data/oss/kcp.jsonl.gz
+++ b/capa/analysis/strings/data/oss/kcp.jsonl.gz
--- a/capa/analysis/strings/data/oss/liblzma.jsonl.gz
+++ b/capa/analysis/strings/data/oss/liblzma.jsonl.gz
--- a/capa/analysis/strings/data/oss/libpcap.jsonl.gz
+++ b/capa/analysis/strings/data/oss/libpcap.jsonl.gz
--- a/capa/analysis/strings/data/oss/libsodium.jsonl.gz
+++ b/capa/analysis/strings/data/oss/libsodium.jsonl.gz
--- a/capa/analysis/strings/data/oss/mbedtls.jsonl.gz
+++ b/capa/analysis/strings/data/oss/mbedtls.jsonl.gz
--- a/capa/analysis/strings/data/oss/openssl.jsonl.gz
+++ b/capa/analysis/strings/data/oss/openssl.jsonl.gz
--- a/capa/analysis/strings/data/oss/readme.md
+++ b/capa/analysis/strings/data/oss/readme.md
@@ -0,0 +1,99 @@
+# Strings from Open Source libraries
+
+This directory contains databases of strings extracted from open soure software. 
+capa uses these databases to ignore functions that are likely library code.
+
+There is one file for each database. Each database is a gzip-compressed, JSONL (one JSON document per line) file.
+The JSON document looks like this:
+
+    string: "1.0.8, 13-Jul-2019"
+    library_name: "bzip2"
+    library_version: "1.0.8#3"
+    file_path: "CMakeFiles/bz2.dir/bzlib.c.obj"
+    function_name: "BZ2_bzlibVersion"
+    line_number: null
+
+The following databases were extracted via the vkpkg & jh technique:
+
+  - brotli 1.0.9#5
+  - bzip2 1.0.8#3
+  - cryptopp 8.7.0
+  - curl 7.86.0#1
+  - detours 4.0.1#7
+  - jemalloc 5.3.0#1
+  - jsoncpp 1.9.5
+  - kcp 1.7
+  - liblzma 5.2.5#6
+  - libsodium 1.0.18#8
+  - libpcap 1.10.1#3
+  - mbedtls 2.28.1
+  - openssl 3.0.7#1
+  - sqlite3 3.40.0#1
+  - tomcrypt 1.18.2#2
+  - wolfssl 5.5.0
+  - zlib 1.2.13
+
+This code was originally developed in FLOSS and imported into capa.
+
+## The vkpkg & jh technique
+
+Major steps:
+
+  1. build static libraries via vcpkg
+  2. extract features via jh
+  3. convert to JSONL format with `jh_to_qs.py`
+  4. compress with gzip
+
+### Build static libraries via vcpkg
+
+[vcpkg](https://vcpkg.io/en/) is a free C/C++ package manager for acquiring and managing libraries.
+We use it to easily build common open source libraries, like zlib.
+Use the triplet `x64-windows-static` to build static archives (.lib files that are AR archives containing COFF object files):
+
+```console
+PS > C:\vcpkg\vcpkg.exe install --triplet x64-windows-static zlib
+```
+
+### Extract features via jh
+
+[jh](https://github.com/williballenthin/lancelot/blob/master/bin/src/bin/jh.rs)
+is a lancelot-based utility that parses AR archives containing COFF object files,
+reconstructs their control flow, finds functions, and extracts features. 
+jh extracts numbers, API calls, and strings; we are only interested in the string features.
+
+For each feature, jh emits a CSV line with the fields 
+  - target triplet
+  - compiler 
+  - library
+  - version
+  - build profile
+  - path
+  - function
+  - feature type
+  - feature value
+
+For example:
+
+```csv
+x64-windows-static,msvc143,bzip2,1.0.8#3,release,CMakeFiles/bz2.dir/bzlib.c.obj,BZ2_bzBuffToBuffCompress,number,0x00000100
+```
+
+For example, to invoke jh:
+
+```console
+$ ~/lancelot/target/release/jh x64-windows-static msvc143 zlib 1.2.13 release /mnt/c/vcpkg/installed/x64-windows-static/lib/zlib.lib > ~/flare-floss/floss/qs/db/data/oss/zlib.csv
+```
+
+### Convert to OSS database format
+
+We use the script `jh_to_qs.py` to convert these CSV lines into JSONL file prepared for FLOSS:
+
+```console
+$ python3 jh_to_qs.py zlib.csv > zlib.jsonl
+```
+
+These files are then gzip'd:
+
+```console
+$  gzip -c zlib.jsonl > zlib.jsonl.gz
+```
--- a/capa/analysis/strings/data/oss/sqlite3.jsonl.gz
+++ b/capa/analysis/strings/data/oss/sqlite3.jsonl.gz
--- a/capa/analysis/strings/data/oss/tomcrypt.jsonl.gz
+++ b/capa/analysis/strings/data/oss/tomcrypt.jsonl.gz
--- a/capa/analysis/strings/data/oss/wolfssl.jsonl.gz
+++ b/capa/analysis/strings/data/oss/wolfssl.jsonl.gz
--- a/capa/analysis/strings/data/oss/zlib.jsonl.gz
+++ b/capa/analysis/strings/data/oss/zlib.jsonl.gz
--- a/capa/analysis/strings/data/winapi/apis.txt.gz
+++ b/capa/analysis/strings/data/winapi/apis.txt.gz
--- a/capa/analysis/strings/data/winapi/dlls.txt.gz
+++ b/capa/analysis/strings/data/winapi/dlls.txt.gz
--- a/capa/capabilities/dynamic.py
+++ b/capa/capabilities/dynamic.py
@@ -6,20 +6,16 @@
 # Unless required by applicable law or agreed to in writing, software distributed under the License
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
-import sys
 import logging
 import itertools
 import collections
-from typing import Any, Tuple
-
-import tqdm
+from typing import Any, List, Tuple

 import capa.perf
 import capa.features.freeze as frz
 import capa.render.result_document as rdoc
 from capa.rules import Scope, RuleSet
 from capa.engine import FeatureSet, MatchResults
-from capa.helpers import redirecting_print_to_tqdm
 from capa.capabilities.common import find_file_capabilities
 from capa.features.extractors.base_extractor import CallHandle, ThreadHandle, ProcessHandle, DynamicFeatureExtractor

@@ -139,38 +135,30 @@ def find_dynamic_capabilities(
    feature_counts = rdoc.DynamicFeatureCounts(file=0, processes=())

    assert isinstance(extractor, DynamicFeatureExtractor)
-    with redirecting_print_to_tqdm(disable_progress):
-        with tqdm.contrib.logging.logging_redirect_tqdm():
-            pbar = tqdm.tqdm
-            if disable_progress:
-                # do not use tqdm to avoid unnecessary side effects when caller intends
-                # to disable progress completely
-                def pbar(s, *args, **kwargs):
-                    return s
+    processes: List[ProcessHandle] = list(extractor.get_processes())
+    n_processes: int = len(processes)

-            elif not sys.stderr.isatty():
-                # don't display progress bar when stderr is redirected to a file
-                def pbar(s, *args, **kwargs):
-                    return s
+    with capa.helpers.CapaProgressBar(
+        console=capa.helpers.log_console, transient=True, disable=disable_progress
+    ) as pbar:
+        task = pbar.add_task("matching", total=n_processes, unit="processes")
+        for p in processes:
+            process_matches, thread_matches, call_matches, feature_count = find_process_capabilities(
+                ruleset, extractor, p
+            )
+            feature_counts.processes += (
+                rdoc.ProcessFeatureCount(address=frz.Address.from_capa(p.address), count=feature_count),
+            )
+            logger.debug("analyzed %s and extracted %d features", p.address, feature_count)

-            processes = list(extractor.get_processes())
+            for rule_name, res in process_matches.items():
+                all_process_matches[rule_name].extend(res)
+            for rule_name, res in thread_matches.items():
+                all_thread_matches[rule_name].extend(res)
+            for rule_name, res in call_matches.items():
+                all_call_matches[rule_name].extend(res)

-            pb = pbar(processes, desc="matching", unit=" processes", leave=False)
-            for p in pb:
-                process_matches, thread_matches, call_matches, feature_count = find_process_capabilities(
-                    ruleset, extractor, p
-                )
-                feature_counts.processes += (
-                    rdoc.ProcessFeatureCount(address=frz.Address.from_capa(p.address), count=feature_count),
-                )
-                logger.debug("analyzed %s and extracted %d features", p.address, feature_count)
-
-                for rule_name, res in process_matches.items():
-                    all_process_matches[rule_name].extend(res)
-                for rule_name, res in thread_matches.items():
-                    all_thread_matches[rule_name].extend(res)
-                for rule_name, res in call_matches.items():
-                    all_call_matches[rule_name].extend(res)
+            pbar.advance(task)

    # collection of features that captures the rule matches within process and thread scopes.
    # mapping from feature (matched rule) to set of addresses at which it matched.
--- a/capa/capabilities/static.py
+++ b/capa/capabilities/static.py
@@ -6,21 +6,18 @@
 # Unless required by applicable law or agreed to in writing, software distributed under the License
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
-import sys
 import time
 import logging
 import itertools
 import collections
-from typing import Any, Tuple
-
-import tqdm.contrib.logging
+from typing import Any, List, Tuple

 import capa.perf
+import capa.helpers
 import capa.features.freeze as frz
 import capa.render.result_document as rdoc
 from capa.rules import Scope, RuleSet
 from capa.engine import FeatureSet, MatchResults
-from capa.helpers import redirecting_print_to_tqdm
 from capa.capabilities.common import find_file_capabilities
 from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, StaticFeatureExtractor

@@ -143,75 +140,58 @@ def find_static_capabilities(
    library_functions: Tuple[rdoc.LibraryFunction, ...] = ()

    assert isinstance(extractor, StaticFeatureExtractor)
-    with redirecting_print_to_tqdm(disable_progress):
-        with tqdm.contrib.logging.logging_redirect_tqdm():
-            pbar = tqdm.tqdm
-            if capa.helpers.is_runtime_ghidra():
-                # Ghidrathon interpreter cannot properly handle
-                # the TMonitor thread that is created via a monitor_interval
-                # > 0
-                pbar.monitor_interval = 0
-            if disable_progress:
-                # do not use tqdm to avoid unnecessary side effects when caller intends
-                # to disable progress completely
-                def pbar(s, *args, **kwargs):
-                    return s
+    functions: List[FunctionHandle] = list(extractor.get_functions())
+    n_funcs: int = len(functions)
+    n_libs: int = 0
+    percentage: float = 0

-            elif not sys.stderr.isatty():
-                # don't display progress bar when stderr is redirected to a file
-                def pbar(s, *args, **kwargs):
-                    return s
-
-            functions = list(extractor.get_functions())
-            n_funcs = len(functions)
-
-            pb = pbar(functions, desc="matching", unit=" functions", postfix="skipped 0 library functions", leave=False)
-            for f in pb:
-                t0 = time.time()
-                if extractor.is_library_function(f.address):
-                    function_name = extractor.get_function_name(f.address)
-                    logger.debug("skipping library function 0x%x (%s)", f.address, function_name)
-                    library_functions += (
-                        rdoc.LibraryFunction(address=frz.Address.from_capa(f.address), name=function_name),
-                    )
-                    n_libs = len(library_functions)
-                    percentage = round(100 * (n_libs / n_funcs))
-                    if isinstance(pb, tqdm.tqdm):
-                        pb.set_postfix_str(f"skipped {n_libs} library functions ({percentage}%)")
-                    continue
-
-                function_matches, bb_matches, insn_matches, feature_count = find_code_capabilities(
-                    ruleset, extractor, f
+    with capa.helpers.CapaProgressBar(
+        console=capa.helpers.log_console, transient=True, disable=disable_progress
+    ) as pbar:
+        task = pbar.add_task(
+            "matching", total=n_funcs, unit="functions", postfix=f"skipped {n_libs} library functions, {percentage}%"
+        )
+        for f in functions:
+            t0 = time.time()
+            if extractor.is_library_function(f.address):
+                function_name = extractor.get_function_name(f.address)
+                logger.debug("skipping library function 0x%x (%s)", f.address, function_name)
+                library_functions += (
+                    rdoc.LibraryFunction(address=frz.Address.from_capa(f.address), name=function_name),
                )
-                feature_counts.functions += (
-                    rdoc.FunctionFeatureCount(address=frz.Address.from_capa(f.address), count=feature_count),
-                )
-                t1 = time.time()
+                n_libs = len(library_functions)
+                percentage = round(100 * (n_libs / n_funcs))
+                pbar.update(task, postfix=f"skipped {n_libs} library functions, {percentage}%")
+                pbar.advance(task)
+                continue

-                match_count = 0
-                for name, matches_ in itertools.chain(
-                    function_matches.items(), bb_matches.items(), insn_matches.items()
-                ):
-                    # in practice, most matches are derived rules,
-                    # like "check OS version/5bf4c7f39fd4492cbed0f6dc7d596d49"
-                    # but when we log to the human, they really care about "real" rules.
-                    if not ruleset.rules[name].is_subscope_rule():
-                        match_count += len(matches_)
+            function_matches, bb_matches, insn_matches, feature_count = find_code_capabilities(ruleset, extractor, f)
+            feature_counts.functions += (
+                rdoc.FunctionFeatureCount(address=frz.Address.from_capa(f.address), count=feature_count),
+            )
+            t1 = time.time()

-                logger.debug(
-                    "analyzed function 0x%x and extracted %d features, %d matches in %0.02fs",
-                    f.address,
-                    feature_count,
-                    match_count,
-                    t1 - t0,
-                )
+            match_count = 0
+            for name, matches_ in itertools.chain(function_matches.items(), bb_matches.items(), insn_matches.items()):
+                if not ruleset.rules[name].is_subscope_rule():
+                    match_count += len(matches_)

-                for rule_name, res in function_matches.items():
-                    all_function_matches[rule_name].extend(res)
-                for rule_name, res in bb_matches.items():
-                    all_bb_matches[rule_name].extend(res)
-                for rule_name, res in insn_matches.items():
-                    all_insn_matches[rule_name].extend(res)
+            logger.debug(
+                "analyzed function 0x%x and extracted %d features, %d matches in %0.02fs",
+                f.address,
+                feature_count,
+                match_count,
+                t1 - t0,
+            )
+
+            for rule_name, res in function_matches.items():
+                all_function_matches[rule_name].extend(res)
+            for rule_name, res in bb_matches.items():
+                all_bb_matches[rule_name].extend(res)
+            for rule_name, res in insn_matches.items():
+                all_insn_matches[rule_name].extend(res)
+
+            pbar.advance(task)

    # collection of features that captures the rule matches within function, BB, and instruction scopes.
    # mapping from feature (matched rule) to set of addresses at which it matched.
--- a/capa/features/extractors/binja/file.py
+++ b/capa/features/extractors/binja/file.py
@@ -5,8 +5,6 @@
 # Unless required by applicable law or agreed to in writing, software distributed under the License
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
-
-import struct
 from typing import Tuple, Iterator

 from binaryninja import Segment, BinaryView, SymbolType, SymbolBinding
@@ -20,56 +18,24 @@ from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress, Absolu
 from capa.features.extractors.binja.helpers import read_c_string, unmangle_c_name


-def check_segment_for_pe(bv: BinaryView, seg: Segment) -> Iterator[Tuple[int, int]]:
-    """check segment for embedded PE
-
-    adapted for binja from:
-    https://github.com/vivisect/vivisect/blob/7be4037b1cecc4551b397f840405a1fc606f9b53/PE/carve.py#L19
-    """
-    mz_xor = [
-        (
-            capa.features.extractors.helpers.xor_static(b"MZ", i),
-            capa.features.extractors.helpers.xor_static(b"PE", i),
-            i,
-        )
-        for i in range(256)
-    ]
-
-    todo = []
-    # If this is the first segment of the binary, skip the first bytes. Otherwise, there will always be a matched
-    # PE at the start of the binaryview.
-    start = seg.start
-    if bv.view_type == "PE" and start == bv.start:
+def check_segment_for_pe(bv: BinaryView, seg: Segment) -> Iterator[Tuple[Feature, Address]]:
+    """check segment for embedded PE"""
+    start = 0
+    if bv.view_type == "PE" and seg.start == bv.start:
+        # If this is the first segment of the binary, skip the first bytes.
+        # Otherwise, there will always be a matched PE at the start of the binaryview.
        start += 1

-    for mzx, pex, i in mz_xor:
-        for off, _ in bv.find_all_data(start, seg.end, mzx):
-            todo.append((off, mzx, pex, i))
+    buf = bv.read(seg.start, seg.length)

-    while len(todo):
-        off, mzx, pex, i = todo.pop()
-
-        # The MZ header has one field we will check e_lfanew is at 0x3c
-        e_lfanew = off + 0x3C
-
-        if seg.end < (e_lfanew + 4):
-            continue
-
-        newoff = struct.unpack("<I", capa.features.extractors.helpers.xor_static(bv.read(e_lfanew, 4), i))[0]
-
-        peoff = off + newoff
-        if seg.end < (peoff + 2):
-            continue
-
-        if bv.read(peoff, 2) == pex:
-            yield off, i
+    for offset, _ in capa.features.extractors.helpers.carve_pe(buf, start):
+        yield Characteristic("embedded pe"), FileOffsetAddress(seg.start + offset)


 def extract_file_embedded_pe(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
    """extract embedded PE features"""
    for seg in bv.segments:
-        for ea, _ in check_segment_for_pe(bv, seg):
-            yield Characteristic("embedded pe"), FileOffsetAddress(ea)
+        yield from check_segment_for_pe(bv, seg)


 def extract_file_export_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
--- a/capa/features/extractors/binja/find_binja_api.py
+++ b/capa/features/extractors/binja/find_binja_api.py
@@ -5,31 +5,175 @@
 # Unless required by applicable law or agreed to in writing, software distributed under the License
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
+import os
+import sys
+import logging
 import subprocess
+import importlib.util
+from typing import Optional
 from pathlib import Path

+logger = logging.getLogger(__name__)
+
+
 # When the script gets executed as a standalone executable (via PyInstaller), `import binaryninja` does not work because
 # we have excluded the binaryninja module in `pyinstaller.spec`. The trick here is to call the system Python and try
 # to find out the path of the binaryninja module that has been installed.
 # Note, including the binaryninja module in the `pyinstaller.spec` would not work, since the binaryninja module tries to
 # find the binaryninja core e.g., `libbinaryninjacore.dylib`, using a relative path. And this does not work when the
 # binaryninja module is extracted by the PyInstaller.
-code = r"""
+CODE = r"""
 from pathlib import Path
 from importlib import util
 spec = util.find_spec('binaryninja')
 if spec is not None:
    if len(spec.submodule_search_locations) > 0:
-            path = Path(spec.submodule_search_locations[0])
-            # encode the path with utf8 then convert to hex, make sure it can be read and restored properly
-            print(str(path.parent).encode('utf8').hex())
+        path = Path(spec.submodule_search_locations[0])
+        # encode the path with utf8 then convert to hex, make sure it can be read and restored properly
+        print(str(path.parent).encode('utf8').hex())
 """


-def find_binja_path() -> Path:
-    raw_output = subprocess.check_output(["python", "-c", code]).decode("ascii").strip()
-    return Path(bytes.fromhex(raw_output).decode("utf8"))
+def find_binaryninja_path_via_subprocess() -> Optional[Path]:
+    raw_output = subprocess.check_output(["python", "-c", CODE]).decode("ascii").strip()
+    output = bytes.fromhex(raw_output).decode("utf8")
+    if not output.strip():
+        return None
+    return Path(output)
+
+
+def get_desktop_entry(name: str) -> Optional[Path]:
+    """
+    Find the path for the given XDG Desktop Entry name.
+
+    Like:
+
+        >> get_desktop_entry("com.vector35.binaryninja.desktop")
+        Path("~/.local/share/applications/com.vector35.binaryninja.desktop")
+    """
+    assert sys.platform in ("linux", "linux2")
+    assert name.endswith(".desktop")
+
+    data_dirs = os.environ.get("XDG_DATA_DIRS", "/usr/share") + f":{Path.home()}/.local/share"
+    for data_dir in data_dirs.split(":"):
+        applications = Path(data_dir) / "applications"
+        for application in applications.glob("*.desktop"):
+            if application.name == name:
+                return application
+
+    return None
+
+
+def get_binaryninja_path(desktop_entry: Path) -> Optional[Path]:
+    # from: Exec=/home/wballenthin/software/binaryninja/binaryninja %u
+    # to:        /home/wballenthin/software/binaryninja/
+    for line in desktop_entry.read_text(encoding="utf-8").splitlines():
+        if not line.startswith("Exec="):
+            continue
+
+        if not line.endswith("binaryninja %u"):
+            continue
+
+        binaryninja_path = Path(line[len("Exec=") : -len("binaryninja %u")])
+        if not binaryninja_path.exists():
+            return None
+
+        return binaryninja_path
+
+    return None
+
+
+def validate_binaryninja_path(binaryninja_path: Path) -> bool:
+    if not binaryninja_path:
+        return False
+
+    module_path = binaryninja_path / "python"
+    if not module_path.is_dir():
+        return False
+
+    if not (module_path / "binaryninja" / "__init__.py").is_file():
+        return False
+
+    return True
+
+
+def find_binaryninja() -> Optional[Path]:
+    binaryninja_path = find_binaryninja_path_via_subprocess()
+    if not binaryninja_path or not validate_binaryninja_path(binaryninja_path):
+        if sys.platform == "linux" or sys.platform == "linux2":
+            # ok
+            logger.debug("detected OS: linux")
+        elif sys.platform == "darwin":
+            logger.warning("unsupported platform to find Binary Ninja: %s", sys.platform)
+            return False
+        elif sys.platform == "win32":
+            logger.warning("unsupported platform to find Binary Ninja: %s", sys.platform)
+            return False
+        else:
+            logger.warning("unsupported platform to find Binary Ninja: %s", sys.platform)
+            return False
+
+        desktop_entry = get_desktop_entry("com.vector35.binaryninja.desktop")
+        if not desktop_entry:
+            logger.debug("failed to find Binary Ninja application")
+            return None
+        logger.debug("found Binary Ninja application: %s", desktop_entry)
+
+        binaryninja_path = get_binaryninja_path(desktop_entry)
+        if not binaryninja_path:
+            logger.debug("failed to determine Binary Ninja installation path")
+            return None
+
+        if not validate_binaryninja_path(binaryninja_path):
+            logger.debug("failed to validate Binary Ninja installation")
+            return None
+
+    logger.debug("found Binary Ninja installation: %s", binaryninja_path)
+
+    return binaryninja_path / "python"
+
+
+def is_binaryninja_installed() -> bool:
+    """Is the binaryninja module ready to import?"""
+    try:
+        return importlib.util.find_spec("binaryninja") is not None
+    except ModuleNotFoundError:
+        return False
+
+
+def has_binaryninja() -> bool:
+    if is_binaryninja_installed():
+        logger.debug("found installed Binary Ninja API")
+        return True
+
+    logger.debug("Binary Ninja API not installed, searching...")
+
+    binaryninja_path = find_binaryninja()
+    if not binaryninja_path:
+        logger.debug("failed to find Binary Ninja installation")
+
+    logger.debug("found Binary Ninja API: %s", binaryninja_path)
+    return binaryninja_path is not None
+
+
+def load_binaryninja() -> bool:
+    try:
+        import binaryninja
+
+        return True
+    except ImportError:
+        binaryninja_path = find_binaryninja()
+        if not binaryninja_path:
+            return False
+
+        sys.path.append(binaryninja_path.absolute().as_posix())
+        try:
+            import binaryninja  # noqa: F401 unused import
+
+            return True
+        except ImportError:
+            return False


 if __name__ == "__main__":
-    print(find_binja_path())
+    print(find_binaryninja_path_via_subprocess())
--- a/capa/features/extractors/cape/call.py
+++ b/capa/features/extractors/cape/call.py
@@ -9,6 +9,7 @@
 import logging
 from typing import Tuple, Iterator

+import capa.features.extractors.helpers
 from capa.helpers import assert_never
 from capa.features.insn import API, Number
 from capa.features.common import String, Feature
@@ -50,7 +51,8 @@ def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -
        else:
            assert_never(value)

-    yield API(call.api), ch.address
+    for name in capa.features.extractors.helpers.generate_symbols("", call.api):
+        yield API(name), ch.address


 def extract_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]:
--- a/capa/features/extractors/drakvuf/call.py
+++ b/capa/features/extractors/drakvuf/call.py
@@ -9,6 +9,7 @@
 import logging
 from typing import Tuple, Iterator

+import capa.features.extractors.helpers
 from capa.features.insn import API, Number
 from capa.features.common import String, Feature
 from capa.features.address import Address
@@ -44,7 +45,8 @@ def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -
            # but yielding the entire string would be helpful for an analyst looking at the verbose output
            yield String(arg_value), ch.address

-    yield API(call.name), ch.address
+    for name in capa.features.extractors.helpers.generate_symbols("", call.name):
+        yield API(name), ch.address


 def extract_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]:
--- a/capa/features/extractors/ida/extractor.py
+++ b/capa/features/extractors/ida/extractor.py
@@ -8,7 +8,6 @@
 from typing import List, Tuple, Iterator

 import idaapi
-import ida_nalt

 import capa.ida.helpers
 import capa.features.extractors.elf
@@ -32,7 +31,9 @@ class IdaFeatureExtractor(StaticFeatureExtractor):
    def __init__(self):
        super().__init__(
            hashes=SampleHashes(
-                md5=ida_nalt.retrieve_input_file_md5(), sha1="(unknown)", sha256=ida_nalt.retrieve_input_file_sha256()
+                md5=capa.ida.helpers.retrieve_input_file_md5(),
+                sha1="(unknown)",
+                sha256=capa.ida.helpers.retrieve_input_file_sha256(),
            )
        )
        self.global_features: List[Tuple[Feature, Address]] = []
--- a/capa/features/extractors/ida/helpers.py
+++ b/capa/features/extractors/ida/helpers.py
@@ -41,7 +41,7 @@ if hasattr(ida_bytes, "parse_binpat_str"):
            return

        while True:
-            ea, _ = ida_bytes.bin_search3(start, end, patterns, ida_bytes.BIN_SEARCH_FORWARD)
+            ea, _ = ida_bytes.bin_search(start, end, patterns, ida_bytes.BIN_SEARCH_FORWARD)
            if ea == idaapi.BADADDR:
                break
            start = ea + 1
--- a/capa/features/extractors/ida/idalib.py
+++ b/capa/features/extractors/ida/idalib.py
@@ -0,0 +1,117 @@
+# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at: [package root]/LICENSE.txt
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+#  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and limitations under the License.
+import os
+import sys
+import json
+import logging
+import importlib.util
+from typing import Optional
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+
+def is_idalib_installed() -> bool:
+    try:
+        return importlib.util.find_spec("idapro") is not None
+    except ModuleNotFoundError:
+        return False
+
+
+def get_idalib_user_config_path() -> Optional[Path]:
+    """Get the path to the user's config file based on platform following IDA's user directories."""
+    # derived from `py-activate-idalib.py` from IDA v9.0 Beta 4
+
+    if sys.platform == "win32":
+        # On Windows, use the %APPDATA%\Hex-Rays\IDA Pro directory
+        config_dir = Path(os.getenv("APPDATA")) / "Hex-Rays" / "IDA Pro"
+    else:
+        # On macOS and Linux, use ~/.idapro
+        config_dir = Path.home() / ".idapro"
+
+    # Return the full path to the config file (now in JSON format)
+    user_config_path = config_dir / "ida-config.json"
+    if not user_config_path.exists():
+        return None
+    return user_config_path
+
+
+def find_idalib() -> Optional[Path]:
+    config_path = get_idalib_user_config_path()
+    if not config_path:
+        logger.error("IDA Pro user configuration does not exist, please make sure you've installed idalib properly.")
+        return None
+
+    config = json.loads(config_path.read_text(encoding="utf-8"))
+
+    try:
+        ida_install_dir = Path(config["Paths"]["ida-install-dir"])
+    except KeyError:
+        logger.error(
+            "IDA Pro user configuration does not contain location of IDA Pro installation, please make sure you've installed idalib properly."
+        )
+        return None
+
+    if not ida_install_dir.exists():
+        return None
+
+    libname = {
+        "win32": "idalib.dll",
+        "linux": "libidalib.so",
+        "linux2": "libidalib.so",
+        "darwin": "libidalib.dylib",
+    }[sys.platform]
+
+    if not (ida_install_dir / "ida.hlp").is_file():
+        return None
+
+    if not (ida_install_dir / libname).is_file():
+        return None
+
+    idalib_path = ida_install_dir / "idalib" / "python"
+    if not idalib_path.exists():
+        return None
+
+    if not (idalib_path / "idapro" / "__init__.py").is_file():
+        return None
+
+    return idalib_path
+
+
+def has_idalib() -> bool:
+    if is_idalib_installed():
+        logger.debug("found installed IDA idalib API")
+        return True
+
+    logger.debug("IDA idalib API not installed, searching...")
+
+    idalib_path = find_idalib()
+    if not idalib_path:
+        logger.debug("failed to find IDA idalib installation")
+
+    logger.debug("found IDA idalib API: %s", idalib_path)
+    return idalib_path is not None
+
+
+def load_idalib() -> bool:
+    try:
+        import idapro
+
+        return True
+    except ImportError:
+        idalib_path = find_idalib()
+        if not idalib_path:
+            return False
+
+        sys.path.append(idalib_path.absolute().as_posix())
+        try:
+            import idapro  # noqa: F401 unused import
+
+            return True
+        except ImportError:
+            return False
--- a/capa/features/extractors/pefile.py
+++ b/capa/features/extractors/pefile.py
@@ -130,7 +130,13 @@ def extract_file_arch(pe, **kwargs):
    elif pe.FILE_HEADER.Machine == pefile.MACHINE_TYPE["IMAGE_FILE_MACHINE_AMD64"]:
        yield Arch(ARCH_AMD64), NO_ADDRESS
    else:
-        logger.warning("unsupported architecture: %s", pefile.MACHINE_TYPE[pe.FILE_HEADER.Machine])
+        try:
+            logger.warning(
+                "unsupported architecture: %s",
+                pefile.MACHINE_TYPE[pe.FILE_HEADER.Machine],
+            )
+        except KeyError:
+            logger.warning("unknown architecture: %s", pe.FILE_HEADER.Machine)


 def extract_file_features(pe, buf):
--- a/capa/features/extractors/vmray/init.py
+++ b/capa/features/extractors/vmray/init.py
@@ -10,6 +10,7 @@ from typing import Dict, List, Tuple, Optional
 from pathlib import Path
 from zipfile import ZipFile
 from collections import defaultdict
+from dataclasses import dataclass

 from capa.exceptions import UnsupportedFormatError
 from capa.features.extractors.vmray.models import File, Flog, SummaryV2, StaticData, FunctionCall, xml_to_dict
@@ -21,6 +22,21 @@ DEFAULT_ARCHIVE_PASSWORD = b"infected"
 SUPPORTED_FLOG_VERSIONS = ("2",)


+@dataclass
+class VMRayMonitorThread:
+    tid: int  # thread ID assigned by OS
+    monitor_id: int  # unique ID assigned to thread by VMRay
+    process_monitor_id: int  # unqiue ID assigned to containing process by VMRay
+
+
+@dataclass
+class VMRayMonitorProcess:
+    pid: int  # process ID assigned by OS
+    ppid: int  # parent process ID assigned by OS
+    monitor_id: int  # unique ID assigned to process by VMRay
+    image_name: str
+
+
 class VMRayAnalysis:
    def __init__(self, zipfile_path: Path):
        self.zipfile = ZipFile(zipfile_path, "r")
@@ -45,9 +61,15 @@ class VMRayAnalysis:
        self.exports: Dict[int, str] = {}
        self.imports: Dict[int, Tuple[str, str]] = {}
        self.sections: Dict[int, str] = {}
-        self.process_ids: Dict[int, int] = {}
-        self.process_threads: Dict[int, List[int]] = defaultdict(list)
-        self.process_calls: Dict[int, Dict[int, List[FunctionCall]]] = defaultdict(lambda: defaultdict(list))
+        self.monitor_processes: Dict[int, VMRayMonitorProcess] = {}
+        self.monitor_threads: Dict[int, VMRayMonitorThread] = {}
+
+        # map monitor thread IDs to their associated monitor process ID
+        self.monitor_threads_by_monitor_process: Dict[int, List[int]] = defaultdict(list)
+
+        # map function calls to their associated monitor thread ID mapped to its associated monitor process ID
+        self.monitor_process_calls: Dict[int, Dict[int, List[FunctionCall]]] = defaultdict(lambda: defaultdict(list))
+
        self.base_address: int

        self.sample_file_name: Optional[str] = None
@@ -79,13 +101,14 @@ class VMRayAnalysis:

        self.sample_file_buf: bytes = self.zipfile.read(sample_file_path, pwd=DEFAULT_ARCHIVE_PASSWORD)

+        # do not change order, it matters
        self._compute_base_address()
        self._compute_imports()
        self._compute_exports()
        self._compute_sections()
-        self._compute_process_ids()
-        self._compute_process_threads()
-        self._compute_process_calls()
+        self._compute_monitor_processes()
+        self._compute_monitor_threads()
+        self._compute_monitor_process_calls()

    def _find_sample_file(self):
        for file_name, file_analysis in self.sv2.files.items():
@@ -128,34 +151,48 @@ class VMRayAnalysis:
            for elffile_section in self.sample_file_static_data.elf.sections:
                self.sections[elffile_section.header.sh_addr] = elffile_section.header.sh_name

-    def _compute_process_ids(self):
+    def _compute_monitor_processes(self):
        for process in self.sv2.processes.values():
-            # we expect VMRay's monitor IDs to be unique, but OS PIDs may be reused
-            assert process.monitor_id not in self.process_ids.keys()
-            self.process_ids[process.monitor_id] = process.os_pid
+            # we expect monitor IDs to be unique
+            assert process.monitor_id not in self.monitor_processes

-    def _compute_process_threads(self):
-        # logs/flog.xml appears to be the only file that contains thread-related data
-        # so we use it here to map processes to threads
+            ppid: int = (
+                self.sv2.processes[process.ref_parent_process.path[1]].os_pid if process.ref_parent_process else 0
+            )
+            self.monitor_processes[process.monitor_id] = VMRayMonitorProcess(
+                process.os_pid, ppid, process.monitor_id, process.image_name
+            )
+
+        # not all processes are recorded in SummaryV2.json, get missing data from flog.xml, see #2394
+        for monitor_process in self.flog.analysis.monitor_processes:
+            vmray_monitor_process: VMRayMonitorProcess = VMRayMonitorProcess(
+                monitor_process.os_pid,
+                monitor_process.os_parent_pid,
+                monitor_process.process_id,
+                monitor_process.image_name,
+            )
+
+            if monitor_process.process_id not in self.monitor_processes:
+                self.monitor_processes[monitor_process.process_id] = vmray_monitor_process
+            else:
+                # we expect monitor processes recorded in both SummaryV2.json and flog.xml to equal
+                assert self.monitor_processes[monitor_process.process_id] == vmray_monitor_process
+
+    def _compute_monitor_threads(self):
+        for monitor_thread in self.flog.analysis.monitor_threads:
+            # we expect monitor IDs to be unique
+            assert monitor_thread.thread_id not in self.monitor_threads
+
+            self.monitor_threads[monitor_thread.thread_id] = VMRayMonitorThread(
+                monitor_thread.os_tid, monitor_thread.thread_id, monitor_thread.process_id
+            )
+
+            # we expect each monitor thread ID to be unique for its associated monitor process ID e.g. monitor
+            # thread ID 10 should not be captured twice for monitor process ID 1
+            assert monitor_thread.thread_id not in self.monitor_threads_by_monitor_process[monitor_thread.thread_id]
+
+            self.monitor_threads_by_monitor_process[monitor_thread.process_id].append(monitor_thread.thread_id)
+
+    def _compute_monitor_process_calls(self):
        for function_call in self.flog.analysis.function_calls:
-            pid: int = self.get_process_os_pid(function_call.process_id)  # flog.xml uses process monitor ID, not OS PID
-            tid: int = function_call.thread_id
-
-            assert isinstance(pid, int)
-            assert isinstance(tid, int)
-
-            if tid not in self.process_threads[pid]:
-                self.process_threads[pid].append(tid)
-
-    def _compute_process_calls(self):
-        for function_call in self.flog.analysis.function_calls:
-            pid: int = self.get_process_os_pid(function_call.process_id)  # flog.xml uses process monitor ID, not OS PID
-            tid: int = function_call.thread_id
-
-            assert isinstance(pid, int)
-            assert isinstance(tid, int)
-
-            self.process_calls[pid][tid].append(function_call)
-
-    def get_process_os_pid(self, monitor_id: int) -> int:
-        return self.process_ids[monitor_id]
+            self.monitor_process_calls[function_call.process_id][function_call.thread_id].append(function_call)
--- a/capa/features/extractors/vmray/call.py
+++ b/capa/features/extractors/vmray/call.py
@@ -8,6 +8,7 @@
 import logging
 from typing import Tuple, Iterator

+import capa.features.extractors.helpers
 from capa.features.insn import API, Number
 from capa.features.common import String, Feature
 from capa.features.address import Address
@@ -26,7 +27,11 @@ def get_call_param_features(param: Param, ch: CallHandle) -> Iterator[Tuple[Feat
            if param.deref.type_ in PARAM_TYPE_INT:
                yield Number(hexint(param.deref.value)), ch.address
            elif param.deref.type_ in PARAM_TYPE_STR:
-                yield String(param.deref.value), ch.address
+                # TODO(mr-tz): remove FPS like " \\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\x09\\x0a\\x0b\\x0c\\x0d\\x0e\\x0f\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a\\x1b\\x1c\\x1d\\x1e\..."
+                # https://github.com/mandiant/capa/issues/2432
+
+                # parsing the data up to here results in double-escaped backslashes, remove those here
+                yield String(param.deref.value.replace("\\\\", "\\")), ch.address
            else:
                logger.debug("skipping deref param type %s", param.deref.type_)
    elif param.value is not None:
@@ -41,7 +46,8 @@ def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -
        for param in call.params_in.params:
            yield from get_call_param_features(param, ch)

-    yield API(call.name), ch.address
+    for name in capa.features.extractors.helpers.generate_symbols("", call.name):
+        yield API(name), ch.address


 def extract_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]:
--- a/capa/features/extractors/vmray/extractor.py
+++ b/capa/features/extractors/vmray/extractor.py
@@ -15,9 +15,16 @@ import capa.features.extractors.vmray.call
 import capa.features.extractors.vmray.file
 import capa.features.extractors.vmray.global_
 from capa.features.common import Feature, Characteristic
-from capa.features.address import NO_ADDRESS, Address, ThreadAddress, DynamicCallAddress, AbsoluteVirtualAddress
-from capa.features.extractors.vmray import VMRayAnalysis
-from capa.features.extractors.vmray.models import PARAM_TYPE_STR, Process, ParamList, FunctionCall
+from capa.features.address import (
+    NO_ADDRESS,
+    Address,
+    ThreadAddress,
+    ProcessAddress,
+    DynamicCallAddress,
+    AbsoluteVirtualAddress,
+)
+from capa.features.extractors.vmray import VMRayAnalysis, VMRayMonitorThread, VMRayMonitorProcess
+from capa.features.extractors.vmray.models import PARAM_TYPE_STR, ParamList, FunctionCall
 from capa.features.extractors.base_extractor import (
    CallHandle,
    SampleHashes,
@@ -69,20 +76,24 @@ class VMRayExtractor(DynamicFeatureExtractor):
        yield from self.global_features

    def get_processes(self) -> Iterator[ProcessHandle]:
-        yield from capa.features.extractors.vmray.file.get_processes(self.analysis)
+        for monitor_process in self.analysis.monitor_processes.values():
+            address: ProcessAddress = ProcessAddress(pid=monitor_process.pid, ppid=monitor_process.ppid)
+            yield ProcessHandle(address, inner=monitor_process)

    def extract_process_features(self, ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
        # we have not identified process-specific features for VMRay yet
        yield from []

    def get_process_name(self, ph) -> str:
-        process: Process = ph.inner
-        return process.image_name
+        monitor_process: VMRayMonitorProcess = ph.inner
+        return monitor_process.image_name

    def get_threads(self, ph: ProcessHandle) -> Iterator[ThreadHandle]:
-        for thread in self.analysis.process_threads[ph.address.pid]:
-            address: ThreadAddress = ThreadAddress(process=ph.address, tid=thread)
-            yield ThreadHandle(address=address, inner={})
+        for monitor_thread_id in self.analysis.monitor_threads_by_monitor_process[ph.inner.monitor_id]:
+            monitor_thread: VMRayMonitorThread = self.analysis.monitor_threads[monitor_thread_id]
+
+            address: ThreadAddress = ThreadAddress(process=ph.address, tid=monitor_thread.tid)
+            yield ThreadHandle(address=address, inner=monitor_thread)

    def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[Tuple[Feature, Address]]:
        if False:
@@ -92,7 +103,7 @@ class VMRayExtractor(DynamicFeatureExtractor):
        return

    def get_calls(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[CallHandle]:
-        for function_call in self.analysis.process_calls[ph.address.pid][th.address.tid]:
+        for function_call in self.analysis.monitor_process_calls[ph.inner.monitor_id][th.inner.monitor_id]:
            addr = DynamicCallAddress(thread=th.address, id=function_call.fncall_id)
            yield CallHandle(address=addr, inner=function_call)

--- a/capa/features/extractors/vmray/file.py
+++ b/capa/features/extractors/vmray/file.py
@@ -6,37 +6,18 @@
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
 import logging
-from typing import Dict, Tuple, Iterator
+from typing import Tuple, Iterator

 import capa.features.extractors.common
 from capa.features.file import Export, Import, Section
 from capa.features.common import String, Feature
-from capa.features.address import NO_ADDRESS, Address, ProcessAddress, AbsoluteVirtualAddress
+from capa.features.address import NO_ADDRESS, Address, AbsoluteVirtualAddress
 from capa.features.extractors.vmray import VMRayAnalysis
 from capa.features.extractors.helpers import generate_symbols
-from capa.features.extractors.vmray.models import Process
-from capa.features.extractors.base_extractor import ProcessHandle

 logger = logging.getLogger(__name__)


-def get_processes(analysis: VMRayAnalysis) -> Iterator[ProcessHandle]:
-    processes: Dict[str, Process] = analysis.sv2.processes
-
-    for process in processes.values():
-        # we map VMRay's monitor ID to the OS PID to make it easier for users
-        # to follow the processes in capa's output
-        pid: int = analysis.get_process_os_pid(process.monitor_id)
-        ppid: int = (
-            analysis.get_process_os_pid(processes[process.ref_parent_process.path[1]].monitor_id)
-            if process.ref_parent_process
-            else 0
-        )
-
-        addr: ProcessAddress = ProcessAddress(pid=pid, ppid=ppid)
-        yield ProcessHandle(address=addr, inner=process)
-
-
 def extract_export_names(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
    for addr, name in analysis.exports.items():
        yield Export(name), AbsoluteVirtualAddress(addr)
--- a/capa/features/extractors/vmray/models.py
+++ b/capa/features/extractors/vmray/models.py
@@ -87,7 +87,7 @@ class Param(BaseModel):
    deref: Optional[ParamDeref] = None


-def validate_param_list(value: Union[List[Param], Param]) -> List[Param]:
+def validate_ensure_is_list(value: Union[List[Param], Param]) -> List[Param]:
    if isinstance(value, list):
        return value
    else:
@@ -97,7 +97,7 @@ def validate_param_list(value: Union[List[Param], Param]) -> List[Param]:
 # params may be stored as a list of Param or a single Param so we convert
 # the input value to Python list type before the inner validation (List[Param])
 # is called
-ParamList = Annotated[List[Param], BeforeValidator(validate_param_list)]
+ParamList = Annotated[List[Param], BeforeValidator(validate_ensure_is_list)]


 class Params(BaseModel):
@@ -137,12 +137,46 @@ class FunctionReturn(BaseModel):
    from_addr: HexInt = Field(alias="from")


+class MonitorProcess(BaseModel):
+    ts: HexInt
+    process_id: int
+    image_name: str
+    filename: str
+    # page_root: HexInt
+    os_pid: HexInt
+    # os_integrity_level: HexInt
+    # os_privileges: HexInt
+    monitor_reason: str
+    parent_id: int
+    os_parent_pid: HexInt
+    # cmd_line: str
+    # cur_dir: str
+    # os_username: str
+    # bitness: int
+    # os_groups: str
+
+
+class MonitorThread(BaseModel):
+    ts: HexInt
+    thread_id: int
+    process_id: int
+    os_tid: HexInt
+
+
+# handle if there's only single entries, but the model expects a list
+MonitorProcessList = Annotated[List[MonitorProcess], BeforeValidator(validate_ensure_is_list)]
+MonitorThreadList = Annotated[List[MonitorThread], BeforeValidator(validate_ensure_is_list)]
+FunctionCallList = Annotated[List[FunctionCall], BeforeValidator(validate_ensure_is_list)]
+
+
 class Analysis(BaseModel):
    log_version: str  # tested 2
    analyzer_version: str  # tested 2024.2.1
    # analysis_date: str

-    function_calls: List[FunctionCall] = Field(alias="fncall", default=[])
+    monitor_processes: MonitorProcessList = Field(alias="monitor_process", default=[])
+    monitor_threads: MonitorThreadList = Field(alias="monitor_thread", default=[])
+    function_calls: FunctionCallList = Field(alias="fncall", default=[])
    # function_returns: List[FunctionReturn] = Field(alias="fnret", default=[])


--- a/capa/ghidra/capa_explorer.py
+++ b/capa/ghidra/capa_explorer.py
@@ -372,6 +372,10 @@ if __name__ == "__main__":
        from capa.exceptions import UnsupportedRuntimeError

        raise UnsupportedRuntimeError("This version of capa can only be used with Python 3.8+")
+    elif sys.version_info < (3, 10):
+        from warnings import warn
+
+        warn("This is the last capa version supporting Python 3.8 and 3.9.", DeprecationWarning, stacklevel=2)
    exit_code = main()
    if exit_code != 0:
        popup("capa explorer encountered errors during analysis. Please check the console output for more information.")  # type: ignore [name-defined] # noqa: F821
--- a/capa/ghidra/capa_ghidra.py
+++ b/capa/ghidra/capa_ghidra.py
@@ -164,4 +164,8 @@ if __name__ == "__main__":
        from capa.exceptions import UnsupportedRuntimeError

        raise UnsupportedRuntimeError("This version of capa can only be used with Python 3.8+")
+    elif sys.version_info < (3, 10):
+        from warnings import warn
+
+        warn("This is the last capa version supporting Python 3.8 and 3.9.", DeprecationWarning, stacklevel=2)
    sys.exit(main())
--- a/capa/helpers.py
+++ b/capa/helpers.py
@@ -5,11 +5,13 @@
 # Unless required by applicable law or agreed to in writing, software distributed under the License
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
+import io
 import os
 import sys
 import gzip
-import inspect
+import ctypes
 import logging
+import tempfile
 import contextlib
 import importlib.util
 from typing import Dict, List, Union, BinaryIO, Iterator, NoReturn
@@ -17,8 +19,21 @@ from pathlib import Path
 from zipfile import ZipFile
 from datetime import datetime

-import tqdm
 import msgspec.json
+from rich.console import Console
+from rich.progress import (
+    Task,
+    Text,
+    Progress,
+    BarColumn,
+    TextColumn,
+    SpinnerColumn,
+    ProgressColumn,
+    TimeElapsedColumn,
+    MofNCompleteColumn,
+    TaskProgressColumn,
+    TimeRemainingColumn,
+)

 from capa.exceptions import UnsupportedFormatError
 from capa.features.common import (
@@ -48,6 +63,10 @@ EXTENSIONS_FREEZE = "frz"
 logger = logging.getLogger("capa")


+# shared console used to redirect logging to stderr
+log_console: Console = Console(stderr=True)
+
+
 def hex(n: int) -> str:
    """render the given number using upper case hex, like: 0x123ABC"""
    if n < 0:
@@ -81,6 +100,59 @@ def assert_never(value) -> NoReturn:
    assert False, f"Unhandled value: {value} ({type(value).__name__})"  # noqa: B011


+@contextlib.contextmanager
+def stdout_redirector(stream):
+    """
+    Redirect stdout at the C runtime level,
+     which lets us handle native libraries that spam stdout.
+
+    *But*, this only works on Linux! Otherwise will silently still write to stdout.
+    So, try to upstream the fix when possible.
+
+    Via: https://eli.thegreenplace.net/2015/redirecting-all-kinds-of-stdout-in-python/
+    """
+    if sys.platform not in ("linux", "linux2"):
+        logger.warning("Unable to capture STDOUT on non-Linux (begin)")
+        yield
+        logger.warning("Unable to capture STDOUT on non-Linux (end)")
+        return
+
+    # libc is only on Linux
+    LIBC = ctypes.CDLL(None)
+    C_STDOUT = ctypes.c_void_p.in_dll(LIBC, "stdout")
+
+    # The original fd stdout points to. Usually 1 on POSIX systems.
+    original_stdout_fd = sys.stdout.fileno()
+
+    def _redirect_stdout(to_fd):
+        """Redirect stdout to the given file descriptor."""
+        # Flush the C-level buffer stdout
+        LIBC.fflush(C_STDOUT)
+        # Flush and close sys.stdout - also closes the file descriptor (fd)
+        sys.stdout.close()
+        # Make original_stdout_fd point to the same file as to_fd
+        os.dup2(to_fd, original_stdout_fd)
+        # Create a new sys.stdout that points to the redirected fd
+        sys.stdout = io.TextIOWrapper(os.fdopen(original_stdout_fd, "wb"))
+
+    # Save a copy of the original stdout fd in saved_stdout_fd
+    saved_stdout_fd = os.dup(original_stdout_fd)
+    try:
+        # Create a temporary file and redirect stdout to it
+        tfile = tempfile.TemporaryFile(mode="w+b")
+        _redirect_stdout(tfile.fileno())
+        # Yield to caller, then redirect stdout back to the saved fd
+        yield
+        _redirect_stdout(saved_stdout_fd)
+        # Copy contents of temporary file to the given stream
+        tfile.flush()
+        tfile.seek(0, io.SEEK_SET)
+        stream.write(tfile.read())
+    finally:
+        tfile.close()
+        os.close(saved_stdout_fd)
+
+
 def load_json_from_path(json_path: Path):
    with gzip.open(json_path, "r") as compressed_report:
        try:
@@ -191,39 +263,6 @@ def get_format(sample: Path) -> str:
    return FORMAT_UNKNOWN


-@contextlib.contextmanager
-def redirecting_print_to_tqdm(disable_progress):
-    """
-    tqdm (progress bar) expects to have fairly tight control over console output.
-    so calls to `print()` will break the progress bar and make things look bad.
-    so, this context manager temporarily replaces the `print` implementation
-    with one that is compatible with tqdm.
-    via: https://stackoverflow.com/a/42424890/87207
-    """
-    old_print = print  # noqa: T202 [reserved word print used]
-
-    def new_print(*args, **kwargs):
-        # If tqdm.tqdm.write raises error, use builtin print
-        if disable_progress:
-            old_print(*args, **kwargs)
-        else:
-            try:
-                tqdm.tqdm.write(*args, **kwargs)
-            except Exception:
-                old_print(*args, **kwargs)
-
-    try:
-        # Globally replace print with new_print.
-        # Verified this works manually on Python 3.11:
-        #     >>> import inspect
-        #     >>> inspect.builtins
-        #     <module 'builtins' (built-in)>
-        inspect.builtins.print = new_print  # type: ignore
-        yield
-    finally:
-        inspect.builtins.print = old_print  # type: ignore
-
-
 def log_unsupported_format_error():
    logger.error("-" * 80)
    logger.error(" Input file does not appear to be a supported file.")
@@ -377,3 +416,47 @@ def is_cache_newer_than_rule_code(cache_dir: Path) -> bool:
        return False

    return True
+
+
+class RateColumn(ProgressColumn):
+    """Renders speed column in progress bar."""
+
+    def render(self, task: "Task") -> Text:
+        speed = f"{task.speed:>.1f}" if task.speed else "00.0"
+        unit = task.fields.get("unit", "it")
+        return Text.from_markup(f"[progress.data.speed]{speed} {unit}/s")
+
+
+class PostfixColumn(ProgressColumn):
+    """Renders a postfix column in progress bar."""
+
+    def render(self, task: "Task") -> Text:
+        return Text(task.fields.get("postfix", ""))
+
+
+class MofNCompleteColumnWithUnit(MofNCompleteColumn):
+    """Renders completed/total count column with a unit."""
+
+    def render(self, task: "Task") -> Text:
+        ret = super().render(task)
+        unit = task.fields.get("unit")
+        return ret.append(f" {unit}") if unit else ret
+
+
+class CapaProgressBar(Progress):
+    @classmethod
+    def get_default_columns(cls):
+        return (
+            SpinnerColumn(),
+            TextColumn("[progress.description]{task.description}"),
+            TaskProgressColumn(),
+            BarColumn(),
+            MofNCompleteColumnWithUnit(),
+            "•",
+            TimeElapsedColumn(),
+            "<",
+            TimeRemainingColumn(),
+            "•",
+            RateColumn(),
+            PostfixColumn(),
+        )
--- a/capa/ida/helpers.py
+++ b/capa/ida/helpers.py
@@ -14,6 +14,7 @@ from pathlib import Path
 import idc
 import idaapi
 import ida_ida
+import ida_nalt
 import idautils
 import ida_bytes
 import ida_loader
@@ -64,6 +65,12 @@ if version < 9.0:
        info: idaapi.idainfo = idaapi.get_inf_structure()
        return info.is_64bit()

+    def retrieve_input_file_md5() -> str:
+        return ida_nalt.retrieve_input_file_md5()
+
+    def retrieve_input_file_sha256() -> str:
+        return ida_nalt.retrieve_input_file_sha256()
+
 else:

    def get_filetype() -> "ida_ida.filetype_t":
@@ -78,6 +85,12 @@ else:
    def is_64bit() -> bool:
        return idaapi.inf_is_64bit()

+    def retrieve_input_file_md5() -> str:
+        return ida_nalt.retrieve_input_file_md5().hex()
+
+    def retrieve_input_file_sha256() -> str:
+        return ida_nalt.retrieve_input_file_sha256().hex()
+

 def inform_user_ida_ui(message):
    # this isn't a logger, this is IDA's logging facility
--- a/capa/loader.py
+++ b/capa/loader.py
@@ -5,8 +5,8 @@
 # Unless required by applicable law or agreed to in writing, software distributed under the License
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
+import io
 import os
-import sys
 import logging
 import datetime
 import contextlib
@@ -69,6 +69,7 @@ BACKEND_DRAKVUF = "drakvuf"
 BACKEND_VMRAY = "vmray"
 BACKEND_FREEZE = "freeze"
 BACKEND_BINEXPORT2 = "binexport2"
+BACKEND_IDA = "ida"


 class CorruptFile(ValueError):
@@ -170,6 +171,7 @@ def get_workspace(path: Path, input_format: str, sigpaths: List[Path]):
        # to do a subclass check via isinstance.
        if type(e) is Exception and "Couldn't convert rva" in e.args[0]:
            raise CorruptFile(e.args[0]) from e
+        raise

    viv_utils.flirt.register_flirt_signature_analyzers(vw, [str(s) for s in sigpaths])

@@ -237,24 +239,15 @@ def get_extractor(
        return capa.features.extractors.dnfile.extractor.DnfileFeatureExtractor(input_path)

    elif backend == BACKEND_BINJA:
-        import capa.helpers
-        from capa.features.extractors.binja.find_binja_api import find_binja_path
+        import capa.features.extractors.binja.find_binja_api as finder

-        # When we are running as a standalone executable, we cannot directly import binaryninja
-        # We need to fist find the binja API installation path and add it into sys.path
-        if capa.helpers.is_running_standalone():
-            bn_api = find_binja_path()
-            if bn_api.exists():
-                sys.path.append(str(bn_api))
+        if not finder.has_binaryninja():
+            raise RuntimeError("cannot find Binary Ninja API module.")

-        try:
-            import binaryninja
-            from binaryninja import BinaryView
-        except ImportError:
-            raise RuntimeError(
-                "Cannot import binaryninja module. Please install the Binary Ninja Python API first: "
-                + "https://docs.binary.ninja/dev/batch.html#install-the-api)."
-            )
+        if not finder.load_binaryninja():
+            raise RuntimeError("failed to load Binary Ninja API module.")
+
+        import binaryninja

        import capa.features.extractors.binja.extractor

@@ -269,7 +262,7 @@ def get_extractor(
                raise UnsupportedOSError()

        with console.status("analyzing program...", spinner="dots"):
-            bv: BinaryView = binaryninja.load(str(input_path))
+            bv: binaryninja.BinaryView = binaryninja.load(str(input_path))
            if bv is None:
                raise RuntimeError(f"Binary Ninja cannot open file {input_path}")

@@ -321,6 +314,34 @@ def get_extractor(

        return capa.features.extractors.binexport2.extractor.BinExport2FeatureExtractor(be2, buf)

+    elif backend == BACKEND_IDA:
+        import capa.features.extractors.ida.idalib as idalib
+
+        if not idalib.has_idalib():
+            raise RuntimeError("cannot find IDA idalib module.")
+
+        if not idalib.load_idalib():
+            raise RuntimeError("failed to load IDA idalib module.")
+
+        import idapro
+        import ida_auto
+
+        import capa.features.extractors.ida.extractor
+
+        logger.debug("idalib: opening database...")
+        # idalib writes to stdout (ugh), so we have to capture that
+        # so as not to screw up structured output.
+        with capa.helpers.stdout_redirector(io.BytesIO()):
+            with console.status("analyzing program...", spinner="dots"):
+                if idapro.open_database(str(input_path), run_auto_analysis=True):
+                    raise RuntimeError("failed to analyze input file")
+
+            logger.debug("idalib: waiting for analysis...")
+            ida_auto.auto_wait()
+            logger.debug("idalib: opened database.")
+
+        return capa.features.extractors.ida.extractor.IdaFeatureExtractor()
+
    else:
        raise ValueError("unexpected backend: " + backend)

--- a/capa/main.py
+++ b/capa/main.py
@@ -22,6 +22,7 @@ from pathlib import Path

 import colorama
 from pefile import PEFormatError
+from rich.logging import RichHandler
 from elftools.common.exceptions import ELFError

 import capa.perf
@@ -43,6 +44,7 @@ import capa.features.extractors.common
 from capa.rules import RuleSet
 from capa.engine import MatchResults
 from capa.loader import (
+    BACKEND_IDA,
    BACKEND_VIV,
    BACKEND_CAPE,
    BACKEND_BINJA,
@@ -283,6 +285,7 @@ def install_common_args(parser, wanted=None):
        backends = [
            (BACKEND_AUTO, "(default) detect appropriate backend automatically"),
            (BACKEND_VIV, "vivisect"),
+            (BACKEND_IDA, "IDA via idalib"),
            (BACKEND_PEFILE, "pefile (file features only)"),
            (BACKEND_BINJA, "Binary Ninja"),
            (BACKEND_DOTNET, ".NET"),
@@ -403,15 +406,23 @@ def handle_common_args(args):
      ShouldExitError: if the program is invoked incorrectly and should exit.
    """
    if args.quiet:
-        logging.basicConfig(level=logging.WARNING)
        logging.getLogger().setLevel(logging.WARNING)
    elif args.debug:
-        logging.basicConfig(level=logging.DEBUG)
        logging.getLogger().setLevel(logging.DEBUG)
    else:
-        logging.basicConfig(level=logging.INFO)
        logging.getLogger().setLevel(logging.INFO)

+    # use [/] after the logger name to reset any styling,
+    # and prevent the color from carrying over to the message
+    logformat = "[dim]%(name)s[/]: %(message)s"
+
+    # set markup=True to allow the use of Rich's markup syntax in log messages
+    rich_handler = RichHandler(markup=True, show_time=False, show_path=True, console=capa.helpers.log_console)
+    rich_handler.setFormatter(logging.Formatter(logformat))
+
+    # use RichHandler for root logger
+    logging.getLogger().addHandler(rich_handler)
+
    # disable vivisect-related logging, it's verbose and not relevant for capa users
    set_vivisect_log_level(logging.CRITICAL)

@@ -892,6 +903,10 @@ def apply_extractor_filters(extractor: FeatureExtractor, extractor_filters: Filt
 def main(argv: Optional[List[str]] = None):
    if sys.version_info < (3, 8):
        raise UnsupportedRuntimeError("This version of capa can only be used with Python 3.8+")
+    elif sys.version_info < (3, 10):
+        from warnings import warn
+
+        warn("This is the last capa version supporting Python 3.8 and 3.9.", DeprecationWarning, stacklevel=2)

    if argv is None:
        argv = sys.argv[1:]
--- a/capa/render/utils.py
+++ b/capa/render/utils.py
@@ -9,28 +9,29 @@
 import io
 from typing import Dict, List, Tuple, Union, Iterator, Optional

-import termcolor
+import rich.console
+from rich.progress import Text

 import capa.render.result_document as rd


-def bold(s: str) -> str:
+def bold(s: str) -> Text:
    """draw attention to the given string"""
-    return termcolor.colored(s, "cyan")
+    return Text.from_markup(f"[cyan]{s}")


-def bold2(s: str) -> str:
+def bold2(s: str) -> Text:
    """draw attention to the given string, within a `bold` section"""
-    return termcolor.colored(s, "green")
+    return Text.from_markup(f"[green]{s}")


-def mute(s: str) -> str:
+def mute(s: str) -> Text:
    """draw attention away from the given string"""
-    return termcolor.colored(s, "dark_grey")
+    return Text.from_markup(f"[dim]{s}")


-def warn(s: str) -> str:
-    return termcolor.colored(s, "yellow")
+def warn(s: str) -> Text:
+    return Text.from_markup(f"[yellow]{s}")


 def format_parts_id(data: Union[rd.AttackSpec, rd.MBCSpec]):
@@ -85,3 +86,17 @@ class StringIO(io.StringIO):
    def writeln(self, s):
        self.write(s)
        self.write("\n")
+
+
+class Console(rich.console.Console):
+    def writeln(self, *args, **kwargs) -> None:
+        """
+        prints the text with a new line at the end.
+        """
+        return self.print(*args, **kwargs)
+
+    def write(self, *args, **kwargs) -> None:
+        """
+        prints the text without a new line at the end.
+        """
+        return self.print(*args, **kwargs, end="")
--- a/capa/render/verbose.py
+++ b/capa/render/verbose.py
@@ -25,7 +25,8 @@ See the License for the specific language governing permissions and limitations

 from typing import cast

-import tabulate
+from rich.text import Text
+from rich.table import Table

 import capa.rules
 import capa.helpers
@@ -34,6 +35,7 @@ import capa.features.freeze as frz
 import capa.render.result_document as rd
 from capa.rules import RuleSet
 from capa.engine import MatchResults
+from capa.render.utils import Console


 def format_address(address: frz.Address) -> str:
@@ -140,7 +142,7 @@ def render_call(layout: rd.DynamicLayout, addr: frz.Address) -> str:
    )


-def render_static_meta(ostream, meta: rd.StaticMetadata):
+def render_static_meta(console: Console, meta: rd.StaticMetadata):
    """
    like:

@@ -161,12 +163,16 @@ def render_static_meta(ostream, meta: rd.StaticMetadata):
        total feature count  1918
    """

+    grid = Table.grid(padding=(0, 2))
+    grid.add_column(style="dim")
+    grid.add_column()
+
    rows = [
        ("md5", meta.sample.md5),
        ("sha1", meta.sample.sha1),
        ("sha256", meta.sample.sha256),
        ("path", meta.sample.path),
-        ("timestamp", meta.timestamp),
+        ("timestamp", str(meta.timestamp)),
        ("capa version", meta.version),
        ("os", meta.analysis.os),
        ("format", meta.analysis.format),
@@ -175,18 +181,21 @@ def render_static_meta(ostream, meta: rd.StaticMetadata):
        ("extractor", meta.analysis.extractor),
        ("base address", format_address(meta.analysis.base_address)),
        ("rules", "\n".join(meta.analysis.rules)),
-        ("function count", len(meta.analysis.feature_counts.functions)),
-        ("library function count", len(meta.analysis.library_functions)),
+        ("function count", str(len(meta.analysis.feature_counts.functions))),
+        ("library function count", str(len(meta.analysis.library_functions))),
        (
            "total feature count",
-            meta.analysis.feature_counts.file + sum(f.count for f in meta.analysis.feature_counts.functions),
+            str(meta.analysis.feature_counts.file + sum(f.count for f in meta.analysis.feature_counts.functions)),
        ),
    ]

-    ostream.writeln(tabulate.tabulate(rows, tablefmt="plain"))
+    for row in rows:
+        grid.add_row(*row)
+
+    console.print(grid)


-def render_dynamic_meta(ostream, meta: rd.DynamicMetadata):
+def render_dynamic_meta(console: Console, meta: rd.DynamicMetadata):
    """
    like:

@@ -205,12 +214,16 @@ def render_dynamic_meta(ostream, meta: rd.DynamicMetadata):
        total feature count  1918
    """

+    table = Table.grid(padding=(0, 2))
+    table.add_column(style="dim")
+    table.add_column()
+
    rows = [
        ("md5", meta.sample.md5),
        ("sha1", meta.sample.sha1),
        ("sha256", meta.sample.sha256),
        ("path", meta.sample.path),
-        ("timestamp", meta.timestamp),
+        ("timestamp", str(meta.timestamp)),
        ("capa version", meta.version),
        ("os", meta.analysis.os),
        ("format", meta.analysis.format),
@@ -218,26 +231,29 @@ def render_dynamic_meta(ostream, meta: rd.DynamicMetadata):
        ("analysis", meta.flavor.value),
        ("extractor", meta.analysis.extractor),
        ("rules", "\n".join(meta.analysis.rules)),
-        ("process count", len(meta.analysis.feature_counts.processes)),
+        ("process count", str(len(meta.analysis.feature_counts.processes))),
        (
            "total feature count",
-            meta.analysis.feature_counts.file + sum(p.count for p in meta.analysis.feature_counts.processes),
+            str(meta.analysis.feature_counts.file + sum(p.count for p in meta.analysis.feature_counts.processes)),
        ),
    ]

-    ostream.writeln(tabulate.tabulate(rows, tablefmt="plain"))
+    for row in rows:
+        table.add_row(*row)
+
+    console.print(table)


-def render_meta(osstream, doc: rd.ResultDocument):
+def render_meta(console: Console, doc: rd.ResultDocument):
    if doc.meta.flavor == rd.Flavor.STATIC:
-        render_static_meta(osstream, cast(rd.StaticMetadata, doc.meta))
+        render_static_meta(console, cast(rd.StaticMetadata, doc.meta))
    elif doc.meta.flavor == rd.Flavor.DYNAMIC:
-        render_dynamic_meta(osstream, cast(rd.DynamicMetadata, doc.meta))
+        render_dynamic_meta(console, cast(rd.DynamicMetadata, doc.meta))
    else:
        raise ValueError("invalid meta analysis")


-def render_rules(ostream, doc: rd.ResultDocument):
+def render_rules(console: Console, doc: rd.ResultDocument):
    """
    like:

@@ -254,11 +270,15 @@ def render_rules(ostream, doc: rd.ResultDocument):
        if count == 1:
            capability = rutils.bold(rule.meta.name)
        else:
-            capability = f"{rutils.bold(rule.meta.name)} ({count} matches)"
+            capability = Text.assemble(rutils.bold(rule.meta.name), f" ({count} matches)")

-        ostream.writeln(capability)
+        console.print(capability)
        had_match = True

+        table = Table.grid(padding=(0, 2))
+        table.add_column(style="dim")
+        table.add_column()
+
        rows = []

        ns = rule.meta.namespace
@@ -310,23 +330,26 @@ def render_rules(ostream, doc: rd.ResultDocument):

            rows.append(("matches", "\n".join(lines)))

-        ostream.writeln(tabulate.tabulate(rows, tablefmt="plain"))
-        ostream.write("\n")
+        for row in rows:
+            table.add_row(*row)
+
+        console.print(table)
+        console.print()

    if not had_match:
-        ostream.writeln(rutils.bold("no capabilities found"))
+        console.print(rutils.bold("no capabilities found"))


 def render_verbose(doc: rd.ResultDocument):
-    ostream = rutils.StringIO()
+    console = Console(highlight=False)

-    render_meta(ostream, doc)
-    ostream.write("\n")
+    with console.capture() as capture:
+        render_meta(console, doc)
+        console.print()
+        render_rules(console, doc)
+        console.print()

-    render_rules(ostream, doc)
-    ostream.write("\n")
-
-    return ostream.getvalue()
+    return capture.get()


 def render(meta, rules: RuleSet, capabilities: MatchResults) -> str:
--- a/capa/render/vverbose.py
+++ b/capa/render/vverbose.py
@@ -9,7 +9,8 @@ import logging
 import textwrap
 from typing import Dict, Iterable, Optional

-import tabulate
+from rich.text import Text
+from rich.table import Table

 import capa.rules
 import capa.helpers
@@ -22,6 +23,7 @@ import capa.render.result_document as rd
 import capa.features.freeze.features as frzf
 from capa.rules import RuleSet
 from capa.engine import MatchResults
+from capa.render.utils import Console

 logger = logging.getLogger(__name__)

@@ -45,7 +47,7 @@ def hanging_indent(s: str, indent: int) -> str:
    return textwrap.indent(s, prefix=prefix)[len(prefix) :]


-def render_locations(ostream, layout: rd.Layout, locations: Iterable[frz.Address], indent: int):
+def render_locations(console: Console, layout: rd.Layout, locations: Iterable[frz.Address], indent: int):
    import capa.render.verbose as v

    # it's possible to have an empty locations array here,
@@ -56,7 +58,7 @@ def render_locations(ostream, layout: rd.Layout, locations: Iterable[frz.Address
    if len(locations) == 0:
        return

-    ostream.write(" @ ")
+    console.write(" @ ")
    location0 = locations[0]

    if len(locations) == 1:
@@ -64,58 +66,58 @@ def render_locations(ostream, layout: rd.Layout, locations: Iterable[frz.Address

        if location.type == frz.AddressType.CALL:
            assert isinstance(layout, rd.DynamicLayout)
-            ostream.write(hanging_indent(v.render_call(layout, location), indent + 1))
+            console.write(hanging_indent(v.render_call(layout, location), indent + 1))
        else:
-            ostream.write(v.format_address(locations[0]))
+            console.write(v.format_address(locations[0]))

    elif location0.type == frz.AddressType.CALL and len(locations) > 1:
        location = locations[0]

        assert isinstance(layout, rd.DynamicLayout)
        s = f"{v.render_call(layout, location)}\nand {(len(locations) - 1)} more..."
-        ostream.write(hanging_indent(s, indent + 1))
+        console.write(hanging_indent(s, indent + 1))

    elif len(locations) > 4:
        # don't display too many locations, because it becomes very noisy.
        # probably only the first handful of locations will be useful for inspection.
-        ostream.write(", ".join(map(v.format_address, locations[0:4])))
-        ostream.write(f", and {(len(locations) - 4)} more...")
+        console.write(", ".join(map(v.format_address, locations[0:4])))
+        console.write(f", and {(len(locations) - 4)} more...")

    elif len(locations) > 1:
-        ostream.write(", ".join(map(v.format_address, locations)))
+        console.write(", ".join(map(v.format_address, locations)))

    else:
        raise RuntimeError("unreachable")


-def render_statement(ostream, layout: rd.Layout, match: rd.Match, statement: rd.Statement, indent: int):
-    ostream.write("  " * indent)
+def render_statement(console: Console, layout: rd.Layout, match: rd.Match, statement: rd.Statement, indent: int):
+    console.write("  " * indent)

    if isinstance(statement, rd.SubscopeStatement):
        # emit `basic block:`
        # rather than `subscope:`
-        ostream.write(statement.scope)
+        console.write(statement.scope)

-        ostream.write(":")
+        console.write(":")
        if statement.description:
-            ostream.write(f" = {statement.description}")
-        ostream.writeln("")
+            console.write(f" = {statement.description}")
+        console.writeln()

    elif isinstance(statement, (rd.CompoundStatement)):
        # emit `and:`  `or:`  `optional:`  `not:`
-        ostream.write(statement.type)
+        console.write(statement.type)

-        ostream.write(":")
+        console.write(":")
        if statement.description:
-            ostream.write(f" = {statement.description}")
-        ostream.writeln("")
+            console.write(f" = {statement.description}")
+        console.writeln()

    elif isinstance(statement, rd.SomeStatement):
-        ostream.write(f"{statement.count} or more:")
+        console.write(f"{statement.count} or more:")

        if statement.description:
-            ostream.write(f" = {statement.description}")
-        ostream.writeln("")
+            console.write(f" = {statement.description}")
+        console.writeln()

    elif isinstance(statement, rd.RangeStatement):
        # `range` is a weird node, its almost a hybrid of statement+feature.
@@ -133,25 +135,25 @@ def render_statement(ostream, layout: rd.Layout, match: rd.Match, statement: rd.
            value = rutils.bold2(value)

            if child.description:
-                ostream.write(f"count({child.type}({value} = {child.description})): ")
+                console.write(f"count({child.type}({value} = {child.description})): ")
            else:
-                ostream.write(f"count({child.type}({value})): ")
+                console.write(f"count({child.type}({value})): ")
        else:
-            ostream.write(f"count({child.type}): ")
+            console.write(f"count({child.type}): ")

        if statement.max == statement.min:
-            ostream.write(f"{statement.min}")
+            console.write(f"{statement.min}")
        elif statement.min == 0:
-            ostream.write(f"{statement.max} or fewer")
+            console.write(f"{statement.max} or fewer")
        elif statement.max == (1 << 64 - 1):
-            ostream.write(f"{statement.min} or more")
+            console.write(f"{statement.min} or more")
        else:
-            ostream.write(f"between {statement.min} and {statement.max}")
+            console.write(f"between {statement.min} and {statement.max}")

        if statement.description:
-            ostream.write(f" = {statement.description}")
-        render_locations(ostream, layout, match.locations, indent)
-        ostream.writeln("")
+            console.write(f" = {statement.description}")
+        render_locations(console, layout, match.locations, indent)
+        console.writeln()

    else:
        raise RuntimeError("unexpected match statement type: " + str(statement))
@@ -162,9 +164,9 @@ def render_string_value(s: str) -> str:


 def render_feature(
-    ostream, layout: rd.Layout, rule: rd.RuleMatches, match: rd.Match, feature: frzf.Feature, indent: int
+    console: Console, layout: rd.Layout, rule: rd.RuleMatches, match: rd.Match, feature: frzf.Feature, indent: int
 ):
-    ostream.write("  " * indent)
+    console.write("  " * indent)

    key = feature.type
    value: Optional[str]
@@ -205,14 +207,14 @@ def render_feature(
        elif isinstance(feature, frzf.OperandOffsetFeature):
            key = f"operand[{feature.index}].offset"

-        ostream.write(f"{key}: ")
+        console.write(f"{key}: ")

        if value:
-            ostream.write(rutils.bold2(value))
+            console.write(rutils.bold2(value))

            if feature.description:
-                ostream.write(capa.rules.DESCRIPTION_SEPARATOR)
-                ostream.write(feature.description)
+                console.write(capa.rules.DESCRIPTION_SEPARATOR)
+                console.write(feature.description)

        if isinstance(feature, (frzf.OSFeature, frzf.ArchFeature, frzf.FormatFeature)):
            # don't show the location of these global features
@@ -224,35 +226,32 @@ def render_feature(
        elif isinstance(feature, (frzf.OSFeature, frzf.ArchFeature, frzf.FormatFeature)):
            pass
        else:
-            render_locations(ostream, layout, match.locations, indent)
-        ostream.write("\n")
+            render_locations(console, layout, match.locations, indent)
+        console.writeln()
    else:
        # like:
        #  regex: /blah/ = SOME_CONSTANT
        #    - "foo blah baz" @ 0x401000
        #    - "aaa blah bbb" @ 0x402000, 0x403400
-        ostream.write(key)
-        ostream.write(": ")
-        ostream.write(value)
-        ostream.write("\n")
+        console.writeln(f"{key}: {value}")

        for capture, locations in sorted(match.captures.items()):
-            ostream.write("  " * (indent + 1))
-            ostream.write("- ")
-            ostream.write(rutils.bold2(render_string_value(capture)))
+            console.write("  " * (indent + 1))
+            console.write("- ")
+            console.write(rutils.bold2(render_string_value(capture)))
            if isinstance(layout, rd.DynamicLayout) and rule.meta.scopes.dynamic == capa.rules.Scope.CALL:
                # like above, don't re-render calls when in call scope.
                pass
            else:
-                render_locations(ostream, layout, locations, indent=indent)
-            ostream.write("\n")
+                render_locations(console, layout, locations, indent=indent)
+            console.writeln()


-def render_node(ostream, layout: rd.Layout, rule: rd.RuleMatches, match: rd.Match, node: rd.Node, indent: int):
+def render_node(console: Console, layout: rd.Layout, rule: rd.RuleMatches, match: rd.Match, node: rd.Node, indent: int):
    if isinstance(node, rd.StatementNode):
-        render_statement(ostream, layout, match, node.statement, indent=indent)
+        render_statement(console, layout, match, node.statement, indent=indent)
    elif isinstance(node, rd.FeatureNode):
-        render_feature(ostream, layout, rule, match, node.feature, indent=indent)
+        render_feature(console, layout, rule, match, node.feature, indent=indent)
    else:
        raise RuntimeError("unexpected node type: " + str(node))

@@ -265,7 +264,9 @@ MODE_SUCCESS = "success"
 MODE_FAILURE = "failure"


-def render_match(ostream, layout: rd.Layout, rule: rd.RuleMatches, match: rd.Match, indent=0, mode=MODE_SUCCESS):
+def render_match(
+    console: Console, layout: rd.Layout, rule: rd.RuleMatches, match: rd.Match, indent=0, mode=MODE_SUCCESS
+):
    child_mode = mode
    if mode == MODE_SUCCESS:
        # display only nodes that evaluated successfully.
@@ -297,13 +298,13 @@ def render_match(ostream, layout: rd.Layout, rule: rd.RuleMatches, match: rd.Mat
    else:
        raise RuntimeError("unexpected mode: " + mode)

-    render_node(ostream, layout, rule, match, match.node, indent=indent)
+    render_node(console, layout, rule, match, match.node, indent=indent)

    for child in match.children:
-        render_match(ostream, layout, rule, child, indent=indent + 1, mode=child_mode)
+        render_match(console, layout, rule, child, indent=indent + 1, mode=child_mode)


-def render_rules(ostream, doc: rd.ResultDocument):
+def render_rules(console: Console, doc: rd.ResultDocument):
    """
    like:

@@ -350,13 +351,13 @@ def render_rules(ostream, doc: rd.ResultDocument):
        if count == 1:
            if rule.meta.lib:
                lib_info = " (library rule)"
-            capability = f"{rutils.bold(rule.meta.name)}{lib_info}"
+            capability = Text.assemble(rutils.bold(rule.meta.name), f"{lib_info}")
        else:
            if rule.meta.lib:
                lib_info = ", only showing first match of library rule"
-            capability = f"{rutils.bold(rule.meta.name)} ({count} matches{lib_info})"
+            capability = Text.assemble(rutils.bold(rule.meta.name), f" ({count} matches{lib_info})")

-        ostream.writeln(capability)
+        console.writeln(capability)
        had_match = True

        rows = []
@@ -402,7 +403,14 @@ def render_rules(ostream, doc: rd.ResultDocument):
        if rule.meta.description:
            rows.append(("description", rule.meta.description))

-        ostream.writeln(tabulate.tabulate(rows, tablefmt="plain"))
+        grid = Table.grid(padding=(0, 2))
+        grid.add_column(style="dim")
+        grid.add_column()
+
+        for row in rows:
+            grid.add_row(*row)
+
+        console.writeln(grid)

        if capa.rules.Scope.FILE in rule.meta.scopes:
            matches = doc.rules[rule.meta.name].matches
@@ -413,61 +421,58 @@ def render_rules(ostream, doc: rd.ResultDocument):
                # so, lets be explicit about our assumptions and raise an exception if they fail.
                raise RuntimeError(f"unexpected file scope match count: {len(matches)}")
            _, first_match = matches[0]
-            render_match(ostream, doc.meta.analysis.layout, rule, first_match, indent=0)
+            render_match(console, doc.meta.analysis.layout, rule, first_match, indent=0)
        else:
            for location, match in sorted(doc.rules[rule.meta.name].matches):
                if doc.meta.flavor == rd.Flavor.STATIC:
                    assert rule.meta.scopes.static is not None
-                    ostream.write(rule.meta.scopes.static.value)
-                    ostream.write(" @ ")
-                    ostream.write(capa.render.verbose.format_address(location))
+                    console.write(rule.meta.scopes.static.value + " @ ")
+                    console.write(capa.render.verbose.format_address(location))

                    if rule.meta.scopes.static == capa.rules.Scope.BASIC_BLOCK:
                        func = frz.Address.from_capa(functions_by_bb[location.to_capa()])
-                        ostream.write(f" in function {capa.render.verbose.format_address(func)}")
+                        console.write(f" in function {capa.render.verbose.format_address(func)}")

                elif doc.meta.flavor == rd.Flavor.DYNAMIC:
                    assert rule.meta.scopes.dynamic is not None
                    assert isinstance(doc.meta.analysis.layout, rd.DynamicLayout)

-                    ostream.write(rule.meta.scopes.dynamic.value)
-
-                    ostream.write(" @ ")
+                    console.write(rule.meta.scopes.dynamic.value + " @ ")

                    if rule.meta.scopes.dynamic == capa.rules.Scope.PROCESS:
-                        ostream.write(v.render_process(doc.meta.analysis.layout, location))
+                        console.write(v.render_process(doc.meta.analysis.layout, location))
                    elif rule.meta.scopes.dynamic == capa.rules.Scope.THREAD:
-                        ostream.write(v.render_thread(doc.meta.analysis.layout, location))
+                        console.write(v.render_thread(doc.meta.analysis.layout, location))
                    elif rule.meta.scopes.dynamic == capa.rules.Scope.CALL:
-                        ostream.write(hanging_indent(v.render_call(doc.meta.analysis.layout, location), indent=1))
+                        console.write(hanging_indent(v.render_call(doc.meta.analysis.layout, location), indent=1))
                    else:
                        capa.helpers.assert_never(rule.meta.scopes.dynamic)

                else:
                    capa.helpers.assert_never(doc.meta.flavor)

-                ostream.write("\n")
-                render_match(ostream, doc.meta.analysis.layout, rule, match, indent=1)
+                console.writeln()
+                render_match(console, doc.meta.analysis.layout, rule, match, indent=1)
                if rule.meta.lib:
                    # only show first match
                    break

-        ostream.write("\n")
+        console.writeln()

    if not had_match:
-        ostream.writeln(rutils.bold("no capabilities found"))
+        console.writeln(rutils.bold("no capabilities found"))


 def render_vverbose(doc: rd.ResultDocument):
-    ostream = rutils.StringIO()
+    console = Console(highlight=False)

-    capa.render.verbose.render_meta(ostream, doc)
-    ostream.write("\n")
+    with console.capture() as capture:
+        capa.render.verbose.render_meta(console, doc)
+        console.writeln()
+        render_rules(console, doc)
+        console.writeln()

-    render_rules(ostream, doc)
-    ostream.write("\n")
-
-    return ostream.getvalue()
+    return capture.get()


 def render(meta, rules: RuleSet, capabilities: MatchResults) -> str:
--- a/capa/version.py
+++ b/capa/version.py
@@ -5,7 +5,7 @@
 # Unless required by applicable law or agreed to in writing, software distributed under the License
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
-__version__ = "7.3.0"
+__version__ = "7.4.0"


 def get_major_version():
--- a/doc/release.md
+++ b/doc/release.md
@@ -26,7 +26,9 @@

    ### Bug Fixes

-    ### capa explorer IDA Pro plugin
+    ### capa Explorer Web
+
+    ### capa Explorer IDA Pro plugin

    ### Development

@@ -42,5 +44,6 @@
  - [ ] [publish to PyPI](https://pypi.org/project/flare-capa)
  - [ ] [create tag in capa rules](https://github.com/mandiant/capa-rules/tags)
  - [ ] [create release in capa rules](https://github.com/mandiant/capa-rules/releases)
+- [ ] Update [homepage](https://github.com/mandiant/capa/blob/master/web/public/index.html)
 - [ ] [Spread the word](https://twitter.com)
 - [ ] Update internal service
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,7 +20,7 @@ authors = [
 description = "The FLARE team's open-source tool to identify capabilities in executable files."
 readme = {file = "README.md", content-type = "text/markdown"}
 license = {file = "LICENSE.txt"}
-requires-python = ">=3.8"
+requires-python = ">=3.8.1"
 keywords = ["malware analysis", "reverse engineering", "capability detection", "software behaviors", "capa", "FLARE"]
 classifiers = [
    "Development Status :: 5 - Production/Stable",
@@ -65,12 +65,8 @@ dependencies = [
    # or minor otherwise).
    # As specific constraints are identified, please provide
    # comments and context.
-    "tqdm>=4",
    "pyyaml>=6",
-    "tabulate>=0.9",
    "colorama>=0.4",
-    "termcolor>=2",
-    "wcwidth>=0.2",
    "ida-settings>=2",
    "ruamel.yaml>=0.18",
    "pefile>=2023.2.7",
@@ -81,6 +77,8 @@ dependencies = [
    "protobuf>=5",
    "msgspec>=0.18.6",
    "xmltodict>=0.13.0",
+    # for library detection (in development)
+    "nltk>=3",

    # ---------------------------------------
    # Dependencies that we develop
@@ -146,11 +144,9 @@ dev = [
    "types-backports==0.1.3",
    "types-colorama==0.4.15.11",
    "types-PyYAML==6.0.8",
-    "types-tabulate==0.9.0.20240106",
-    "types-termcolor==1.1.4",
    "types-psutil==6.0.0.20240901",
    "types_requests==2.32.0.20240712",
-    "types-protobuf==5.27.0.20240907",
+    "types-protobuf==5.28.0.20240924",
    "deptry==0.20.0"
 ]
 build = [
@@ -159,7 +155,7 @@ build = [
    # These dependencies are not used in production environments
    # and should not conflict with other libraries/tooling.
    "pyinstaller==6.10.0",
-    "setuptools==70.0.0",
+    "setuptools==75.1.0",
    "build==1.2.2"
 ]
 scripts = [
@@ -183,7 +179,9 @@ known_first_party = [
    "binaryninja",
    "flirt",
    "ghidra",
+    "idapro",
    "ida_ida",
+    "ida_auto",
    "ida_bytes",
    "ida_entry",
    "ida_funcs",
@@ -234,10 +232,7 @@ DEP002 = [
    "types-protobuf",
    "types-psutil",
    "types-PyYAML",
-    "types-tabulate",
-    "types-termcolor",
    "types_requests",
-    "wcwidth"
 ]

 # dependencies imported but missing from definitions
--- a/requirements.txt
+++ b/requirements.txt
@@ -20,29 +20,28 @@ markdown-it-py==3.0.0
 mdurl==0.1.2
 msgpack==1.0.8
 networkx==3.1
-pefile==2023.2.7
+pefile==2024.8.26
 pip==24.2
-protobuf==5.27.3
+protobuf==5.28.2
 pyasn1==0.5.1
 pyasn1-modules==0.3.0
 pycparser==2.22
-pydantic==2.9.1
-pydantic-core==2.23.3
+pydantic==2.9.2
+# pydantic pins pydantic-core, 
+# but dependabot updates these separately (which is broken) and is annoying,
+# so we rely on pydantic to pull in the right version of pydantic-core.
+# pydantic-core==2.23.4
 xmltodict==0.13.0
 pyelftools==0.31
 pygments==2.18.0
 python-flirt==0.8.10
 pyyaml==6.0.2
-rich==13.8.0
+rich==13.9.2
 ruamel-yaml==0.18.6
 ruamel-yaml-clib==0.2.8
-setuptools==70.0.0
+setuptools==75.1.0
 six==1.16.0
 sortedcontainers==2.4.0
-tabulate==0.9.0
-termcolor==2.4.0
-tqdm==4.66.5
 viv-utils==0.7.11
 vivisect==1.2.1
-wcwidth==0.2.13
 msgspec==0.18.6
--- a/2
+++ b/2
--- a/scripts/compare-backends.py
+++ b/scripts/compare-backends.py
@@ -0,0 +1,316 @@
+# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at: [package root]/LICENSE.txt
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+#  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and limitations under the License.
+
+import sys
+import json
+import time
+import logging
+import argparse
+import contextlib
+import statistics
+import subprocess
+import multiprocessing
+from typing import Set, Dict, List, Optional
+from pathlib import Path
+from collections import Counter
+from dataclasses import dataclass
+from multiprocessing import Pool
+
+import rich
+import rich.box
+import rich.table
+
+import capa.main
+
+logger = logging.getLogger("capa.compare-backends")
+
+BACKENDS = ("vivisect", "ida", "binja")
+
+
+@dataclass
+class CapaInvocation:
+    path: Path
+    backend: str
+    duration: float
+    returncode: int
+    stdout: Optional[str]
+    stderr: Optional[str]
+    err: Optional[str]
+
+
+def invoke_capa(file: Path, backend: str) -> CapaInvocation:
+    stdout = None
+    stderr = None
+    err = None
+    returncode: int
+    try:
+        logger.debug("run capa: %s: %s", backend, file.name)
+        t1 = time.time()
+        child = subprocess.run(
+            ["python", "-m", "capa.main", "--json", "--backend=" + backend, str(file)],
+            capture_output=True,
+            check=True,
+            text=True,
+            encoding="utf-8",
+        )
+        returncode = child.returncode
+        stdout = child.stdout
+        stderr = child.stderr
+    except subprocess.CalledProcessError as e:
+        returncode = e.returncode
+        stdout = e.stdout
+        stderr = e.stderr
+
+        logger.debug("%s:%s: error", backend, file.name)
+        err = str(e)
+    else:
+        pass
+    finally:
+        t2 = time.time()
+
+    return CapaInvocation(
+        path=file,
+        backend=backend,
+        duration=t2 - t1,
+        returncode=returncode,
+        stdout=stdout,
+        stderr=stderr,
+        err=err,
+    )
+
+
+def wrapper_invoke_capa(args):
+    file, backend = args
+    return invoke_capa(file, backend)
+
+
+def collect(args):
+    results_path = args.results_path
+    if not results_path.is_file():
+        default_doc = {backend: {} for backend in BACKENDS}  # type: ignore
+        results_path.write_text(json.dumps(default_doc), encoding="utf-8")
+
+    testfiles = Path(__file__).parent.parent / "tests" / "data"
+
+    for file in sorted(p for p in testfiles.glob("*")):
+        # remove leftover analysis files
+        # because IDA doesn't cleanup after itself, currently.
+        if file.suffix in (".til", ".id0", ".id1", ".id2", ".nam", ".viv"):
+            logger.debug("removing: %s", file)
+            with contextlib.suppress(IOError):
+                file.unlink()
+
+    doc = json.loads(results_path.read_text(encoding="utf-8"))
+
+    plan = []
+    for file in sorted(p for p in testfiles.glob("*")):
+        if not file.is_file():
+            continue
+
+        if file.is_dir():
+            continue
+
+        if file.name.startswith("."):
+            continue
+
+        if file.suffix not in (".exe_", ".dll_", ".elf_", ""):
+            continue
+
+        logger.debug("%s", file.name)
+        key = str(file)
+
+        for backend in BACKENDS:
+
+            if (backend, file.name) in {
+                ("binja", "0953cc3b77ed2974b09e3a00708f88de931d681e2d0cb64afbaf714610beabe6.exe_")
+            }:
+                # this file takes 38GB+ and 20hrs+
+                # https://github.com/Vector35/binaryninja-api/issues/5951
+                continue
+
+            if key in doc[backend]:
+                if not args.retry_failures:
+                    continue
+
+                if not doc[backend][key]["err"]:
+                    # didn't previously fail, don't repeat work
+                    continue
+
+                else:
+                    # want to retry this previous failure
+                    pass
+
+            plan.append((file, backend))
+
+    pool_size = multiprocessing.cpu_count() // 2
+    logger.info("work pool size: %d", pool_size)
+    with Pool(processes=pool_size) as pool:
+        for i, result in enumerate(pool.imap_unordered(wrapper_invoke_capa, plan)):
+            doc[result.backend][str(result.path)] = {
+                "path": str(result.path),
+                "returncode": result.returncode,
+                "stdout": result.stdout,
+                "stderr": result.stderr,
+                "err": result.err,
+                "duration": result.duration,
+            }
+
+            if i % 8 == 0:
+                logger.info("syncing output database")
+                results_path.write_text(json.dumps(doc))
+
+            logger.info(
+                "%.1f\t%s %s %s",
+                result.duration,
+                "(err)" if result.err else "     ",
+                result.backend.ljust(8),
+                result.path.name,
+            )
+
+    results_path.write_text(json.dumps(doc))
+    return
+
+
+def report(args):
+    doc = json.loads(args.results_path.read_text(encoding="utf-8"))
+
+    samples = set()
+    for backend in BACKENDS:
+        samples.update(doc[backend].keys())
+
+    failures_by_backend: Dict[str, Set[str]] = {backend: set() for backend in BACKENDS}
+    durations_by_backend: Dict[str, List[float]] = {backend: [] for backend in BACKENDS}
+
+    console = rich.get_console()
+    for key in sorted(samples):
+        sample = Path(key).name
+        console.print(sample, style="bold")
+
+        seen_rules: Counter[str] = Counter()
+
+        rules_by_backend: Dict[str, Set[str]] = {backend: set() for backend in BACKENDS}
+
+        for backend in BACKENDS:
+            if key not in doc[backend]:
+                continue
+
+            entry = doc[backend][key]
+            duration = entry["duration"]
+
+            if not entry["err"]:
+                matches = json.loads(entry["stdout"])["rules"].keys()
+                seen_rules.update(matches)
+                rules_by_backend[backend].update(matches)
+                durations_by_backend[backend].append(duration)
+
+                console.print(f"  {backend: >8}: {duration: >6.1f}s   {len(matches): >3d} matches")
+
+            else:
+                failures_by_backend[backend].add(sample)
+                console.print(f"  {backend: >8}: {duration: >6.1f}s   (error)")
+
+        if not seen_rules:
+            console.print()
+            continue
+
+        t = rich.table.Table(box=rich.box.SIMPLE, header_style="default")
+        t.add_column("viv")
+        t.add_column("ida")
+        t.add_column("bn")
+        t.add_column("rule")
+
+        for rule, _ in seen_rules.most_common():
+            t.add_row(
+                "x" if rule in rules_by_backend["vivisect"] else " ",
+                "x" if rule in rules_by_backend["ida"] else " ",
+                "x" if rule in rules_by_backend["binja"] else " ",
+                rule,
+            )
+
+        console.print(t)
+
+    for backend in BACKENDS:
+        console.print(f"failures for {backend}:", style="bold")
+        for failure in sorted(failures_by_backend[backend]):
+            console.print(f"  - {failure}")
+
+        if not failures_by_backend[backend]:
+            console.print("  (none)", style="green")
+    console.print()
+
+    console.print("durations:", style="bold")
+    console.print("  (10-quantiles, in seconds)", style="grey37")
+    for backend in BACKENDS:
+        q = statistics.quantiles(durations_by_backend[backend], n=10)
+        console.print(f"  {backend: <8}: ", end="")
+        for i in range(9):
+            if i in (4, 8):
+                style = "bold"
+            else:
+                style = "default"
+            console.print(f"{q[i]: >6.1f}", style=style, end=" ")
+        console.print()
+    console.print("                ^-- 10% of samples took less than this                  ^", style="grey37")
+    console.print("                    10% of samples took more than this -----------------+", style="grey37")
+
+    console.print()
+    for backend in BACKENDS:
+        total = sum(durations_by_backend[backend])
+        successes = len(durations_by_backend[backend])
+        avg = statistics.mean(durations_by_backend[backend])
+        console.print(
+            f"  {backend: <8}: {total: >7.0f} seconds across {successes: >4d} successful runs, {avg: >4.1f} average"
+        )
+    console.print()
+
+    console.print("slowest samples:", style="bold")
+    for backend in BACKENDS:
+        console.print(backend)
+        for duration, path in sorted(
+            ((d["duration"], Path(d["path"]).name) for d in doc[backend].values()), reverse=True
+        )[:5]:
+            console.print(f"  - {duration: >6.1f} {path}")
+
+    return
+
+
+def main(argv=None):
+    if argv is None:
+        argv = sys.argv[1:]
+
+    default_samples_path = Path(__file__).resolve().parent.parent / "tests" / "data"
+
+    parser = argparse.ArgumentParser(description="Compare analysis backends.")
+    capa.main.install_common_args(
+        parser,
+        wanted=set(),
+    )
+
+    subparsers = parser.add_subparsers()
+    collect_parser = subparsers.add_parser("collect")
+    collect_parser.add_argument("results_path", type=Path, help="Path to output JSON file")
+    collect_parser.add_argument("--samples", type=Path, default=default_samples_path, help="Path to samples")
+    collect_parser.add_argument("--retry-failures", action="store_true", help="Retry previous failures")
+    collect_parser.set_defaults(func=collect)
+
+    report_parser = subparsers.add_parser("report")
+    report_parser.add_argument("results_path", type=Path, help="Path to JSON file")
+    report_parser.set_defaults(func=report)
+
+    args = parser.parse_args(args=argv)
+
+    try:
+        capa.main.handle_common_args(args)
+    except capa.main.ShouldExitError as e:
+        return e.status_code
+
+    args.func(args)
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/scripts/detect-backends.py
+++ b/scripts/detect-backends.py
@@ -0,0 +1,106 @@
+# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at: [package root]/LICENSE.txt
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+#  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and limitations under the License.
+
+import sys
+import logging
+import argparse
+import importlib.util
+
+import rich
+import rich.table
+
+import capa.main
+from capa.features.extractors.ida.idalib import find_idalib, load_idalib, is_idalib_installed
+from capa.features.extractors.binja.find_binja_api import find_binaryninja, load_binaryninja, is_binaryninja_installed
+
+logger = logging.getLogger(__name__)
+
+
+def is_vivisect_installed() -> bool:
+    try:
+        return importlib.util.find_spec("vivisect") is not None
+    except ModuleNotFoundError:
+        return False
+
+
+def load_vivisect() -> bool:
+    try:
+        import vivisect  # noqa: F401 unused import
+
+        return True
+    except ImportError:
+        return False
+
+
+def main(argv=None):
+    if argv is None:
+        argv = sys.argv[1:]
+
+    parser = argparse.ArgumentParser(description="Detect analysis backends.")
+    capa.main.install_common_args(parser, wanted=set())
+    args = parser.parse_args(args=argv)
+
+    try:
+        capa.main.handle_common_args(args)
+    except capa.main.ShouldExitError as e:
+        return e.status_code
+
+    if args.debug:
+        logging.getLogger("capa").setLevel(logging.DEBUG)
+        logging.getLogger("viv_utils").setLevel(logging.DEBUG)
+    else:
+        logging.getLogger("capa").setLevel(logging.ERROR)
+        logging.getLogger("viv_utils").setLevel(logging.ERROR)
+
+    table = rich.table.Table()
+    table.add_column("backend")
+    table.add_column("already installed?")
+    table.add_column("found?")
+    table.add_column("loads?")
+
+    if True:
+        row = ["vivisect"]
+        if is_vivisect_installed():
+            row.append("True")
+            row.append("-")
+        else:
+            row.append("False")
+            row.append("False")
+
+        row.append(str(load_vivisect()))
+        table.add_row(*row)
+
+    if True:
+        row = ["Binary Ninja"]
+        if is_binaryninja_installed():
+            row.append("True")
+            row.append("-")
+        else:
+            row.append("False")
+            row.append(str(find_binaryninja() is not None))
+
+        row.append(str(load_binaryninja()))
+        table.add_row(*row)
+
+    if True:
+        row = ["IDA idalib"]
+        if is_idalib_installed():
+            row.append("True")
+            row.append("-")
+        else:
+            row.append("False")
+            row.append(str(find_idalib() is not None))
+
+        row.append(str(load_idalib()))
+        table.add_row(*row)
+
+    rich.print(table)
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/scripts/lint.py
+++ b/scripts/lint.py
@@ -31,11 +31,9 @@ from typing import Set, Dict, List
 from pathlib import Path
 from dataclasses import field, dataclass

-import tqdm
 import pydantic
-import termcolor
 import ruamel.yaml
-import tqdm.contrib.logging
+from rich import print

 import capa.main
 import capa.rules
@@ -51,18 +49,6 @@ from capa.render.result_document import RuleMetadata
 logger = logging.getLogger("lint")


-def red(s):
-    return termcolor.colored(s, "red")
-
-
-def orange(s):
-    return termcolor.colored(s, "yellow")
-
-
-def green(s):
-    return termcolor.colored(s, "green")
-
-
@dataclass
 class Context:
    """
@@ -80,8 +66,8 @@ class Context:


 class Lint:
-    WARN = orange("WARN")
-    FAIL = red("FAIL")
+    WARN = "[yellow]WARN[/yellow]"
+    FAIL = "[red]FAIL[/red]"

    name = "lint"
    level = FAIL
@@ -896,7 +882,7 @@ def lint_rule(ctx: Context, rule: Rule):
        if (not lints_failed) and (not lints_warned) and has_examples:
            print("")
            print(f'{"    (nursery) " if is_nursery_rule(rule) else ""} {rule.name}')
-            print(f"      {Lint.WARN}: {green('no lint failures')}: Graduate the rule")
+            print(f"      {Lint.WARN}: '[green]no lint failures[/green]': Graduate the rule")
            print("")
    else:
        lints_failed = len(tuple(filter(lambda v: v.level == Lint.FAIL, violations)))
@@ -921,12 +907,15 @@ def lint(ctx: Context):
    ret = {}

    source_rules = [rule for rule in ctx.rules.rules.values() if not rule.is_subscope_rule()]
-    with tqdm.contrib.logging.tqdm_logging_redirect(source_rules, unit="rule", leave=False) as pbar:
-        with capa.helpers.redirecting_print_to_tqdm(False):
-            for rule in pbar:
-                name = rule.name
-                pbar.set_description(width(f"linting rule: {name}", 48))
-                ret[name] = lint_rule(ctx, rule)
+    n_rules: int = len(source_rules)
+
+    with capa.helpers.CapaProgressBar(transient=True, console=capa.helpers.log_console) as pbar:
+        task = pbar.add_task(description="linting", total=n_rules, unit="rule")
+        for rule in source_rules:
+            name = rule.name
+            pbar.update(task, description=width(f"linting rule: {name}", 48))
+            ret[name] = lint_rule(ctx, rule)
+            pbar.advance(task)

    return ret

@@ -1020,18 +1009,18 @@ def main(argv=None):
    logger.debug("lints ran for ~ %02d:%02dm", min, sec)

    if warned_rules:
-        print(orange("rules with WARN:"))
+        print("[yellow]rules with WARN:[/yellow]")
        for warned_rule in sorted(warned_rules):
            print("  - " + warned_rule)
        print()

    if failed_rules:
-        print(red("rules with FAIL:"))
+        print("[red]rules with FAIL:[/red]")
        for failed_rule in sorted(failed_rules):
            print("  - " + failed_rule)
        return 1
    else:
-        logger.info(green("no lints failed, nice!"))
+        logger.info("[green]no lints failed, nice![/green]")
        return 0


--- a/scripts/profile-time.py
+++ b/scripts/profile-time.py
@@ -42,9 +42,10 @@ import logging
 import argparse
 import subprocess

-import tqdm
 import humanize
-import tabulate
+from rich import box
+from rich.table import Table
+from rich.console import Console

 import capa.main
 import capa.perf
@@ -92,51 +93,61 @@ def main(argv=None):
    except capa.main.ShouldExitError as e:
        return e.status_code

-    with tqdm.tqdm(total=args.number * args.repeat, leave=False) as pbar:
+    with capa.helpers.CapaProgressBar(console=capa.helpers.log_console) as progress:
+        total_iterations = args.number * args.repeat
+        task = progress.add_task("profiling", total=total_iterations)

        def do_iteration():
            capa.perf.reset()
            capa.capabilities.common.find_capabilities(rules, extractor, disable_progress=True)
-            pbar.update(1)
+
+            progress.advance(task)

        samples = timeit.repeat(do_iteration, number=args.number, repeat=args.repeat)

    logger.debug("perf: find capabilities: min: %0.2fs", (min(samples) / float(args.number)))
-    logger.debug("perf: find capabilities: avg: %0.2fs", (sum(samples) / float(args.repeat) / float(args.number)))
+    logger.debug(
+        "perf: find capabilities: avg: %0.2fs",
+        (sum(samples) / float(args.repeat) / float(args.number)),
+    )
    logger.debug("perf: find capabilities: max: %0.2fs", (max(samples) / float(args.number)))

    for counter, count in capa.perf.counters.most_common():
        logger.debug("perf: counter: %s: %s", counter, count)

-    print(
-        tabulate.tabulate(
-            [(counter, humanize.intcomma(count)) for counter, count in capa.perf.counters.most_common()],
-            headers=["feature class", "evaluation count"],
-            tablefmt="github",
-        )
-    )
-    print()
+    console = Console()

-    print(
-        tabulate.tabulate(
-            [
-                (
-                    args.label,
-                    "{:,}".format(capa.perf.counters["evaluate.feature"]),
-                    # python documentation indicates that min(samples) should be preferred,
-                    # so lets put that first.
-                    #
-                    # https://docs.python.org/3/library/timeit.html#timeit.Timer.repeat
-                    f"{(min(samples) / float(args.number)):.2f}s",
-                    f"{(sum(samples) / float(args.repeat) / float(args.number)):.2f}s",
-                    f"{(max(samples) / float(args.number)):.2f}s",
-                )
-            ],
-            headers=["label", "count(evaluations)", "min(time)", "avg(time)", "max(time)"],
-            tablefmt="github",
-        )
+    table1 = Table(box=box.MARKDOWN)
+    table1.add_column("feature class")
+    table1.add_column("evaluation count")
+
+    for counter, count in capa.perf.counters.most_common():
+        table1.add_row(counter, humanize.intcomma(count))
+
+    console.print(table1)
+    console.print()
+
+    table2 = Table(box=box.MARKDOWN)
+    table2.add_column("label")
+    table2.add_column("count(evaluations)", style="magenta")
+    table2.add_column("min(time)", style="green")
+    table2.add_column("avg(time)", style="yellow")
+    table2.add_column("max(time)", style="red")
+
+    table2.add_row(
+        args.label,
+        # python documentation indicates that min(samples) should be preferred,
+        # so lets put that first.
+        #
+        # https://docs.python.org/3/library/timeit.html#timeit.Timer.repeat
+        "{:,}".format(capa.perf.counters["evaluate.feature"]),
+        f"{(min(samples) / float(args.number)):.2f}s",
+        f"{(sum(samples) / float(args.repeat) / float(args.number)):.2f}s",
+        f"{(max(samples) / float(args.number)):.2f}s",
    )

+    console.print(table2)
+
    return 0


--- a/scripts/show-object-layout.py
+++ b/scripts/show-object-layout.py
@@ -0,0 +1,184 @@
+import sys
+import sqlite3
+import argparse
+from pathlib import Path
+from dataclasses import dataclass
+
+import pefile
+
+import capa.main
+
+
+@dataclass
+class AssemblageRow:
+    # from table: binaries
+    binary_id: int
+    file_name: str
+    platform: str
+    build_mode: str
+    toolset_version: str
+    github_url: str
+    optimization: str
+    repo_last_update: int
+    size: int
+    path: str
+    license: str
+    binary_hash: str
+    repo_commit_hash: str
+    # from table: functions
+    function_id: int
+    function_name: str
+    function_hash: str
+    top_comments: str
+    source_codes: str
+    prototype: str
+    _source_file: str
+    # from table: rvas
+    rva_id: int
+    start_rva: int
+    end_rva: int
+
+    @property
+    def source_file(self):
+        # cleanup some extra metadata provided by assemblage
+        return self._source_file.partition(" (MD5: ")[0].partition(" (0x3: ")[0]
+
+
+class Assemblage:
+    conn: sqlite3.Connection
+    samples: Path
+
+    def __init__(self, db: Path, samples: Path):
+        super().__init__()
+
+        self.db = db
+        self.samples = samples
+
+        self.conn = sqlite3.connect(self.db)
+        with self.conn:
+            self.conn.executescript("""
+                PRAGMA journal_mode = WAL;
+                PRAGMA synchronous = NORMAL;
+                PRAGMA busy_timeout = 5000;
+                PRAGMA cache_size = -20000; -- 20MB
+                PRAGMA foreign_keys = true;
+                PRAGMA temp_store = memory;
+
+                BEGIN IMMEDIATE TRANSACTION;
+                CREATE INDEX IF NOT EXISTS idx__functions__binary_id ON functions (binary_id);
+                CREATE INDEX IF NOT EXISTS idx__rvas__function_id ON rvas (function_id);
+
+                CREATE VIEW IF NOT EXISTS assemblage AS 
+                SELECT 
+                    binaries.id AS binary_id,
+                    binaries.file_name AS file_name,
+                    binaries.platform AS platform,
+                    binaries.build_mode AS build_mode,
+                    binaries.toolset_version AS toolset_version,
+                    binaries.github_url AS github_url,
+                    binaries.optimization AS optimization,
+                    binaries.repo_last_update AS repo_last_update,
+                    binaries.size AS size,
+                    binaries.path AS path,
+                    binaries.license AS license,
+                    binaries.hash AS hash,
+                    binaries.repo_commit_hash AS repo_commit_hash,
+
+                    functions.id AS function_id,
+                    functions.name AS function_name,
+                    functions.hash AS function_hash,
+                    functions.top_comments AS top_comments,
+                    functions.source_codes AS source_codes,
+                    functions.prototype AS prototype,
+                    functions.source_file AS source_file,
+
+                    rvas.id AS rva_id,
+                    rvas.start AS start_rva,
+                    rvas.end AS end_rva
+                FROM binaries 
+                JOIN functions ON binaries.id = functions.binary_id
+                JOIN rvas ON functions.id = rvas.function_id;
+            """)
+
+    def get_row_by_binary_id(self, binary_id: int) -> AssemblageRow:
+        with self.conn:
+            cur = self.conn.execute("SELECT * FROM assemblage WHERE binary_id = ? LIMIT 1;", (binary_id, ))
+            return AssemblageRow(*cur.fetchone())
+
+    def get_rows_by_binary_id(self, binary_id: int) -> AssemblageRow:
+        with self.conn:
+            cur = self.conn.execute("SELECT * FROM assemblage WHERE binary_id = ?;", (binary_id, ))
+            row = cur.fetchone()
+            while row:
+                yield AssemblageRow(*row)
+                row = cur.fetchone()
+
+    def get_path_by_binary_id(self, binary_id: int) -> Path:
+        with self.conn:
+            cur = self.conn.execute("""SELECT path FROM assemblage WHERE binary_id = ? LIMIT 1""", (binary_id, ))
+            return self.samples / cur.fetchone()[0]
+
+    def get_pe_by_binary_id(self, binary_id: int) -> pefile.PE:
+        path = self.get_path_by_binary_id(binary_id)
+        return pefile.PE(data=path.read_bytes(), fast_load=True)
+
+
+def main(argv=None):
+    if argv is None:
+        argv = sys.argv[1:]
+
+    parser = argparse.ArgumentParser(description="Inspect object boundaries in compiled programs")
+    capa.main.install_common_args(parser, wanted={})
+    parser.add_argument("assemblage_database", type=Path, help="path to Assemblage database")
+    parser.add_argument("assemblage_directory", type=Path, help="path to Assemblage samples directory")
+    parser.add_argument("binary_id", type=int, help="primary key of binary to inspect")
+    args = parser.parse_args(args=argv)
+
+    try:
+        capa.main.handle_common_args(args)
+    except capa.main.ShouldExitError as e:
+        return e.status_code
+
+    if not args.assemblage_database.is_file():
+        raise ValueError("database doesn't exist")
+
+    db = Assemblage(args.assemblage_database, args.assemblage_directory)
+    # print(db.get_row_by_binary_id(args.binary_id))
+    # print(db.get_pe_by_binary_id(args.binary_id))
+
+    @dataclass
+    class Function:
+        file: str
+        name: str
+        start_rva: int
+        end_rva: int
+
+    functions = [
+        Function(
+            file=m.source_file,
+            name=m.function_name,
+            start_rva=m.start_rva,
+            end_rva=m.end_rva,
+        )
+        for m in db.get_rows_by_binary_id(args.binary_id)
+    ]
+
+    import rich
+    import rich.table
+
+    print(db.get_path_by_binary_id(args.binary_id))
+
+    t = rich.table.Table()
+    t.add_column("rva")
+    t.add_column("filename")
+    t.add_column("name")
+
+    for function in sorted(functions, key=lambda f: f.start_rva):
+        t.add_row(hex(function.start_rva), function.file, function.name)
+
+    rich.print(t)
+
+    # db.conn.close()
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/scripts/show-unused-features.py
+++ b/scripts/show-unused-features.py
@@ -12,11 +12,12 @@ import sys
 import typing
 import logging
 import argparse
-from typing import Set, Tuple
+from typing import Set, List, Tuple
 from collections import Counter

-import tabulate
-from termcolor import colored
+from rich import print
+from rich.text import Text
+from rich.table import Table

 import capa.main
 import capa.rules
@@ -77,23 +78,30 @@ def get_file_features(
    return feature_map


-def get_colored(s: str):
+def get_colored(s: str) -> Text:
    if "(" in s and ")" in s:
        s_split = s.split("(", 1)
-        s_color = colored(s_split[1][:-1], "cyan")
-        return f"{s_split[0]}({s_color})"
+        return Text.assemble(s_split[0], "(", (s_split[1][:-1], "cyan"), ")")
    else:
-        return colored(s, "cyan")
+        return Text(s, style="cyan")


 def print_unused_features(feature_map: typing.Counter[Feature], rules_feature_set: Set[Feature]):
-    unused_features = []
+    unused_features: List[Tuple[str, Text]] = []
    for feature, count in reversed(feature_map.most_common()):
        if feature in rules_feature_set:
            continue
        unused_features.append((str(count), get_colored(str(feature))))
+
+    table = Table(title="Unused Features", box=None)
+    table.add_column("Count", style="dim")
+    table.add_column("Feature")
+
+    for count_str, feature_text in unused_features:
+        table.add_row(count_str, feature_text)
+
    print("\n")
-    print(tabulate.tabulate(unused_features, headers=["Count", "Feature"], tablefmt="plain"))
+    print(table)
    print("\n")


--- a/tests/data
+++ b/tests/data
--- a/tests/fixtures.py
+++ b/tests/fixtures.py
@@ -431,6 +431,14 @@ def get_data_path_by_name(name) -> Path:
            / "vmray"
            / "93b2d1840566f45fab674ebc79a9d19c88993bcb645e0357f3cb584d16e7c795_min_archive.zip"
        )
+    elif name.startswith("2f8a79-vmray"):
+        return (
+            CD
+            / "data"
+            / "dynamic"
+            / "vmray"
+            / "2f8a79b12a7a989ac7e5f6ec65050036588a92e65aeb6841e08dc228ff0e21b4_min_archive.zip"
+        )
    elif name.startswith("ea2876"):
        return CD / "data" / "ea2876e9175410b6f6719f80ee44b9553960758c7d0f7bed73c0fe9a78d8e669.dll_"
    elif name.startswith("1038a2"):
--- a/tests/test_cape_features.py
+++ b/tests/test_cape_features.py
@@ -37,6 +37,8 @@ DYNAMIC_CAPE_FEATURE_PRESENCE_TESTS = sorted(
        ),
        ("0000a657", "process=(1180:3052)", capa.features.common.String("nope"), False),
        # thread/api calls
+        ("0000a657", "process=(2900:2852),thread=2904", capa.features.insn.API("RegQueryValueExA"), True),
+        ("0000a657", "process=(2900:2852),thread=2904", capa.features.insn.API("RegQueryValueEx"), True),
        ("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.API("NtQueryValueKey"), True),
        ("0000a657", "process=(2852:3052),thread=2804", capa.features.insn.API("GetActiveWindow"), False),
        # thread/number call argument
--- a/tests/test_drakvuf_features.py
+++ b/tests/test_drakvuf_features.py
@@ -22,6 +22,8 @@ DYNAMIC_DRAKVUF_FEATURE_PRESENCE_TESTS = sorted(
        ("93b2d1-drakvuf", "process=(3564:4852),thread=6592", capa.features.insn.API("LdrLoadDll"), True),
        ("93b2d1-drakvuf", "process=(3564:4852),thread=6592", capa.features.insn.API("DoesNotExist"), False),
        # call/api
+        ("93b2d1-drakvuf", "process=(3564:4852),thread=4716,call=17", capa.features.insn.API("CreateWindowExW"), True),
+        ("93b2d1-drakvuf", "process=(3564:4852),thread=4716,call=17", capa.features.insn.API("CreateWindowEx"), True),
        ("93b2d1-drakvuf", "process=(3564:4852),thread=6592,call=1", capa.features.insn.API("LdrLoadDll"), True),
        ("93b2d1-drakvuf", "process=(3564:4852),thread=6592,call=1", capa.features.insn.API("DoesNotExist"), False),
        # call/string argument
--- a/tests/test_render.py
+++ b/tests/test_render.py
@@ -10,7 +10,6 @@ import textwrap
 from unittest.mock import Mock

 import fixtures
-import rich.console

 import capa.rules
 import capa.render.utils
@@ -24,6 +23,7 @@ import capa.features.basicblock
 import capa.render.result_document
 import capa.render.result_document as rd
 import capa.features.freeze.features
+from capa.render.utils import Console


 def test_render_number():
@@ -154,7 +154,7 @@ def test_render_meta_maec():

    # capture the output of render_maec
    f = io.StringIO()
-    console = rich.console.Console(file=f)
+    console = Console(file=f)
    capa.render.default.render_maec(mock_rd, console)
    output = f.getvalue()

@@ -198,7 +198,7 @@ def test_render_meta_maec():
    ],
 )
 def test_render_vverbose_feature(feature, expected):
-    ostream = capa.render.utils.StringIO()
+    console = Console(highlight=False)

    addr = capa.features.freeze.Address.from_capa(capa.features.address.AbsoluteVirtualAddress(0x401000))
    feature = capa.features.freeze.features.feature_from_capa(feature)
@@ -240,6 +240,8 @@ def test_render_vverbose_feature(feature, expected):
        matches=(),
    )

-    capa.render.vverbose.render_feature(ostream, layout, rm, matches, feature, indent=0)
+    with console.capture() as capture:
+        capa.render.vverbose.render_feature(console, layout, rm, matches, feature, indent=0)

-    assert ostream.getvalue().strip() == expected
+    output = capture.get().strip()
+    assert output == expected
--- a/tests/test_vmray_features.py
+++ b/tests/test_vmray_features.py
@@ -19,22 +19,51 @@ DYNAMIC_VMRAY_FEATURE_PRESENCE_TESTS = sorted(
        ("93b2d1-vmray", "file", capa.features.common.String("\\Program Files\\WindowsApps\\does_not_exist"), False),
        # file/imports
        ("93b2d1-vmray", "file", capa.features.file.Import("GetAddrInfoW"), True),
+        ("93b2d1-vmray", "file", capa.features.file.Import("GetAddrInfo"), True),
        # thread/api calls
-        ("93b2d1-vmray", "process=(2176:0),thread=7", capa.features.insn.API("GetAddrInfoW"), True),
-        ("93b2d1-vmray", "process=(2176:0),thread=7", capa.features.insn.API("DoesNotExist"), False),
+        ("93b2d1-vmray", "process=(2176:0),thread=2180", capa.features.insn.API("LoadLibraryExA"), True),
+        ("93b2d1-vmray", "process=(2176:0),thread=2180", capa.features.insn.API("LoadLibraryEx"), True),
+        ("93b2d1-vmray", "process=(2176:0),thread=2420", capa.features.insn.API("GetAddrInfoW"), True),
+        ("93b2d1-vmray", "process=(2176:0),thread=2420", capa.features.insn.API("GetAddrInfo"), True),
+        ("93b2d1-vmray", "process=(2176:0),thread=2420", capa.features.insn.API("DoesNotExist"), False),
        # call/api
-        ("93b2d1-vmray", "process=(2176:0),thread=7,call=2361", capa.features.insn.API("GetAddrInfoW"), True),
+        ("93b2d1-vmray", "process=(2176:0),thread=2420,call=2361", capa.features.insn.API("GetAddrInfoW"), True),
        # call/string argument
        (
            "93b2d1-vmray",
-            "process=(2176:0),thread=7,call=10323",
+            "process=(2176:0),thread=2420,call=10323",
            capa.features.common.String("raw.githubusercontent.com"),
            True,
        ),
+        # backslashes in paths; see #2428
+        (
+            "93b2d1-vmray",
+            "process=(2176:0),thread=2180,call=267",
+            capa.features.common.String("C:\\Users\\WhuOXYsD\\Desktop\\filename.exe"),
+            True,
+        ),
+        (
+            "93b2d1-vmray",
+            "process=(2176:0),thread=2180,call=267",
+            capa.features.common.String("C:\\\\Users\\\\WhuOXYsD\\\\Desktop\\\\filename.exe"),
+            False,
+        ),
+        (
+            "93b2d1-vmray",
+            "process=(2176:0),thread=2204,call=2395",
+            capa.features.common.String("Software\\Microsoft\\Windows\\CurrentVersion\\Policies\\System"),
+            True,
+        ),
+        (
+            "93b2d1-vmray",
+            "process=(2176:0),thread=2204,call=2395",
+            capa.features.common.String("Software\\\\Microsoft\\\\Windows\\\\CurrentVersion\\\\Policies\\\\System"),
+            False,
+        ),
        # call/number argument
        # VirtualAlloc(4096, 4)
-        ("93b2d1-vmray", "process=(2176:0),thread=7,call=2358", capa.features.insn.Number(4096), True),
-        ("93b2d1-vmray", "process=(2176:0),thread=7,call=2358", capa.features.insn.Number(4), True),
+        ("93b2d1-vmray", "process=(2176:0),thread=2420,call=2358", capa.features.insn.Number(4096), True),
+        ("93b2d1-vmray", "process=(2176:0),thread=2420,call=2358", capa.features.insn.Number(4), True),
    ],
    # order tests by (file, item)
    # so that our LRU cache is most effective.
@@ -46,24 +75,24 @@ DYNAMIC_VMRAY_FEATURE_COUNT_TESTS = sorted(
        # file/imports
        ("93b2d1-vmray", "file", capa.features.file.Import("GetAddrInfoW"), 1),
        # thread/api calls
-        ("93b2d1-vmray", "process=(2176:0),thread=7", capa.features.insn.API("free"), 1),
-        ("93b2d1-vmray", "process=(2176:0),thread=7", capa.features.insn.API("GetAddrInfoW"), 5),
+        ("93b2d1-vmray", "process=(2176:0),thread=2420", capa.features.insn.API("free"), 1),
+        ("93b2d1-vmray", "process=(2176:0),thread=2420", capa.features.insn.API("GetAddrInfoW"), 5),
        # call/api
-        ("93b2d1-vmray", "process=(2176:0),thread=7,call=2345", capa.features.insn.API("free"), 1),
-        ("93b2d1-vmray", "process=(2176:0),thread=7,call=2345", capa.features.insn.API("GetAddrInfoW"), 0),
-        ("93b2d1-vmray", "process=(2176:0),thread=7,call=2361", capa.features.insn.API("GetAddrInfoW"), 1),
+        ("93b2d1-vmray", "process=(2176:0),thread=2420,call=2345", capa.features.insn.API("free"), 1),
+        ("93b2d1-vmray", "process=(2176:0),thread=2420,call=2345", capa.features.insn.API("GetAddrInfoW"), 0),
+        ("93b2d1-vmray", "process=(2176:0),thread=2420,call=2361", capa.features.insn.API("GetAddrInfoW"), 1),
        # call/string argument
        (
            "93b2d1-vmray",
-            "process=(2176:0),thread=7,call=10323",
+            "process=(2176:0),thread=2420,call=10323",
            capa.features.common.String("raw.githubusercontent.com"),
            1,
        ),
-        ("93b2d1-vmray", "process=(2176:0),thread=7,call=10323", capa.features.common.String("non_existant"), 0),
+        ("93b2d1-vmray", "process=(2176:0),thread=2420,call=10323", capa.features.common.String("non_existant"), 0),
        # call/number argument
-        ("93b2d1-vmray", "process=(2176:0),thread=7,call=10315", capa.features.insn.Number(4096), 1),
-        ("93b2d1-vmray", "process=(2176:0),thread=7,call=10315", capa.features.insn.Number(4), 1),
-        ("93b2d1-vmray", "process=(2176:0),thread=7,call=10315", capa.features.insn.Number(404), 0),
+        ("93b2d1-vmray", "process=(2176:0),thread=2420,call=10315", capa.features.insn.Number(4096), 1),
+        ("93b2d1-vmray", "process=(2176:0),thread=2420,call=10315", capa.features.insn.Number(4), 1),
+        ("93b2d1-vmray", "process=(2176:0),thread=2420,call=10315", capa.features.insn.Number(404), 0),
    ],
    # order tests by (file, item)
    # so that our LRU cache is most effective.
@@ -87,3 +116,10 @@ def test_vmray_features(sample, scope, feature, expected):
 )
 def test_vmray_feature_counts(sample, scope, feature, expected):
    fixtures.do_test_feature_count(fixtures.get_vmray_extractor, sample, scope, feature, expected)
+
+
+def test_vmray_processes():
+    # see #2394
+    path = fixtures.get_data_path_by_name("2f8a79-vmray")
+    vmre = fixtures.get_vmray_extractor(path)
+    assert len(vmre.analysis.monitor_processes) == 9
--- a/web/explorer/package-lock.json
+++ b/web/explorer/package-lock.json
--- a/web/explorer/package.json
+++ b/web/explorer/package.json
@@ -33,7 +33,7 @@
        "eslint-plugin-vue": "^9.23.0",
        "jsdom": "^24.1.0",
        "prettier": "^3.2.5",
-        "vite": "^5.3.1",
+        "vite": "^5.4.6",
        "vite-plugin-singlefile": "^2.0.2",
        "vitest": "^1.6.0"
    }
--- a/web/explorer/src/components/NavBar.vue
+++ b/web/explorer/src/components/NavBar.vue
@@ -1,6 +1,7 @@
 <script setup>
 import Menubar from "primevue/menubar";
 import { RouterLink } from "vue-router";
+import Button from "primevue/button";

 const isBundle = import.meta.env.MODE === "bundle";
 </script>
@@ -14,6 +15,9 @@ const isBundle = import.meta.env.MODE === "bundle";
        </template>
        <template #end>
            <div class="flex align-items-center gap-3">
+                <a href="https://github.com/mandiant/capa/issues/new/choose" target="_blank" rel="noopener noreferrer">
+                    <Button severity="contrast" size="small" outlined label="Provide feedback" />
+                </a>
                <a
                    v-if="!isBundle"
                    v-ripple
--- a/web/explorer/src/components/UploadOptions.vue
+++ b/web/explorer/src/components/UploadOptions.vue
@@ -5,7 +5,7 @@
                mode="basic"
                name="model[]"
                accept=".json,.gz"
-                :max-file-size="10000000"
+                :max-file-size="100000000"
                :auto="true"
                :custom-upload="true"
                choose-label="Upload from local"
--- a/web/explorer/src/router/index.js
+++ b/web/explorer/src/router/index.js
@@ -18,12 +18,20 @@ const router = createRouter({
            name: "analysis",
            component: AnalysisView,
            beforeEnter: (to, from, next) => {
-                if (rdocStore.data.value === null) {
-                    // No rdoc loaded, redirect to home page
-                    next({ name: "home" });
-                } else {
-                    // rdoc is loaded, proceed to analysis page
+                // check if rdoc is loaded
+                if (rdocStore.data.value !== null) {
+                    // rdocStore.data already contains the rdoc json - continue
                    next();
+                } else {
+                    // rdoc is not loaded, check if the rdoc query param is set in the URL
+                    const rdocUrl = to.query.rdoc;
+                    if (rdocUrl) {
+                        // query param is set - try to load the rdoc from the homepage
+                        next({ name: "home", query: { rdoc: rdocUrl } });
+                    } else {
+                        // no query param is set - go back home
+                        next({ name: "home" });
+                    }
                }
            }
        },
--- a/web/explorer/src/utils/rdocParser.js
+++ b/web/explorer/src/utils/rdocParser.js
@@ -492,6 +492,8 @@ function getFeatureName(feature) {
            return `operand[${feature.index}].offset: 0x${feature.operand_offset.toString(16).toUpperCase()}`;
        case "class":
            return `${feature.class_}`;
+        case "import":
+            return `${feature.import_}`;
        default:
            return `${feature[feature.type]}`;
    }
--- a/web/public/index.html
+++ b/web/public/index.html
@@ -88,7 +88,7 @@
             box-shadow: 0 0.5rem 1rem rgba(0,0,0,0.05),inset 0 -1px 0 rgba(0,0,0,0.15);"
      >
    <a href="/" class="d-flex align-items-center mb-3 mb-md-0 me-md-auto">
-      <img src="./img/logo.png" height=48 />
+      <img src="./img/logo.png" alt="capa logo" height=48 />
    </a>

    <ul class="nav nav-pills">
@@ -118,7 +118,7 @@
            references.
        </p>
        <div class="d-grid gap-2 d-md-flex justify-content-md-start mb-4 mb-lg-3">
-          <a href="#download" type="button" class="btn btn-primary bs-primary btn-lg px-4 me-md-2 fw-bold">Download</button>
+          <a href="#download" type="button" class="btn btn-primary bs-primary btn-lg px-4 me-md-2 fw-bold">Download</a>
          <a href="./rules/"  type="button" class="btn btn-outline-secondary btn-lg px-4">Browse Rules</a>
        </div>
      </div>
@@ -194,7 +194,7 @@
    <div class="row flex-lg-row-reverse align-items-center g-5">
      <h1>What's New</h1>

-      <h3 class="mt-3">Rule Updates</h3>
+      <h2 class="mt-3">Rule Updates</h2>

      <ul class="mt-2 ps-5">
        <!-- TODO(williballenthin): add date -->
@@ -213,10 +213,22 @@
        </li>
      </ul>

-      <h3 class="mt-3">Tool Updates</h3>
+      <h2 class="mt-3">Tool Updates</h2>

-      <h5 class="mt-2">v7.2.0</h5>
-      <!-- TODO(williballenthin): add date -->
+      <h3 class="mt-2">v7.3.0 (<em>2024-09-20</em>)</h3>
+      <div class="mt-0">
+        The <a href="https://github.com/mandiant/capa/releases/tag/v7.3.0">capa v7.3.0</a> release comes with the following three major enhancements:
+        <p><strong>1. Support for VMRay sandbox analysis archives</strong>: Unlock powerful malware analysis with capa&#39;s new <a href="https://www.vmray.com/">VMRay sandbox</a> integration!
+          Simply provide a VMRay analysis archive, and capa will automatically extract and match capabilities to streamline your workflow. This is the second support for the analysis of dynamic
+          analysis results after <a href="https://www.mandiant.com/resources/blog/dynamic-capa-executable-behavior-cape-sandbox">CAPE</a>.</p>
+        <p><strong>2. Support for BinExport files generated by Ghidra</strong>: <a href="https://github.com/google/binexport">BinExport</a> files store disassembled data into a Protocol Buffer format.
+          capa now supports the analysis of BinExport files generated by Ghidra. Using Ghidra and the BinExport file format users can now analyze ARM (AARCH64) ELF files targeting Android.</p>
+        <p><strong>3. Introducing the capa rules website</strong>: You can now browse capa&#39;s default rule set at <a href="https://mandiant.github.io/capa/rules">https://mandiant.github.io/capa/rules</a>.
+          In modern terminals the CLI capa tool hyperlinks to resources on the web, including entries on the capa rules website.
+          Furthermore, <a href="https://mandiant.github.io/capa">https://mandiant.github.io/capa</a> provides a landing page for the capa tool project.</p>
+        </div>
+
+      <h3 class="mt-2">v7.2.0 (<em>2024-08-20</em>)</h3>
      <p class="mt-0">
        <a href="https://github.com/mandiant/capa/releases/tag/v7.2.0">capa v7.2.0</a>
        introduces a first version of capa Explorer Web: a web-based user interface to inspect capa results using your browser.
@@ -254,9 +266,9 @@
      <div class="col">
        <div class="row row-cols-1 row-cols-sm-2 g-4">
          <div class="col d-flex flex-column gap-2">
-            <h4 class="fw-semibold mb-0 text-body-emphasis">
+            <h3 class="fw-semibold mb-0 text-body-emphasis">
              IDA Pro
-            </h4>
+            </h3>
            <p class="text-body-secondary">
              <!-- TODO(williballenthin): add link to find out more -->
              Use the capa Explorer IDA Plugin to guide your reverse engineering, zeroing in on the interesting functions by behavior.
@@ -264,9 +276,9 @@
          </div>

          <div class="col d-flex flex-column gap-2">
-            <h4 class="fw-semibold mb-0 text-body-emphasis">
+            <h3 class="fw-semibold mb-0 text-body-emphasis">
              Ghidra
-            </h4>
+            </h3>
            <p class="text-body-secondary">
              <!-- TODO(williballenthin): add link to find out more -->
              Invoke Ghidra in headless mode to collect features for capa, or use the capa Explorer Ghidra plugin to understand key functions.
@@ -274,9 +286,9 @@
          </div>

          <div class="col d-flex flex-column gap-2">
-            <h4 class="fw-semibold mb-0 text-body-emphasis">
+            <h3 class="fw-semibold mb-0 text-body-emphasis">
              Binary Ninja
-            </h4>
+            </h3>
            <p class="text-body-secondary">
              <!-- TODO(williballenthin): add link to find out more -->
              Use Binary Ninja as the disassembler backend, relying on its state-of-the-art code analysis to recover capabilities.
@@ -284,9 +296,9 @@
          </div>

          <div class="col d-flex flex-column gap-2">
-            <h4 class="fw-semibold mb-0 text-body-emphasis">
+            <h3 class="fw-semibold mb-0 text-body-emphasis">
              CAPE
-            </h4>
+            </h3>
            <p class="text-body-secondary">
              <!-- TODO(williballenthin): add link to find out more -->
              Analyze the API trace captured by CAPE as it detonates malware, summarizing the behaviors seen across thousands of function calls.
@@ -356,10 +368,10 @@

  <div class="bg-dark text-secondary px-4 pt-5 text-center">
    <div class="py-5">
-      <img src="./img/icon.png" />
-      <h3 class="display-5 fw-bold text-white">
+      <img src="./img/icon.png" alt="capa icon"/>
+      <h2 class="display-5 fw-bold text-white">
        capa
-      </h3>
+      </h2>

      <div class="col-lg-6 mx-auto">
        <p class="fs-5 my-4">
@@ -379,7 +391,7 @@
      </div>
    </div>
  </div>
- </main>
+</main>

 <script>
    window.addEventListener('DOMContentLoaded', (event) => {
Author	SHA1	Message	Date
Willi Ballenthin	2ec979469e	init show-object-layout using assemblage	2024-10-22 09:40:09 +00:00
Willi Ballenthin	2db0cc457f	detect user code via entry points (main function name)	2024-10-22 09:21:59 +00:00
Willi Ballenthin	3cad8d12af	mute unknown lines	2024-10-22 09:21:40 +00:00
Willi Ballenthin	5be96d7ddc	consider thunks library functions	2024-10-22 09:21:16 +00:00
mr-tz	a3b6aef67f	render from doc	2024-10-21 12:43:47 +00:00
mr-tz	077fa2e7e1	simplify and include thunks	2024-10-21 11:50:25 +00:00
mr-tz	c3b8e7c638	remove Python 3.12 syntax	2024-10-21 11:49:45 +00:00
Willi Ballenthin	4346922b9a	library-detection: add json output format	2024-10-21 10:42:30 +00:00
Willi Ballenthin	d652192af1	library-detection: cleanup script	2024-10-21 10:26:19 +00:00
Moritz	d83750c901	Add LookupError exception	2024-10-15 17:10:59 +02:00
mr-tz	8394b81841	init add result structure and render	2024-10-14 16:05:01 +00:00
mr-tz	febda7d0e2	add option to save idb	2024-10-14 06:15:06 +00:00
mr-tz	f9abb5e83f	ease/document extra dependency	2024-10-14 05:53:03 +00:00
Willi Ballenthin	f69602d085	library detection: rough integration of algorithms	2024-10-11 15:58:37 +00:00
Willi Ballenthin	ad187fc3bd	library detection: merge flirt and string branches	2024-10-11 13:43:10 +00:00
mr-tz	637926e0b6	initial commit of out-of-the box flirt-based library id	2024-10-11 12:36:42 +00:00
Willi Ballenthin	03ce40e781	initial attempt at library identification via known strings	2024-10-10 12:35:48 +00:00
Moritz	1f7f24c467	Merge pull request #2454 from mandiant/fix/ida9idalib Fix IDA 9.0 / idalib	2024-10-09 18:04:23 +02:00
mr-tz	f2c329b768	rename ida to idapro module for IDA 9.0	2024-10-09 12:20:38 +00:00
mr-tz	22368fbe6f	rename bin_search function	2024-10-09 12:13:11 +00:00
Moritz	6a12ab8598	Merge pull request #2450 from mandiant/dependabot/pip/rich-13.9.2 build(deps): bump rich from 13.8.0 to 13.9.2	2024-10-08 10:57:04 +02:00
dependabot[bot]	a4fdb0a3ef	build(deps): bump rich from 13.8.0 to 13.9.2 Bumps [rich](https://github.com/Textualize/rich) from 13.8.0 to 13.9.2. - [Release notes](https://github.com/Textualize/rich/releases) - [Changelog](https://github.com/Textualize/rich/blob/master/CHANGELOG.md) - [Commits](https://github.com/Textualize/rich/compare/v13.8.0...v13.9.2) --- updated-dependencies: - dependency-name: rich dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com>	2024-10-07 14:07:10 +00:00
Moritz	c7bb8b8e67	Update Node checkout Actions (#2446 ) * Update setup Node Actions	2024-10-07 11:46:37 +02:00
Tamir K.	41c5194693	Fix/corrupted file architecture key error (#2444 ) * Add try except clause	2024-10-06 08:46:16 +02:00
Moritz	8c8b67a6ea	Merge pull request #2438 from mandiant/mr-tz-patch-2 Update build.yml	2024-10-04 14:22:45 +02:00
Moritz	f0cc0fb2b8	Update build.yml	2024-10-04 14:02:53 +02:00
Moritz	fc8089c248	Merge pull request #2426 from mandiant/release/v740 Release v7.4.0	2024-10-04 13:51:37 +02:00
mr-tz	d795db9017	include capa explorer web entry	2024-10-04 09:22:11 +00:00
mr-tz	544e3eee5b	bump version to 7.4.0 tmp2 tmp2	2024-10-04 09:22:08 +00:00
mr-tz	dfc304d9f6	add Python 3.8 and 3.9 deprecation warning tmp	2024-10-04 09:19:56 +00:00
Capa Bot	54688517c4	Sync capa rules submodule	2024-10-04 09:18:47 +00:00
Moritz	21fc77ea28	Merge pull request #2431 from s-ff/add-provide-feedback-button capa Explorer Web: add provide feedback button	2024-10-03 12:28:17 +02:00
Capa Bot	2976974009	Sync capa rules submodule	2024-10-03 09:39:09 +00:00
Moritz	030954d556	Merge pull request #2433 from mandiant/fix/vmray-string-call-args fix backslash handling in string call arguments	2024-10-03 11:28:34 +02:00
Capa Bot	389a5eb84f	Sync capa-testfiles submodule	2024-10-02 16:56:11 +00:00
mr-tz	6d3b96f0b0	fix backslash handling in string call arguments	2024-10-02 16:54:38 +00:00
Soufiane Fariss	2a13bf6c0b	capa Explorer Web: fix lint	2024-10-02 16:10:23 +02:00
Fariss	e9f4f5bc31	capa Explorer Web: remove unneeded attribute	2024-10-02 16:05:38 +02:00
Soufiane Fariss	e7400be99a	capa Explorer Web: add provide feedback buttom	2024-10-02 15:54:07 +02:00
Moritz	591a1e8fbb	Merge pull request #2430 from s-ff/web-fix-import-features capa Explorer Web: fix import features	2024-10-02 15:29:35 +02:00
Soufiane Fariss	2f5a227fb0	capa Explorer Web: fix import features	2024-10-02 14:49:58 +02:00
Moritz	931ff62421	Merge pull request #2423 from mandiant/dependabot/pip/types-protobuf-5.28.0.20240924 build(deps): bump types-protobuf from 5.27.0.20240920 to 5.28.0.20240924	2024-10-02 11:21:12 +02:00
dependabot[bot]	3037307ee8	build(deps): bump pydantic from 2.9.1 to 2.9.2 (#2389 ) * build(deps): bump pydantic from 2.9.1 to 2.9.2 Bumps [pydantic](https://github.com/pydantic/pydantic) from 2.9.1 to 2.9.2. - [Release notes](https://github.com/pydantic/pydantic/releases) - [Changelog](https://github.com/pydantic/pydantic/blob/main/HISTORY.md) - [Commits](https://github.com/pydantic/pydantic/compare/v2.9.1...v2.9.2) --- updated-dependencies: - dependency-name: pydantic dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] <support@github.com> * Update requirements.txt * remove pinned sub-dependency Co-authored-by: Willi Ballenthin <wballenthin@google.com> --------- Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Moritz <mr-tz@users.noreply.github.com> Co-authored-by: Willi Ballenthin <wballenthin@google.com>	2024-10-02 11:20:54 +02:00
Capa Bot	d6c1725d7e	Sync capa rules submodule	2024-10-02 08:41:23 +00:00
Fariss	16eae70c17	capa Explorer Web: improve url navigation (#2425 ) * explorer web: improve url navigation This commit enhances the navigation guard for the /analysis route to provide a better user experience when loading data from a URL: Previously: users browsing to /analysis were always redirected to the homepage (/). With this commit: - If a user accesses /analysis without an rdoc parameter, they are still redirected to the homepage. - If a user accesses /analysis with an rdoc parameter, the following occurs: The user is redirected to the homepage (/) and the rdoc parameter is preserved in the URL, capa Explorer Web then loads the rdoc from URL. --------- Co-authored-by: Moritz <mr-tz@users.noreply.github.com>	2024-10-01 19:25:20 +02:00
dependabot[bot]	9e7e6be374	build(deps): bump types-protobuf from 5.27.0.20240920 to 5.28.0.20240924 Bumps [types-protobuf](https://github.com/python/typeshed) from 5.27.0.20240920 to 5.28.0.20240924. - [Commits](https://github.com/python/typeshed/commits) --- updated-dependencies: - dependency-name: types-protobuf dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com>	2024-10-01 11:44:19 +00:00
Moritz	3e8bed1db2	Merge pull request #2421 from mandiant/ci/dependabot-ignore-patch Update dependabot.yml to ignore patch versions	2024-10-01 13:40:34 +02:00
Moritz	e4ac02a968	Update dependabot.yml	2024-10-01 13:32:31 +02:00
dependabot[bot]	eff358980a	build(deps): bump pefile from 2023.2.7 to 2024.8.26 (#2413 )	2024-09-30 20:24:09 +00:00
Capa Bot	108bd7f224	Sync capa-testfiles submodule	2024-09-30 12:08:25 +00:00
Willi Ballenthin	ab43c8c0c2	loader: fix unhandled name error (#2411 )	2024-09-30 14:06:14 +02:00
Capa Bot	585dff8b48	Sync capa rules submodule	2024-09-30 12:06:04 +00:00
Capa Bot	cb09041387	Sync capa rules submodule	2024-09-30 12:05:43 +00:00
Capa Bot	80899f3f70	Sync capa-testfiles submodule	2024-09-27 09:53:30 +00:00
Moritz	00d2bb06fd	Merge pull request #2409 from mandiant/fix/2408 dynamic: emit complete features for A/W APIs	2024-09-27 11:26:39 +02:00
Moritz	ff1043e976	Merge branch 'master' into fix/2408	2024-09-27 09:35:24 +02:00
Fariss	51a4eb46b8	replace tqdm, termcolor, tabulate with rich (#2374 ) * logging: use rich handler for logging * tqdm: remove unneeded redirecting_print_to_tqdm function * tqdm: introduce `CapaProgressBar` rich `Progress` bar * tqdm: replace tqdm with rich Progress bar * tqdm: remove tqdm dependency * termcolor: replace termcolor and update `scripts/` * tests: update `test_render.py` to use rich.console.Console * termcolor: remove termcolor dependency * capa.render.utils: add `write` & `writeln` methods to subclass `Console` * update markup util functions to use fmt strings * tests: update `test_render.py` to use `capa.render.utils.Console` * replace kwarg `end=""` with `write` and `writeln` methods * tabulate: replace tabulate with `rich.table` * tabulate: remove `tabulate` and its dependency `wcwidth` * logging: handle logging in `capa.main` * logging: set up logging in `capa.main` this commit sets up logging in `capa.main` and uses a shared `log_console` in `capa.helpers` for logging purposes * changelog: replace packages with rich * remove entry from pyinstaller and unneeded progress.update call * update requirements.txt * scripts: use `capa.helpers.log_console` in `CapaProgressBar` * logging: configure root logger to use `RichHandler` * remove unused import `inspect`	2024-09-27 09:34:21 +02:00
dependabot[bot]	558bf0fbf2	build(deps): bump protobuf from 5.27.3 to 5.28.2 (#2390 ) Bumps [protobuf](https://github.com/protocolbuffers/protobuf) from 5.27.3 to 5.28.2. - [Release notes](https://github.com/protocolbuffers/protobuf/releases) - [Changelog](https://github.com/protocolbuffers/protobuf/blob/main/protobuf_release.bzl) - [Commits](https://github.com/protocolbuffers/protobuf/compare/v5.27.3...v5.28.2) --- updated-dependencies: - dependency-name: protobuf dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2024-09-27 09:32:58 +02:00
dependabot[bot]	76aff57467	build(deps): bump setuptools from 70.0.0 to 75.1.0 (#2392 ) Bumps [setuptools](https://github.com/pypa/setuptools) from 70.0.0 to 75.1.0. - [Release notes](https://github.com/pypa/setuptools/releases) - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) - [Commits](https://github.com/pypa/setuptools/compare/v70.0.0...v75.1.0) --- updated-dependencies: - dependency-name: setuptools dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2024-09-27 09:32:18 +02:00
dependabot[bot]	f82fc1902c	build(deps): bump types-protobuf from 5.27.0.20240907 to 5.27.0.20240920 (#2393 ) Bumps [types-protobuf](https://github.com/python/typeshed) from 5.27.0.20240907 to 5.27.0.20240920. - [Commits](https://github.com/python/typeshed/commits) --- updated-dependencies: - dependency-name: types-protobuf dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2024-09-27 09:32:08 +02:00
Capa Bot	e9e8fe42ed	Sync capa rules submodule	2024-09-27 07:31:51 +00:00
Mike Hunhoff	80e007787c	dynamic: update CHANGELOG	2024-09-26 14:43:20 -06:00
Mike Hunhoff	bfcc705117	dynamic: vmray: remove redundant test	2024-09-26 14:42:08 -06:00
Mike Hunhoff	834150ad1d	dynamic: drakvuf: fix A/W API detection	2024-09-26 14:36:16 -06:00
Mike Hunhoff	31ec208a9b	dynamic: cape: fix A/W API detection	2024-09-26 14:27:45 -06:00
Mike Hunhoff	a5d9459c42	dynamic: vmray: fix A/W API detection	2024-09-26 14:15:21 -06:00
Moritz	06271a88d4	Fix VMRay missing process data (#2396 ) * get all processes, see #2394 * add tests for process recording * rename symbols for clarification * handle single and list entries * update changelog * dynamic: vmray: use monitor IDs to track processes and threads * dynamic: vmray: code refactor * dynamic: vmray: add sanity checks when processing monitor processes * dynamic: vmray: remove unnecessary keys() access * dynamic: vmray: clarify comments * Update CHANGELOG.md Co-authored-by: Willi Ballenthin <wballenthin@google.com> * dynamic: vmray: update CHANGELOG --------- Co-authored-by: Mike Hunhoff <mike.hunhoff@gmail.com> Co-authored-by: Willi Ballenthin <wballenthin@google.com>	2024-09-26 13:57:30 -06:00
Capa Bot	c48bccf623	Sync capa rules submodule	2024-09-26 17:38:34 +00:00
Capa Bot	9975f769f9	Sync capa-testfiles submodule	2024-09-26 17:34:51 +00:00
Capa Bot	c5d8f99d6f	Sync capa rules submodule	2024-09-26 12:25:36 +00:00
Willi Ballenthin	bcd57a9af1	detect and use third-party analysis backends when possible (#2380 ) * introduce script to detect 3P backends ref #2376 * add idalib backend * binary ninja: search for API using XDG desktop entry ref #2376 * binja: search more XDG locations for desktop entry * binary ninja: optimize embedded PE scanning closes #2397 * add script for comparing the performance of analysis backends	2024-09-26 13:21:55 +02:00
Capa Bot	12337be2b7	Sync capa-testfiles submodule	2024-09-25 09:17:50 +00:00
Moritz	25c4902c21	Merge pull request #2400 from mandiant/web/filesize bump upload size limit to 100MB from 10MB	2024-09-24 14:14:42 +02:00
mr-tz	f024e1d54c	bump upload size limit to 100MB from 10MB	2024-09-24 12:09:38 +00:00
Moritz	bab7ed9188	Merge pull request #2395 from mandiant/dependabot/npm_and_yarn/web/explorer/rollup-4.22.4 build(deps): bump rollup from 4.21.3 to 4.22.4 in /web/explorer	2024-09-24 13:49:10 +02:00
Capa Bot	6eda8c9713	Sync capa-testfiles submodule	2024-09-24 11:29:53 +00:00
Capa Bot	22e88c860f	Sync capa-testfiles submodule	2024-09-24 11:25:28 +00:00
Capa Bot	7884248022	Sync capa rules submodule	2024-09-24 11:25:18 +00:00
dependabot[bot]	4891fd750f	build(deps): bump rollup from 4.21.3 to 4.22.4 in /web/explorer Bumps [rollup](https://github.com/rollup/rollup) from 4.21.3 to 4.22.4. - [Release notes](https://github.com/rollup/rollup/releases) - [Changelog](https://github.com/rollup/rollup/blob/master/CHANGELOG.md) - [Commits](https://github.com/rollup/rollup/compare/v4.21.3...v4.22.4) --- updated-dependencies: - dependency-name: rollup dependency-type: indirect ... Signed-off-by: dependabot[bot] <support@github.com>	2024-09-24 04:55:36 +00:00
Willi Ballenthin	783e14b949	pyinstaller: use Python 3.12 for standalone build (#2385 ) * pyinstaller: use Python 3.12 for standalone build closes #2383 * changelog * ci: build: fix test filename	2024-09-23 22:33:23 +02:00
Willi Ballenthin	74777ad23e	changelog	2024-09-23 20:21:50 +00:00
Willi Ballenthin	01b35e7582	pyproject.toml: bump min python version to 3.8.1 fixed #2387	2024-09-23 20:21:50 +00:00
Capa Bot	e29288cc8d	Sync capa rules submodule	2024-09-22 12:09:30 +00:00
Moritz	c4c35ca6e9	Merge pull request #2379 from mandiant/weg/update-homepage update release v7.3.0 info and formatting	2024-09-20 14:46:42 +02:00
Moritz	3b1e0284c0	Merge pull request #2378 from mandiant/doc/update-homepage add update homepage entry	2024-09-20 14:46:27 +02:00
Moritz	7b61d28dd2	Merge pull request #2375 from mandiant/dependabot/npm_and_yarn/web/explorer/vite-5.4.6 build(deps-dev): bump vite from 5.3.2 to 5.4.6 in /web/explorer	2024-09-20 12:02:31 +02:00
mr-tz	e3267df5b1	update release v7.3.0 info and formatting	2024-09-20 09:57:01 +00:00
Moritz	9076e5475d	add update homepage entry	2024-09-20 11:14:16 +02:00
dependabot[bot]	84d2a18b52	build(deps-dev): bump vite from 5.3.2 to 5.4.6 in /web/explorer Bumps [vite](https://github.com/vitejs/vite/tree/HEAD/packages/vite) from 5.3.2 to 5.4.6. - [Release notes](https://github.com/vitejs/vite/releases) - [Changelog](https://github.com/vitejs/vite/blob/v5.4.6/packages/vite/CHANGELOG.md) - [Commits](https://github.com/vitejs/vite/commits/v5.4.6/packages/vite) --- updated-dependencies: - dependency-name: vite dependency-type: direct:development ... Signed-off-by: dependabot[bot] <support@github.com>	2024-09-17 19:16:36 +00:00