vmray: merge upstream

2025-12-12 07:40:38 -08:00 · 2024-07-12 09:27:49 -06:00
parent 9be35f9a8d 76913af20b
commit 194017bce3
10 changed files with 54 additions and 25 deletions
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -32,7 +32,7 @@ jobs:
            artifact_name: capa.exe
            asset_name: windows
            python_version: 3.8
-          - os: macos-11
+          - os: macos-12
            # use older macOS for assumed better portability
            artifact_name: capa
            asset_name: macos
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -76,7 +76,7 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        os: [ubuntu-20.04, windows-2019, macos-11]
+        os: [ubuntu-20.04, windows-2019, macos-12]
        # across all operating systems
        python-version: ["3.8", "3.11"]
        include:
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -17,6 +17,7 @@
 ### capa explorer IDA Pro plugin

 ### Development
+- CI: use macos-12 since macos-11 is deprecated and will be removed on June 28th, 2024 #2173 @mr-tz

 ### Raw diffs
 - [capa v7.1.0...master](https://github.com/mandiant/capa/compare/v7.1.0...master)
--- a/capa/features/extractors/binja/extractor.py
+++ b/capa/features/extractors/binja/extractor.py
@@ -28,7 +28,7 @@ from capa.features.extractors.base_extractor import (

 class BinjaFeatureExtractor(StaticFeatureExtractor):
    def __init__(self, bv: binja.BinaryView):
-        super().__init__(hashes=SampleHashes.from_bytes(bv.file.raw.read(0, len(bv.file.raw))))
+        super().__init__(hashes=SampleHashes.from_bytes(bv.file.raw.read(0, bv.file.raw.length)))
        self.bv = bv
        self.global_features: List[Tuple[Feature, Address]] = []
        self.global_features.extend(capa.features.extractors.binja.file.extract_file_format(self.bv))
--- a/capa/features/extractors/cape/global_.py
+++ b/capa/features/extractors/cape/global_.py
@@ -48,7 +48,7 @@ def extract_format(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
    else:
        logger.warning("unknown file format, file command output: %s", report.target.file.type)
        raise ValueError(
-            "unrecognized file format from the CAPE report; output of file command: {report.target.file.type}"
+            f"unrecognized file format from the CAPE report; output of file command: {report.target.file.type}"
        )


@@ -73,7 +73,7 @@ def extract_os(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
        else:
            # if the operating system information is missing from the cape report, it's likely a bug
            logger.warning("unrecognized OS: %s", file_output)
-            raise ValueError("unrecognized OS from the CAPE report; output of file command: {file_output}")
+            raise ValueError(f"unrecognized OS from the CAPE report; output of file command: {file_output}")
    else:
        # the sample is shellcode
        logger.debug("unsupported file format, file command output: %s", file_output)
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -124,10 +124,10 @@ dev = [
    "pytest-sugar==1.0.0",
    "pytest-instafail==0.5.0",
    "pytest-cov==5.0.0",
-    "flake8==7.0.0",
+    "flake8==7.1.0",
    "flake8-bugbear==24.4.26",
    "flake8-encodings==0.5.1",
-    "flake8-comprehensions==3.14.0",
+    "flake8-comprehensions==3.15.0",
    "flake8-logging-format==0.9.0",
    "flake8-no-implicit-concat==0.3.5",
    "flake8-print==5.0.0",
@@ -135,7 +135,7 @@ dev = [
    "flake8-simplify==0.21.0",
    "flake8-use-pathlib==0.3.0",
    "flake8-copyright==0.2.4",
-    "ruff==0.4.8",
+    "ruff==0.5.0",
    "black==24.4.2",
    "isort==5.13.2",
    "mypy==1.10.0",
@@ -163,10 +163,10 @@ build = [
 ]
 scripts = [
    "jschema_to_python==1.2.3",
-    "psutil==5.9.2",
+    "psutil==6.0.0",
    "stix2==3.0.1",
    "sarif_om==1.0.4",
-    "requests==2.31.0",
+    "requests==2.32.3",
 ]

 [tool.deptry]
--- a/scripts/import-to-bn.py
+++ b/scripts/import-to-bn.py
@@ -69,7 +69,8 @@ def load_analysis(bv):
        return 0
    binaryninja.log_info(f"Using capa file {path}")

-    doc = json.loads(path.read_bytes().decode("utf-8"))
+    with Path(path).open("r", encoding="utf-8") as file:
+        doc = json.load(file)

    if "meta" not in doc or "rules" not in doc:
        binaryninja.log_error("doesn't appear to be a capa report")
@@ -83,20 +84,35 @@ def load_analysis(bv):
        binaryninja.log_error("sample mismatch")
        return -2

+    # Retreive base address
+    capa_base_address = 0
+    if "analysis" in doc["meta"] and "base_address" in doc["meta"]["analysis"]:
+        if doc["meta"]["analysis"]["base_address"]["type"] == "absolute":
+            capa_base_address = int(doc["meta"]["analysis"]["base_address"]["value"])
+
    rows = []
    for rule in doc["rules"].values():
        if rule["meta"].get("lib"):
            continue
        if rule["meta"].get("capa/subscope"):
            continue
-        if rule["meta"]["scope"] != "function":
+        if rule["meta"]["scopes"].get("static") != "function":
            continue

        name = rule["meta"]["name"]
        ns = rule["meta"].get("namespace", "")
-        for va in rule["matches"].keys():
-            va = int(va)
-            rows.append((ns, name, va))
+        for matches in rule["matches"]:
+            for match in matches:
+                if "type" not in match.keys():
+                    continue
+                if "value" not in match.keys():
+                    continue
+                va = match["value"]
+                # Substract va and CAPA base_address
+                va = int(va) - capa_base_address
+                # Add binja base address
+                va = va + bv.start
+                rows.append((ns, name, va))

    # order by (namespace, name) so that like things show up together
    rows = sorted(rows)
--- a/scripts/show-features.py
+++ b/scripts/show-features.py
@@ -171,8 +171,8 @@ def print_dynamic_analysis(extractor: DynamicFeatureExtractor, args):
    process_handles = tuple(extractor.get_processes())

    if args.process:
-        process_handles = tuple(filter(lambda ph: ph.inner["name"] == args.process, process_handles))
-        if args.process not in [ph.inner["name"] for ph in args.process]:
+        process_handles = tuple(filter(lambda ph: extractor.get_process_name(ph) == args.process, process_handles))
+        if args.process not in [extractor.get_process_name(ph) for ph in process_handles]:
            print(f"{args.process} not a process")
            return -1

--- a/tests/data
+++ b/tests/data
--- a/tests/test_scripts.py
+++ b/tests/test_scripts.py
@@ -23,10 +23,21 @@ def get_script_path(s: str):
    return str(CD / ".." / "scripts" / s)


-def get_file_path():
+def get_binary_file_path():
    return str(CD / "data" / "9324d1a8ae37a36ae560c37448c9705a.exe_")


+def get_report_file_path():
+    return str(
+        CD
+        / "data"
+        / "dynamic"
+        / "cape"
+        / "v2.4"
+        / "fb7ade52dc5a1d6128b9c217114a46d0089147610f99f5122face29e429a1e74.json.gz"
+    )
+
+
 def get_rules_path():
    return str(CD / ".." / "rules")

@@ -48,12 +59,13 @@ def get_rule_path():
        pytest.param("lint.py", ["-t", "create directory", get_rules_path()]),
        # `create directory` rule has native and .NET example PEs
        pytest.param("lint.py", ["--thorough", "-t", "create directory", get_rules_path()]),
-        pytest.param("match-function-id.py", [get_file_path()]),
-        pytest.param("show-capabilities-by-function.py", [get_file_path()]),
-        pytest.param("show-features.py", [get_file_path()]),
-        pytest.param("show-features.py", ["-F", "0x407970", get_file_path()]),
-        pytest.param("show-unused-features.py", [get_file_path()]),
-        pytest.param("capa_as_library.py", [get_file_path()]),
+        pytest.param("match-function-id.py", [get_binary_file_path()]),
+        pytest.param("show-capabilities-by-function.py", [get_binary_file_path()]),
+        pytest.param("show-features.py", [get_binary_file_path()]),
+        pytest.param("show-features.py", ["-F", "0x407970", get_binary_file_path()]),
+        pytest.param("show-features.py", ["-P", "MicrosoftEdgeUpdate.exe", get_report_file_path()]),
+        pytest.param("show-unused-features.py", [get_binary_file_path()]),
+        pytest.param("capa_as_library.py", [get_binary_file_path()]),
    ],
 )
 def test_scripts(script, args):