Merge branch 'master' into backend-ghidra

2025-12-12 15:49:46 -08:00 · 2023-08-17 16:06:17 +00:00
parent b3cf1129e3 7e78133925
commit a2a2949675
16 changed files with 71 additions and 26 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,18 +9,23 @@

 ### Breaking Changes

-### New Rules (5)
+### New Rules (6)

 - executable/pe/export/forwarded-export ronnie.salomonsen@mandiant.com
 - host-interaction/bootloader/get-uefi-variable jakub.jozwiak@mandiant.com
 - host-interaction/bootloader/set-uefi-variable jakub.jozwiak@mandiant.com
 - nursery/enumerate-device-drivers-on-linux @mr-tz
+- anti-analysis/anti-vm/vm-detection/check-for-foreground-window-switch ervin.ocampo@mandiant.com
 -

 ### Bug Fixes

 - Fix binja backend stack string detection. #1473 @xusheng6
 - linter: skip native API check for NtProtectVirtualMemory #1675 @williballenthin 
+- OS: detect Android ELF files #1705 @williballenthin
+- ELF: fix parsing of symtab #1704 @williballenthin
+- result document: don't use deprecated pydantic functions #1718 @williballenthin
+- pytest: don't mark IDA tests as pytest tests #1719 @williballenthin

 ### capa explorer IDA Pro plugin
 - fix unhandled exception when resolving rule path #1693 @mike-hunhoff
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@

 [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/flare-capa)](https://pypi.org/project/flare-capa)
 [![Last release](https://img.shields.io/github/v/release/mandiant/capa)](https://github.com/mandiant/capa/releases)
-[![Number of rules](https://img.shields.io/badge/rules-828-blue.svg)](https://github.com/mandiant/capa-rules)
+[![Number of rules](https://img.shields.io/badge/rules-829-blue.svg)](https://github.com/mandiant/capa-rules)
 [![CI status](https://github.com/mandiant/capa/workflows/CI/badge.svg)](https://github.com/mandiant/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster)
 [![Downloads](https://img.shields.io/github/downloads/mandiant/capa/total)](https://github.com/mandiant/capa/releases)
 [![License](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE.txt)
--- a/capa/features/extractors/elf.py
+++ b/capa/features/extractors/elf.py
@@ -13,6 +13,8 @@ from enum import Enum
 from typing import Set, Dict, List, Tuple, BinaryIO, Iterator, Optional
 from dataclasses import dataclass

+import Elf  # from vivisect
+
 logger = logging.getLogger(__name__)


@@ -54,6 +56,7 @@ class OS(str, Enum):
    CLOUD = "cloud"
    SYLLABLE = "syllable"
    NACL = "nacl"
+    ANDROID = "android"


 # via readelf: https://github.com/bminor/binutils-gdb/blob/c0e94211e1ac05049a4ce7c192c9d14d1764eb3e/binutils/readelf.c#L19635-L19658
@@ -709,17 +712,17 @@ class SymTab:
        yield from self.symbols

    @classmethod
-    def from_Elf(cls, ElfBinary) -> Optional["SymTab"]:
-        endian = "<" if ElfBinary.getEndian() == 0 else ">"
-        bitness = ElfBinary.bits
+    def from_viv(cls, elf: Elf.Elf) -> Optional["SymTab"]:
+        endian = "<" if elf.getEndian() == 0 else ">"
+        bitness = elf.bits

        SHT_SYMTAB = 0x2
-        for section in ElfBinary.sections:
-            if section.sh_info & SHT_SYMTAB:
-                strtab_section = ElfBinary.sections[section.sh_link]
-                sh_symtab = Shdr.from_viv(section, ElfBinary.readAtOffset(section.sh_offset, section.sh_size))
+        for section in elf.sections:
+            if section.sh_type == SHT_SYMTAB:
+                strtab_section = elf.sections[section.sh_link]
+                sh_symtab = Shdr.from_viv(section, elf.readAtOffset(section.sh_offset, section.sh_size))
                sh_strtab = Shdr.from_viv(
-                    strtab_section, ElfBinary.readAtOffset(strtab_section.sh_offset, strtab_section.sh_size)
+                    strtab_section, elf.readAtOffset(strtab_section.sh_offset, strtab_section.sh_size)
                )

        try:
@@ -764,6 +767,11 @@ def guess_os_from_ph_notes(elf: ELF) -> Optional[OS]:
        elif note.name == "FreeBSD":
            logger.debug("note owner: %s", "FREEBSD")
            return OS.FREEBSD
+        elif note.name == "Android":
+            logger.debug("note owner: %s", "Android")
+            # see the following for parsing the structure:
+            # https://android.googlesource.com/platform/ndk/+/master/parse_elfnote.py
+            return OS.ANDROID
        elif note.name == "GNU":
            abi_tag = note.abi_tag
            if abi_tag:
@@ -855,6 +863,8 @@ def guess_os_from_needed_dependencies(elf: ELF) -> Optional[OS]:
            return OS.HURD
        if needed.startswith("libhurduser.so"):
            return OS.HURD
+        if needed.startswith("libandroid.so"):
+            return OS.ANDROID

    return None

--- a/capa/features/extractors/viv/function.py
+++ b/capa/features/extractors/viv/function.py
@@ -38,7 +38,7 @@ def extract_function_symtab_names(fh: FunctionHandle) -> Iterator[Tuple[Feature,
        # this is in order to eliminate the computational overhead of refetching symtab each time.
        if "symtab" not in fh.ctx["cache"]:
            try:
-                fh.ctx["cache"]["symtab"] = SymTab.from_Elf(fh.inner.vw.parsedbin)
+                fh.ctx["cache"]["symtab"] = SymTab.from_viv(fh.inner.vw.parsedbin)
            except Exception:
                fh.ctx["cache"]["symtab"] = None

--- a/capa/features/extractors/viv/insn.py
+++ b/capa/features/extractors/viv/insn.py
@@ -115,7 +115,7 @@ def extract_insn_api_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterato
                # the symbol table gets stored as a function's attribute in order to avoid running
                # this code everytime the call is made, thus preventing the computational overhead.
                try:
-                    fh.ctx["cache"]["symtab"] = SymTab.from_Elf(f.vw.parsedbin)
+                    fh.ctx["cache"]["symtab"] = SymTab.from_viv(f.vw.parsedbin)
                except Exception:
                    fh.ctx["cache"]["symtab"] = None

--- a/capa/features/freeze/init.py
+++ b/capa/features/freeze/init.py
@@ -320,7 +320,7 @@ def loads(s: str) -> capa.features.extractors.base_extractor.FeatureExtractor:
    """deserialize a set of features (as a NullFeatureExtractor) from a string."""
    import capa.features.extractors.null as null

-    freeze = Freeze.parse_raw(s)
+    freeze = Freeze.model_validate_json(s)
    if freeze.version != 2:
        raise ValueError(f"unsupported freeze format version: {freeze.version}")

--- a/capa/ida/helpers.py
+++ b/capa/ida/helpers.py
@@ -5,7 +5,6 @@
 # Unless required by applicable law or agreed to in writing, software distributed under the License
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
-import json
 import logging
 import datetime
 import contextlib
@@ -223,7 +222,7 @@ def load_and_verify_cached_results() -> Optional[rdoc.ResultDocument]:
    logger.debug("loading cached capa results from netnode '%s'", CAPA_NETNODE)

    n = netnode.Netnode(CAPA_NETNODE)
-    doc = rdoc.ResultDocument.parse_obj(json.loads(n[NETNODE_RESULTS]))
+    doc = rdoc.ResultDocument.model_validate_json(n[NETNODE_RESULTS])

    for rule in rutils.capability_rules(doc):
        for location_, _ in rule.matches:
--- a/capa/main.py
+++ b/capa/main.py
@@ -1228,7 +1228,7 @@ def main(argv: Optional[List[str]] = None):

    if format_ == FORMAT_RESULT:
        # result document directly parses into meta, capabilities
-        result_doc = capa.render.result_document.ResultDocument.parse_file(args.sample)
+        result_doc = capa.render.result_document.ResultDocument.from_file(Path(args.sample))
        meta, capabilities = result_doc.to_capa()

    else:
--- a/capa/render/result_document.py
+++ b/capa/render/result_document.py
@@ -8,6 +8,7 @@
 import datetime
 import collections
 from typing import Dict, List, Tuple, Union, Literal, Optional
+from pathlib import Path

 from pydantic import Field, BaseModel, ConfigDict

@@ -596,3 +597,7 @@ class ResultDocument(FrozenModel):
                capabilities[rule_name].append((addr.to_capa(), result))

        return self.meta, capabilities
+
+    @classmethod
+    def from_file(cls, path: Path) -> "ResultDocument":
+        return cls.model_validate_json(path.read_text(encoding="utf-8"))
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -32,7 +32,7 @@ classifiers = [
    "Topic :: Security",
 ]
 dependencies = [
-    "tqdm==4.65.0",
+    "tqdm==4.66.1",
    "pyyaml==6.0.1",
    "tabulate==0.9.0",
    "colorama==0.4.6",
@@ -77,10 +77,10 @@ dev = [
    "flake8-simplify==0.20.0",
    "flake8-use-pathlib==0.3.0",
    "flake8-copyright==0.2.4",
-    "ruff==0.0.282",
+    "ruff==0.0.284",
    "black==23.7.0",
    "isort==5.11.4",
-    "mypy==1.4.1",
+    "mypy==1.5.0",
    "psutil==5.9.2",
    "stix2==3.0.1",
    "requests==2.31.0",
--- a/scripts/import-to-ida.py
+++ b/scripts/import-to-ida.py
@@ -30,6 +30,7 @@ See the License for the specific language governing permissions and limitations
 """
 import logging
 import binascii
+from pathlib import Path

 import ida_nalt
 import ida_funcs
@@ -68,7 +69,7 @@ def main():
    if not path:
        return 0

-    result_doc = capa.render.result_document.ResultDocument.parse_file(path)
+    result_doc = capa.render.result_document.ResultDocument.from_file(Path(path))
    meta, capabilities = result_doc.to_capa()

    # in IDA 7.4, the MD5 hash may be truncated, for example:
--- a/scripts/proto-from-results.py
+++ b/scripts/proto-from-results.py
@@ -31,6 +31,7 @@ Example:
 import sys
 import logging
 import argparse
+from pathlib import Path

 import capa.render.proto
 import capa.render.result_document
@@ -64,7 +65,7 @@ def main(argv=None):
        logging.basicConfig(level=logging.INFO)
        logging.getLogger().setLevel(logging.INFO)

-    rd = capa.render.result_document.ResultDocument.parse_file(args.json)
+    rd = capa.render.result_document.ResultDocument.from_file(Path(args.json))
    pb = capa.render.proto.doc_to_pb2(rd)

    sys.stdout.buffer.write(pb.SerializeToString(deterministic=True))
--- a/tests/fixtures.py
+++ b/tests/fixtures.py
@@ -308,6 +308,8 @@ def get_data_path_by_name(name) -> Path:
        return CD / "data" / "2bf18d0403677378adad9001b1243211.elf_"
    elif name.startswith("ea2876"):
        return CD / "data" / "ea2876e9175410b6f6719f80ee44b9553960758c7d0f7bed73c0fe9a78d8e669.dll_"
+    elif name.startswith("1038a2"):
+        return CD / "data" / "1038a23daad86042c66bfe6c9d052d27048de9653bde5750dc0f240c792d9ac8.elf_"
    else:
        raise ValueError(f"unexpected sample fixture: {name}")

@@ -1180,8 +1182,8 @@ def _039a6_dotnetfile_extractor():
    return get_dnfile_extractor(get_data_path_by_name("_039a6"))


-def get_result_doc(path):
-    return capa.render.result_document.ResultDocument.parse_file(path)
+def get_result_doc(path: Path):
+    return capa.render.result_document.ResultDocument.from_file(path)


@pytest.fixture
--- a/tests/test_ida_features.py
+++ b/tests/test_ida_features.py
@@ -92,6 +92,15 @@ def get_ida_extractor(_path):
    return capa.features.extractors.ida.extractor.IdaFeatureExtractor()


+def nocollect(f):
+    "don't collect the decorated function as a pytest test"
+    f.__test__ = False
+    return f
+
+
+# although these look like pytest tests, they're not, because they don't run within pytest
+# (the runner is below) and they use `yield`, which is deprecated.
+@nocollect
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
 def test_ida_features():
    # we're guaranteed to be in a function here, so there's a stack frame
@@ -118,6 +127,7 @@ def test_ida_features():
            yield this_name, id, "pass", None


+@nocollect
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
 def test_ida_feature_counts():
    # we're guaranteed to be in a function here, so there's a stack frame
--- a/tests/test_os_detection.py
+++ b/tests/test_os_detection.py
@@ -80,6 +80,18 @@ def test_elf_symbol_table():
        assert capa.features.extractors.elf.detect_elf_os(f) == "linux"


+def test_elf_android_notes():
+    # DEBUG:capa.features.extractors.elf:guess: osabi: None
+    # DEBUG:capa.features.extractors.elf:guess: ph notes: OS.ANDROID
+    # DEBUG:capa.features.extractors.elf:guess: sh notes: None
+    # DEBUG:capa.features.extractors.elf:guess: linker: None
+    # DEBUG:capa.features.extractors.elf:guess: ABI versions needed: None
+    # DEBUG:capa.features.extractors.elf:guess: needed dependencies: OS.ANDROID
+    path = get_data_path_by_name("1038a2")
+    with Path(path).open("rb") as f:
+        assert capa.features.extractors.elf.detect_elf_os(f) == "android"
+
+
 def test_elf_parse_capa_pyinstaller_header():
    # error after misidentified large pydata section with address 0; fixed in #1454
    # compressed ELF header of capa-v5.1.0-linux
--- a/tests/test_result_document.py
+++ b/tests/test_result_document.py
@@ -237,7 +237,7 @@ def assert_round_trip(rd: rdoc.ResultDocument):
    one = rd

    doc = one.model_dump_json(exclude_none=True)
-    two = rdoc.ResultDocument.parse_raw(doc)
+    two = rdoc.ResultDocument.model_validate_json(doc)

    # show the round trip works
    # first by comparing the objects directly,
@@ -272,13 +272,13 @@ def test_round_trip(request, rd_file):

 def test_json_to_rdoc():
    path = fixtures.get_data_path_by_name("pma01-01-rd")
-    assert isinstance(rdoc.ResultDocument.parse_file(path), rdoc.ResultDocument)
+    assert isinstance(rdoc.ResultDocument.from_file(path), rdoc.ResultDocument)


 def test_rdoc_to_capa():
    path = fixtures.get_data_path_by_name("pma01-01-rd")

-    rd = rdoc.ResultDocument.parse_file(path)
+    rd = rdoc.ResultDocument.from_file(path)

    meta, capabilites = rd.to_capa()
    assert isinstance(meta, rdoc.Metadata)