Merge branch 'master' into backend-ghidra

This commit is contained in:
colton-gabertan
2023-08-17 16:06:17 +00:00
16 changed files with 71 additions and 26 deletions

View File

@@ -9,18 +9,23 @@
### Breaking Changes
### New Rules (5)
### New Rules (6)
- executable/pe/export/forwarded-export ronnie.salomonsen@mandiant.com
- host-interaction/bootloader/get-uefi-variable jakub.jozwiak@mandiant.com
- host-interaction/bootloader/set-uefi-variable jakub.jozwiak@mandiant.com
- nursery/enumerate-device-drivers-on-linux @mr-tz
- anti-analysis/anti-vm/vm-detection/check-for-foreground-window-switch ervin.ocampo@mandiant.com
-
### Bug Fixes
- Fix binja backend stack string detection. #1473 @xusheng6
- linter: skip native API check for NtProtectVirtualMemory #1675 @williballenthin
- OS: detect Android ELF files #1705 @williballenthin
- ELF: fix parsing of symtab #1704 @williballenthin
- result document: don't use deprecated pydantic functions #1718 @williballenthin
- pytest: don't mark IDA tests as pytest tests #1719 @williballenthin
### capa explorer IDA Pro plugin
- fix unhandled exception when resolving rule path #1693 @mike-hunhoff

View File

@@ -2,7 +2,7 @@
[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/flare-capa)](https://pypi.org/project/flare-capa)
[![Last release](https://img.shields.io/github/v/release/mandiant/capa)](https://github.com/mandiant/capa/releases)
[![Number of rules](https://img.shields.io/badge/rules-828-blue.svg)](https://github.com/mandiant/capa-rules)
[![Number of rules](https://img.shields.io/badge/rules-829-blue.svg)](https://github.com/mandiant/capa-rules)
[![CI status](https://github.com/mandiant/capa/workflows/CI/badge.svg)](https://github.com/mandiant/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster)
[![Downloads](https://img.shields.io/github/downloads/mandiant/capa/total)](https://github.com/mandiant/capa/releases)
[![License](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE.txt)

View File

@@ -13,6 +13,8 @@ from enum import Enum
from typing import Set, Dict, List, Tuple, BinaryIO, Iterator, Optional
from dataclasses import dataclass
import Elf # from vivisect
logger = logging.getLogger(__name__)
@@ -54,6 +56,7 @@ class OS(str, Enum):
CLOUD = "cloud"
SYLLABLE = "syllable"
NACL = "nacl"
ANDROID = "android"
# via readelf: https://github.com/bminor/binutils-gdb/blob/c0e94211e1ac05049a4ce7c192c9d14d1764eb3e/binutils/readelf.c#L19635-L19658
@@ -709,17 +712,17 @@ class SymTab:
yield from self.symbols
@classmethod
def from_Elf(cls, ElfBinary) -> Optional["SymTab"]:
endian = "<" if ElfBinary.getEndian() == 0 else ">"
bitness = ElfBinary.bits
def from_viv(cls, elf: Elf.Elf) -> Optional["SymTab"]:
endian = "<" if elf.getEndian() == 0 else ">"
bitness = elf.bits
SHT_SYMTAB = 0x2
for section in ElfBinary.sections:
if section.sh_info & SHT_SYMTAB:
strtab_section = ElfBinary.sections[section.sh_link]
sh_symtab = Shdr.from_viv(section, ElfBinary.readAtOffset(section.sh_offset, section.sh_size))
for section in elf.sections:
if section.sh_type == SHT_SYMTAB:
strtab_section = elf.sections[section.sh_link]
sh_symtab = Shdr.from_viv(section, elf.readAtOffset(section.sh_offset, section.sh_size))
sh_strtab = Shdr.from_viv(
strtab_section, ElfBinary.readAtOffset(strtab_section.sh_offset, strtab_section.sh_size)
strtab_section, elf.readAtOffset(strtab_section.sh_offset, strtab_section.sh_size)
)
try:
@@ -764,6 +767,11 @@ def guess_os_from_ph_notes(elf: ELF) -> Optional[OS]:
elif note.name == "FreeBSD":
logger.debug("note owner: %s", "FREEBSD")
return OS.FREEBSD
elif note.name == "Android":
logger.debug("note owner: %s", "Android")
# see the following for parsing the structure:
# https://android.googlesource.com/platform/ndk/+/master/parse_elfnote.py
return OS.ANDROID
elif note.name == "GNU":
abi_tag = note.abi_tag
if abi_tag:
@@ -855,6 +863,8 @@ def guess_os_from_needed_dependencies(elf: ELF) -> Optional[OS]:
return OS.HURD
if needed.startswith("libhurduser.so"):
return OS.HURD
if needed.startswith("libandroid.so"):
return OS.ANDROID
return None

View File

@@ -38,7 +38,7 @@ def extract_function_symtab_names(fh: FunctionHandle) -> Iterator[Tuple[Feature,
# this is in order to eliminate the computational overhead of refetching symtab each time.
if "symtab" not in fh.ctx["cache"]:
try:
fh.ctx["cache"]["symtab"] = SymTab.from_Elf(fh.inner.vw.parsedbin)
fh.ctx["cache"]["symtab"] = SymTab.from_viv(fh.inner.vw.parsedbin)
except Exception:
fh.ctx["cache"]["symtab"] = None

View File

@@ -115,7 +115,7 @@ def extract_insn_api_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterato
# the symbol table gets stored as a function's attribute in order to avoid running
# this code everytime the call is made, thus preventing the computational overhead.
try:
fh.ctx["cache"]["symtab"] = SymTab.from_Elf(f.vw.parsedbin)
fh.ctx["cache"]["symtab"] = SymTab.from_viv(f.vw.parsedbin)
except Exception:
fh.ctx["cache"]["symtab"] = None

View File

@@ -320,7 +320,7 @@ def loads(s: str) -> capa.features.extractors.base_extractor.FeatureExtractor:
"""deserialize a set of features (as a NullFeatureExtractor) from a string."""
import capa.features.extractors.null as null
freeze = Freeze.parse_raw(s)
freeze = Freeze.model_validate_json(s)
if freeze.version != 2:
raise ValueError(f"unsupported freeze format version: {freeze.version}")

View File

@@ -5,7 +5,6 @@
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import json
import logging
import datetime
import contextlib
@@ -223,7 +222,7 @@ def load_and_verify_cached_results() -> Optional[rdoc.ResultDocument]:
logger.debug("loading cached capa results from netnode '%s'", CAPA_NETNODE)
n = netnode.Netnode(CAPA_NETNODE)
doc = rdoc.ResultDocument.parse_obj(json.loads(n[NETNODE_RESULTS]))
doc = rdoc.ResultDocument.model_validate_json(n[NETNODE_RESULTS])
for rule in rutils.capability_rules(doc):
for location_, _ in rule.matches:

View File

@@ -1228,7 +1228,7 @@ def main(argv: Optional[List[str]] = None):
if format_ == FORMAT_RESULT:
# result document directly parses into meta, capabilities
result_doc = capa.render.result_document.ResultDocument.parse_file(args.sample)
result_doc = capa.render.result_document.ResultDocument.from_file(Path(args.sample))
meta, capabilities = result_doc.to_capa()
else:

View File

@@ -8,6 +8,7 @@
import datetime
import collections
from typing import Dict, List, Tuple, Union, Literal, Optional
from pathlib import Path
from pydantic import Field, BaseModel, ConfigDict
@@ -596,3 +597,7 @@ class ResultDocument(FrozenModel):
capabilities[rule_name].append((addr.to_capa(), result))
return self.meta, capabilities
@classmethod
def from_file(cls, path: Path) -> "ResultDocument":
return cls.model_validate_json(path.read_text(encoding="utf-8"))

View File

@@ -32,7 +32,7 @@ classifiers = [
"Topic :: Security",
]
dependencies = [
"tqdm==4.65.0",
"tqdm==4.66.1",
"pyyaml==6.0.1",
"tabulate==0.9.0",
"colorama==0.4.6",
@@ -77,10 +77,10 @@ dev = [
"flake8-simplify==0.20.0",
"flake8-use-pathlib==0.3.0",
"flake8-copyright==0.2.4",
"ruff==0.0.282",
"ruff==0.0.284",
"black==23.7.0",
"isort==5.11.4",
"mypy==1.4.1",
"mypy==1.5.0",
"psutil==5.9.2",
"stix2==3.0.1",
"requests==2.31.0",

View File

@@ -30,6 +30,7 @@ See the License for the specific language governing permissions and limitations
"""
import logging
import binascii
from pathlib import Path
import ida_nalt
import ida_funcs
@@ -68,7 +69,7 @@ def main():
if not path:
return 0
result_doc = capa.render.result_document.ResultDocument.parse_file(path)
result_doc = capa.render.result_document.ResultDocument.from_file(Path(path))
meta, capabilities = result_doc.to_capa()
# in IDA 7.4, the MD5 hash may be truncated, for example:

View File

@@ -31,6 +31,7 @@ Example:
import sys
import logging
import argparse
from pathlib import Path
import capa.render.proto
import capa.render.result_document
@@ -64,7 +65,7 @@ def main(argv=None):
logging.basicConfig(level=logging.INFO)
logging.getLogger().setLevel(logging.INFO)
rd = capa.render.result_document.ResultDocument.parse_file(args.json)
rd = capa.render.result_document.ResultDocument.from_file(Path(args.json))
pb = capa.render.proto.doc_to_pb2(rd)
sys.stdout.buffer.write(pb.SerializeToString(deterministic=True))

View File

@@ -308,6 +308,8 @@ def get_data_path_by_name(name) -> Path:
return CD / "data" / "2bf18d0403677378adad9001b1243211.elf_"
elif name.startswith("ea2876"):
return CD / "data" / "ea2876e9175410b6f6719f80ee44b9553960758c7d0f7bed73c0fe9a78d8e669.dll_"
elif name.startswith("1038a2"):
return CD / "data" / "1038a23daad86042c66bfe6c9d052d27048de9653bde5750dc0f240c792d9ac8.elf_"
else:
raise ValueError(f"unexpected sample fixture: {name}")
@@ -1180,8 +1182,8 @@ def _039a6_dotnetfile_extractor():
return get_dnfile_extractor(get_data_path_by_name("_039a6"))
def get_result_doc(path):
return capa.render.result_document.ResultDocument.parse_file(path)
def get_result_doc(path: Path):
return capa.render.result_document.ResultDocument.from_file(path)
@pytest.fixture

View File

@@ -92,6 +92,15 @@ def get_ida_extractor(_path):
return capa.features.extractors.ida.extractor.IdaFeatureExtractor()
def nocollect(f):
"don't collect the decorated function as a pytest test"
f.__test__ = False
return f
# although these look like pytest tests, they're not, because they don't run within pytest
# (the runner is below) and they use `yield`, which is deprecated.
@nocollect
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
def test_ida_features():
# we're guaranteed to be in a function here, so there's a stack frame
@@ -118,6 +127,7 @@ def test_ida_features():
yield this_name, id, "pass", None
@nocollect
@pytest.mark.skip(reason="IDA Pro tests must be run within IDA")
def test_ida_feature_counts():
# we're guaranteed to be in a function here, so there's a stack frame

View File

@@ -80,6 +80,18 @@ def test_elf_symbol_table():
assert capa.features.extractors.elf.detect_elf_os(f) == "linux"
def test_elf_android_notes():
# DEBUG:capa.features.extractors.elf:guess: osabi: None
# DEBUG:capa.features.extractors.elf:guess: ph notes: OS.ANDROID
# DEBUG:capa.features.extractors.elf:guess: sh notes: None
# DEBUG:capa.features.extractors.elf:guess: linker: None
# DEBUG:capa.features.extractors.elf:guess: ABI versions needed: None
# DEBUG:capa.features.extractors.elf:guess: needed dependencies: OS.ANDROID
path = get_data_path_by_name("1038a2")
with Path(path).open("rb") as f:
assert capa.features.extractors.elf.detect_elf_os(f) == "android"
def test_elf_parse_capa_pyinstaller_header():
# error after misidentified large pydata section with address 0; fixed in #1454
# compressed ELF header of capa-v5.1.0-linux

View File

@@ -237,7 +237,7 @@ def assert_round_trip(rd: rdoc.ResultDocument):
one = rd
doc = one.model_dump_json(exclude_none=True)
two = rdoc.ResultDocument.parse_raw(doc)
two = rdoc.ResultDocument.model_validate_json(doc)
# show the round trip works
# first by comparing the objects directly,
@@ -272,13 +272,13 @@ def test_round_trip(request, rd_file):
def test_json_to_rdoc():
path = fixtures.get_data_path_by_name("pma01-01-rd")
assert isinstance(rdoc.ResultDocument.parse_file(path), rdoc.ResultDocument)
assert isinstance(rdoc.ResultDocument.from_file(path), rdoc.ResultDocument)
def test_rdoc_to_capa():
path = fixtures.get_data_path_by_name("pma01-01-rd")
rd = rdoc.ResultDocument.parse_file(path)
rd = rdoc.ResultDocument.from_file(path)
meta, capabilites = rd.to_capa()
assert isinstance(meta, rdoc.Metadata)