Compare commits

...

3 Commits

Author SHA1 Message Date
Willi Ballenthin
826377530d add Lancelot backend 2025-02-26 09:28:55 +00:00
Willi Ballenthin
9c90f0e554 binexport2: extract dll name from linked library 2025-02-26 09:28:55 +00:00
Willi Ballenthin
7431c67bbe binexport2: parse BinExport2 from raw bytes 2025-02-26 09:28:55 +00:00
7 changed files with 82 additions and 4 deletions

View File

@@ -40,12 +40,16 @@ from capa.features.extractors.binexport2.binexport2_pb2 import BinExport2
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def get_binexport2(sample: Path) -> BinExport2: def get_binexport2_from_bytes(buf: bytes) -> BinExport2:
be2: BinExport2 = BinExport2() be2: BinExport2 = BinExport2()
be2.ParseFromString(sample.read_bytes()) be2.ParseFromString(buf)
return be2 return be2
def get_binexport2(sample: Path) -> BinExport2:
return get_binexport2_from_bytes(sample.read_bytes())
def compute_common_prefix_length(m: str, n: str) -> int: def compute_common_prefix_length(m: str, n: str) -> int:
# ensure #m < #n # ensure #m < #n
if len(n) < len(m): if len(n) < len(m):

View File

@@ -64,12 +64,17 @@ def extract_insn_api_features(fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle
): ):
continue continue
dll = ""
if vertex.HasField("library_index"):
library = be2.library[vertex.library_index]
dll = library.name
if not vertex.HasField("mangled_name"): if not vertex.HasField("mangled_name"):
logger.debug("vertex %d does not have mangled_name", vertex_idx) logger.debug("vertex %d does not have mangled_name", vertex_idx)
continue continue
api_name: str = vertex.mangled_name api_name: str = vertex.mangled_name
for name in capa.features.extractors.helpers.generate_symbols("", api_name): for name in capa.features.extractors.helpers.generate_symbols(dll, api_name):
yield API(name), ih.address yield API(name), ih.address

View File

@@ -79,6 +79,7 @@ BACKEND_VMRAY = "vmray"
BACKEND_FREEZE = "freeze" BACKEND_FREEZE = "freeze"
BACKEND_BINEXPORT2 = "binexport2" BACKEND_BINEXPORT2 = "binexport2"
BACKEND_IDA = "ida" BACKEND_IDA = "ida"
BACKEND_LANCELOT = "lancelot"
class CorruptFile(ValueError): class CorruptFile(ValueError):
@@ -351,6 +352,18 @@ def get_extractor(
return capa.features.extractors.ida.extractor.IdaFeatureExtractor() return capa.features.extractors.ida.extractor.IdaFeatureExtractor()
elif backend == BACKEND_LANCELOT:
import lancelot
import capa.features.extractors.binexport2
import capa.features.extractors.binexport2.extractor
buf = input_path.read_bytes()
be2_buf: bytes = lancelot.binexport2_from_bytes(buf)
be2 = capa.features.extractors.binexport2.get_binexport2_from_bytes(be2_buf)
return capa.features.extractors.binexport2.extractor.BinExport2FeatureExtractor(be2, buf)
else: else:
raise ValueError("unexpected backend: " + backend) raise ValueError("unexpected backend: " + backend)

View File

@@ -57,6 +57,7 @@ from capa.loader import (
BACKEND_FREEZE, BACKEND_FREEZE,
BACKEND_PEFILE, BACKEND_PEFILE,
BACKEND_DRAKVUF, BACKEND_DRAKVUF,
BACKEND_LANCELOT,
BACKEND_BINEXPORT2, BACKEND_BINEXPORT2,
) )
from capa.helpers import ( from capa.helpers import (
@@ -298,6 +299,7 @@ def install_common_args(parser, wanted=None):
(BACKEND_BINJA, "Binary Ninja"), (BACKEND_BINJA, "Binary Ninja"),
(BACKEND_DOTNET, ".NET"), (BACKEND_DOTNET, ".NET"),
(BACKEND_BINEXPORT2, "BinExport2"), (BACKEND_BINEXPORT2, "BinExport2"),
(BACKEND_LANCELOT, "Lancelot"),
(BACKEND_FREEZE, "capa freeze"), (BACKEND_FREEZE, "capa freeze"),
(BACKEND_CAPE, "CAPE"), (BACKEND_CAPE, "CAPE"),
(BACKEND_DRAKVUF, "DRAKVUF"), (BACKEND_DRAKVUF, "DRAKVUF"),

View File

@@ -36,7 +36,7 @@ import capa.main
logger = logging.getLogger("capa.compare-backends") logger = logging.getLogger("capa.compare-backends")
BACKENDS = ("vivisect", "ida", "binja") BACKENDS = ("vivisect", "ida", "binja", "lancelot")
@dataclass @dataclass
@@ -113,6 +113,9 @@ def collect(args):
file.unlink() file.unlink()
doc = json.loads(results_path.read_text(encoding="utf-8")) doc = json.loads(results_path.read_text(encoding="utf-8"))
for backend in BACKENDS:
if backend not in doc:
doc[backend] = {}
plan = [] plan = []
for file in sorted(p for p in testfiles.glob("*")): for file in sorted(p for p in testfiles.glob("*")):
@@ -228,6 +231,7 @@ def report(args):
t.add_column("viv") t.add_column("viv")
t.add_column("ida") t.add_column("ida")
t.add_column("bn") t.add_column("bn")
t.add_column("lan")
t.add_column("rule") t.add_column("rule")
for rule, _ in seen_rules.most_common(): for rule, _ in seen_rules.most_common():
@@ -235,6 +239,7 @@ def report(args):
"x" if rule in rules_by_backend["vivisect"] else " ", "x" if rule in rules_by_backend["vivisect"] else " ",
"x" if rule in rules_by_backend["ida"] else " ", "x" if rule in rules_by_backend["ida"] else " ",
"x" if rule in rules_by_backend["binja"] else " ", "x" if rule in rules_by_backend["binja"] else " ",
"x" if rule in rules_by_backend["lancelot"] else " ",
rule, rule,
) )

View File

@@ -134,6 +134,23 @@ def fixup_viv(path: Path, extractor):
extractor.vw.makeFunction(0x404970) extractor.vw.makeFunction(0x404970)
@lru_cache
def get_lancelot_extractor(path: Path):
import lancelot
import capa.features.extractors.binexport2
import capa.features.extractors.binexport2.extractor
buf = path.read_bytes()
be2_buf: bytes = lancelot.binexport2_from_bytes(buf)
be2 = capa.features.extractors.binexport2.get_binexport2_from_bytes(be2_buf)
extractor = capa.features.extractors.binexport2.extractor.BinExport2FeatureExtractor(be2, buf)
setattr(extractor, "path", path.as_posix())
return extractor
@lru_cache(maxsize=1) @lru_cache(maxsize=1)
def get_pefile_extractor(path: Path): def get_pefile_extractor(path: Path):
import capa.features.extractors.pefile import capa.features.extractors.pefile

View File

@@ -0,0 +1,32 @@
# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import pytest
import fixtures
@fixtures.parametrize(
"sample,scope,feature,expected",
fixtures.FEATURE_PRESENCE_TESTS + fixtures.FEATURE_SYMTAB_FUNC_TESTS,
indirect=["sample", "scope"],
)
def test_lancelot_features(sample, scope, feature, expected):
if ".elf" in sample.name:
pytest.xfail("lancelot doesn't handle ELF files")
fixtures.do_test_feature_presence(fixtures.get_lancelot_extractor, sample, scope, feature, expected)
@fixtures.parametrize(
"sample,scope,feature,expected",
fixtures.FEATURE_COUNT_TESTS,
indirect=["sample", "scope"],
)
def test_lancelot_feature_counts(sample, scope, feature, expected):
if ".elf" in sample.name:
pytest.xfail("lancelot doesn't handle ELF files")
fixtures.do_test_feature_count(fixtures.get_lancelot_extractor, sample, scope, feature, expected)