initial support for os override

This commit is contained in:
Zander Work
2023-03-07 00:11:33 -05:00
parent 9f3428e1c3
commit 8f6a660f3d
5 changed files with 39 additions and 13 deletions

View File

@@ -421,6 +421,8 @@ OS_MACOS = "macos"
OS_ANY = "any"
VALID_OS = {os.value for os in capa.features.extractors.elf.OS}
VALID_OS.update({OS_WINDOWS, OS_LINUX, OS_MACOS, OS_ANY})
# internal only, not to be used in rules
OS_AUTO = "auto"
class OS(Feature):

View File

@@ -10,7 +10,7 @@ import capa.features
import capa.features.extractors.elf
import capa.features.extractors.pefile
import capa.features.extractors.strings
from capa.features.common import OS, FORMAT_PE, FORMAT_ELF, OS_WINDOWS, FORMAT_FREEZE, Arch, Format, String, Feature
from capa.features.common import OS, FORMAT_PE, FORMAT_ELF, OS_AUTO, OS_WINDOWS, FORMAT_FREEZE, Arch, Format, String, Feature
from capa.features.freeze import is_freeze
from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress
@@ -73,7 +73,10 @@ def extract_arch(buf) -> Iterator[Tuple[Feature, Address]]:
return
def extract_os(buf) -> Iterator[Tuple[Feature, Address]]:
def extract_os(buf, os = OS_AUTO) -> Iterator[Tuple[Feature, Address]]:
if os != OS_AUTO:
yield OS(os), NO_ADDRESS
if buf.startswith(b"MZ"):
yield OS(OS_WINDOWS), NO_ADDRESS
elif buf.startswith(b"\x7fELF"):
@@ -92,8 +95,6 @@ def extract_os(buf) -> Iterator[Tuple[Feature, Address]]:
# 2. handling a new file format (e.g. macho)
#
# for (1) we can't do much - its shellcode and all bets are off.
# we could maybe accept a further CLI argument to specify the OS,
# but i think this would be rarely used.
# rules that rely on OS conditions will fail to match on shellcode.
#
# for (2), this logic will need to be updated as the format is implemented.

View File

@@ -25,17 +25,18 @@ logger = logging.getLogger(__name__)
class VivisectFeatureExtractor(FeatureExtractor):
def __init__(self, vw, path):
def __init__(self, vw, path, os):
super().__init__()
self.vw = vw
self.path = path
self.os = os
with open(self.path, "rb") as f:
self.buf = f.read()
# pre-compute these because we'll yield them at *every* scope.
self.global_features: List[Tuple[Feature, Address]] = []
self.global_features.extend(capa.features.extractors.viv.file.extract_file_format(self.buf))
self.global_features.extend(capa.features.extractors.common.extract_os(self.buf))
self.global_features.extend(capa.features.extractors.common.extract_os(self.buf, self.os))
self.global_features.extend(capa.features.extractors.viv.global_.extract_arch(self.vw))
def get_base_address(self):

View File

@@ -385,14 +385,14 @@ def main(argv=None):
argv = sys.argv[1:]
parser = argparse.ArgumentParser(description="save capa features to a file")
capa.main.install_common_args(parser, {"sample", "format", "backend", "signatures"})
capa.main.install_common_args(parser, {"sample", "format", "backend", "os", "signatures"})
parser.add_argument("output", type=str, help="Path to output file")
args = parser.parse_args(args=argv)
capa.main.handle_common_args(args)
sigpaths = capa.main.get_signatures(args.signatures)
extractor = capa.main.get_extractor(args.sample, args.format, args.backend, sigpaths, False)
extractor = capa.main.get_extractor(args.sample, args.format, args.os, args.backend, sigpaths, False)
with open(args.output, "wb") as f:
f.write(dump(extractor))

View File

@@ -65,6 +65,10 @@ from capa.features.common import (
FORMAT_SC64,
FORMAT_DOTNET,
FORMAT_FREEZE,
OS_AUTO,
OS_LINUX,
OS_MACOS,
OS_WINDOWS
)
from capa.features.address import NO_ADDRESS, Address
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, FeatureExtractor
@@ -490,7 +494,7 @@ def get_workspace(path, format_, sigpaths):
# TODO get_extractors -> List[FeatureExtractor]?
def get_extractor(
path: str, format_: str, backend: str, sigpaths: List[str], should_save_workspace=False, disable_progress=False
path: str, format_: str, os: str, backend: str, sigpaths: List[str], should_save_workspace=False, disable_progress=False
) -> FeatureExtractor:
"""
raises:
@@ -505,7 +509,7 @@ def get_extractor(
if not is_supported_arch(path):
raise UnsupportedArchError()
if not is_supported_os(path):
if os == OS_AUTO and not is_supported_os(path):
raise UnsupportedOSError()
if format_ == FORMAT_DOTNET:
@@ -530,7 +534,7 @@ def get_extractor(
else:
logger.debug("CAPA_SAVE_WORKSPACE unset, not saving workspace")
return capa.features.extractors.viv.extractor.VivisectFeatureExtractor(vw, path)
return capa.features.extractors.viv.extractor.VivisectFeatureExtractor(vw, path, os)
def get_file_extractors(sample: str, format_: str) -> List[FeatureExtractor]:
@@ -790,6 +794,7 @@ def install_common_args(parser, wanted=None):
wanted (Set[str]): collection of arguments to opt-into, including:
- "sample": required positional argument to input file.
- "format": flag to override file format.
- "os": flag to override file operating system.
- "backend": flag to override analysis backend.
- "rules": flag to override path to capa rules.
- "tag": flag to override/specify which rules to match.
@@ -823,6 +828,7 @@ def install_common_args(parser, wanted=None):
#
# - sample
# - format
# - os
# - rules
# - tag
#
@@ -862,6 +868,22 @@ def install_common_args(parser, wanted=None):
choices=(BACKEND_VIV,),
default=BACKEND_VIV,
)
if "os" in wanted:
oses = [
(OS_AUTO, "detect OS automatically - default"),
(OS_LINUX,),
(OS_MACOS,),
(OS_WINDOWS,),
]
os_help = ", ".join(["%s (%s)" % (o[0], o[1]) if len(o) == 2 else o[0] for o in oses])
parser.add_argument(
"-o",
"--os",
choices=[o[0] for o in oses],
default=OS_AUTO,
help="select sample OS: %s" % os_help,
)
if "rules" in wanted:
parser.add_argument(
@@ -1026,7 +1048,7 @@ def main(argv=None):
parser = argparse.ArgumentParser(
description=desc, epilog=epilog, formatter_class=argparse.RawDescriptionHelpFormatter
)
install_common_args(parser, {"sample", "format", "backend", "signatures", "rules", "tag"})
install_common_args(parser, {"sample", "format", "backend", "os", "signatures", "rules", "tag"})
parser.add_argument("-j", "--json", action="store_true", help="emit JSON instead of text")
args = parser.parse_args(args=argv)
ret = handle_common_args(args)
@@ -1142,7 +1164,7 @@ def main(argv=None):
try:
extractor = get_extractor(
args.sample, format_, args.backend, sig_paths, should_save_workspace, disable_progress=args.quiet
args.sample, format_, args.os, args.backend, sig_paths, should_save_workspace, disable_progress=args.quiet
)
except UnsupportedFormatError:
log_unsupported_format_error()