mirror of
https://github.com/mandiant/capa.git
synced 2025-12-12 15:49:46 -08:00
add ElfFeatureExtractor
This commit is contained in:
3
.github/mypy/mypy.ini
vendored
3
.github/mypy/mypy.ini
vendored
@@ -71,3 +71,6 @@ ignore_missing_imports = True
|
||||
|
||||
[mypy-devtools.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-elftools.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
- rule format: add feature `os: ` for operating system, like `os: windows` #723 @williballenthin
|
||||
- rule format: add feature `substring: ` for verbatim strings with leading/trailing wildcards #737 @williballenthin
|
||||
- scripts: add `profile-memory.py` for profiling memory usage #736 @williballenthin
|
||||
- main: add light weight ELF file feature extractor to detect file limitations #770 @mr-tz
|
||||
|
||||
### Breaking Changes
|
||||
|
||||
|
||||
@@ -344,7 +344,6 @@ VALID_ARCH = (ARCH_I386, ARCH_AMD64)
|
||||
|
||||
class Arch(Feature):
|
||||
def __init__(self, value: str, description=None):
|
||||
assert value in VALID_ARCH
|
||||
super(Arch, self).__init__(value, description=description)
|
||||
self.name = "arch"
|
||||
|
||||
@@ -358,7 +357,6 @@ VALID_OS.update({OS_WINDOWS, OS_LINUX, OS_MACOS})
|
||||
|
||||
class OS(Feature):
|
||||
def __init__(self, value: str, description=None):
|
||||
assert value in (VALID_OS)
|
||||
super(OS, self).__init__(value, description=description)
|
||||
self.name = "os"
|
||||
|
||||
@@ -370,7 +368,6 @@ VALID_FORMAT = (FORMAT_PE, FORMAT_ELF)
|
||||
|
||||
class Format(Feature):
|
||||
def __init__(self, value: str, description=None):
|
||||
assert value in (VALID_FORMAT)
|
||||
super(Format, self).__init__(value, description=description)
|
||||
self.name = "format"
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
import abc
|
||||
from typing import Tuple, Iterator, SupportsInt
|
||||
|
||||
from capa.features.basicblock import Feature
|
||||
from capa.features.common import Feature
|
||||
|
||||
# feature extractors may reference functions, BBs, insns by opaque handle values.
|
||||
# the only requirement of these handles are that they support `__int__`,
|
||||
|
||||
@@ -5,13 +5,25 @@ import contextlib
|
||||
|
||||
import pefile
|
||||
|
||||
import capa.features
|
||||
import capa.features.extractors.elf
|
||||
import capa.features.extractors.pefile
|
||||
from capa.features.common import OS, FORMAT_PE, FORMAT_ELF, OS_WINDOWS, Arch, Format
|
||||
from capa.features.common import OS, FORMAT_PE, FORMAT_ELF, OS_WINDOWS, Arch, Format, String
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def extract_file_strings(buf, **kwargs):
|
||||
"""
|
||||
extract ASCII and UTF-16 LE strings from file
|
||||
"""
|
||||
for s in capa.features.extractors.strings.extract_ascii_strings(buf):
|
||||
yield String(s.s), s.offset
|
||||
|
||||
for s in capa.features.extractors.strings.extract_unicode_strings(buf):
|
||||
yield String(s.s), s.offset
|
||||
|
||||
|
||||
def extract_format(buf):
|
||||
if buf.startswith(b"MZ"):
|
||||
yield Format(FORMAT_PE), 0x0
|
||||
@@ -34,7 +46,7 @@ def extract_arch(buf):
|
||||
with contextlib.closing(io.BytesIO(buf)) as f:
|
||||
arch = capa.features.extractors.elf.detect_elf_arch(f)
|
||||
|
||||
if arch == "unknown":
|
||||
if arch not in capa.features.common.VALID_ARCH:
|
||||
logger.debug("unsupported arch: %s", arch)
|
||||
return
|
||||
|
||||
@@ -62,7 +74,12 @@ def extract_os(buf):
|
||||
with contextlib.closing(io.BytesIO(buf)) as f:
|
||||
os = capa.features.extractors.elf.detect_elf_os(f)
|
||||
|
||||
if os not in capa.features.common.VALID_OS:
|
||||
logger.debug("unsupported os: %s", os)
|
||||
return
|
||||
|
||||
yield OS(os), 0x0
|
||||
|
||||
else:
|
||||
# we likely end up here:
|
||||
# 1. handling shellcode, or
|
||||
|
||||
@@ -1,3 +1,10 @@
|
||||
# Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import struct
|
||||
import logging
|
||||
from enum import Enum
|
||||
|
||||
153
capa/features/extractors/elffile.py
Normal file
153
capa/features/extractors/elffile.py
Normal file
@@ -0,0 +1,153 @@
|
||||
# Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import logging
|
||||
|
||||
from elftools.elf.elffile import ELFFile, SymbolTableSection
|
||||
|
||||
import capa.features.extractors.common
|
||||
from capa.features.file import Import, Section
|
||||
from capa.features.common import OS, FORMAT_ELF, Arch, Format
|
||||
from capa.features.extractors.elf import Arch as ElfArch
|
||||
from capa.features.extractors.base_extractor import FeatureExtractor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def extract_file_import_names(elf, **kwargs):
|
||||
# see https://github.com/eliben/pyelftools/blob/0664de05ed2db3d39041e2d51d19622a8ef4fb0f/scripts/readelf.py#L372
|
||||
symbol_tables = [(idx, s) for idx, s in enumerate(elf.iter_sections()) if isinstance(s, SymbolTableSection)]
|
||||
|
||||
for section_index, section in symbol_tables:
|
||||
if not isinstance(section, SymbolTableSection):
|
||||
continue
|
||||
|
||||
if section["sh_entsize"] == 0:
|
||||
logger.debug("Symbol table '%s' has a sh_entsize of zero!" % (section.name))
|
||||
continue
|
||||
|
||||
logger.debug("Symbol table '%s' contains %s entries:" % (section.name, section.num_symbols()))
|
||||
|
||||
for nsym, symbol in enumerate(section.iter_symbols()):
|
||||
if symbol.name and symbol.entry.st_info.type == "STT_FUNC":
|
||||
# TODO symbol address
|
||||
# TODO symbol version info?
|
||||
yield Import(symbol.name), 0x0
|
||||
|
||||
|
||||
def extract_file_section_names(elf, **kwargs):
|
||||
for section in elf.iter_sections():
|
||||
if section.name:
|
||||
yield Section(section.name), section.header.sh_addr
|
||||
elif section.is_null():
|
||||
yield Section("NULL"), section.header.sh_addr
|
||||
|
||||
|
||||
def extract_file_strings(buf, **kwargs):
|
||||
yield from capa.features.extractors.common.extract_file_strings(buf)
|
||||
|
||||
|
||||
def extract_file_os(elf, buf, **kwargs):
|
||||
# our current approach does not always get an OS value, e.g. for packed samples
|
||||
# for file limitation purposes, we're more lax here
|
||||
try:
|
||||
os = next(capa.features.extractors.common.extract_os(buf))
|
||||
yield os
|
||||
except StopIteration:
|
||||
yield OS("unknown"), 0x0
|
||||
|
||||
|
||||
def extract_file_format(**kwargs):
|
||||
yield Format(FORMAT_ELF), 0x0
|
||||
|
||||
|
||||
def extract_file_arch(elf, **kwargs):
|
||||
# TODO merge with capa.features.extractors.elf.detect_elf_arch()
|
||||
arch = elf.get_machine_arch()
|
||||
if arch == "x86":
|
||||
yield Arch(ElfArch.I386), 0x0
|
||||
elif arch == "x64":
|
||||
yield Arch(ElfArch.AMD64), 0x0
|
||||
else:
|
||||
logger.warning("unsupported architecture: %s", arch)
|
||||
|
||||
|
||||
def extract_file_features(elf, buf):
|
||||
"""
|
||||
extract file features from given sample
|
||||
|
||||
args:
|
||||
elf (elftools.elf.elffile.ELFFile): the parsed ELFFile
|
||||
buf: the raw sample bytes
|
||||
|
||||
yields:
|
||||
Tuple[Feature, VA]: a feature and its location.
|
||||
"""
|
||||
|
||||
for file_handler in FILE_HANDLERS:
|
||||
for feature, va in file_handler(elf=elf, buf=buf):
|
||||
yield feature, va
|
||||
|
||||
|
||||
FILE_HANDLERS = (
|
||||
# TODO extract file export names
|
||||
# extract_file_export_names,
|
||||
extract_file_import_names,
|
||||
extract_file_section_names,
|
||||
extract_file_strings,
|
||||
# elffile doesn't have library matching
|
||||
# extract_file_function_names,
|
||||
extract_file_os,
|
||||
extract_file_format,
|
||||
extract_file_arch,
|
||||
)
|
||||
|
||||
|
||||
class ElfFeatureExtractor(FeatureExtractor):
|
||||
def __init__(self, path: str):
|
||||
super(ElfFeatureExtractor, self).__init__()
|
||||
self.path = path
|
||||
# TODO close where/when?
|
||||
self.elf = ELFFile(open(self.path, "rb"))
|
||||
|
||||
def get_base_address(self):
|
||||
# virtual address of the first segment with type LOAD
|
||||
for segment in self.elf.iter_segments():
|
||||
if segment.header.p_type == "PT_LOAD":
|
||||
return segment.header.p_vaddr
|
||||
|
||||
def extract_file_features(self):
|
||||
with open(self.path, "rb") as f:
|
||||
buf = f.read()
|
||||
|
||||
for feature, va in extract_file_features(self.elf, buf):
|
||||
yield feature, va
|
||||
|
||||
def get_functions(self):
|
||||
raise NotImplementedError("ElfFeatureExtractor can only be used to extract file features")
|
||||
|
||||
def extract_function_features(self, f):
|
||||
raise NotImplementedError("ElfFeatureExtractor can only be used to extract file features")
|
||||
|
||||
def get_basic_blocks(self, f):
|
||||
raise NotImplementedError("ElfFeatureExtractor can only be used to extract file features")
|
||||
|
||||
def extract_basic_block_features(self, f, bb):
|
||||
raise NotImplementedError("ElfFeatureExtractor can only be used to extract file features")
|
||||
|
||||
def get_instructions(self, f, bb):
|
||||
raise NotImplementedError("ElfFeatureExtractor can only be used to extract file features")
|
||||
|
||||
def extract_insn_features(self, f, bb, insn):
|
||||
raise NotImplementedError("ElfFeatureExtractor can only be used to extract file features")
|
||||
|
||||
def is_library_function(self, va):
|
||||
raise NotImplementedError("ElfFeatureExtractor can only be used to extract file features")
|
||||
|
||||
def get_function_name(self, va):
|
||||
raise NotImplementedError("ElfFeatureExtractor can only be used to extract file features")
|
||||
@@ -5,16 +5,18 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import logging
|
||||
|
||||
import pefile
|
||||
|
||||
import capa.features.common
|
||||
import capa.features.extractors
|
||||
import capa.features.extractors.common
|
||||
import capa.features.extractors.helpers
|
||||
import capa.features.extractors.strings
|
||||
from capa.features.file import Export, Import, Section
|
||||
from capa.features.common import OS, ARCH_I386, FORMAT_PE, ARCH_AMD64, OS_WINDOWS, Arch, Format, String, Characteristic
|
||||
from capa.features.common import OS, ARCH_I386, FORMAT_PE, ARCH_AMD64, OS_WINDOWS, Arch, Format, Characteristic
|
||||
from capa.features.extractors.base_extractor import FeatureExtractor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -85,14 +87,7 @@ def extract_file_section_names(pe, **kwargs):
|
||||
|
||||
|
||||
def extract_file_strings(buf, **kwargs):
|
||||
"""
|
||||
extract ASCII and UTF-16 LE strings from file
|
||||
"""
|
||||
for s in capa.features.extractors.strings.extract_ascii_strings(buf):
|
||||
yield String(s.s), s.offset
|
||||
|
||||
for s in capa.features.extractors.strings.extract_unicode_strings(buf):
|
||||
yield String(s.s), s.offset
|
||||
yield from capa.features.extractors.common.extract_file_strings(buf)
|
||||
|
||||
|
||||
def extract_file_function_names(**kwargs):
|
||||
|
||||
@@ -68,14 +68,7 @@ def extract_file_section_names(vw, **kwargs):
|
||||
|
||||
|
||||
def extract_file_strings(buf, **kwargs):
|
||||
"""
|
||||
extract ASCII and UTF-16 LE strings from file
|
||||
"""
|
||||
for s in capa.features.extractors.strings.extract_ascii_strings(buf):
|
||||
yield String(s.s), s.offset
|
||||
|
||||
for s in capa.features.extractors.strings.extract_unicode_strings(buf):
|
||||
yield String(s.s), s.offset
|
||||
yield from capa.features.extractors.common.extract_file_strings(buf)
|
||||
|
||||
|
||||
def extract_file_function_names(vw, **kwargs):
|
||||
|
||||
16
capa/main.py
16
capa/main.py
@@ -39,6 +39,7 @@ import capa.render.vverbose
|
||||
import capa.features.extractors
|
||||
import capa.features.extractors.common
|
||||
import capa.features.extractors.pefile
|
||||
import capa.features.extractors.elffile
|
||||
from capa.rules import Rule, RuleSet
|
||||
from capa.engine import FeatureSet, MatchResults
|
||||
from capa.helpers import get_file_taste
|
||||
@@ -942,8 +943,8 @@ def main(argv=None):
|
||||
return -1
|
||||
|
||||
if args.format == "pe" or (args.format == "auto" and taste.startswith(b"MZ")):
|
||||
# this pefile file feature extractor is pretty light weight: it doesn't do any code analysis.
|
||||
# so we can fairly quickly determine if the given PE file has "pure" file-scope rules
|
||||
# these pefile and elffile file feature extractors are pretty light weight: they don't do any code analysis.
|
||||
# so we can fairly quickly determine if the given file has "pure" file-scope rules
|
||||
# that indicate a limitation (like "file is packed based on section names")
|
||||
# and avoid doing a full code analysis on difficult/impossible binaries.
|
||||
try:
|
||||
@@ -953,6 +954,17 @@ def main(argv=None):
|
||||
except PEFormatError as e:
|
||||
logger.error("Input file '%s' is not a valid PE file: %s", args.sample, str(e))
|
||||
return -1
|
||||
|
||||
elif args.format == "elf" or (args.format == "auto" and taste.startswith(b"\x7fELF")):
|
||||
try:
|
||||
from elftools.common.exceptions import ELFError
|
||||
|
||||
file_extractor = capa.features.extractors.elffile.ElfFeatureExtractor(args.sample)
|
||||
except ELFError as e:
|
||||
logger.error("Input file '%s' is not a valid ELF file: %s", args.sample, str(e))
|
||||
return -1
|
||||
|
||||
if file_extractor:
|
||||
pure_file_capabilities, _ = find_file_capabilities(rules, file_extractor, {})
|
||||
|
||||
# file limitations that rely on non-file scope won't be detected here.
|
||||
|
||||
Reference in New Issue
Block a user