mirror of
https://github.com/mandiant/capa.git
synced 2025-12-12 15:49:46 -08:00
features: move OS and Format to their own features, not characteristics
This commit is contained in:
@@ -8,8 +8,8 @@
|
||||
- explorer: enforce max column width Features and Editor panes #691 @mike-hunhoff
|
||||
- explorer: add option to limit features to currently selected disassembly address #692 @mike-hunhoff
|
||||
- all: add support for ELF files #700 @Adir-Shemesh @TcM1911
|
||||
- rule format: add characteristic for file format, like `format/pe` @williballenthin
|
||||
- rule format: add characteristic for operating system, like `os/windows` @701 @williballenthin
|
||||
- rule format: add feature `format: ` for file format, like `format: pe` @williballenthin
|
||||
- rule format: add feature `os: ` for operating system, like `os: windows` #701 @williballenthin
|
||||
|
||||
### Breaking Changes
|
||||
|
||||
|
||||
@@ -14,6 +14,7 @@ from typing import Set, Dict, Union
|
||||
|
||||
import capa.engine
|
||||
import capa.features
|
||||
import capa.features.extractors.elf
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
MAX_BYTES_FEATURE_SIZE = 0x100
|
||||
@@ -22,16 +23,6 @@ MAX_BYTES_FEATURE_SIZE = 0x100
|
||||
THUNK_CHAIN_DEPTH_DELTA = 5
|
||||
|
||||
|
||||
OS_WINDOWS = "os/windows"
|
||||
OS_LINUX = "os/linux"
|
||||
OS_MACOS = "os/macos"
|
||||
VALID_OS = (OS_WINDOWS, OS_LINUX, OS_MACOS)
|
||||
|
||||
FORMAT_PE = "format/pe"
|
||||
FORMAT_ELF = "format/elf"
|
||||
VALID_FORMAT = (FORMAT_PE, FORMAT_ELF)
|
||||
|
||||
|
||||
def bytes_to_str(b: bytes) -> str:
|
||||
return str(codecs.encode(b, "hex").decode("utf-8"))
|
||||
|
||||
@@ -139,14 +130,6 @@ class Characteristic(Feature):
|
||||
super(Characteristic, self).__init__(value, description=description)
|
||||
|
||||
|
||||
CHARACTERISTIC_WINDOWS = Characteristic(OS_WINDOWS)
|
||||
CHARACTERISTIC_LINUX = Characteristic(OS_LINUX)
|
||||
CHARACTERISTIC_MACOS = Characteristic(OS_MACOS)
|
||||
|
||||
CHARACTERISTIC_PE = Characteristic(FORMAT_PE)
|
||||
CHARACTERISTIC_ELF = Characteristic(FORMAT_ELF)
|
||||
|
||||
|
||||
class String(Feature):
|
||||
def __init__(self, value: str, description=None):
|
||||
super(String, self).__init__(value, description=description)
|
||||
@@ -280,15 +263,51 @@ class Bytes(Feature):
|
||||
BITNESS_X32 = "x32"
|
||||
BITNESS_X64 = "x64"
|
||||
VALID_BITNESS = (BITNESS_X32, BITNESS_X64)
|
||||
|
||||
|
||||
ARCH_I386 = "i386"
|
||||
ARCH_AMD64 = "amd64"
|
||||
VALID_ARCH = (ARCH_I386, ARCH_AMD64)
|
||||
|
||||
|
||||
class Arch(Feature):
|
||||
def __init__(self, value: str, description=None):
|
||||
assert value in VALID_ARCH
|
||||
super(Arch, self).__init__(value, description=description)
|
||||
self.name = "arch"
|
||||
|
||||
|
||||
OS_WINDOWS = "windows"
|
||||
OS_LINUX = "linux"
|
||||
OS_MACOS = "macos"
|
||||
VALID_OS = {os.value for os in capa.features.extractors.elf.OS}
|
||||
VALID_OS.add(OS_WINDOWS)
|
||||
VALID_OS.add(OS_LINUX)
|
||||
VALID_OS.add(OS_MACOS)
|
||||
|
||||
|
||||
class OS(Feature):
|
||||
def __init__(self, value: str, description=None):
|
||||
assert value in (VALID_OS)
|
||||
super(OS, self).__init__(value, description=description)
|
||||
self.name = "os"
|
||||
|
||||
|
||||
FORMAT_PE = "pe"
|
||||
FORMAT_ELF = "elf"
|
||||
VALID_FORMAT = (FORMAT_PE, FORMAT_ELF)
|
||||
|
||||
|
||||
class Format(Feature):
|
||||
def __init__(self, value: str, description=None):
|
||||
assert value in (VALID_FORMAT)
|
||||
super(Format, self).__init__(value, description=description)
|
||||
self.name = "format"
|
||||
|
||||
|
||||
def is_global_feature(feature):
|
||||
"""
|
||||
is this a feature that is extracted at every scope?
|
||||
today, this are OS and file format features.
|
||||
today, this are OS and arch features.
|
||||
"""
|
||||
if (
|
||||
isinstance(feature, Characteristic)
|
||||
and isinstance(feature.value, str)
|
||||
and (feature.value.startswith("os/") or feature.value.startswith("format/"))
|
||||
):
|
||||
return True
|
||||
return False
|
||||
return isinstance(feature, (OS, Arch))
|
||||
|
||||
@@ -4,27 +4,27 @@ import binascii
|
||||
import contextlib
|
||||
|
||||
import capa.features.extractors.elf
|
||||
from capa.features.common import CHARACTERISTIC_PE, CHARACTERISTIC_ELF, CHARACTERISTIC_WINDOWS, Characteristic
|
||||
from capa.features.common import OS, FORMAT_PE, FORMAT_ELF, OS_WINDOWS, Format
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def extract_format(buf):
|
||||
if buf.startswith(b"MZ"):
|
||||
yield CHARACTERISTIC_PE, 0x0
|
||||
yield Format(FORMAT_PE), 0x0
|
||||
elif buf.startswith(b"\x7fELF"):
|
||||
yield CHARACTERISTIC_ELF, 0x0
|
||||
yield Format(FORMAT_ELF), 0x0
|
||||
else:
|
||||
raise NotImplementedError("file format: %s", binascii.hexlify(buf[:4]).decode("ascii"))
|
||||
|
||||
|
||||
def extract_os(buf):
|
||||
if buf.startswith(b"MZ"):
|
||||
yield CHARACTERISTIC_WINDOWS, 0x0
|
||||
yield OS(OS_WINDOWS), 0x0
|
||||
elif buf.startswith(b"\x7fELF"):
|
||||
with contextlib.closing(io.BytesIO(buf)) as f:
|
||||
os = capa.features.extractors.elf.detect_elf_os(f)
|
||||
|
||||
yield Characteristic("os/%s" % (os.lower())), 0x0
|
||||
yield OS(os), 0x0
|
||||
else:
|
||||
raise NotImplementedError("file format: %s", binascii.hexlify(buf[:4]).decode("ascii"))
|
||||
|
||||
@@ -19,27 +19,25 @@ class CorruptElfFile(ValueError):
|
||||
|
||||
|
||||
class OS(str, Enum):
|
||||
HPUX = "HPUX"
|
||||
NETBSD = "NETBSD"
|
||||
LINUX = "LINUX"
|
||||
HURD = "HURD"
|
||||
_86OPEN = "86OPEN"
|
||||
SOLARIS = "SOLARIS"
|
||||
AIX = "AIX"
|
||||
IRIX = "IRIX"
|
||||
FREEBSD = "FREEBSD"
|
||||
TRU64 = "TRU64"
|
||||
MODESTO = "MODESTO"
|
||||
OPENBSD = "OPENBSD"
|
||||
OPENVMS = "OPENVMS"
|
||||
NSK = "NSK"
|
||||
AROS = "AROS"
|
||||
FENIXOS = "FENIXOS"
|
||||
CLOUD = "CLOUD"
|
||||
SORTFIX = "SORTFIX"
|
||||
ARM_AEABI = "ARM_AEABI"
|
||||
SYLLABLE = "SYLLABLE"
|
||||
NACL = "NACL"
|
||||
HPUX = "hpux"
|
||||
NETBSD = "netbsd"
|
||||
LINUX = "linux"
|
||||
HURD = "hurd"
|
||||
_86OPEN = "86open"
|
||||
SOLARIS = "solaris"
|
||||
AIX = "aix"
|
||||
IRIX = "irix"
|
||||
FREEBSD = "freebsd"
|
||||
TRU64 = "tru64"
|
||||
MODESTO = "modesto"
|
||||
OPENBSD = "openbsd"
|
||||
OPENVMS = "openvms"
|
||||
NSK = "nsk"
|
||||
AROS = "aros"
|
||||
FENIXOS = "fenixos"
|
||||
CLOUD = "cloud"
|
||||
SYLLABLE = "syllable"
|
||||
NACL = "nacl"
|
||||
|
||||
|
||||
def detect_elf_os(f: BinaryIO) -> str:
|
||||
|
||||
@@ -5,8 +5,6 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import logging
|
||||
import functools
|
||||
import contextlib
|
||||
|
||||
import idaapi
|
||||
@@ -18,7 +16,7 @@ import capa.features.extractors.ida.file
|
||||
import capa.features.extractors.ida.insn
|
||||
import capa.features.extractors.ida.function
|
||||
import capa.features.extractors.ida.basicblock
|
||||
from capa.features.common import CHARACTERISTIC_PE, CHARACTERISTIC_ELF, Characteristic
|
||||
from capa.features.common import OS, FORMAT_PE, FORMAT_ELF, OS_WINDOWS, Format
|
||||
from capa.features.extractors.base_extractor import FeatureExtractor
|
||||
|
||||
|
||||
@@ -26,20 +24,29 @@ def extract_format():
|
||||
format_name = ida_loader.get_file_type_name()
|
||||
|
||||
if "PE" in format_name:
|
||||
yield CHARACTERISTIC_PE, 0x0
|
||||
yield Format(FORMAT_PE), 0x0
|
||||
elif "ELF64" in format_name:
|
||||
yield CHARACTERISTIC_ELF, 0x0
|
||||
yield Format(FORMAT_ELF), 0x0
|
||||
elif "ELF32" in format_name:
|
||||
yield CHARACTERISTIC_ELF, 0x0
|
||||
yield Format(FORMAT_ELF), 0x0
|
||||
else:
|
||||
raise NotImplementedError("file format: %s", format_name)
|
||||
|
||||
|
||||
def extract_os():
|
||||
with contextlib.closing(capa.ida.helpers.IDAIO()) as f:
|
||||
os = capa.features.extractors.elf.detect_elf_os(f)
|
||||
format_name = ida_loader.get_file_type_name()
|
||||
|
||||
yield Characteristic("os/%s" % (os.lower())), 0x0
|
||||
if "PE" in format_name:
|
||||
yield OS(OS_WINDOWS), 0x0
|
||||
|
||||
elif "ELF" in format_name:
|
||||
with contextlib.closing(capa.ida.helpers.IDAIO()) as f:
|
||||
os = capa.features.extractors.elf.detect_elf_os(f)
|
||||
|
||||
yield OS(os), 0x0
|
||||
|
||||
else:
|
||||
raise NotImplementedError("file format: %s", format_name)
|
||||
|
||||
|
||||
class FunctionHandle:
|
||||
|
||||
@@ -14,7 +14,7 @@ import capa.features.extractors
|
||||
import capa.features.extractors.helpers
|
||||
import capa.features.extractors.strings
|
||||
from capa.features.file import Export, Import, Section
|
||||
from capa.features.common import CHARACTERISTIC_PE, CHARACTERISTIC_WINDOWS, String, Characteristic
|
||||
from capa.features.common import OS, Format, String, Characteristic, OS_WINDOWS, FORMAT_PE
|
||||
from capa.features.extractors.base_extractor import FeatureExtractor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -114,11 +114,11 @@ def extract_file_function_names(pe, file_path):
|
||||
def extract_os(pe, file_path):
|
||||
# assuming PE -> Windows
|
||||
# though i suppose they're also used by UEFI
|
||||
yield CHARACTERISTIC_WINDOWS, 0x0
|
||||
yield OS(OS_WINDOWS), 0x0
|
||||
|
||||
|
||||
def extract_format(pe, file_path):
|
||||
yield CHARACTERISTIC_PE, 0x0
|
||||
yield Format(FORMAT_PE), 0x0
|
||||
|
||||
|
||||
def extract_file_features(pe, file_path):
|
||||
|
||||
@@ -34,15 +34,7 @@ import capa.features.insn
|
||||
import capa.features.common
|
||||
import capa.features.basicblock
|
||||
from capa.engine import Statement, FeatureSet
|
||||
from capa.features.common import (
|
||||
CHARACTERISTIC_PE,
|
||||
CHARACTERISTIC_ELF,
|
||||
CHARACTERISTIC_LINUX,
|
||||
CHARACTERISTIC_MACOS,
|
||||
CHARACTERISTIC_WINDOWS,
|
||||
MAX_BYTES_FEATURE_SIZE,
|
||||
Feature,
|
||||
)
|
||||
from capa.features.common import MAX_BYTES_FEATURE_SIZE, Feature
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -86,11 +78,8 @@ SUPPORTED_FEATURES = {
|
||||
capa.features.file.FunctionName,
|
||||
capa.features.common.Characteristic("embedded pe"),
|
||||
capa.features.common.String,
|
||||
CHARACTERISTIC_WINDOWS,
|
||||
CHARACTERISTIC_LINUX,
|
||||
CHARACTERISTIC_MACOS,
|
||||
CHARACTERISTIC_PE,
|
||||
CHARACTERISTIC_ELF,
|
||||
capa.features.common.Format,
|
||||
capa.features.common.OS,
|
||||
},
|
||||
FUNCTION_SCOPE: {
|
||||
# plus basic block scope features, see below
|
||||
@@ -99,11 +88,7 @@ SUPPORTED_FEATURES = {
|
||||
capa.features.common.Characteristic("calls to"),
|
||||
capa.features.common.Characteristic("loop"),
|
||||
capa.features.common.Characteristic("recursive call"),
|
||||
CHARACTERISTIC_WINDOWS,
|
||||
CHARACTERISTIC_LINUX,
|
||||
CHARACTERISTIC_MACOS,
|
||||
CHARACTERISTIC_PE,
|
||||
CHARACTERISTIC_ELF,
|
||||
capa.features.common.OS,
|
||||
},
|
||||
BASIC_BLOCK_SCOPE: {
|
||||
capa.features.common.MatchedRule,
|
||||
@@ -121,11 +106,7 @@ SUPPORTED_FEATURES = {
|
||||
capa.features.common.Characteristic("tight loop"),
|
||||
capa.features.common.Characteristic("stack string"),
|
||||
capa.features.common.Characteristic("indirect call"),
|
||||
CHARACTERISTIC_WINDOWS,
|
||||
CHARACTERISTIC_LINUX,
|
||||
CHARACTERISTIC_MACOS,
|
||||
CHARACTERISTIC_PE,
|
||||
CHARACTERISTIC_ELF,
|
||||
capa.features.common.OS,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -243,16 +224,16 @@ def parse_feature(key: str):
|
||||
bitness = key.partition("/")[2]
|
||||
# the other handlers here return constructors for features,
|
||||
# and we want to as well,
|
||||
# however, we need to preconfigure one of the arguments (`arch`).
|
||||
# however, we need to preconfigure one of the arguments (`bitness`).
|
||||
# so, instead we return a partially-applied function that
|
||||
# provides `arch` to the feature constructor.
|
||||
# provides `bitness` to the feature constructor.
|
||||
# it forwards any other arguments provided to the closure along to the constructor.
|
||||
return functools.partial(capa.features.insn.Number, arch=bitness)
|
||||
return functools.partial(capa.features.insn.Number, bitness=bitness)
|
||||
elif key == "offset":
|
||||
return capa.features.insn.Offset
|
||||
elif key.startswith("offset/"):
|
||||
bitness = key.partition("/")[2]
|
||||
return functools.partial(capa.features.insn.Offset, arch=bitness)
|
||||
return functools.partial(capa.features.insn.Offset, bitness=bitness)
|
||||
elif key == "mnemonic":
|
||||
return capa.features.insn.Mnemonic
|
||||
elif key == "basic blocks":
|
||||
@@ -269,6 +250,10 @@ def parse_feature(key: str):
|
||||
return capa.features.common.MatchedRule
|
||||
elif key == "function-name":
|
||||
return capa.features.file.FunctionName
|
||||
elif key == "os":
|
||||
return capa.features.common.OS
|
||||
elif key == "format":
|
||||
return capa.features.common.Format
|
||||
else:
|
||||
raise InvalidRule("unexpected statement: %s" % key)
|
||||
|
||||
|
||||
@@ -43,7 +43,7 @@ import capa.rules
|
||||
import capa.engine
|
||||
import capa.features
|
||||
import capa.features.insn
|
||||
from capa.features.common import ARCH_X32, ARCH_X64, String
|
||||
from capa.features.common import BITNESS_X32, BITNESS_X64, String
|
||||
|
||||
logger = logging.getLogger("capa2yara")
|
||||
|
||||
|
||||
@@ -24,10 +24,12 @@ import capa.features.basicblock
|
||||
from capa.features.common import (
|
||||
BITNESS_X32,
|
||||
BITNESS_X64,
|
||||
CHARACTERISTIC_PE,
|
||||
CHARACTERISTIC_ELF,
|
||||
CHARACTERISTIC_LINUX,
|
||||
CHARACTERISTIC_WINDOWS,
|
||||
FORMAT_ELF,
|
||||
FORMAT_PE,
|
||||
Format,
|
||||
OS,
|
||||
OS_LINUX,
|
||||
OS_WINDOWS,
|
||||
)
|
||||
|
||||
CD = os.path.dirname(__file__)
|
||||
@@ -511,17 +513,17 @@ FEATURE_PRESENCE_TESTS = sorted(
|
||||
# file/function-name
|
||||
("pma16-01", "file", capa.features.file.FunctionName("__aulldiv"), True),
|
||||
# os & format
|
||||
("pma16-01", "file", CHARACTERISTIC_WINDOWS, True),
|
||||
("pma16-01", "file", CHARACTERISTIC_LINUX, False),
|
||||
("pma16-01", "function=0x404356", CHARACTERISTIC_WINDOWS, True),
|
||||
("pma16-01", "function=0x404356,bb=0x4043B9", CHARACTERISTIC_WINDOWS, True),
|
||||
("pma16-01", "file", CHARACTERISTIC_PE, True),
|
||||
("pma16-01", "file", CHARACTERISTIC_ELF, False),
|
||||
("pma16-01", "function=0x404356", CHARACTERISTIC_PE, True),
|
||||
("pma16-01", "function=0x404356,bb=0x4043B9", CHARACTERISTIC_PE, True),
|
||||
("pma16-01", "file", OS(OS_WINDOWS), True),
|
||||
("pma16-01", "file", OS(OS_LINUX), False),
|
||||
("pma16-01", "function=0x404356", OS(OS_WINDOWS), True),
|
||||
("pma16-01", "function=0x404356,bb=0x4043B9", OS(OS_WINDOWS), True),
|
||||
("pma16-01", "file", Format(FORMAT_PE), True),
|
||||
("pma16-01", "file", Format(FORMAT_ELF), False),
|
||||
("pma16-01", "function=0x404356", Format(FORMAT_PE), True),
|
||||
("pma16-01", "function=0x404356,bb=0x4043B9", Format(FORMAT_PE), True),
|
||||
# elf support
|
||||
("7351f.elf", "file", CHARACTERISTIC_LINUX, True),
|
||||
("7351f.elf", "file", CHARACTERISTIC_ELF, True),
|
||||
("7351f.elf", "file", OS(OS_LINUX), True),
|
||||
("7351f.elf", "file", OS(OS_WINDOWS), False),
|
||||
("7351f.elf", "function=0x408753", capa.features.common.String("/dev/null"), True),
|
||||
("7351f.elf", "function=0x408753,bb=0x408781", capa.features.insn.API("open"), True),
|
||||
],
|
||||
|
||||
@@ -16,14 +16,15 @@ import capa.features.common
|
||||
from capa.features.file import FunctionName
|
||||
from capa.features.insn import Number, Offset
|
||||
from capa.features.common import (
|
||||
FORMAT_PE,
|
||||
FORMAT_ELF,
|
||||
OS_WINDOWS,
|
||||
OS_LINUX,
|
||||
BITNESS_X32,
|
||||
BITNESS_X64,
|
||||
FORMAT_PE,
|
||||
OS_WINDOWS,
|
||||
CHARACTERISTIC_PE,
|
||||
CHARACTERISTIC_WINDOWS,
|
||||
String,
|
||||
Characteristic,
|
||||
OS,
|
||||
Format
|
||||
)
|
||||
|
||||
|
||||
@@ -964,13 +965,13 @@ def test_os_features():
|
||||
scope: file
|
||||
features:
|
||||
- and:
|
||||
- characteristic: os/windows
|
||||
- os: windows
|
||||
"""
|
||||
)
|
||||
r = capa.rules.Rule.from_yaml(rule)
|
||||
children = list(r.statement.get_children())
|
||||
assert (CHARACTERISTIC_WINDOWS in children) == True
|
||||
assert (CHARACTERISTIC_LINUX not in children) == True
|
||||
assert (OS(OS_WINDOWS) in children) == True
|
||||
assert (OS(OS_LINUX) not in children) == True
|
||||
|
||||
|
||||
def test_format_features():
|
||||
@@ -982,10 +983,10 @@ def test_format_features():
|
||||
scope: file
|
||||
features:
|
||||
- and:
|
||||
- characteristic: format/pe
|
||||
- format: pe
|
||||
"""
|
||||
)
|
||||
r = capa.rules.Rule.from_yaml(rule)
|
||||
children = list(r.statement.get_children())
|
||||
assert (CHARACTERISTIC_PE in children) == True
|
||||
assert (CHARACTERISTIC_ELF not in children) == True
|
||||
assert (Format(FORMAT_PE) in children) == True
|
||||
assert (Format(FORMAT_ELF) not in children) == True
|
||||
|
||||
Reference in New Issue
Block a user