features: move OS and Format to their own features, not characteristics

This commit is contained in:
William Ballenthin
2021-08-16 16:28:26 -06:00
parent f013815b2a
commit ab1326f858
10 changed files with 132 additions and 120 deletions

View File

@@ -8,8 +8,8 @@
- explorer: enforce max column width Features and Editor panes #691 @mike-hunhoff
- explorer: add option to limit features to currently selected disassembly address #692 @mike-hunhoff
- all: add support for ELF files #700 @Adir-Shemesh @TcM1911
- rule format: add characteristic for file format, like `format/pe` @williballenthin
- rule format: add characteristic for operating system, like `os/windows` @701 @williballenthin
- rule format: add feature `format: ` for file format, like `format: pe` @williballenthin
- rule format: add feature `os: ` for operating system, like `os: windows` #701 @williballenthin
### Breaking Changes

View File

@@ -14,6 +14,7 @@ from typing import Set, Dict, Union
import capa.engine
import capa.features
import capa.features.extractors.elf
logger = logging.getLogger(__name__)
MAX_BYTES_FEATURE_SIZE = 0x100
@@ -22,16 +23,6 @@ MAX_BYTES_FEATURE_SIZE = 0x100
THUNK_CHAIN_DEPTH_DELTA = 5
OS_WINDOWS = "os/windows"
OS_LINUX = "os/linux"
OS_MACOS = "os/macos"
VALID_OS = (OS_WINDOWS, OS_LINUX, OS_MACOS)
FORMAT_PE = "format/pe"
FORMAT_ELF = "format/elf"
VALID_FORMAT = (FORMAT_PE, FORMAT_ELF)
def bytes_to_str(b: bytes) -> str:
return str(codecs.encode(b, "hex").decode("utf-8"))
@@ -139,14 +130,6 @@ class Characteristic(Feature):
super(Characteristic, self).__init__(value, description=description)
CHARACTERISTIC_WINDOWS = Characteristic(OS_WINDOWS)
CHARACTERISTIC_LINUX = Characteristic(OS_LINUX)
CHARACTERISTIC_MACOS = Characteristic(OS_MACOS)
CHARACTERISTIC_PE = Characteristic(FORMAT_PE)
CHARACTERISTIC_ELF = Characteristic(FORMAT_ELF)
class String(Feature):
def __init__(self, value: str, description=None):
super(String, self).__init__(value, description=description)
@@ -280,15 +263,51 @@ class Bytes(Feature):
BITNESS_X32 = "x32"
BITNESS_X64 = "x64"
VALID_BITNESS = (BITNESS_X32, BITNESS_X64)
ARCH_I386 = "i386"
ARCH_AMD64 = "amd64"
VALID_ARCH = (ARCH_I386, ARCH_AMD64)
class Arch(Feature):
def __init__(self, value: str, description=None):
assert value in VALID_ARCH
super(Arch, self).__init__(value, description=description)
self.name = "arch"
OS_WINDOWS = "windows"
OS_LINUX = "linux"
OS_MACOS = "macos"
VALID_OS = {os.value for os in capa.features.extractors.elf.OS}
VALID_OS.add(OS_WINDOWS)
VALID_OS.add(OS_LINUX)
VALID_OS.add(OS_MACOS)
class OS(Feature):
def __init__(self, value: str, description=None):
assert value in (VALID_OS)
super(OS, self).__init__(value, description=description)
self.name = "os"
FORMAT_PE = "pe"
FORMAT_ELF = "elf"
VALID_FORMAT = (FORMAT_PE, FORMAT_ELF)
class Format(Feature):
def __init__(self, value: str, description=None):
assert value in (VALID_FORMAT)
super(Format, self).__init__(value, description=description)
self.name = "format"
def is_global_feature(feature):
"""
is this a feature that is extracted at every scope?
today, this are OS and file format features.
today, this are OS and arch features.
"""
if (
isinstance(feature, Characteristic)
and isinstance(feature.value, str)
and (feature.value.startswith("os/") or feature.value.startswith("format/"))
):
return True
return False
return isinstance(feature, (OS, Arch))

View File

@@ -4,27 +4,27 @@ import binascii
import contextlib
import capa.features.extractors.elf
from capa.features.common import CHARACTERISTIC_PE, CHARACTERISTIC_ELF, CHARACTERISTIC_WINDOWS, Characteristic
from capa.features.common import OS, FORMAT_PE, FORMAT_ELF, OS_WINDOWS, Format
logger = logging.getLogger(__name__)
def extract_format(buf):
if buf.startswith(b"MZ"):
yield CHARACTERISTIC_PE, 0x0
yield Format(FORMAT_PE), 0x0
elif buf.startswith(b"\x7fELF"):
yield CHARACTERISTIC_ELF, 0x0
yield Format(FORMAT_ELF), 0x0
else:
raise NotImplementedError("file format: %s", binascii.hexlify(buf[:4]).decode("ascii"))
def extract_os(buf):
if buf.startswith(b"MZ"):
yield CHARACTERISTIC_WINDOWS, 0x0
yield OS(OS_WINDOWS), 0x0
elif buf.startswith(b"\x7fELF"):
with contextlib.closing(io.BytesIO(buf)) as f:
os = capa.features.extractors.elf.detect_elf_os(f)
yield Characteristic("os/%s" % (os.lower())), 0x0
yield OS(os), 0x0
else:
raise NotImplementedError("file format: %s", binascii.hexlify(buf[:4]).decode("ascii"))

View File

@@ -19,27 +19,25 @@ class CorruptElfFile(ValueError):
class OS(str, Enum):
HPUX = "HPUX"
NETBSD = "NETBSD"
LINUX = "LINUX"
HURD = "HURD"
_86OPEN = "86OPEN"
SOLARIS = "SOLARIS"
AIX = "AIX"
IRIX = "IRIX"
FREEBSD = "FREEBSD"
TRU64 = "TRU64"
MODESTO = "MODESTO"
OPENBSD = "OPENBSD"
OPENVMS = "OPENVMS"
NSK = "NSK"
AROS = "AROS"
FENIXOS = "FENIXOS"
CLOUD = "CLOUD"
SORTFIX = "SORTFIX"
ARM_AEABI = "ARM_AEABI"
SYLLABLE = "SYLLABLE"
NACL = "NACL"
HPUX = "hpux"
NETBSD = "netbsd"
LINUX = "linux"
HURD = "hurd"
_86OPEN = "86open"
SOLARIS = "solaris"
AIX = "aix"
IRIX = "irix"
FREEBSD = "freebsd"
TRU64 = "tru64"
MODESTO = "modesto"
OPENBSD = "openbsd"
OPENVMS = "openvms"
NSK = "nsk"
AROS = "aros"
FENIXOS = "fenixos"
CLOUD = "cloud"
SYLLABLE = "syllable"
NACL = "nacl"
def detect_elf_os(f: BinaryIO) -> str:

View File

@@ -5,8 +5,6 @@
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import logging
import functools
import contextlib
import idaapi
@@ -18,7 +16,7 @@ import capa.features.extractors.ida.file
import capa.features.extractors.ida.insn
import capa.features.extractors.ida.function
import capa.features.extractors.ida.basicblock
from capa.features.common import CHARACTERISTIC_PE, CHARACTERISTIC_ELF, Characteristic
from capa.features.common import OS, FORMAT_PE, FORMAT_ELF, OS_WINDOWS, Format
from capa.features.extractors.base_extractor import FeatureExtractor
@@ -26,20 +24,29 @@ def extract_format():
format_name = ida_loader.get_file_type_name()
if "PE" in format_name:
yield CHARACTERISTIC_PE, 0x0
yield Format(FORMAT_PE), 0x0
elif "ELF64" in format_name:
yield CHARACTERISTIC_ELF, 0x0
yield Format(FORMAT_ELF), 0x0
elif "ELF32" in format_name:
yield CHARACTERISTIC_ELF, 0x0
yield Format(FORMAT_ELF), 0x0
else:
raise NotImplementedError("file format: %s", format_name)
def extract_os():
with contextlib.closing(capa.ida.helpers.IDAIO()) as f:
os = capa.features.extractors.elf.detect_elf_os(f)
format_name = ida_loader.get_file_type_name()
yield Characteristic("os/%s" % (os.lower())), 0x0
if "PE" in format_name:
yield OS(OS_WINDOWS), 0x0
elif "ELF" in format_name:
with contextlib.closing(capa.ida.helpers.IDAIO()) as f:
os = capa.features.extractors.elf.detect_elf_os(f)
yield OS(os), 0x0
else:
raise NotImplementedError("file format: %s", format_name)
class FunctionHandle:

View File

@@ -14,7 +14,7 @@ import capa.features.extractors
import capa.features.extractors.helpers
import capa.features.extractors.strings
from capa.features.file import Export, Import, Section
from capa.features.common import CHARACTERISTIC_PE, CHARACTERISTIC_WINDOWS, String, Characteristic
from capa.features.common import OS, Format, String, Characteristic, OS_WINDOWS, FORMAT_PE
from capa.features.extractors.base_extractor import FeatureExtractor
logger = logging.getLogger(__name__)
@@ -114,11 +114,11 @@ def extract_file_function_names(pe, file_path):
def extract_os(pe, file_path):
# assuming PE -> Windows
# though i suppose they're also used by UEFI
yield CHARACTERISTIC_WINDOWS, 0x0
yield OS(OS_WINDOWS), 0x0
def extract_format(pe, file_path):
yield CHARACTERISTIC_PE, 0x0
yield Format(FORMAT_PE), 0x0
def extract_file_features(pe, file_path):

View File

@@ -34,15 +34,7 @@ import capa.features.insn
import capa.features.common
import capa.features.basicblock
from capa.engine import Statement, FeatureSet
from capa.features.common import (
CHARACTERISTIC_PE,
CHARACTERISTIC_ELF,
CHARACTERISTIC_LINUX,
CHARACTERISTIC_MACOS,
CHARACTERISTIC_WINDOWS,
MAX_BYTES_FEATURE_SIZE,
Feature,
)
from capa.features.common import MAX_BYTES_FEATURE_SIZE, Feature
logger = logging.getLogger(__name__)
@@ -86,11 +78,8 @@ SUPPORTED_FEATURES = {
capa.features.file.FunctionName,
capa.features.common.Characteristic("embedded pe"),
capa.features.common.String,
CHARACTERISTIC_WINDOWS,
CHARACTERISTIC_LINUX,
CHARACTERISTIC_MACOS,
CHARACTERISTIC_PE,
CHARACTERISTIC_ELF,
capa.features.common.Format,
capa.features.common.OS,
},
FUNCTION_SCOPE: {
# plus basic block scope features, see below
@@ -99,11 +88,7 @@ SUPPORTED_FEATURES = {
capa.features.common.Characteristic("calls to"),
capa.features.common.Characteristic("loop"),
capa.features.common.Characteristic("recursive call"),
CHARACTERISTIC_WINDOWS,
CHARACTERISTIC_LINUX,
CHARACTERISTIC_MACOS,
CHARACTERISTIC_PE,
CHARACTERISTIC_ELF,
capa.features.common.OS,
},
BASIC_BLOCK_SCOPE: {
capa.features.common.MatchedRule,
@@ -121,11 +106,7 @@ SUPPORTED_FEATURES = {
capa.features.common.Characteristic("tight loop"),
capa.features.common.Characteristic("stack string"),
capa.features.common.Characteristic("indirect call"),
CHARACTERISTIC_WINDOWS,
CHARACTERISTIC_LINUX,
CHARACTERISTIC_MACOS,
CHARACTERISTIC_PE,
CHARACTERISTIC_ELF,
capa.features.common.OS,
},
}
@@ -243,16 +224,16 @@ def parse_feature(key: str):
bitness = key.partition("/")[2]
# the other handlers here return constructors for features,
# and we want to as well,
# however, we need to preconfigure one of the arguments (`arch`).
# however, we need to preconfigure one of the arguments (`bitness`).
# so, instead we return a partially-applied function that
# provides `arch` to the feature constructor.
# provides `bitness` to the feature constructor.
# it forwards any other arguments provided to the closure along to the constructor.
return functools.partial(capa.features.insn.Number, arch=bitness)
return functools.partial(capa.features.insn.Number, bitness=bitness)
elif key == "offset":
return capa.features.insn.Offset
elif key.startswith("offset/"):
bitness = key.partition("/")[2]
return functools.partial(capa.features.insn.Offset, arch=bitness)
return functools.partial(capa.features.insn.Offset, bitness=bitness)
elif key == "mnemonic":
return capa.features.insn.Mnemonic
elif key == "basic blocks":
@@ -269,6 +250,10 @@ def parse_feature(key: str):
return capa.features.common.MatchedRule
elif key == "function-name":
return capa.features.file.FunctionName
elif key == "os":
return capa.features.common.OS
elif key == "format":
return capa.features.common.Format
else:
raise InvalidRule("unexpected statement: %s" % key)

View File

@@ -43,7 +43,7 @@ import capa.rules
import capa.engine
import capa.features
import capa.features.insn
from capa.features.common import ARCH_X32, ARCH_X64, String
from capa.features.common import BITNESS_X32, BITNESS_X64, String
logger = logging.getLogger("capa2yara")

View File

@@ -24,10 +24,12 @@ import capa.features.basicblock
from capa.features.common import (
BITNESS_X32,
BITNESS_X64,
CHARACTERISTIC_PE,
CHARACTERISTIC_ELF,
CHARACTERISTIC_LINUX,
CHARACTERISTIC_WINDOWS,
FORMAT_ELF,
FORMAT_PE,
Format,
OS,
OS_LINUX,
OS_WINDOWS,
)
CD = os.path.dirname(__file__)
@@ -511,17 +513,17 @@ FEATURE_PRESENCE_TESTS = sorted(
# file/function-name
("pma16-01", "file", capa.features.file.FunctionName("__aulldiv"), True),
# os & format
("pma16-01", "file", CHARACTERISTIC_WINDOWS, True),
("pma16-01", "file", CHARACTERISTIC_LINUX, False),
("pma16-01", "function=0x404356", CHARACTERISTIC_WINDOWS, True),
("pma16-01", "function=0x404356,bb=0x4043B9", CHARACTERISTIC_WINDOWS, True),
("pma16-01", "file", CHARACTERISTIC_PE, True),
("pma16-01", "file", CHARACTERISTIC_ELF, False),
("pma16-01", "function=0x404356", CHARACTERISTIC_PE, True),
("pma16-01", "function=0x404356,bb=0x4043B9", CHARACTERISTIC_PE, True),
("pma16-01", "file", OS(OS_WINDOWS), True),
("pma16-01", "file", OS(OS_LINUX), False),
("pma16-01", "function=0x404356", OS(OS_WINDOWS), True),
("pma16-01", "function=0x404356,bb=0x4043B9", OS(OS_WINDOWS), True),
("pma16-01", "file", Format(FORMAT_PE), True),
("pma16-01", "file", Format(FORMAT_ELF), False),
("pma16-01", "function=0x404356", Format(FORMAT_PE), True),
("pma16-01", "function=0x404356,bb=0x4043B9", Format(FORMAT_PE), True),
# elf support
("7351f.elf", "file", CHARACTERISTIC_LINUX, True),
("7351f.elf", "file", CHARACTERISTIC_ELF, True),
("7351f.elf", "file", OS(OS_LINUX), True),
("7351f.elf", "file", OS(OS_WINDOWS), False),
("7351f.elf", "function=0x408753", capa.features.common.String("/dev/null"), True),
("7351f.elf", "function=0x408753,bb=0x408781", capa.features.insn.API("open"), True),
],

View File

@@ -16,14 +16,15 @@ import capa.features.common
from capa.features.file import FunctionName
from capa.features.insn import Number, Offset
from capa.features.common import (
FORMAT_PE,
FORMAT_ELF,
OS_WINDOWS,
OS_LINUX,
BITNESS_X32,
BITNESS_X64,
FORMAT_PE,
OS_WINDOWS,
CHARACTERISTIC_PE,
CHARACTERISTIC_WINDOWS,
String,
Characteristic,
OS,
Format
)
@@ -964,13 +965,13 @@ def test_os_features():
scope: file
features:
- and:
- characteristic: os/windows
- os: windows
"""
)
r = capa.rules.Rule.from_yaml(rule)
children = list(r.statement.get_children())
assert (CHARACTERISTIC_WINDOWS in children) == True
assert (CHARACTERISTIC_LINUX not in children) == True
assert (OS(OS_WINDOWS) in children) == True
assert (OS(OS_LINUX) not in children) == True
def test_format_features():
@@ -982,10 +983,10 @@ def test_format_features():
scope: file
features:
- and:
- characteristic: format/pe
- format: pe
"""
)
r = capa.rules.Rule.from_yaml(rule)
children = list(r.statement.get_children())
assert (CHARACTERISTIC_PE in children) == True
assert (CHARACTERISTIC_ELF not in children) == True
assert (Format(FORMAT_PE) in children) == True
assert (Format(FORMAT_ELF) not in children) == True