features: rename legacy term arch to bitness

makes space for upcoming feature `arch: ` for things like i386/amd64/aarch64
This commit is contained in:
William Ballenthin
2021-08-16 12:21:25 -06:00
parent d5c9a5cf3c
commit f013815b2a
10 changed files with 81 additions and 79 deletions

View File

@@ -13,6 +13,8 @@
### Breaking Changes
- legacy term `arch` (i.e., "x32") is now called `bitness` @williballenthin
### New Rules (20)
- collection/webcam/capture-webcam-image johnk3r

View File

@@ -21,11 +21,6 @@ MAX_BYTES_FEATURE_SIZE = 0x100
# thunks may be chained so we specify a delta to control the depth to which these chains are explored
THUNK_CHAIN_DEPTH_DELTA = 5
# identifiers for supported architectures names that tweak a feature
# for example, offset/x32
ARCH_X32 = "x32"
ARCH_X64 = "x64"
VALID_ARCH = (ARCH_X32, ARCH_X64)
OS_WINDOWS = "os/windows"
OS_LINUX = "os/linux"
@@ -61,33 +56,33 @@ def escape_string(s: str) -> str:
class Feature:
def __init__(self, value: Union[str, int, bytes], arch=None, description=None):
def __init__(self, value: Union[str, int, bytes], bitness=None, description=None):
"""
Args:
value (any): the value of the feature, such as the number or string.
arch (str): one of the VALID_ARCH values, or None.
When None, then the feature applies to any architecture.
Modifies the feature name from `feature` to `feature/arch`, like `offset/x32`.
bitness (str): one of the VALID_BITNESS values, or None.
When None, then the feature applies to any bitness.
Modifies the feature name from `feature` to `feature/bitness`, like `offset/x32`.
description (str): a human-readable description that explains the feature value.
"""
super(Feature, self).__init__()
if arch is not None:
if arch not in VALID_ARCH:
raise ValueError("arch '%s' must be one of %s" % (arch, VALID_ARCH))
self.name = self.__class__.__name__.lower() + "/" + arch
if bitness is not None:
if bitness not in VALID_BITNESS:
raise ValueError("bitness '%s' must be one of %s" % (bitness, VALID_BITNESS))
self.name = self.__class__.__name__.lower() + "/" + bitness
else:
self.name = self.__class__.__name__.lower()
self.value = value
self.arch = arch
self.bitness = bitness
self.description = description
def __hash__(self):
return hash((self.name, self.value, self.arch))
return hash((self.name, self.value, self.bitness))
def __eq__(self, other):
return self.name == other.name and self.value == other.value and self.arch == other.arch
return self.name == other.name and self.value == other.value and self.bitness == other.bitness
def get_value_str(self) -> str:
"""
@@ -114,8 +109,8 @@ class Feature:
return capa.engine.Result(self in ctx, self, [], locations=ctx.get(self, []))
def freeze_serialize(self):
if self.arch is not None:
return (self.__class__.__name__, [self.value, {"arch": self.arch}])
if self.bitness is not None:
return (self.__class__.__name__, [self.value, {"bitness": self.bitness}])
else:
return (self.__class__.__name__, [self.value])
@@ -280,6 +275,11 @@ class Bytes(Feature):
return cls(*[codecs.decode(x, "hex") for x in args])
# identifiers for supported bitness names that tweak a feature
# for example, offset/x32
BITNESS_X32 = "x32"
BITNESS_X64 = "x64"
VALID_BITNESS = (BITNESS_X32, BITNESS_X64)
def is_global_feature(feature):
"""
is this a feature that is extracted at every scope?

View File

@@ -14,8 +14,8 @@ import capa.features.extractors.helpers
import capa.features.extractors.ida.helpers
from capa.features.insn import API, Number, Offset, Mnemonic
from capa.features.common import (
ARCH_X32,
ARCH_X64,
BITNESS_X32,
BITNESS_X64,
MAX_BYTES_FEATURE_SIZE,
THUNK_CHAIN_DEPTH_DELTA,
Bytes,
@@ -28,22 +28,22 @@ from capa.features.common import (
SECURITY_COOKIE_BYTES_DELTA = 0x40
def get_arch(ctx):
def get_bitness(ctx):
"""
fetch the ARCH_* constant for the currently open workspace.
fetch the BITNESS_* constant for the currently open workspace.
via Tamir Bahar/@tmr232
https://reverseengineering.stackexchange.com/a/11398/17194
"""
if "arch" not in ctx:
if "bitness" not in ctx:
info = idaapi.get_inf_structure()
if info.is_64bit():
ctx["arch"] = ARCH_X64
ctx["bitness"] = BITNESS_X64
elif info.is_32bit():
ctx["arch"] = ARCH_X32
ctx["bitness"] = BITNESS_X32
else:
raise ValueError("unexpected architecture")
return ctx["arch"]
raise ValueError("unexpected bitness")
return ctx["bitness"]
def get_imports(ctx):
@@ -149,7 +149,7 @@ def extract_insn_number_features(f, bb, insn):
const = op.addr
yield Number(const), insn.ea
yield Number(const, arch=get_arch(f.ctx)), insn.ea
yield Number(const, bitness=get_bitness(f.ctx)), insn.ea
def extract_insn_bytes_features(f, bb, insn):
@@ -218,7 +218,7 @@ def extract_insn_offset_features(f, bb, insn):
op_off = capa.features.extractors.helpers.twos_complement(op_off, 32)
yield Offset(op_off), insn.ea
yield Offset(op_off, arch=get_arch(f.ctx)), insn.ea
yield Offset(op_off, bitness=get_bitness(f.ctx)), insn.ea
def contains_stack_cookie_keywords(s):

View File

@@ -7,8 +7,8 @@ from smda.common.SmdaReport import SmdaReport
import capa.features.extractors.helpers
from capa.features.insn import API, Number, Offset, Mnemonic
from capa.features.common import (
ARCH_X32,
ARCH_X64,
BITNESS_X32,
BITNESS_X64,
MAX_BYTES_FEATURE_SIZE,
THUNK_CHAIN_DEPTH_DELTA,
Bytes,
@@ -23,12 +23,12 @@ PATTERN_HEXNUM = re.compile(r"[+\-] (?P<num>0x[a-fA-F0-9]+)")
PATTERN_SINGLENUM = re.compile(r"[+\-] (?P<num>[0-9])")
def get_arch(smda_report):
def get_bitness(smda_report):
if smda_report.architecture == "intel":
if smda_report.bitness == 32:
return ARCH_X32
return BITNESS_X32
elif smda_report.bitness == 64:
return ARCH_X64
return BITNESS_X64
else:
raise NotImplementedError
@@ -85,7 +85,7 @@ def extract_insn_number_features(f, bb, insn):
for operand in operands:
try:
yield Number(int(operand, 16)), insn.offset
yield Number(int(operand, 16), arch=get_arch(f.smda_report)), insn.offset
yield Number(int(operand, 16), bitness=get_bitness(f.smda_report)), insn.offset
except:
continue
@@ -228,7 +228,7 @@ def extract_insn_offset_features(f, bb, insn):
number = int(number_int.group("num"))
number = -1 * number if number_int.group().startswith("-") else number
yield Offset(number), insn.offset
yield Offset(number, arch=get_arch(f.smda_report)), insn.offset
yield Offset(number, bitness=get_bitness(f.smda_report)), insn.offset
def is_security_cookie(f, bb, insn):

View File

@@ -19,8 +19,8 @@ import capa.features.extractors.helpers
import capa.features.extractors.viv.helpers
from capa.features.insn import API, Number, Offset, Mnemonic
from capa.features.common import (
ARCH_X32,
ARCH_X64,
BITNESS_X32,
BITNESS_X64,
MAX_BYTES_FEATURE_SIZE,
THUNK_CHAIN_DEPTH_DELTA,
Bytes,
@@ -34,12 +34,12 @@ from capa.features.extractors.viv.indirect_calls import NotFoundError, resolve_i
SECURITY_COOKIE_BYTES_DELTA = 0x40
def get_arch(vw):
arch = vw.getMeta("Architecture")
if arch == "i386":
return ARCH_X32
elif arch == "amd64":
return ARCH_X64
def get_bitness(vw):
bitness = vw.getMeta("Architecture")
if bitness == "i386":
return BITNESS_X32
elif bitness == "amd64":
return BITNESS_X64
def interface_extract_instruction_XXX(f, bb, insn):
@@ -193,7 +193,7 @@ def extract_insn_number_features(f, bb, insn):
return
yield Number(v), insn.va
yield Number(v, arch=get_arch(f.vw)), insn.va
yield Number(v, bitness=get_bitness(f.vw)), insn.va
def derefs(vw, p):
@@ -389,7 +389,7 @@ def extract_insn_offset_features(f, bb, insn):
v = oper.disp
yield Offset(v), insn.va
yield Offset(v, arch=get_arch(f.vw)), insn.va
yield Offset(v, bitness=get_bitness(f.vw)), insn.va
# like: [esi + ecx + 16384]
# reg ^ ^
@@ -400,7 +400,7 @@ def extract_insn_offset_features(f, bb, insn):
v = oper.disp
yield Offset(v), insn.va
yield Offset(v, arch=get_arch(f.vw)), insn.va
yield Offset(v, bitness=get_bitness(f.vw)), insn.va
def is_security_cookie(f, bb, insn) -> bool:

View File

@@ -21,16 +21,16 @@ class API(Feature):
class Number(Feature):
def __init__(self, value: int, arch=None, description=None):
super(Number, self).__init__(value, arch=arch, description=description)
def __init__(self, value: int, bitness=None, description=None):
super(Number, self).__init__(value, bitness=bitness, description=description)
def get_value_str(self):
return capa.render.utils.hex(self.value)
class Offset(Feature):
def __init__(self, value: int, arch=None, description=None):
super(Offset, self).__init__(value, arch=arch, description=description)
def __init__(self, value: int, bitness=None, description=None):
super(Offset, self).__init__(value, bitness=bitness, description=description)
def get_value_str(self):
return capa.render.utils.hex(self.value)

View File

@@ -240,19 +240,19 @@ def parse_feature(key: str):
elif key == "number":
return capa.features.insn.Number
elif key.startswith("number/"):
arch = key.partition("/")[2]
bitness = key.partition("/")[2]
# the other handlers here return constructors for features,
# and we want to as well,
# however, we need to preconfigure one of the arguments (`arch`).
# so, instead we return a partially-applied function that
# provides `arch` to the feature constructor.
# it forwards any other arguments provided to the closure along to the constructor.
return functools.partial(capa.features.insn.Number, arch=arch)
return functools.partial(capa.features.insn.Number, arch=bitness)
elif key == "offset":
return capa.features.insn.Offset
elif key.startswith("offset/"):
arch = key.partition("/")[2]
return functools.partial(capa.features.insn.Offset, arch=arch)
bitness = key.partition("/")[2]
return functools.partial(capa.features.insn.Offset, arch=bitness)
elif key == "mnemonic":
return capa.features.insn.Mnemonic
elif key == "basic blocks":

View File

@@ -22,8 +22,8 @@ import capa.features.insn
import capa.features.common
import capa.features.basicblock
from capa.features.common import (
ARCH_X32,
ARCH_X64,
BITNESS_X32,
BITNESS_X64,
CHARACTERISTIC_PE,
CHARACTERISTIC_ELF,
CHARACTERISTIC_LINUX,
@@ -390,10 +390,10 @@ FEATURE_PRESENCE_TESTS = sorted(
# insn/number: stack adjustments
("mimikatz", "function=0x40105D", capa.features.insn.Number(0xC), False),
("mimikatz", "function=0x40105D", capa.features.insn.Number(0x10), False),
# insn/number: arch flavors
# insn/number: bitness flavors
("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF), True),
("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF, arch=ARCH_X32), True),
("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF, arch=ARCH_X64), False),
("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF, bitness=BITNESS_X32), True),
("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF, bitness=BITNESS_X64), False),
# insn/offset
("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0), True),
("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x4), True),
@@ -406,10 +406,10 @@ FEATURE_PRESENCE_TESTS = sorted(
# insn/offset: negative
("mimikatz", "function=0x4011FB", capa.features.insn.Offset(-0x1), True),
("mimikatz", "function=0x4011FB", capa.features.insn.Offset(-0x2), True),
# insn/offset: arch flavors
# insn/offset: bitness flavors
("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0), True),
("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0, arch=ARCH_X32), True),
("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0, arch=ARCH_X64), False),
("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0, bitness=BITNESS_X32), True),
("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0, bitness=BITNESS_X64), False),
# insn/api
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptAcquireContextW"), True),
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptAcquireContext"), True),

View File

@@ -474,11 +474,11 @@ def test_match_namespace():
def test_render_number():
assert str(capa.features.insn.Number(1)) == "number(0x1)"
assert str(capa.features.insn.Number(1, arch=capa.features.common.ARCH_X32)) == "number/x32(0x1)"
assert str(capa.features.insn.Number(1, arch=capa.features.common.ARCH_X64)) == "number/x64(0x1)"
assert str(capa.features.insn.Number(1, bitness=capa.features.common.BITNESS_X32)) == "number/x32(0x1)"
assert str(capa.features.insn.Number(1, bitness=capa.features.common.BITNESS_X64)) == "number/x64(0x1)"
def test_render_offset():
assert str(capa.features.insn.Offset(1)) == "offset(0x1)"
assert str(capa.features.insn.Offset(1, arch=capa.features.common.ARCH_X32)) == "offset/x32(0x1)"
assert str(capa.features.insn.Offset(1, arch=capa.features.common.ARCH_X64)) == "offset/x64(0x1)"
assert str(capa.features.insn.Offset(1, bitness=capa.features.common.BITNESS_X32)) == "offset/x32(0x1)"
assert str(capa.features.insn.Offset(1, bitness=capa.features.common.BITNESS_X64)) == "offset/x64(0x1)"

View File

@@ -16,8 +16,8 @@ import capa.features.common
from capa.features.file import FunctionName
from capa.features.insn import Number, Offset
from capa.features.common import (
ARCH_X32,
ARCH_X64,
BITNESS_X32,
BITNESS_X64,
FORMAT_PE,
OS_WINDOWS,
CHARACTERISTIC_PE,
@@ -526,7 +526,7 @@ def test_invalid_number():
)
def test_number_arch():
def test_number_bitness():
r = capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
@@ -538,13 +538,13 @@ def test_number_arch():
"""
)
)
assert r.evaluate({Number(2, arch=ARCH_X32): {1}}) == True
assert r.evaluate({Number(2, bitness=BITNESS_X32): {1}}) == True
assert r.evaluate({Number(2): {1}}) == False
assert r.evaluate({Number(2, arch=ARCH_X64): {1}}) == False
assert r.evaluate({Number(2, bitness=BITNESS_X64): {1}}) == False
def test_number_arch_symbol():
def test_number_bitness_symbol():
r = capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
@@ -556,7 +556,7 @@ def test_number_arch_symbol():
"""
)
)
assert r.evaluate({Number(2, arch=ARCH_X32, description="some constant"): {1}}) == True
assert r.evaluate({Number(2, bitness=BITNESS_X32, description="some constant"): {1}}) == True
def test_offset_symbol():
@@ -604,7 +604,7 @@ def test_count_offset_symbol():
assert r.evaluate({Offset(0x100, description="symbol name"): {1, 2, 3}}) == True
def test_offset_arch():
def test_offset_bitness():
r = capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
@@ -616,13 +616,13 @@ def test_offset_arch():
"""
)
)
assert r.evaluate({Offset(2, arch=ARCH_X32): {1}}) == True
assert r.evaluate({Offset(2, bitness=BITNESS_X32): {1}}) == True
assert r.evaluate({Offset(2): {1}}) == False
assert r.evaluate({Offset(2, arch=ARCH_X64): {1}}) == False
assert r.evaluate({Offset(2, bitness=BITNESS_X64): {1}}) == False
def test_offset_arch_symbol():
def test_offset_bitness_symbol():
r = capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
@@ -634,7 +634,7 @@ def test_offset_arch_symbol():
"""
)
)
assert r.evaluate({Offset(2, arch=ARCH_X32, description="some constant"): {1}}) == True
assert r.evaluate({Offset(2, bitness=BITNESS_X32, description="some constant"): {1}}) == True
def test_invalid_offset():