mirror of
https://github.com/mandiant/capa.git
synced 2026-01-19 08:01:05 -08:00
Allow to add a description for every feature
Enable associate context for all features. This was called symbol before and only enabled for `number`, `offset` and `bytes`. This is not enabled for strings with regular expressions, as they are not a feature.
This commit is contained in:
49
README.md
49
README.md
@@ -317,6 +317,25 @@ These are the features supported at the function-scope:
|
||||
- [mnemonic](#mnemonic)
|
||||
- [characteristics](#characteristics)
|
||||
|
||||
All of them support an optional description which helps with documenting rules and provides context in capa's output.
|
||||
It can be specified in the following way:
|
||||
|
||||
```
|
||||
- string: This program cannot be run in DOS mode.
|
||||
description: MS-DOS stub message
|
||||
- number: 0x4550
|
||||
description: IMAGE_DOS_SIGNATURE (MZ)
|
||||
```
|
||||
|
||||
For all features except for [string](#string), the description can be specified inline preceded by ` = `.
|
||||
For the previous [number](#number) example:
|
||||
|
||||
```
|
||||
- number: 0x4550 = IMAGE_DOS_SIGNATURE (MZ)
|
||||
```
|
||||
|
||||
The inline syntax is preferred (except for [string](#string)).
|
||||
|
||||
### api
|
||||
A call to a named function, probably an import,
|
||||
though possibly a local function (like `malloc`) extracted via FLIRT.
|
||||
@@ -339,8 +358,8 @@ For example, a crypto constant.
|
||||
|
||||
The parameter is a number; if prefixed with `0x` then in hex format, otherwise, decimal format.
|
||||
|
||||
To associate context with a number, e.g. for constant definitions, append an equal sign and the respective name to
|
||||
the number definition. This helps with documenting rules and provides context in capa's output.
|
||||
It can include an optional description, e.g. for constant definitions.
|
||||
The inline syntax is preferred (` = DESCRIPTION STRING`).
|
||||
|
||||
Examples:
|
||||
|
||||
@@ -362,20 +381,29 @@ Regexes should be surrounded with `/` characters.
|
||||
By default, capa uses case-sensitive matching and assumes leading and trailing wildcards.
|
||||
To perform case-insensitive matching append an `i`. To anchor the regex at the start or end of a string, use `^` and/or `$`.
|
||||
|
||||
Strings can include a description, but the inline syntax is not supported.
|
||||
|
||||
Examples:
|
||||
|
||||
string: This program cannot be run in DOS mode.
|
||||
string: Firefox 64.0
|
||||
string: /SELECT.*FROM.*WHERE/
|
||||
string: /Hardware\\Description\\System\\CentralProcessor/i
|
||||
|
||||
```
|
||||
- string: This program cannot be run in DOS mode.
|
||||
description: MS-DOS stub message
|
||||
- string: '{3E5FC7F9-9A51-4367-9063-A120244FBEC7}'
|
||||
description: CLSID_CMSTPLUA
|
||||
- string: Firefox 64.0
|
||||
- string:'/SELECT.*FROM.*WHERE/
|
||||
- string: /Hardware\\Description\\System\\CentralProcessor/i
|
||||
```
|
||||
|
||||
Note that regex matching is expensive (`O(features)` rather than `O(1)`) so they should be used sparingly.
|
||||
|
||||
### bytes
|
||||
A sequence of bytes referenced by the logic of the program.
|
||||
The provided sequence must match from the beginning of the referenced bytes and be no more than `0x100` bytes.
|
||||
The parameter is a sequence of hexadecimal bytes followed by an optional description.
|
||||
|
||||
The parameter is a sequence of hexadecimal bytes.
|
||||
It can include an optional description.
|
||||
The inline syntax is preferred (` = DESCRIPTION STRING`).
|
||||
|
||||
|
||||
The example below illustrates byte matching given a COM CLSID pushed onto the stack prior to `CoCreateInstance`.
|
||||
|
||||
@@ -397,6 +425,7 @@ A structure offset referenced by the logic of the program.
|
||||
This should not be a stack offset.
|
||||
|
||||
The parameter is a number; if prefixed with `0x` then in hex format, otherwise, decimal format.
|
||||
It can be followed by an optional description.
|
||||
|
||||
Examples:
|
||||
|
||||
@@ -453,6 +482,8 @@ These are the features supported at the file-scope:
|
||||
- [import](#import)
|
||||
- [section](#section)
|
||||
|
||||
All of them can be followed by an optional description, as the features in the previous section.
|
||||
|
||||
### file string
|
||||
An ASCII or UTF-16 LE string present in the file.
|
||||
|
||||
|
||||
@@ -17,10 +17,11 @@ def bytes_to_str(b):
|
||||
|
||||
|
||||
class Feature(object):
|
||||
def __init__(self, args):
|
||||
def __init__(self, args, description=None):
|
||||
super(Feature, self).__init__()
|
||||
self.name = self.__class__.__name__
|
||||
self.args = args
|
||||
self.description = description
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.name, tuple(self.args)))
|
||||
@@ -28,8 +29,17 @@ class Feature(object):
|
||||
def __eq__(self, other):
|
||||
return self.name == other.name and self.args == other.args
|
||||
|
||||
def _str_name(self):
|
||||
return self.name.lower()
|
||||
|
||||
def _str_value(self):
|
||||
return ','.join(self.args)
|
||||
|
||||
def __str__(self):
|
||||
return '%s(%s)' % (self.name.lower(), ','.join(self.args))
|
||||
if self.description:
|
||||
return '%s(%s = %s)' % (self._str_name(), self._str_value(), self.description)
|
||||
else:
|
||||
return '%s(%s)' % (self._str_name(), self._str_value())
|
||||
|
||||
def __repr__(self):
|
||||
return str(self)
|
||||
@@ -50,22 +60,22 @@ class Feature(object):
|
||||
|
||||
|
||||
class MatchedRule(Feature):
|
||||
def __init__(self, rule_name):
|
||||
super(MatchedRule, self).__init__([rule_name])
|
||||
def __init__(self, rule_name, description=None):
|
||||
super(MatchedRule, self).__init__([rule_name], description)
|
||||
self.rule_name = rule_name
|
||||
|
||||
def __str__(self):
|
||||
return 'match(%s)' % (self.rule_name)
|
||||
def _str_name(self):
|
||||
return 'match'
|
||||
|
||||
|
||||
class Characteristic(Feature):
|
||||
def __init__(self, name, value=None):
|
||||
def __init__(self, name, value=None, description=None):
|
||||
'''
|
||||
when `value` is not provided, this serves as descriptor for a class of characteristics.
|
||||
this is only used internally, such as in `rules.py` when checking if a statement is
|
||||
supported by a given scope.
|
||||
'''
|
||||
super(Characteristic, self).__init__([name, value])
|
||||
super(Characteristic, self).__init__([name, value], description)
|
||||
self.name = name
|
||||
self.value = value
|
||||
|
||||
@@ -74,27 +84,23 @@ class Characteristic(Feature):
|
||||
raise ValueError('cannot evaluate characteristc %s with empty value' % (str(self)))
|
||||
return super(Characteristic, self).evaluate(ctx)
|
||||
|
||||
def __str__(self):
|
||||
def _str_value(self):
|
||||
if self.value is None:
|
||||
return 'characteristic(%s)' % (self.name)
|
||||
return self.name
|
||||
else:
|
||||
return 'characteristic(%s(%s))' % (self.name, self.value)
|
||||
return '%s(%s)' % (self.name, self.value)
|
||||
|
||||
|
||||
class String(Feature):
|
||||
def __init__(self, value):
|
||||
super(String, self).__init__([value])
|
||||
def __init__(self, value, description=None):
|
||||
super(String, self).__init__([value], description)
|
||||
self.value = value
|
||||
|
||||
def __str__(self):
|
||||
return 'string("%s")' % (self.value)
|
||||
|
||||
|
||||
class Bytes(Feature):
|
||||
def __init__(self, value, symbol=None):
|
||||
super(Bytes, self).__init__([value])
|
||||
def __init__(self, value, description=None):
|
||||
super(Bytes, self).__init__([value], description)
|
||||
self.value = value
|
||||
self.symbol = symbol
|
||||
|
||||
def evaluate(self, ctx):
|
||||
for feature, locations in ctx.items():
|
||||
@@ -106,11 +112,8 @@ class Bytes(Feature):
|
||||
|
||||
return capa.engine.Result(False, self, [])
|
||||
|
||||
def __str__(self):
|
||||
if self.symbol:
|
||||
return 'bytes(0x%s = %s)' % (bytes_to_str(self.value).upper(), self.symbol)
|
||||
else:
|
||||
return 'bytes(0x%s)' % (bytes_to_str(self.value).upper())
|
||||
def _str_value(self):
|
||||
return '0x%s' % bytes_to_str(self.value).upper()
|
||||
|
||||
def freeze_serialize(self):
|
||||
return (self.__class__.__name__,
|
||||
|
||||
@@ -2,30 +2,21 @@ from capa.features import Feature
|
||||
|
||||
|
||||
class Export(Feature):
|
||||
def __init__(self, value):
|
||||
def __init__(self, value, description=None):
|
||||
# value is export name
|
||||
super(Export, self).__init__([value])
|
||||
super(Export, self).__init__([value], description)
|
||||
self.value = value
|
||||
|
||||
def __str__(self):
|
||||
return 'Export(%s)' % (self.value)
|
||||
|
||||
|
||||
class Import(Feature):
|
||||
def __init__(self, value):
|
||||
def __init__(self, value, description=None):
|
||||
# value is import name
|
||||
super(Import, self).__init__([value])
|
||||
super(Import, self).__init__([value], description)
|
||||
self.value = value
|
||||
|
||||
def __str__(self):
|
||||
return 'Import(%s)' % (self.value)
|
||||
|
||||
|
||||
class Section(Feature):
|
||||
def __init__(self, value):
|
||||
def __init__(self, value, description=None):
|
||||
# value is section name
|
||||
super(Section, self).__init__([value])
|
||||
super(Section, self).__init__([value], description)
|
||||
self.value = value
|
||||
|
||||
def __str__(self):
|
||||
return 'Section(%s)' % (self.value)
|
||||
|
||||
@@ -2,45 +2,34 @@ from capa.features import Feature
|
||||
|
||||
|
||||
class API(Feature):
|
||||
def __init__(self, name):
|
||||
def __init__(self, name, description=None):
|
||||
# Downcase library name if given
|
||||
if '.' in name:
|
||||
modname, impname = name.split('.')
|
||||
name = modname.lower() + '.' + impname
|
||||
|
||||
super(API, self).__init__([name])
|
||||
super(API, self).__init__([name], description)
|
||||
|
||||
|
||||
class Number(Feature):
|
||||
def __init__(self, value, symbol=None):
|
||||
super(Number, self).__init__([value])
|
||||
def __init__(self, value, description=None):
|
||||
super(Number, self).__init__([value], description)
|
||||
self.value = value
|
||||
self.symbol = symbol
|
||||
|
||||
def __str__(self):
|
||||
if self.symbol:
|
||||
return 'number(0x%x = %s)' % (self.value, self.symbol)
|
||||
else:
|
||||
return 'number(0x%x)' % (self.value)
|
||||
def _str_value(self):
|
||||
return '0x%x' % self.value
|
||||
|
||||
|
||||
class Offset(Feature):
|
||||
def __init__(self, value, symbol=None):
|
||||
def __init__(self, value, description=None):
|
||||
super(Offset, self).__init__([value])
|
||||
self.value = value
|
||||
self.symbol = symbol
|
||||
|
||||
def __str__(self):
|
||||
if self.symbol:
|
||||
return 'offset(0x%x = %s)' % (self.value, self.symbol)
|
||||
else:
|
||||
return 'offset(0x%x)' % (self.value)
|
||||
def _str_value(self):
|
||||
return '0x%x' % self.value
|
||||
|
||||
|
||||
class Mnemonic(Feature):
|
||||
def __init__(self, value):
|
||||
super(Mnemonic, self).__init__([value])
|
||||
def __init__(self, value, description=None):
|
||||
super(Mnemonic, self).__init__([value], description)
|
||||
self.value = value
|
||||
|
||||
def __str__(self):
|
||||
return 'mnemonic(%s)' % (self.value)
|
||||
|
||||
@@ -207,7 +207,7 @@ def parse_feature(key):
|
||||
return capa.features.basicblock.BasicBlock
|
||||
elif key.startswith('characteristic(') and key.endswith(')'):
|
||||
characteristic = key[len('characteristic('):-len(')')]
|
||||
return lambda v: capa.features.Characteristic(characteristic, v)
|
||||
return lambda v, description=None: capa.features.Characteristic(characteristic, v, description)
|
||||
elif key == 'export':
|
||||
return capa.features.file.Export
|
||||
elif key == 'import':
|
||||
@@ -220,18 +220,18 @@ def parse_feature(key):
|
||||
raise InvalidRule('unexpected statement: %s' % key)
|
||||
|
||||
|
||||
def parse_symbol(s, value_type):
|
||||
def parse_description(s, value_type, description=None):
|
||||
'''
|
||||
s can be an int or a string
|
||||
'''
|
||||
if isinstance(s, str) and '=' in s:
|
||||
value, symbol = s.split('=', 1)
|
||||
symbol = symbol.strip()
|
||||
if symbol == '':
|
||||
raise InvalidRule('unexpected value: "%s", symbol name cannot be empty' % s)
|
||||
if value_type != 'string' and isinstance(s, str) and ' = ' in s:
|
||||
if description:
|
||||
raise InvalidRule('unexpected value: "%s", only one description allowed (inline description with `=`)' % s)
|
||||
value, description = s.split(' = ', 1)
|
||||
if description == '':
|
||||
raise InvalidRule('unexpected value: "%s", description cannot be empty' % s)
|
||||
else:
|
||||
value = s
|
||||
symbol = None
|
||||
|
||||
if isinstance(value, str):
|
||||
if value_type == 'bytes':
|
||||
@@ -244,17 +244,17 @@ def parse_symbol(s, value_type):
|
||||
if len(value) > MAX_BYTES_FEATURE_SIZE:
|
||||
raise InvalidRule('unexpected bytes value: byte sequences must be no larger than %s bytes' %
|
||||
MAX_BYTES_FEATURE_SIZE)
|
||||
else:
|
||||
elif value_type in ['number', 'offset']:
|
||||
try:
|
||||
value = parse_int(value)
|
||||
except ValueError:
|
||||
raise InvalidRule('unexpected value: "%s", must begin with numerical value' % value)
|
||||
|
||||
return value, symbol
|
||||
return value, description
|
||||
|
||||
|
||||
def build_statements(d, scope):
|
||||
if len(d.keys()) != 1:
|
||||
if len(d.keys()) > 2:
|
||||
raise InvalidRule('too many statements')
|
||||
|
||||
key = list(d.keys())[0]
|
||||
@@ -330,10 +330,10 @@ def build_statements(d, scope):
|
||||
#
|
||||
# count(offset(0xC))
|
||||
# count(number(0x11223344))
|
||||
# count(number(0x100 = symbol name))
|
||||
# count(number(0x100 = description))
|
||||
if term in ('number', 'offset', 'bytes'):
|
||||
value, symbol = parse_symbol(arg, term)
|
||||
feature = Feature(value, symbol)
|
||||
value, description = parse_description(arg, term)
|
||||
feature = Feature(value, description)
|
||||
else:
|
||||
# arg is string, like:
|
||||
#
|
||||
@@ -370,13 +370,8 @@ def build_statements(d, scope):
|
||||
raise InvalidRule('invalid regular expression: %s it should use Python syntax, try it at https://pythex.org' % d[key])
|
||||
else:
|
||||
Feature = parse_feature(key)
|
||||
if key in ('number', 'offset', 'bytes'):
|
||||
# parse numbers with symbol description, e.g. 0x4550 = IMAGE_DOS_SIGNATURE
|
||||
# or regular numbers, e.g. 37
|
||||
value, symbol = parse_symbol(d[key], key)
|
||||
feature = Feature(value, symbol)
|
||||
else:
|
||||
feature = Feature(d[key])
|
||||
value, symbol = parse_description(d[key], key, d.get('description'))
|
||||
feature = Feature(value, symbol)
|
||||
ensure_feature_valid_for_scope(scope, feature)
|
||||
return feature
|
||||
|
||||
|
||||
Reference in New Issue
Block a user