Allow to add a description for every feature

Enable associate context for all features. This was called symbol before
and only enabled for `number`, `offset` and `bytes`.

This is not enabled for strings with regular expressions, as they are
not a feature.
This commit is contained in:
Ana María Martínez Gómez
2020-06-17 08:43:04 +02:00
parent 12671ea44b
commit 767a76d814
5 changed files with 100 additions and 91 deletions

View File

@@ -317,6 +317,25 @@ These are the features supported at the function-scope:
- [mnemonic](#mnemonic)
- [characteristics](#characteristics)
All of them support an optional description which helps with documenting rules and provides context in capa's output.
It can be specified in the following way:
```
- string: This program cannot be run in DOS mode.
description: MS-DOS stub message
- number: 0x4550
description: IMAGE_DOS_SIGNATURE (MZ)
```
For all features except for [string](#string), the description can be specified inline preceded by ` = `.
For the previous [number](#number) example:
```
- number: 0x4550 = IMAGE_DOS_SIGNATURE (MZ)
```
The inline syntax is preferred (except for [string](#string)).
### api
A call to a named function, probably an import,
though possibly a local function (like `malloc`) extracted via FLIRT.
@@ -339,8 +358,8 @@ For example, a crypto constant.
The parameter is a number; if prefixed with `0x` then in hex format, otherwise, decimal format.
To associate context with a number, e.g. for constant definitions, append an equal sign and the respective name to
the number definition. This helps with documenting rules and provides context in capa's output.
It can include an optional description, e.g. for constant definitions.
The inline syntax is preferred (` = DESCRIPTION STRING`).
Examples:
@@ -362,20 +381,29 @@ Regexes should be surrounded with `/` characters.
By default, capa uses case-sensitive matching and assumes leading and trailing wildcards.
To perform case-insensitive matching append an `i`. To anchor the regex at the start or end of a string, use `^` and/or `$`.
Strings can include a description, but the inline syntax is not supported.
Examples:
string: This program cannot be run in DOS mode.
string: Firefox 64.0
string: /SELECT.*FROM.*WHERE/
string: /Hardware\\Description\\System\\CentralProcessor/i
```
- string: This program cannot be run in DOS mode.
description: MS-DOS stub message
- string: '{3E5FC7F9-9A51-4367-9063-A120244FBEC7}'
description: CLSID_CMSTPLUA
- string: Firefox 64.0
- string:'/SELECT.*FROM.*WHERE/
- string: /Hardware\\Description\\System\\CentralProcessor/i
```
Note that regex matching is expensive (`O(features)` rather than `O(1)`) so they should be used sparingly.
### bytes
A sequence of bytes referenced by the logic of the program.
The provided sequence must match from the beginning of the referenced bytes and be no more than `0x100` bytes.
The parameter is a sequence of hexadecimal bytes followed by an optional description.
The parameter is a sequence of hexadecimal bytes.
It can include an optional description.
The inline syntax is preferred (` = DESCRIPTION STRING`).
The example below illustrates byte matching given a COM CLSID pushed onto the stack prior to `CoCreateInstance`.
@@ -397,6 +425,7 @@ A structure offset referenced by the logic of the program.
This should not be a stack offset.
The parameter is a number; if prefixed with `0x` then in hex format, otherwise, decimal format.
It can be followed by an optional description.
Examples:
@@ -453,6 +482,8 @@ These are the features supported at the file-scope:
- [import](#import)
- [section](#section)
All of them can be followed by an optional description, as the features in the previous section.
### file string
An ASCII or UTF-16 LE string present in the file.

View File

@@ -17,10 +17,11 @@ def bytes_to_str(b):
class Feature(object):
def __init__(self, args):
def __init__(self, args, description=None):
super(Feature, self).__init__()
self.name = self.__class__.__name__
self.args = args
self.description = description
def __hash__(self):
return hash((self.name, tuple(self.args)))
@@ -28,8 +29,17 @@ class Feature(object):
def __eq__(self, other):
return self.name == other.name and self.args == other.args
def _str_name(self):
return self.name.lower()
def _str_value(self):
return ','.join(self.args)
def __str__(self):
return '%s(%s)' % (self.name.lower(), ','.join(self.args))
if self.description:
return '%s(%s = %s)' % (self._str_name(), self._str_value(), self.description)
else:
return '%s(%s)' % (self._str_name(), self._str_value())
def __repr__(self):
return str(self)
@@ -50,22 +60,22 @@ class Feature(object):
class MatchedRule(Feature):
def __init__(self, rule_name):
super(MatchedRule, self).__init__([rule_name])
def __init__(self, rule_name, description=None):
super(MatchedRule, self).__init__([rule_name], description)
self.rule_name = rule_name
def __str__(self):
return 'match(%s)' % (self.rule_name)
def _str_name(self):
return 'match'
class Characteristic(Feature):
def __init__(self, name, value=None):
def __init__(self, name, value=None, description=None):
'''
when `value` is not provided, this serves as descriptor for a class of characteristics.
this is only used internally, such as in `rules.py` when checking if a statement is
supported by a given scope.
'''
super(Characteristic, self).__init__([name, value])
super(Characteristic, self).__init__([name, value], description)
self.name = name
self.value = value
@@ -74,27 +84,23 @@ class Characteristic(Feature):
raise ValueError('cannot evaluate characteristc %s with empty value' % (str(self)))
return super(Characteristic, self).evaluate(ctx)
def __str__(self):
def _str_value(self):
if self.value is None:
return 'characteristic(%s)' % (self.name)
return self.name
else:
return 'characteristic(%s(%s))' % (self.name, self.value)
return '%s(%s)' % (self.name, self.value)
class String(Feature):
def __init__(self, value):
super(String, self).__init__([value])
def __init__(self, value, description=None):
super(String, self).__init__([value], description)
self.value = value
def __str__(self):
return 'string("%s")' % (self.value)
class Bytes(Feature):
def __init__(self, value, symbol=None):
super(Bytes, self).__init__([value])
def __init__(self, value, description=None):
super(Bytes, self).__init__([value], description)
self.value = value
self.symbol = symbol
def evaluate(self, ctx):
for feature, locations in ctx.items():
@@ -106,11 +112,8 @@ class Bytes(Feature):
return capa.engine.Result(False, self, [])
def __str__(self):
if self.symbol:
return 'bytes(0x%s = %s)' % (bytes_to_str(self.value).upper(), self.symbol)
else:
return 'bytes(0x%s)' % (bytes_to_str(self.value).upper())
def _str_value(self):
return '0x%s' % bytes_to_str(self.value).upper()
def freeze_serialize(self):
return (self.__class__.__name__,

View File

@@ -2,30 +2,21 @@ from capa.features import Feature
class Export(Feature):
def __init__(self, value):
def __init__(self, value, description=None):
# value is export name
super(Export, self).__init__([value])
super(Export, self).__init__([value], description)
self.value = value
def __str__(self):
return 'Export(%s)' % (self.value)
class Import(Feature):
def __init__(self, value):
def __init__(self, value, description=None):
# value is import name
super(Import, self).__init__([value])
super(Import, self).__init__([value], description)
self.value = value
def __str__(self):
return 'Import(%s)' % (self.value)
class Section(Feature):
def __init__(self, value):
def __init__(self, value, description=None):
# value is section name
super(Section, self).__init__([value])
super(Section, self).__init__([value], description)
self.value = value
def __str__(self):
return 'Section(%s)' % (self.value)

View File

@@ -2,45 +2,34 @@ from capa.features import Feature
class API(Feature):
def __init__(self, name):
def __init__(self, name, description=None):
# Downcase library name if given
if '.' in name:
modname, impname = name.split('.')
name = modname.lower() + '.' + impname
super(API, self).__init__([name])
super(API, self).__init__([name], description)
class Number(Feature):
def __init__(self, value, symbol=None):
super(Number, self).__init__([value])
def __init__(self, value, description=None):
super(Number, self).__init__([value], description)
self.value = value
self.symbol = symbol
def __str__(self):
if self.symbol:
return 'number(0x%x = %s)' % (self.value, self.symbol)
else:
return 'number(0x%x)' % (self.value)
def _str_value(self):
return '0x%x' % self.value
class Offset(Feature):
def __init__(self, value, symbol=None):
def __init__(self, value, description=None):
super(Offset, self).__init__([value])
self.value = value
self.symbol = symbol
def __str__(self):
if self.symbol:
return 'offset(0x%x = %s)' % (self.value, self.symbol)
else:
return 'offset(0x%x)' % (self.value)
def _str_value(self):
return '0x%x' % self.value
class Mnemonic(Feature):
def __init__(self, value):
super(Mnemonic, self).__init__([value])
def __init__(self, value, description=None):
super(Mnemonic, self).__init__([value], description)
self.value = value
def __str__(self):
return 'mnemonic(%s)' % (self.value)

View File

@@ -207,7 +207,7 @@ def parse_feature(key):
return capa.features.basicblock.BasicBlock
elif key.startswith('characteristic(') and key.endswith(')'):
characteristic = key[len('characteristic('):-len(')')]
return lambda v: capa.features.Characteristic(characteristic, v)
return lambda v, description=None: capa.features.Characteristic(characteristic, v, description)
elif key == 'export':
return capa.features.file.Export
elif key == 'import':
@@ -220,18 +220,18 @@ def parse_feature(key):
raise InvalidRule('unexpected statement: %s' % key)
def parse_symbol(s, value_type):
def parse_description(s, value_type, description=None):
'''
s can be an int or a string
'''
if isinstance(s, str) and '=' in s:
value, symbol = s.split('=', 1)
symbol = symbol.strip()
if symbol == '':
raise InvalidRule('unexpected value: "%s", symbol name cannot be empty' % s)
if value_type != 'string' and isinstance(s, str) and ' = ' in s:
if description:
raise InvalidRule('unexpected value: "%s", only one description allowed (inline description with `=`)' % s)
value, description = s.split(' = ', 1)
if description == '':
raise InvalidRule('unexpected value: "%s", description cannot be empty' % s)
else:
value = s
symbol = None
if isinstance(value, str):
if value_type == 'bytes':
@@ -244,17 +244,17 @@ def parse_symbol(s, value_type):
if len(value) > MAX_BYTES_FEATURE_SIZE:
raise InvalidRule('unexpected bytes value: byte sequences must be no larger than %s bytes' %
MAX_BYTES_FEATURE_SIZE)
else:
elif value_type in ['number', 'offset']:
try:
value = parse_int(value)
except ValueError:
raise InvalidRule('unexpected value: "%s", must begin with numerical value' % value)
return value, symbol
return value, description
def build_statements(d, scope):
if len(d.keys()) != 1:
if len(d.keys()) > 2:
raise InvalidRule('too many statements')
key = list(d.keys())[0]
@@ -330,10 +330,10 @@ def build_statements(d, scope):
#
# count(offset(0xC))
# count(number(0x11223344))
# count(number(0x100 = symbol name))
# count(number(0x100 = description))
if term in ('number', 'offset', 'bytes'):
value, symbol = parse_symbol(arg, term)
feature = Feature(value, symbol)
value, description = parse_description(arg, term)
feature = Feature(value, description)
else:
# arg is string, like:
#
@@ -370,13 +370,8 @@ def build_statements(d, scope):
raise InvalidRule('invalid regular expression: %s it should use Python syntax, try it at https://pythex.org' % d[key])
else:
Feature = parse_feature(key)
if key in ('number', 'offset', 'bytes'):
# parse numbers with symbol description, e.g. 0x4550 = IMAGE_DOS_SIGNATURE
# or regular numbers, e.g. 37
value, symbol = parse_symbol(d[key], key)
feature = Feature(value, symbol)
else:
feature = Feature(d[key])
value, symbol = parse_description(d[key], key, d.get('description'))
feature = Feature(value, symbol)
ensure_feature_valid_for_scope(scope, feature)
return feature