Adapt description implementation to new output

As the `__str__` method is not used anymore in the output, the
description implementation needs to be adapted.
This commit is contained in:
Ana María Martínez Gómez
2020-06-30 10:57:58 +02:00
parent 767a76d814
commit 681feebf35
6 changed files with 66 additions and 72 deletions

View File

@@ -84,6 +84,7 @@ Download capa from the [Releases](/releases) page or get the nightly builds here
- [section](#section)
- [counting](#counting)
- [matching prior rule matches](#matching-prior-rule-matches)
- [descriptions](#descriptions)
- [limitations](#Limitations)
# installation
@@ -317,25 +318,6 @@ These are the features supported at the function-scope:
- [mnemonic](#mnemonic)
- [characteristics](#characteristics)
All of them support an optional description which helps with documenting rules and provides context in capa's output.
It can be specified in the following way:
```
- string: This program cannot be run in DOS mode.
description: MS-DOS stub message
- number: 0x4550
description: IMAGE_DOS_SIGNATURE (MZ)
```
For all features except for [string](#string), the description can be specified inline preceded by ` = `.
For the previous [number](#number) example:
```
- number: 0x4550 = IMAGE_DOS_SIGNATURE (MZ)
```
The inline syntax is preferred (except for [string](#string)).
### api
A call to a named function, probably an import,
though possibly a local function (like `malloc`) extracted via FLIRT.
@@ -358,8 +340,9 @@ For example, a crypto constant.
The parameter is a number; if prefixed with `0x` then in hex format, otherwise, decimal format.
It can include an optional description, e.g. for constant definitions.
The inline syntax is preferred (` = DESCRIPTION STRING`).
To help humans understand the meaning of a number, such that the constant `0x40` means `PAGE_EXECUTE_READWRITE`, you may provide a description alongside the definition.
Use the inline syntax (preferred) by ending the line with ` = DESCRIPTION STRING`.
Check the [description section](#description) for more details.
Examples:
@@ -381,7 +364,9 @@ Regexes should be surrounded with `/` characters.
By default, capa uses case-sensitive matching and assumes leading and trailing wildcards.
To perform case-insensitive matching append an `i`. To anchor the regex at the start or end of a string, use `^` and/or `$`.
Strings can include a description, but the inline syntax is not supported.
To add context to a string use the two-line syntax, using the `description` tag: `description: DESCRIPTION STRING`.
The inline syntax is not supported.
Check the [description section](#description) for more details.
Examples:
@@ -401,9 +386,9 @@ Note that regex matching is expensive (`O(features)` rather than `O(1)`) so they
A sequence of bytes referenced by the logic of the program.
The provided sequence must match from the beginning of the referenced bytes and be no more than `0x100` bytes.
The parameter is a sequence of hexadecimal bytes.
It can include an optional description.
The inline syntax is preferred (` = DESCRIPTION STRING`).
To help humans understand the meaning of the bytes sequence, you may provide a description.
Use the inline syntax (preferred) by ending the line with ` = DESCRIPTION STRING`.
Check the [description section](#description) for more details.
The example below illustrates byte matching given a COM CLSID pushed onto the stack prior to `CoCreateInstance`.
@@ -482,7 +467,6 @@ These are the features supported at the file-scope:
- [import](#import)
- [section](#section)
All of them can be followed by an optional description, as the features in the previous section.
### file string
An ASCII or UTF-16 LE string present in the file.
@@ -563,6 +547,28 @@ By default, library rules will not be output to the user as a rule match,
but can be matched by other rules.
When no active rules depend on a library rule, these the library rules will not be evaluated - maintaining performance.
## description
All features support an optional description which helps with documenting rules and provides context in capa's output.
For all features except for [strings](#string), the description can be specified inline preceded by ` = `: ` = DESCRIPTION STRING`.
For example:
```
- number: 0x4550 = IMAGE_DOS_SIGNATURE (MZ)
```
The inline syntax is preferred.
For [strings](#string) or if the description is long or contains newlines, use the two-line syntax.
It uses the `description` tag in the following way: `description: DESCRIPTION STRING`
For example:
```
- string: This program cannot be run in DOS mode.
description: MS-DOS stub message
- number: 0x4550
description: IMAGE_DOS_SIGNATURE (MZ)
```
# limitations
To learn more about capa's current limitations see [here](doc/limitations.md).

View File

@@ -19,7 +19,7 @@ def bytes_to_str(b):
class Feature(object):
def __init__(self, args, description=None):
super(Feature, self).__init__()
self.name = self.__class__.__name__
self.name = self.__class__.__name__.lower()
self.args = args
self.description = description
@@ -29,17 +29,16 @@ class Feature(object):
def __eq__(self, other):
return self.name == other.name and self.args == other.args
def _str_name(self):
return self.name.lower()
def _str_value(self):
# Used to overwrite the rendering of the feature args in `__str__` and the
# json output
def get_args_str(self):
return ','.join(self.args)
def __str__(self):
if self.description:
return '%s(%s = %s)' % (self._str_name(), self._str_value(), self.description)
return '%s(%s = %s)' % (self.name, self.get_args_str(), self.description)
else:
return '%s(%s)' % (self._str_name(), self._str_value())
return '%s(%s)' % (self.name, self.get_args_str())
def __repr__(self):
return str(self)
@@ -62,21 +61,19 @@ class Feature(object):
class MatchedRule(Feature):
def __init__(self, rule_name, description=None):
super(MatchedRule, self).__init__([rule_name], description)
self.name = 'match'
self.rule_name = rule_name
def _str_name(self):
return 'match'
class Characteristic(Feature):
def __init__(self, name, value=None, description=None):
def __init__(self, attribute, value=None, description=None):
'''
when `value` is not provided, this serves as descriptor for a class of characteristics.
this is only used internally, such as in `rules.py` when checking if a statement is
supported by a given scope.
'''
super(Characteristic, self).__init__([name, value], description)
self.name = name
super(Characteristic, self).__init__([attribute, value], description)
self.attribute = attribute
self.value = value
def evaluate(self, ctx):
@@ -84,11 +81,11 @@ class Characteristic(Feature):
raise ValueError('cannot evaluate characteristc %s with empty value' % (str(self)))
return super(Characteristic, self).evaluate(ctx)
def _str_value(self):
def get_args_str(self):
if self.value is None:
return self.name
return self.attribute
else:
return '%s(%s)' % (self.name, self.value)
return '%s(%s)' % (self.attribute, self.value)
class String(Feature):
@@ -112,8 +109,8 @@ class Bytes(Feature):
return capa.engine.Result(False, self, [])
def _str_value(self):
return '0x%s' % bytes_to_str(self.value).upper()
def get_args_str(self):
return bytes_to_str(self.value).upper()
def freeze_serialize(self):
return (self.__class__.__name__,

View File

@@ -16,8 +16,8 @@ class Number(Feature):
super(Number, self).__init__([value], description)
self.value = value
def _str_value(self):
return '0x%x' % self.value
def get_args_str(self):
return '0x%X' % self.value
class Offset(Feature):
@@ -25,8 +25,8 @@ class Offset(Feature):
super(Offset, self).__init__([value])
self.value = value
def _str_value(self):
return '0x%x' % self.value
def get_args_str(self):
return '0x%X' % self.value
class Mnemonic(Feature):

View File

@@ -86,23 +86,11 @@ def convert_feature_to_result_document(feature):
"type": "characteristic"
},
"""
name, value = feature.freeze_serialize()
result = {'type': feature.name, feature.name: feature.get_args_str()}
if feature.description:
result['description'] = feature.description
# make the terms pretty
name = name.lower()
if name == 'matchedrule':
name = 'match'
# in the common case, there's a single argument
# so use it directly.
# like: name=number value=1
if isinstance(value, list) and len(value) == 1:
value = value[0]
return {
'type': name,
name: value,
}
return result
def convert_node_to_result_document(node):

View File

@@ -48,7 +48,7 @@ def render_statement(ostream, match, statement, indent=0):
elif child['type'] == 'bytes':
feature = '%s(%s)' % (child['type'], rutils.bold2(rutils.hex_string(child[child['type']])))
elif child['type'] == 'characteristic':
feature = 'characteristic(%s)' % (rutils.bold2(child['characteristic'][0]))
feature = 'characteristic(%s)' % (rutils.bold2(child['characteristic']))
else:
raise RuntimeError('unexpected feature type: ' + str(child))
@@ -94,13 +94,16 @@ def render_feature(ostream, match, feature, indent=0):
# it should always be an even number of characters (its hex).
ostream.write(rutils.bold2(rutils.hex_string(feature[feature['type']])))
elif feature['type'] == 'characteristic':
ostream.write('characteristic(%s)' % (rutils.bold2(feature['characteristic'][0])))
ostream.write('characteristic(%s)' % (rutils.bold2(feature['characteristic'])))
# note that regex is found in `render_statement`
else:
raise RuntimeError('unexpected feature type: ' + str(feature))
render_locations(ostream, match)
if 'description' in feature:
ostream.write(' = ')
ostream.write(feature['description'])
render_locations(ostream, match)
ostream.write('\n')

View File

@@ -138,7 +138,7 @@ class InvalidRuleSet(ValueError):
def ensure_feature_valid_for_scope(scope, feature):
if isinstance(feature, capa.features.Characteristic):
if capa.features.Characteristic(feature.name) not in SUPPORTED_FEATURES[scope]:
if capa.features.Characteristic(feature.attribute) not in SUPPORTED_FEATURES[scope]:
raise InvalidRule('feature %s not support for scope %s' % (feature, scope))
elif not isinstance(feature, tuple(filter(lambda t: isinstance(t, type), SUPPORTED_FEATURES[scope]))):
raise InvalidRule('feature %s not support for scope %s' % (feature, scope))
@@ -226,7 +226,7 @@ def parse_description(s, value_type, description=None):
'''
if value_type != 'string' and isinstance(s, str) and ' = ' in s:
if description:
raise InvalidRule('unexpected value: "%s", only one description allowed (inline description with `=`)' % s)
raise InvalidRule('unexpected value: "%s", only one description allowed (inline description with ` = `)' % s)
value, description = s.split(' = ', 1)
if description == '':
raise InvalidRule('unexpected value: "%s", description cannot be empty' % s)
@@ -244,7 +244,7 @@ def parse_description(s, value_type, description=None):
if len(value) > MAX_BYTES_FEATURE_SIZE:
raise InvalidRule('unexpected bytes value: byte sequences must be no larger than %s bytes' %
MAX_BYTES_FEATURE_SIZE)
elif value_type in ['number', 'offset']:
elif value_type in {'number', 'offset'}:
try:
value = parse_int(value)
except ValueError:
@@ -370,8 +370,8 @@ def build_statements(d, scope):
raise InvalidRule('invalid regular expression: %s it should use Python syntax, try it at https://pythex.org' % d[key])
else:
Feature = parse_feature(key)
value, symbol = parse_description(d[key], key, d.get('description'))
feature = Feature(value, symbol)
value, description = parse_description(d[key], key, d.get('description'))
feature = Feature(value, description)
ensure_feature_valid_for_scope(scope, feature)
return feature