mirror of
https://github.com/mandiant/capa.git
synced 2026-01-17 15:22:53 -08:00
Adapt description implementation to new output
As the `__str__` method is not used anymore in the output, the description implementation needs to be adapted.
This commit is contained in:
58
README.md
58
README.md
@@ -84,6 +84,7 @@ Download capa from the [Releases](/releases) page or get the nightly builds here
|
||||
- [section](#section)
|
||||
- [counting](#counting)
|
||||
- [matching prior rule matches](#matching-prior-rule-matches)
|
||||
- [descriptions](#descriptions)
|
||||
- [limitations](#Limitations)
|
||||
|
||||
# installation
|
||||
@@ -317,25 +318,6 @@ These are the features supported at the function-scope:
|
||||
- [mnemonic](#mnemonic)
|
||||
- [characteristics](#characteristics)
|
||||
|
||||
All of them support an optional description which helps with documenting rules and provides context in capa's output.
|
||||
It can be specified in the following way:
|
||||
|
||||
```
|
||||
- string: This program cannot be run in DOS mode.
|
||||
description: MS-DOS stub message
|
||||
- number: 0x4550
|
||||
description: IMAGE_DOS_SIGNATURE (MZ)
|
||||
```
|
||||
|
||||
For all features except for [string](#string), the description can be specified inline preceded by ` = `.
|
||||
For the previous [number](#number) example:
|
||||
|
||||
```
|
||||
- number: 0x4550 = IMAGE_DOS_SIGNATURE (MZ)
|
||||
```
|
||||
|
||||
The inline syntax is preferred (except for [string](#string)).
|
||||
|
||||
### api
|
||||
A call to a named function, probably an import,
|
||||
though possibly a local function (like `malloc`) extracted via FLIRT.
|
||||
@@ -358,8 +340,9 @@ For example, a crypto constant.
|
||||
|
||||
The parameter is a number; if prefixed with `0x` then in hex format, otherwise, decimal format.
|
||||
|
||||
It can include an optional description, e.g. for constant definitions.
|
||||
The inline syntax is preferred (` = DESCRIPTION STRING`).
|
||||
To help humans understand the meaning of a number, such that the constant `0x40` means `PAGE_EXECUTE_READWRITE`, you may provide a description alongside the definition.
|
||||
Use the inline syntax (preferred) by ending the line with ` = DESCRIPTION STRING`.
|
||||
Check the [description section](#description) for more details.
|
||||
|
||||
Examples:
|
||||
|
||||
@@ -381,7 +364,9 @@ Regexes should be surrounded with `/` characters.
|
||||
By default, capa uses case-sensitive matching and assumes leading and trailing wildcards.
|
||||
To perform case-insensitive matching append an `i`. To anchor the regex at the start or end of a string, use `^` and/or `$`.
|
||||
|
||||
Strings can include a description, but the inline syntax is not supported.
|
||||
To add context to a string use the two-line syntax, using the `description` tag: `description: DESCRIPTION STRING`.
|
||||
The inline syntax is not supported.
|
||||
Check the [description section](#description) for more details.
|
||||
|
||||
Examples:
|
||||
|
||||
@@ -401,9 +386,9 @@ Note that regex matching is expensive (`O(features)` rather than `O(1)`) so they
|
||||
A sequence of bytes referenced by the logic of the program.
|
||||
The provided sequence must match from the beginning of the referenced bytes and be no more than `0x100` bytes.
|
||||
The parameter is a sequence of hexadecimal bytes.
|
||||
It can include an optional description.
|
||||
The inline syntax is preferred (` = DESCRIPTION STRING`).
|
||||
|
||||
To help humans understand the meaning of the bytes sequence, you may provide a description.
|
||||
Use the inline syntax (preferred) by ending the line with ` = DESCRIPTION STRING`.
|
||||
Check the [description section](#description) for more details.
|
||||
|
||||
The example below illustrates byte matching given a COM CLSID pushed onto the stack prior to `CoCreateInstance`.
|
||||
|
||||
@@ -482,7 +467,6 @@ These are the features supported at the file-scope:
|
||||
- [import](#import)
|
||||
- [section](#section)
|
||||
|
||||
All of them can be followed by an optional description, as the features in the previous section.
|
||||
|
||||
### file string
|
||||
An ASCII or UTF-16 LE string present in the file.
|
||||
@@ -563,6 +547,28 @@ By default, library rules will not be output to the user as a rule match,
|
||||
but can be matched by other rules.
|
||||
When no active rules depend on a library rule, these the library rules will not be evaluated - maintaining performance.
|
||||
|
||||
## description
|
||||
|
||||
All features support an optional description which helps with documenting rules and provides context in capa's output.
|
||||
For all features except for [strings](#string), the description can be specified inline preceded by ` = `: ` = DESCRIPTION STRING`.
|
||||
For example:
|
||||
|
||||
```
|
||||
- number: 0x4550 = IMAGE_DOS_SIGNATURE (MZ)
|
||||
```
|
||||
|
||||
The inline syntax is preferred.
|
||||
For [strings](#string) or if the description is long or contains newlines, use the two-line syntax.
|
||||
It uses the `description` tag in the following way: `description: DESCRIPTION STRING`
|
||||
For example:
|
||||
|
||||
```
|
||||
- string: This program cannot be run in DOS mode.
|
||||
description: MS-DOS stub message
|
||||
- number: 0x4550
|
||||
description: IMAGE_DOS_SIGNATURE (MZ)
|
||||
```
|
||||
|
||||
# limitations
|
||||
|
||||
To learn more about capa's current limitations see [here](doc/limitations.md).
|
||||
|
||||
@@ -19,7 +19,7 @@ def bytes_to_str(b):
|
||||
class Feature(object):
|
||||
def __init__(self, args, description=None):
|
||||
super(Feature, self).__init__()
|
||||
self.name = self.__class__.__name__
|
||||
self.name = self.__class__.__name__.lower()
|
||||
self.args = args
|
||||
self.description = description
|
||||
|
||||
@@ -29,17 +29,16 @@ class Feature(object):
|
||||
def __eq__(self, other):
|
||||
return self.name == other.name and self.args == other.args
|
||||
|
||||
def _str_name(self):
|
||||
return self.name.lower()
|
||||
|
||||
def _str_value(self):
|
||||
# Used to overwrite the rendering of the feature args in `__str__` and the
|
||||
# json output
|
||||
def get_args_str(self):
|
||||
return ','.join(self.args)
|
||||
|
||||
def __str__(self):
|
||||
if self.description:
|
||||
return '%s(%s = %s)' % (self._str_name(), self._str_value(), self.description)
|
||||
return '%s(%s = %s)' % (self.name, self.get_args_str(), self.description)
|
||||
else:
|
||||
return '%s(%s)' % (self._str_name(), self._str_value())
|
||||
return '%s(%s)' % (self.name, self.get_args_str())
|
||||
|
||||
def __repr__(self):
|
||||
return str(self)
|
||||
@@ -62,21 +61,19 @@ class Feature(object):
|
||||
class MatchedRule(Feature):
|
||||
def __init__(self, rule_name, description=None):
|
||||
super(MatchedRule, self).__init__([rule_name], description)
|
||||
self.name = 'match'
|
||||
self.rule_name = rule_name
|
||||
|
||||
def _str_name(self):
|
||||
return 'match'
|
||||
|
||||
|
||||
class Characteristic(Feature):
|
||||
def __init__(self, name, value=None, description=None):
|
||||
def __init__(self, attribute, value=None, description=None):
|
||||
'''
|
||||
when `value` is not provided, this serves as descriptor for a class of characteristics.
|
||||
this is only used internally, such as in `rules.py` when checking if a statement is
|
||||
supported by a given scope.
|
||||
'''
|
||||
super(Characteristic, self).__init__([name, value], description)
|
||||
self.name = name
|
||||
super(Characteristic, self).__init__([attribute, value], description)
|
||||
self.attribute = attribute
|
||||
self.value = value
|
||||
|
||||
def evaluate(self, ctx):
|
||||
@@ -84,11 +81,11 @@ class Characteristic(Feature):
|
||||
raise ValueError('cannot evaluate characteristc %s with empty value' % (str(self)))
|
||||
return super(Characteristic, self).evaluate(ctx)
|
||||
|
||||
def _str_value(self):
|
||||
def get_args_str(self):
|
||||
if self.value is None:
|
||||
return self.name
|
||||
return self.attribute
|
||||
else:
|
||||
return '%s(%s)' % (self.name, self.value)
|
||||
return '%s(%s)' % (self.attribute, self.value)
|
||||
|
||||
|
||||
class String(Feature):
|
||||
@@ -112,8 +109,8 @@ class Bytes(Feature):
|
||||
|
||||
return capa.engine.Result(False, self, [])
|
||||
|
||||
def _str_value(self):
|
||||
return '0x%s' % bytes_to_str(self.value).upper()
|
||||
def get_args_str(self):
|
||||
return bytes_to_str(self.value).upper()
|
||||
|
||||
def freeze_serialize(self):
|
||||
return (self.__class__.__name__,
|
||||
|
||||
@@ -16,8 +16,8 @@ class Number(Feature):
|
||||
super(Number, self).__init__([value], description)
|
||||
self.value = value
|
||||
|
||||
def _str_value(self):
|
||||
return '0x%x' % self.value
|
||||
def get_args_str(self):
|
||||
return '0x%X' % self.value
|
||||
|
||||
|
||||
class Offset(Feature):
|
||||
@@ -25,8 +25,8 @@ class Offset(Feature):
|
||||
super(Offset, self).__init__([value])
|
||||
self.value = value
|
||||
|
||||
def _str_value(self):
|
||||
return '0x%x' % self.value
|
||||
def get_args_str(self):
|
||||
return '0x%X' % self.value
|
||||
|
||||
|
||||
class Mnemonic(Feature):
|
||||
|
||||
@@ -86,23 +86,11 @@ def convert_feature_to_result_document(feature):
|
||||
"type": "characteristic"
|
||||
},
|
||||
"""
|
||||
name, value = feature.freeze_serialize()
|
||||
result = {'type': feature.name, feature.name: feature.get_args_str()}
|
||||
if feature.description:
|
||||
result['description'] = feature.description
|
||||
|
||||
# make the terms pretty
|
||||
name = name.lower()
|
||||
if name == 'matchedrule':
|
||||
name = 'match'
|
||||
|
||||
# in the common case, there's a single argument
|
||||
# so use it directly.
|
||||
# like: name=number value=1
|
||||
if isinstance(value, list) and len(value) == 1:
|
||||
value = value[0]
|
||||
|
||||
return {
|
||||
'type': name,
|
||||
name: value,
|
||||
}
|
||||
return result
|
||||
|
||||
|
||||
def convert_node_to_result_document(node):
|
||||
|
||||
@@ -48,7 +48,7 @@ def render_statement(ostream, match, statement, indent=0):
|
||||
elif child['type'] == 'bytes':
|
||||
feature = '%s(%s)' % (child['type'], rutils.bold2(rutils.hex_string(child[child['type']])))
|
||||
elif child['type'] == 'characteristic':
|
||||
feature = 'characteristic(%s)' % (rutils.bold2(child['characteristic'][0]))
|
||||
feature = 'characteristic(%s)' % (rutils.bold2(child['characteristic']))
|
||||
else:
|
||||
raise RuntimeError('unexpected feature type: ' + str(child))
|
||||
|
||||
@@ -94,13 +94,16 @@ def render_feature(ostream, match, feature, indent=0):
|
||||
# it should always be an even number of characters (its hex).
|
||||
ostream.write(rutils.bold2(rutils.hex_string(feature[feature['type']])))
|
||||
elif feature['type'] == 'characteristic':
|
||||
ostream.write('characteristic(%s)' % (rutils.bold2(feature['characteristic'][0])))
|
||||
ostream.write('characteristic(%s)' % (rutils.bold2(feature['characteristic'])))
|
||||
# note that regex is found in `render_statement`
|
||||
else:
|
||||
raise RuntimeError('unexpected feature type: ' + str(feature))
|
||||
|
||||
render_locations(ostream, match)
|
||||
if 'description' in feature:
|
||||
ostream.write(' = ')
|
||||
ostream.write(feature['description'])
|
||||
|
||||
render_locations(ostream, match)
|
||||
ostream.write('\n')
|
||||
|
||||
|
||||
|
||||
@@ -138,7 +138,7 @@ class InvalidRuleSet(ValueError):
|
||||
|
||||
def ensure_feature_valid_for_scope(scope, feature):
|
||||
if isinstance(feature, capa.features.Characteristic):
|
||||
if capa.features.Characteristic(feature.name) not in SUPPORTED_FEATURES[scope]:
|
||||
if capa.features.Characteristic(feature.attribute) not in SUPPORTED_FEATURES[scope]:
|
||||
raise InvalidRule('feature %s not support for scope %s' % (feature, scope))
|
||||
elif not isinstance(feature, tuple(filter(lambda t: isinstance(t, type), SUPPORTED_FEATURES[scope]))):
|
||||
raise InvalidRule('feature %s not support for scope %s' % (feature, scope))
|
||||
@@ -226,7 +226,7 @@ def parse_description(s, value_type, description=None):
|
||||
'''
|
||||
if value_type != 'string' and isinstance(s, str) and ' = ' in s:
|
||||
if description:
|
||||
raise InvalidRule('unexpected value: "%s", only one description allowed (inline description with `=`)' % s)
|
||||
raise InvalidRule('unexpected value: "%s", only one description allowed (inline description with ` = `)' % s)
|
||||
value, description = s.split(' = ', 1)
|
||||
if description == '':
|
||||
raise InvalidRule('unexpected value: "%s", description cannot be empty' % s)
|
||||
@@ -244,7 +244,7 @@ def parse_description(s, value_type, description=None):
|
||||
if len(value) > MAX_BYTES_FEATURE_SIZE:
|
||||
raise InvalidRule('unexpected bytes value: byte sequences must be no larger than %s bytes' %
|
||||
MAX_BYTES_FEATURE_SIZE)
|
||||
elif value_type in ['number', 'offset']:
|
||||
elif value_type in {'number', 'offset'}:
|
||||
try:
|
||||
value = parse_int(value)
|
||||
except ValueError:
|
||||
@@ -370,8 +370,8 @@ def build_statements(d, scope):
|
||||
raise InvalidRule('invalid regular expression: %s it should use Python syntax, try it at https://pythex.org' % d[key])
|
||||
else:
|
||||
Feature = parse_feature(key)
|
||||
value, symbol = parse_description(d[key], key, d.get('description'))
|
||||
feature = Feature(value, symbol)
|
||||
value, description = parse_description(d[key], key, d.get('description'))
|
||||
feature = Feature(value, description)
|
||||
ensure_feature_valid_for_scope(scope, feature)
|
||||
return feature
|
||||
|
||||
|
||||
Reference in New Issue
Block a user