From 681feebf356d9d80bfb970c2136cb846eb6c1b10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ana=20Mar=C3=ADa=20Mart=C3=ADnez=20G=C3=B3mez?= Date: Tue, 30 Jun 2020 10:57:58 +0200 Subject: [PATCH] Adapt description implementation to new output As the `__str__` method is not used anymore in the output, the description implementation needs to be adapted. --- README.md | 58 +++++++++++++++++++++------------------ capa/features/__init__.py | 33 ++++++++++------------ capa/features/insn.py | 8 +++--- capa/render/__init__.py | 20 +++----------- capa/render/vverbose.py | 9 ++++-- capa/rules.py | 10 +++---- 6 files changed, 66 insertions(+), 72 deletions(-) diff --git a/README.md b/README.md index 318b6ba6..7f8d3845 100644 --- a/README.md +++ b/README.md @@ -84,6 +84,7 @@ Download capa from the [Releases](/releases) page or get the nightly builds here - [section](#section) - [counting](#counting) - [matching prior rule matches](#matching-prior-rule-matches) + - [descriptions](#descriptions) - [limitations](#Limitations) # installation @@ -317,25 +318,6 @@ These are the features supported at the function-scope: - [mnemonic](#mnemonic) - [characteristics](#characteristics) -All of them support an optional description which helps with documenting rules and provides context in capa's output. -It can be specified in the following way: - -``` -- string: This program cannot be run in DOS mode. - description: MS-DOS stub message -- number: 0x4550 - description: IMAGE_DOS_SIGNATURE (MZ) -``` - -For all features except for [string](#string), the description can be specified inline preceded by ` = `. -For the previous [number](#number) example: - -``` -- number: 0x4550 = IMAGE_DOS_SIGNATURE (MZ) -``` - -The inline syntax is preferred (except for [string](#string)). - ### api A call to a named function, probably an import, though possibly a local function (like `malloc`) extracted via FLIRT. @@ -358,8 +340,9 @@ For example, a crypto constant. The parameter is a number; if prefixed with `0x` then in hex format, otherwise, decimal format. -It can include an optional description, e.g. for constant definitions. -The inline syntax is preferred (` = DESCRIPTION STRING`). +To help humans understand the meaning of a number, such that the constant `0x40` means `PAGE_EXECUTE_READWRITE`, you may provide a description alongside the definition. +Use the inline syntax (preferred) by ending the line with ` = DESCRIPTION STRING`. +Check the [description section](#description) for more details. Examples: @@ -381,7 +364,9 @@ Regexes should be surrounded with `/` characters. By default, capa uses case-sensitive matching and assumes leading and trailing wildcards. To perform case-insensitive matching append an `i`. To anchor the regex at the start or end of a string, use `^` and/or `$`. -Strings can include a description, but the inline syntax is not supported. +To add context to a string use the two-line syntax, using the `description` tag: `description: DESCRIPTION STRING`. +The inline syntax is not supported. +Check the [description section](#description) for more details. Examples: @@ -401,9 +386,9 @@ Note that regex matching is expensive (`O(features)` rather than `O(1)`) so they A sequence of bytes referenced by the logic of the program. The provided sequence must match from the beginning of the referenced bytes and be no more than `0x100` bytes. The parameter is a sequence of hexadecimal bytes. -It can include an optional description. -The inline syntax is preferred (` = DESCRIPTION STRING`). - +To help humans understand the meaning of the bytes sequence, you may provide a description. +Use the inline syntax (preferred) by ending the line with ` = DESCRIPTION STRING`. +Check the [description section](#description) for more details. The example below illustrates byte matching given a COM CLSID pushed onto the stack prior to `CoCreateInstance`. @@ -482,7 +467,6 @@ These are the features supported at the file-scope: - [import](#import) - [section](#section) -All of them can be followed by an optional description, as the features in the previous section. ### file string An ASCII or UTF-16 LE string present in the file. @@ -563,6 +547,28 @@ By default, library rules will not be output to the user as a rule match, but can be matched by other rules. When no active rules depend on a library rule, these the library rules will not be evaluated - maintaining performance. +## description + +All features support an optional description which helps with documenting rules and provides context in capa's output. +For all features except for [strings](#string), the description can be specified inline preceded by ` = `: ` = DESCRIPTION STRING`. +For example: + +``` +- number: 0x4550 = IMAGE_DOS_SIGNATURE (MZ) +``` + +The inline syntax is preferred. +For [strings](#string) or if the description is long or contains newlines, use the two-line syntax. +It uses the `description` tag in the following way: `description: DESCRIPTION STRING` +For example: + +``` +- string: This program cannot be run in DOS mode. + description: MS-DOS stub message +- number: 0x4550 + description: IMAGE_DOS_SIGNATURE (MZ) +``` + # limitations To learn more about capa's current limitations see [here](doc/limitations.md). diff --git a/capa/features/__init__.py b/capa/features/__init__.py index 182cd514..ee407291 100644 --- a/capa/features/__init__.py +++ b/capa/features/__init__.py @@ -19,7 +19,7 @@ def bytes_to_str(b): class Feature(object): def __init__(self, args, description=None): super(Feature, self).__init__() - self.name = self.__class__.__name__ + self.name = self.__class__.__name__.lower() self.args = args self.description = description @@ -29,17 +29,16 @@ class Feature(object): def __eq__(self, other): return self.name == other.name and self.args == other.args - def _str_name(self): - return self.name.lower() - - def _str_value(self): + # Used to overwrite the rendering of the feature args in `__str__` and the + # json output + def get_args_str(self): return ','.join(self.args) def __str__(self): if self.description: - return '%s(%s = %s)' % (self._str_name(), self._str_value(), self.description) + return '%s(%s = %s)' % (self.name, self.get_args_str(), self.description) else: - return '%s(%s)' % (self._str_name(), self._str_value()) + return '%s(%s)' % (self.name, self.get_args_str()) def __repr__(self): return str(self) @@ -62,21 +61,19 @@ class Feature(object): class MatchedRule(Feature): def __init__(self, rule_name, description=None): super(MatchedRule, self).__init__([rule_name], description) + self.name = 'match' self.rule_name = rule_name - def _str_name(self): - return 'match' - class Characteristic(Feature): - def __init__(self, name, value=None, description=None): + def __init__(self, attribute, value=None, description=None): ''' when `value` is not provided, this serves as descriptor for a class of characteristics. this is only used internally, such as in `rules.py` when checking if a statement is supported by a given scope. ''' - super(Characteristic, self).__init__([name, value], description) - self.name = name + super(Characteristic, self).__init__([attribute, value], description) + self.attribute = attribute self.value = value def evaluate(self, ctx): @@ -84,11 +81,11 @@ class Characteristic(Feature): raise ValueError('cannot evaluate characteristc %s with empty value' % (str(self))) return super(Characteristic, self).evaluate(ctx) - def _str_value(self): + def get_args_str(self): if self.value is None: - return self.name + return self.attribute else: - return '%s(%s)' % (self.name, self.value) + return '%s(%s)' % (self.attribute, self.value) class String(Feature): @@ -112,8 +109,8 @@ class Bytes(Feature): return capa.engine.Result(False, self, []) - def _str_value(self): - return '0x%s' % bytes_to_str(self.value).upper() + def get_args_str(self): + return bytes_to_str(self.value).upper() def freeze_serialize(self): return (self.__class__.__name__, diff --git a/capa/features/insn.py b/capa/features/insn.py index 122bd0ae..a353cb43 100644 --- a/capa/features/insn.py +++ b/capa/features/insn.py @@ -16,8 +16,8 @@ class Number(Feature): super(Number, self).__init__([value], description) self.value = value - def _str_value(self): - return '0x%x' % self.value + def get_args_str(self): + return '0x%X' % self.value class Offset(Feature): @@ -25,8 +25,8 @@ class Offset(Feature): super(Offset, self).__init__([value]) self.value = value - def _str_value(self): - return '0x%x' % self.value + def get_args_str(self): + return '0x%X' % self.value class Mnemonic(Feature): diff --git a/capa/render/__init__.py b/capa/render/__init__.py index e53aca1f..a1808f6e 100644 --- a/capa/render/__init__.py +++ b/capa/render/__init__.py @@ -86,23 +86,11 @@ def convert_feature_to_result_document(feature): "type": "characteristic" }, """ - name, value = feature.freeze_serialize() + result = {'type': feature.name, feature.name: feature.get_args_str()} + if feature.description: + result['description'] = feature.description - # make the terms pretty - name = name.lower() - if name == 'matchedrule': - name = 'match' - - # in the common case, there's a single argument - # so use it directly. - # like: name=number value=1 - if isinstance(value, list) and len(value) == 1: - value = value[0] - - return { - 'type': name, - name: value, - } + return result def convert_node_to_result_document(node): diff --git a/capa/render/vverbose.py b/capa/render/vverbose.py index 46dd09f8..0f0adc2a 100644 --- a/capa/render/vverbose.py +++ b/capa/render/vverbose.py @@ -48,7 +48,7 @@ def render_statement(ostream, match, statement, indent=0): elif child['type'] == 'bytes': feature = '%s(%s)' % (child['type'], rutils.bold2(rutils.hex_string(child[child['type']]))) elif child['type'] == 'characteristic': - feature = 'characteristic(%s)' % (rutils.bold2(child['characteristic'][0])) + feature = 'characteristic(%s)' % (rutils.bold2(child['characteristic'])) else: raise RuntimeError('unexpected feature type: ' + str(child)) @@ -94,13 +94,16 @@ def render_feature(ostream, match, feature, indent=0): # it should always be an even number of characters (its hex). ostream.write(rutils.bold2(rutils.hex_string(feature[feature['type']]))) elif feature['type'] == 'characteristic': - ostream.write('characteristic(%s)' % (rutils.bold2(feature['characteristic'][0]))) + ostream.write('characteristic(%s)' % (rutils.bold2(feature['characteristic']))) # note that regex is found in `render_statement` else: raise RuntimeError('unexpected feature type: ' + str(feature)) - render_locations(ostream, match) + if 'description' in feature: + ostream.write(' = ') + ostream.write(feature['description']) + render_locations(ostream, match) ostream.write('\n') diff --git a/capa/rules.py b/capa/rules.py index 7e2ffd6a..155f7127 100644 --- a/capa/rules.py +++ b/capa/rules.py @@ -138,7 +138,7 @@ class InvalidRuleSet(ValueError): def ensure_feature_valid_for_scope(scope, feature): if isinstance(feature, capa.features.Characteristic): - if capa.features.Characteristic(feature.name) not in SUPPORTED_FEATURES[scope]: + if capa.features.Characteristic(feature.attribute) not in SUPPORTED_FEATURES[scope]: raise InvalidRule('feature %s not support for scope %s' % (feature, scope)) elif not isinstance(feature, tuple(filter(lambda t: isinstance(t, type), SUPPORTED_FEATURES[scope]))): raise InvalidRule('feature %s not support for scope %s' % (feature, scope)) @@ -226,7 +226,7 @@ def parse_description(s, value_type, description=None): ''' if value_type != 'string' and isinstance(s, str) and ' = ' in s: if description: - raise InvalidRule('unexpected value: "%s", only one description allowed (inline description with `=`)' % s) + raise InvalidRule('unexpected value: "%s", only one description allowed (inline description with ` = `)' % s) value, description = s.split(' = ', 1) if description == '': raise InvalidRule('unexpected value: "%s", description cannot be empty' % s) @@ -244,7 +244,7 @@ def parse_description(s, value_type, description=None): if len(value) > MAX_BYTES_FEATURE_SIZE: raise InvalidRule('unexpected bytes value: byte sequences must be no larger than %s bytes' % MAX_BYTES_FEATURE_SIZE) - elif value_type in ['number', 'offset']: + elif value_type in {'number', 'offset'}: try: value = parse_int(value) except ValueError: @@ -370,8 +370,8 @@ def build_statements(d, scope): raise InvalidRule('invalid regular expression: %s it should use Python syntax, try it at https://pythex.org' % d[key]) else: Feature = parse_feature(key) - value, symbol = parse_description(d[key], key, d.get('description')) - feature = Feature(value, symbol) + value, description = parse_description(d[key], key, d.get('description')) + feature = Feature(value, description) ensure_feature_valid_for_scope(scope, feature) return feature