Merge pull request #327 from fireeye/fix/312-statement-descriptions

parse descriptions for statements
This commit is contained in:
Willi Ballenthin
2020-09-25 11:50:47 -06:00
committed by GitHub
3 changed files with 94 additions and 42 deletions

View File

@@ -6,7 +6,6 @@
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import sys
import copy
import collections

View File

@@ -276,27 +276,63 @@ def parse_description(s, value_type, description=None):
return value, description
def pop_statement_description_entry(d):
"""
extracts the description for statements and removes the description entry from the document
a statement can only have one description
example:
the features definition
- or:
- description: statement description
- number: 1
description: feature description
becomes
<statement>: [
{ "description": "statement description" }, <-- extracted here
{ "number": 1, "description": "feature description" }
]
"""
if not isinstance(d, list):
return None
# identify child of form '{ "description": <description> }'
descriptions = list(filter(lambda c: isinstance(c, dict) and len(c) == 1 and "description" in c, d))
if len(descriptions) > 1:
raise InvalidRule("statements can only have one description")
if not descriptions:
return None
description = descriptions[0]
d.remove(description)
return description["description"]
def build_statements(d, scope):
if len(d.keys()) > 2:
raise InvalidRule("too many statements")
key = list(d.keys())[0]
description = pop_statement_description_entry(d[key])
if key == "and":
return And([build_statements(dd, scope) for dd in d[key]], description=d.get("description"))
return And([build_statements(dd, scope) for dd in d[key]], description=description)
elif key == "or":
return Or([build_statements(dd, scope) for dd in d[key]], description=d.get("description"))
return Or([build_statements(dd, scope) for dd in d[key]], description=description)
elif key == "not":
if len(d[key]) != 1:
raise InvalidRule("not statement must have exactly one child statement")
return Not(build_statements(d[key][0], scope), description=d.get("description"))
return Not(build_statements(d[key][0], scope), description=description)
elif key.endswith(" or more"):
count = int(key[: -len("or more")])
return Some(count, [build_statements(dd, scope) for dd in d[key]], description=d.get("description"))
return Some(count, [build_statements(dd, scope) for dd in d[key]], description=description)
elif key == "optional":
# `optional` is an alias for `0 or more`
# which is useful for documenting behaviors,
# like with `write file`, we might say that `WriteFile` is optionally found alongside `CreateFileA`.
return Some(0, [build_statements(dd, scope) for dd in d[key]], description=d.get("description"))
return Some(0, [build_statements(dd, scope) for dd in d[key]], description=description)
elif key == "function":
if scope != FILE_SCOPE:
@@ -355,18 +391,18 @@ def build_statements(d, scope):
count = d[key]
if isinstance(count, int):
return Range(feature, min=count, max=count, description=d.get("description"))
return Range(feature, min=count, max=count, description=description)
elif count.endswith(" or more"):
min = parse_int(count[: -len(" or more")])
max = None
return Range(feature, min=min, max=max, description=d.get("description"))
return Range(feature, min=min, max=max, description=description)
elif count.endswith(" or fewer"):
min = None
max = parse_int(count[: -len(" or fewer")])
return Range(feature, min=min, max=max, description=d.get("description"))
return Range(feature, min=min, max=max, description=description)
elif count.startswith("("):
min, max = parse_range(count)
return Range(feature, min=min, max=max, description=d.get("description"))
return Range(feature, min=min, max=max, description=description)
else:
raise InvalidRule("unexpected range: %s" % (count))
elif key == "string" and not isinstance(d[key], six.string_types):

View File

@@ -69,46 +69,63 @@ def test_rule_yaml_complex():
assert r.evaluate({Number(6): {1}, Number(7): {1}, Number(8): {1}}) == False
def test_rule_yaml_descriptions():
def test_rule_descriptions():
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
features:
meta:
name: test rule
features:
- and:
- description: and description
- number: 1 = number description
- string: mystring
description: string description
- string: '/myregex/'
description: regex description
# TODO - count(number(2 = number description)): 2
- or:
- description: or description
- and:
- number: 1 = This is the number 1
- string: This program cannot be run in DOS mode.
description: MS-DOS stub message
- string: '/SELECT.*FROM.*WHERE/i'
description: SQL WHERE Clause
- count(number(2 = AF_INET/SOCK_DGRAM)): 2
- or:
- and:
- offset: 0x50 = IMAGE_NT_HEADERS.OptionalHeader.SizeOfImage
- offset: 0x34 = IMAGE_NT_HEADERS.OptionalHeader.ImageBase
description: 32-bits
- and:
- offset: 0x50 = IMAGE_NT_HEADERS64.OptionalHeader.SizeOfImage
- offset: 0x30 = IMAGE_NT_HEADERS64.OptionalHeader.ImageBase
description: 64-bits
description: PE headers offsets
- offset: 0x50 = offset description
- offset: 0x34 = offset description
- description: and description
- and:
- description: and description
- offset/x64: 0x50 = offset/x64 description
- offset/x64: 0x30 = offset/x64 description
"""
)
r = capa.rules.Rule.from_yaml(rule)
assert (
r.evaluate(
{
Number(1): {1},
Number(2): {2, 3},
String("This program cannot be run in DOS mode."): {4},
String("SELECT password FROM hidden_table WHERE user == admin"): {5},
Offset(0x50): {6},
Offset(0x30): {7},
}
def rec(statement):
if isinstance(statement, capa.engine.Statement):
assert statement.description == statement.name.lower() + " description"
for child in statement.get_children():
rec(child)
else:
assert statement.description == statement.name + " description"
rec(r.statement)
def test_invalid_rule_statement_descriptions():
# statements can only have one description
with pytest.raises(capa.rules.InvalidRule):
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule
features:
- or:
- number: 1 = This is the number 1
- description: description
- description: another description (invalid)
"""
)
)
== True
)
def test_rule_yaml_not():