Merge pull request #155 from fireeye/ana-desc-regex

Enable descriptions for regular expressions
This commit is contained in:
Willi Ballenthin
2020-07-15 15:22:50 -06:00
committed by GitHub
7 changed files with 78 additions and 68 deletions

View File

@@ -1,6 +1,5 @@
# Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
import re
import sys
import copy
import collections
@@ -176,39 +175,6 @@ class Range(Statement):
return "range(%s, min=%d, max=%d)" % (str(self.child), self.min, self.max)
class Regex(Statement):
"""match if the given pattern matches a String feature."""
def __init__(self, pattern):
super(Regex, self).__init__()
self.pattern = pattern
pat = self.pattern[len("/") : -len("/")]
flags = re.DOTALL
if pattern.endswith("/i"):
pat = self.pattern[len("/") : -len("/i")]
flags |= re.IGNORECASE
self.re = re.compile(pat, flags)
self.match = ""
def evaluate(self, ctx):
for feature, locations in ctx.items():
if not isinstance(feature, (capa.features.String,)):
continue
# `re.search` finds a match anywhere in the given string
# which implies leading and/or trailing whitespace.
# using this mode cleans is more convenient for rule authors,
# so that they don't have to prefix/suffix their terms like: /.*foo.*/.
if self.re.search(feature.value):
self.match = feature.value
return Result(True, self, [], locations=locations)
return Result(False, self, [])
def __str__(self):
return 'regex(string =~ %s, matched = "%s")' % (self.pattern, self.match)
class Subscope(Statement):
"""
a subscope element is a placeholder in a rule - it should not be evaluated directly.

View File

@@ -1,5 +1,6 @@
# Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
import re
import sys
import codecs
import logging
@@ -82,6 +83,50 @@ class String(Feature):
super(String, self).__init__(value, description)
class Regex(String):
def __init__(self, value, description=None):
super(Regex, self).__init__(value, description)
pat = self.value[len("/") : -len("/")]
flags = re.DOTALL
if value.endswith("/i"):
pat = self.value[len("/") : -len("/i")]
flags |= re.IGNORECASE
try:
self.re = re.compile(pat, flags)
except re.error:
if value.endswith("/i"):
value = value[: -len("i")]
raise ValueError(
"invalid regular expression: %s it should use Python syntax, try it at https://pythex.org" % value
)
self.match = None
def evaluate(self, ctx):
for feature, locations in ctx.items():
if not isinstance(feature, (capa.features.String,)):
continue
# `re.search` finds a match anywhere in the given string
# which implies leading and/or trailing whitespace.
# using this mode cleans is more convenient for rule authors,
# so that they don't have to prefix/suffix their terms like: /.*foo.*/.
if self.re.search(feature.value):
self.match = feature.value
return capa.engine.Result(True, self, [], locations=locations)
return capa.engine.Result(False, self, [])
def __str__(self):
return 'regex(string =~ %s, matched = "%s")' % (self.value, self.match)
class StringFactory(object):
def __new__(self, value, description):
if value.startswith("/") and (value.endswith("/") or value.endswith("/i")):
return Regex(value, description)
return String(value, description)
class Bytes(Feature):
def __init__(self, value, description=None):
super(Bytes, self).__init__(value, description)

View File

@@ -373,11 +373,6 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
return parent2
elif statement["type"] == "subscope":
return CapaExplorerSubscopeItem(parent, statement[statement["type"]])
elif statement["type"] == "regex":
# regex is a `Statement` not a `Feature`
# this is because it doesn't get extracted, but applies to all strings in scope.
# so we have to handle it here
return CapaExplorerFeatureItem(parent, "regex(%s)" % statement["pattern"], details=statement["match"])
else:
raise RuntimeError("unexpected match statement type: " + str(statement))
@@ -496,7 +491,9 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
if len(locations) == 1:
# only one location for feature so no need to nest children
parent2 = self.render_capa_doc_feature(parent, feature, next(iter(locations)), doc, display=display)
parent2 = self.render_capa_doc_feature(
parent, feature, next(iter(locations)), doc, display=display,
)
else:
# feature has multiple children, nest under one parent feature node
parent2 = CapaExplorerFeatureItem(parent, display)
@@ -539,6 +536,9 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
parent, display, source=doc["rules"].get(feature[feature["type"]], {}).get("source", "")
)
if feature["type"] == "regex":
return CapaExplorerFeatureItem(parent, display, location, details=feature["match"])
if feature["type"] == "basicblock":
return CapaExplorerBlockItem(parent, location)

View File

@@ -46,13 +46,6 @@ def convert_statement_to_result_document(statement):
"max": statement.max,
"child": convert_feature_to_result_document(statement.child),
}
elif isinstance(statement, capa.engine.Regex):
return {
"type": "regex",
"pattern": statement.pattern,
# the string that was matched
"match": statement.match,
}
elif isinstance(statement, capa.engine.Subscope):
return {
"type": "subscope",
@@ -90,7 +83,8 @@ def convert_feature_to_result_document(feature):
result = {"type": feature.name, feature.name: feature.get_value_str()}
if feature.description:
result["description"] = feature.description
if feature.name == "regex":
result["match"] = feature.match
return result

View File

@@ -70,11 +70,6 @@ def render_statement(ostream, match, statement, indent=0):
elif statement["type"] == "subscope":
ostream.write(statement["subscope"])
ostream.writeln(":")
elif statement["type"] == "regex":
# regex is a `Statement` not a `Feature`
# this is because it doesn't get extracted, but applies to all strings in scope.
# so we have to handle it here
ostream.writeln("string: %s" % (statement["match"]))
else:
raise RuntimeError("unexpected match statement type: " + str(statement))
@@ -82,11 +77,17 @@ def render_statement(ostream, match, statement, indent=0):
def render_feature(ostream, match, feature, indent=0):
ostream.write(" " * indent)
ostream.write(feature["type"])
key = feature["type"]
value = feature[feature["type"]]
if key == "regex":
key = "string" # render string for regex to mirror the rule source
value = feature["match"] # the match provides more information than the value for regex
ostream.write(key)
ostream.write(": ")
if feature[feature["type"]]:
ostream.write(rutils.bold2(feature[feature["type"]]))
if value:
ostream.write(rutils.bold2(value))
if "description" in feature:
ostream.write(capa.rules.DESCRIPTION_SEPARATOR)

View File

@@ -184,7 +184,7 @@ def parse_feature(key):
if key == "api":
return capa.features.insn.API
elif key == "string":
return capa.features.String
return capa.features.StringFactory
elif key == "bytes":
return capa.features.Bytes
elif key == "number":
@@ -348,19 +348,13 @@ def build_statements(d, scope):
raise InvalidRule("unexpected range: %s" % (count))
elif key == "string" and not isinstance(d[key], six.string_types):
raise InvalidRule("ambiguous string value %s, must be defined as explicit string" % d[key])
elif key == "string" and d[key].startswith("/") and (d[key].endswith("/") or d[key].endswith("/i")):
try:
return Regex(d[key])
except re.error:
if d[key].endswith("/i"):
d[key] = d[key][: -len("i")]
raise InvalidRule(
"invalid regular expression: %s it should use Python syntax, try it at https://pythex.org" % d[key]
)
else:
Feature = parse_feature(key)
value, description = parse_description(d[key], key, d.get("description"))
feature = Feature(value, description)
try:
feature = Feature(value, description)
except ValueError as e:
raise InvalidRule(str(e))
ensure_feature_valid_for_scope(scope, feature)
return feature

View File

@@ -74,12 +74,22 @@ def test_rule_yaml_descriptions():
- number: 1 = This is the number 1
- string: This program cannot be run in DOS mode.
description: MS-DOS stub message
- string: '/SELECT.*FROM.*WHERE/i'
description: SQL WHERE Clause
- count(number(2 = AF_INET/SOCK_DGRAM)): 2
"""
)
r = capa.rules.Rule.from_yaml(rule)
assert (
r.evaluate({Number(1): {1}, Number(2): {2, 3}, String("This program cannot be run in DOS mode."): {4}}) == True
r.evaluate(
{
Number(1): {1},
Number(2): {2, 3},
String("This program cannot be run in DOS mode."): {4},
String("SELECT password FROM hidden_table WHERE user == admin"): {5},
}
)
== True
)