mirror of
https://github.com/mandiant/capa.git
synced 2026-01-08 03:11:05 -08:00
Merge pull request #155 from fireeye/ana-desc-regex
Enable descriptions for regular expressions
This commit is contained in:
@@ -1,6 +1,5 @@
|
||||
# Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
|
||||
|
||||
import re
|
||||
import sys
|
||||
import copy
|
||||
import collections
|
||||
@@ -176,39 +175,6 @@ class Range(Statement):
|
||||
return "range(%s, min=%d, max=%d)" % (str(self.child), self.min, self.max)
|
||||
|
||||
|
||||
class Regex(Statement):
|
||||
"""match if the given pattern matches a String feature."""
|
||||
|
||||
def __init__(self, pattern):
|
||||
super(Regex, self).__init__()
|
||||
self.pattern = pattern
|
||||
pat = self.pattern[len("/") : -len("/")]
|
||||
flags = re.DOTALL
|
||||
if pattern.endswith("/i"):
|
||||
pat = self.pattern[len("/") : -len("/i")]
|
||||
flags |= re.IGNORECASE
|
||||
self.re = re.compile(pat, flags)
|
||||
self.match = ""
|
||||
|
||||
def evaluate(self, ctx):
|
||||
for feature, locations in ctx.items():
|
||||
if not isinstance(feature, (capa.features.String,)):
|
||||
continue
|
||||
|
||||
# `re.search` finds a match anywhere in the given string
|
||||
# which implies leading and/or trailing whitespace.
|
||||
# using this mode cleans is more convenient for rule authors,
|
||||
# so that they don't have to prefix/suffix their terms like: /.*foo.*/.
|
||||
if self.re.search(feature.value):
|
||||
self.match = feature.value
|
||||
return Result(True, self, [], locations=locations)
|
||||
|
||||
return Result(False, self, [])
|
||||
|
||||
def __str__(self):
|
||||
return 'regex(string =~ %s, matched = "%s")' % (self.pattern, self.match)
|
||||
|
||||
|
||||
class Subscope(Statement):
|
||||
"""
|
||||
a subscope element is a placeholder in a rule - it should not be evaluated directly.
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
# Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
|
||||
|
||||
import re
|
||||
import sys
|
||||
import codecs
|
||||
import logging
|
||||
@@ -82,6 +83,50 @@ class String(Feature):
|
||||
super(String, self).__init__(value, description)
|
||||
|
||||
|
||||
class Regex(String):
|
||||
def __init__(self, value, description=None):
|
||||
super(Regex, self).__init__(value, description)
|
||||
pat = self.value[len("/") : -len("/")]
|
||||
flags = re.DOTALL
|
||||
if value.endswith("/i"):
|
||||
pat = self.value[len("/") : -len("/i")]
|
||||
flags |= re.IGNORECASE
|
||||
try:
|
||||
self.re = re.compile(pat, flags)
|
||||
except re.error:
|
||||
if value.endswith("/i"):
|
||||
value = value[: -len("i")]
|
||||
raise ValueError(
|
||||
"invalid regular expression: %s it should use Python syntax, try it at https://pythex.org" % value
|
||||
)
|
||||
self.match = None
|
||||
|
||||
def evaluate(self, ctx):
|
||||
for feature, locations in ctx.items():
|
||||
if not isinstance(feature, (capa.features.String,)):
|
||||
continue
|
||||
|
||||
# `re.search` finds a match anywhere in the given string
|
||||
# which implies leading and/or trailing whitespace.
|
||||
# using this mode cleans is more convenient for rule authors,
|
||||
# so that they don't have to prefix/suffix their terms like: /.*foo.*/.
|
||||
if self.re.search(feature.value):
|
||||
self.match = feature.value
|
||||
return capa.engine.Result(True, self, [], locations=locations)
|
||||
|
||||
return capa.engine.Result(False, self, [])
|
||||
|
||||
def __str__(self):
|
||||
return 'regex(string =~ %s, matched = "%s")' % (self.value, self.match)
|
||||
|
||||
|
||||
class StringFactory(object):
|
||||
def __new__(self, value, description):
|
||||
if value.startswith("/") and (value.endswith("/") or value.endswith("/i")):
|
||||
return Regex(value, description)
|
||||
return String(value, description)
|
||||
|
||||
|
||||
class Bytes(Feature):
|
||||
def __init__(self, value, description=None):
|
||||
super(Bytes, self).__init__(value, description)
|
||||
|
||||
@@ -373,11 +373,6 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
|
||||
return parent2
|
||||
elif statement["type"] == "subscope":
|
||||
return CapaExplorerSubscopeItem(parent, statement[statement["type"]])
|
||||
elif statement["type"] == "regex":
|
||||
# regex is a `Statement` not a `Feature`
|
||||
# this is because it doesn't get extracted, but applies to all strings in scope.
|
||||
# so we have to handle it here
|
||||
return CapaExplorerFeatureItem(parent, "regex(%s)" % statement["pattern"], details=statement["match"])
|
||||
else:
|
||||
raise RuntimeError("unexpected match statement type: " + str(statement))
|
||||
|
||||
@@ -496,7 +491,9 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
|
||||
|
||||
if len(locations) == 1:
|
||||
# only one location for feature so no need to nest children
|
||||
parent2 = self.render_capa_doc_feature(parent, feature, next(iter(locations)), doc, display=display)
|
||||
parent2 = self.render_capa_doc_feature(
|
||||
parent, feature, next(iter(locations)), doc, display=display,
|
||||
)
|
||||
else:
|
||||
# feature has multiple children, nest under one parent feature node
|
||||
parent2 = CapaExplorerFeatureItem(parent, display)
|
||||
@@ -539,6 +536,9 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
|
||||
parent, display, source=doc["rules"].get(feature[feature["type"]], {}).get("source", "")
|
||||
)
|
||||
|
||||
if feature["type"] == "regex":
|
||||
return CapaExplorerFeatureItem(parent, display, location, details=feature["match"])
|
||||
|
||||
if feature["type"] == "basicblock":
|
||||
return CapaExplorerBlockItem(parent, location)
|
||||
|
||||
|
||||
@@ -46,13 +46,6 @@ def convert_statement_to_result_document(statement):
|
||||
"max": statement.max,
|
||||
"child": convert_feature_to_result_document(statement.child),
|
||||
}
|
||||
elif isinstance(statement, capa.engine.Regex):
|
||||
return {
|
||||
"type": "regex",
|
||||
"pattern": statement.pattern,
|
||||
# the string that was matched
|
||||
"match": statement.match,
|
||||
}
|
||||
elif isinstance(statement, capa.engine.Subscope):
|
||||
return {
|
||||
"type": "subscope",
|
||||
@@ -90,7 +83,8 @@ def convert_feature_to_result_document(feature):
|
||||
result = {"type": feature.name, feature.name: feature.get_value_str()}
|
||||
if feature.description:
|
||||
result["description"] = feature.description
|
||||
|
||||
if feature.name == "regex":
|
||||
result["match"] = feature.match
|
||||
return result
|
||||
|
||||
|
||||
|
||||
@@ -70,11 +70,6 @@ def render_statement(ostream, match, statement, indent=0):
|
||||
elif statement["type"] == "subscope":
|
||||
ostream.write(statement["subscope"])
|
||||
ostream.writeln(":")
|
||||
elif statement["type"] == "regex":
|
||||
# regex is a `Statement` not a `Feature`
|
||||
# this is because it doesn't get extracted, but applies to all strings in scope.
|
||||
# so we have to handle it here
|
||||
ostream.writeln("string: %s" % (statement["match"]))
|
||||
else:
|
||||
raise RuntimeError("unexpected match statement type: " + str(statement))
|
||||
|
||||
@@ -82,11 +77,17 @@ def render_statement(ostream, match, statement, indent=0):
|
||||
def render_feature(ostream, match, feature, indent=0):
|
||||
ostream.write(" " * indent)
|
||||
|
||||
ostream.write(feature["type"])
|
||||
key = feature["type"]
|
||||
value = feature[feature["type"]]
|
||||
if key == "regex":
|
||||
key = "string" # render string for regex to mirror the rule source
|
||||
value = feature["match"] # the match provides more information than the value for regex
|
||||
|
||||
ostream.write(key)
|
||||
ostream.write(": ")
|
||||
|
||||
if feature[feature["type"]]:
|
||||
ostream.write(rutils.bold2(feature[feature["type"]]))
|
||||
if value:
|
||||
ostream.write(rutils.bold2(value))
|
||||
|
||||
if "description" in feature:
|
||||
ostream.write(capa.rules.DESCRIPTION_SEPARATOR)
|
||||
|
||||
@@ -184,7 +184,7 @@ def parse_feature(key):
|
||||
if key == "api":
|
||||
return capa.features.insn.API
|
||||
elif key == "string":
|
||||
return capa.features.String
|
||||
return capa.features.StringFactory
|
||||
elif key == "bytes":
|
||||
return capa.features.Bytes
|
||||
elif key == "number":
|
||||
@@ -348,19 +348,13 @@ def build_statements(d, scope):
|
||||
raise InvalidRule("unexpected range: %s" % (count))
|
||||
elif key == "string" and not isinstance(d[key], six.string_types):
|
||||
raise InvalidRule("ambiguous string value %s, must be defined as explicit string" % d[key])
|
||||
elif key == "string" and d[key].startswith("/") and (d[key].endswith("/") or d[key].endswith("/i")):
|
||||
try:
|
||||
return Regex(d[key])
|
||||
except re.error:
|
||||
if d[key].endswith("/i"):
|
||||
d[key] = d[key][: -len("i")]
|
||||
raise InvalidRule(
|
||||
"invalid regular expression: %s it should use Python syntax, try it at https://pythex.org" % d[key]
|
||||
)
|
||||
else:
|
||||
Feature = parse_feature(key)
|
||||
value, description = parse_description(d[key], key, d.get("description"))
|
||||
feature = Feature(value, description)
|
||||
try:
|
||||
feature = Feature(value, description)
|
||||
except ValueError as e:
|
||||
raise InvalidRule(str(e))
|
||||
ensure_feature_valid_for_scope(scope, feature)
|
||||
return feature
|
||||
|
||||
|
||||
@@ -74,12 +74,22 @@ def test_rule_yaml_descriptions():
|
||||
- number: 1 = This is the number 1
|
||||
- string: This program cannot be run in DOS mode.
|
||||
description: MS-DOS stub message
|
||||
- string: '/SELECT.*FROM.*WHERE/i'
|
||||
description: SQL WHERE Clause
|
||||
- count(number(2 = AF_INET/SOCK_DGRAM)): 2
|
||||
"""
|
||||
)
|
||||
r = capa.rules.Rule.from_yaml(rule)
|
||||
assert (
|
||||
r.evaluate({Number(1): {1}, Number(2): {2, 3}, String("This program cannot be run in DOS mode."): {4}}) == True
|
||||
r.evaluate(
|
||||
{
|
||||
Number(1): {1},
|
||||
Number(2): {2, 3},
|
||||
String("This program cannot be run in DOS mode."): {4},
|
||||
String("SELECT password FROM hidden_table WHERE user == admin"): {5},
|
||||
}
|
||||
)
|
||||
== True
|
||||
)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user