Files
capa/scripts/lint.py
William Ballenthin 1188103d1c pep8: isort
2020-07-02 10:52:05 -06:00

517 lines
14 KiB
Python

"""
Check the given capa rules for style issues.
Usage:
$ python scripts/lint.py rules/
"""
import os
import sys
import string
import hashlib
import logging
import os.path
import itertools
import posixpath
import argparse
import capa.main
import capa.engine
import capa.features
import capa.features.insn
logger = logging.getLogger("capa.lint")
class Lint(object):
name = "lint"
recommendation = ""
def check_rule(self, ctx, rule):
return False
class NameCasing(Lint):
name = "rule name casing"
recommendation = "Rename rule using to start with lower case letters"
def check_rule(self, ctx, rule):
return rule.name[0] in string.ascii_uppercase and rule.name[1] not in string.ascii_uppercase
class FilenameDoesntMatchRuleName(Lint):
name = "filename doesn't match the rule name"
recommendation = 'Rename rule file to match the rule name, expected: "{:s}", found: "{:s}"'
def check_rule(self, ctx, rule):
expected = rule.name
expected = expected.lower()
expected = expected.replace(" ", "-")
expected = expected.replace("(", "")
expected = expected.replace(")", "")
expected = expected.replace("+", "")
expected = expected.replace("/", "")
expected = expected + ".yml"
found = os.path.basename(rule.meta["capa/path"])
self.recommendation = self.recommendation.format(expected, found)
return expected != found
class MissingNamespace(Lint):
name = "missing rule namespace"
recommendation = "Add meta.namespace so that the rule is emitted correctly"
def check_rule(self, ctx, rule):
return (
"namespace" not in rule.meta
and not is_nursery_rule(rule)
and "maec/malware-category" not in rule.meta
and "lib" not in rule.meta
)
class NamespaceDoesntMatchRulePath(Lint):
name = "file path doesn't match rule namespace"
recommendation = "Move rule to appropriate directory or update the namespace"
def check_rule(self, ctx, rule):
# let the other lints catch namespace issues
if "namespace" not in rule.meta:
return False
if is_nursery_rule(rule):
return False
if "maec/malware-category" in rule.meta:
return False
if "lib" in rule.meta:
return False
return rule.meta["namespace"] not in posixpath.normpath(rule.meta["capa/path"])
class MissingScope(Lint):
name = "missing scope"
recommendation = "Add meta.scope so that the scope is explicit (defaults to `function`)"
def check_rule(self, ctx, rule):
return "scope" not in rule.meta
class InvalidScope(Lint):
name = "invalid scope"
recommendation = "Use only file, function, or basic block rule scopes"
def check_rule(self, ctx, rule):
return rule.meta.get("scope") not in ("file", "function", "basic block")
class MissingAuthor(Lint):
name = "missing author"
recommendation = "Add meta.author so that users know who to contact with questions"
def check_rule(self, ctx, rule):
return "author" not in rule.meta
class MissingExamples(Lint):
name = "missing examples"
recommendation = "Add meta.examples so that the rule can be tested and verified"
def check_rule(self, ctx, rule):
return (
"examples" not in rule.meta
or not isinstance(rule.meta["examples"], list)
or len(rule.meta["examples"]) == 0
or rule.meta["examples"] == [None]
)
class MissingExampleOffset(Lint):
name = "missing example offset"
recommendation = "Add offset of example function"
def check_rule(self, ctx, rule):
if rule.meta.get("scope") in ("function", "basic block"):
for example in rule.meta.get("examples", []):
if example and ":" not in example:
logger.debug("example: %s", example)
return True
class ExampleFileDNE(Lint):
name = "referenced example doesn't exist"
recommendation = "Add the referenced example to samples directory ($capa-root/tests/data or supplied via --samples)"
def check_rule(self, ctx, rule):
if not rule.meta.get("examples"):
# let the MissingExamples lint catch this case, don't double report.
return False
found = False
for example in rule.meta.get("examples", []):
if example:
example_id = example.partition(":")[0]
if example_id in ctx["samples"]:
found = True
break
return not found
class DoesntMatchExample(Lint):
name = "doesn't match on referenced example"
recommendation = "Fix the rule logic or provide a different example"
def check_rule(self, ctx, rule):
if not ctx["is_thorough"]:
return False
for example in rule.meta.get("examples", []):
example_id = example.partition(":")[0]
try:
path = ctx["samples"][example_id]
except KeyError:
# lint ExampleFileDNE will catch this.
# don't double report.
continue
try:
extractor = capa.main.get_extractor(path, "auto")
capabilities = capa.main.find_capabilities(ctx["rules"], extractor, disable_progress=True)
except Exception as e:
logger.error("failed to extract capabilities: %s %s %s", rule.name, path, e)
return True
if rule.name not in capabilities:
return True
class UnusualMetaField(Lint):
name = "unusual meta field"
recommendation = 'Remove the meta field: "{:s}"'
def check_rule(self, ctx, rule):
for key in rule.meta.keys():
if key in capa.rules.META_KEYS:
continue
if key in capa.rules.HIDDEN_META_KEYS:
continue
self.recommendation = self.recommendation.format(key)
return True
return False
class LibRuleNotInLibDirectory(Lint):
name = "lib rule not found in lib directory"
recommendation = "Move the rule to the `lib` subdirectory of the rules path"
def check_rule(self, ctx, rule):
if is_nursery_rule(rule):
return False
if "lib" not in rule.meta:
return False
return "/lib/" not in posixpath.normpath(rule.meta["capa/path"])
class LibRuleHasNamespace(Lint):
name = "lib rule has a namespace"
recommendation = "Remove the namespace from the rule"
def check_rule(self, ctx, rule):
if "lib" not in rule.meta:
return False
return "namespace" in rule.meta
class FeatureStringTooShort(Lint):
name = "feature string too short"
recommendation = 'capa only extracts strings with length >= 4; will not match on "{:s}"'
def check_features(self, ctx, features):
for feature in features:
if isinstance(feature, capa.features.String):
if len(feature.value) < 4:
self.recommendation = self.recommendation.format(feature.value)
return True
return False
class FeatureNegativeNumberOrOffset(Lint):
name = "feature value is negative"
recommendation = (
"capa treats all numbers as unsigned values; you may specify the number's two's complement "
'representation; will not match on "{:d}"'
)
def check_features(self, ctx, features):
for feature in features:
if isinstance(feature, (capa.features.insn.Number, capa.features.insn.Offset)):
if feature.value < 0:
self.recommendation = self.recommendation.format(feature.value)
return True
return False
def run_lints(lints, ctx, rule):
for lint in lints:
if lint.check_rule(ctx, rule):
yield lint
def run_feature_lints(lints, ctx, features):
for lint in lints:
if lint.check_features(ctx, features):
yield lint
NAME_LINTS = (
NameCasing(),
FilenameDoesntMatchRuleName(),
)
def lint_name(ctx, rule):
return run_lints(NAME_LINTS, ctx, rule)
SCOPE_LINTS = (
MissingScope(),
InvalidScope(),
)
def lint_scope(ctx, rule):
return run_lints(SCOPE_LINTS, ctx, rule)
META_LINTS = (
MissingNamespace(),
NamespaceDoesntMatchRulePath(),
MissingAuthor(),
MissingExamples(),
MissingExampleOffset(),
ExampleFileDNE(),
UnusualMetaField(),
LibRuleNotInLibDirectory(),
LibRuleHasNamespace(),
)
def lint_meta(ctx, rule):
return run_lints(META_LINTS, ctx, rule)
FEATURE_LINTS = (
FeatureStringTooShort(),
FeatureNegativeNumberOrOffset(),
)
def lint_features(ctx, rule):
features = get_features(ctx, rule)
return run_feature_lints(FEATURE_LINTS, ctx, features)
def get_features(ctx, rule):
# get features from rule and all dependencies including subscopes and matched rules
features = []
namespaces = capa.rules.index_rules_by_namespace([rule])
deps = [ctx["rules"].rules[dep] for dep in rule.get_dependencies(namespaces)]
for r in [rule] + deps:
features.extend(get_rule_features(r))
return features
def get_rule_features(rule):
features = []
def rec(statement):
if isinstance(statement, capa.engine.Statement):
for child in statement.get_children():
rec(child)
else:
features.append(statement)
rec(rule.statement)
return features
LOGIC_LINTS = (DoesntMatchExample(),)
def lint_logic(ctx, rule):
return run_lints(LOGIC_LINTS, ctx, rule)
def is_nursery_rule(rule):
"""
The nursery is a spot for rules that have not yet been fully polished.
For example, they may not have references to public example of a technique.
Yet, we still want to capture and report on their matches.
"""
return rule.meta.get("capa/nursery")
def lint_rule(ctx, rule):
logger.debug(rule.name)
violations = list(
itertools.chain(
lint_name(ctx, rule),
lint_scope(ctx, rule),
lint_meta(ctx, rule),
lint_logic(ctx, rule),
lint_features(ctx, rule),
)
)
if len(violations) > 0:
category = rule.meta.get("rule-category")
print("")
print(
"%s%s %s"
% (" (nursery) " if is_nursery_rule(rule) else "", rule.name, ("(%s)" % category) if category else "",)
)
level = "WARN" if is_nursery_rule(rule) else "FAIL"
for violation in violations:
print(
"%s %s: %s: %s"
% (" " if is_nursery_rule(rule) else "", level, violation.name, violation.recommendation,)
)
return len(violations) > 0 and not is_nursery_rule(rule)
def lint(ctx, rules):
"""
Args:
samples (Dict[string, string]): map from sample id to path.
for each sample, record sample id of sha256, md5, and filename.
see `collect_samples(path)`.
rules (List[Rule]): the rules to lint.
"""
did_suggest_fix = False
for rule in rules.rules.values():
if rule.meta.get("capa/subscope-rule", False):
continue
did_suggest_fix = lint_rule(ctx, rule) or did_suggest_fix
return did_suggest_fix
def collect_samples(path):
"""
recurse through the given path, collecting all file paths, indexed by their content sha256, md5, and filename.
"""
samples = {}
for root, dirs, files in os.walk(path):
for name in files:
if name.endswith(".viv"):
continue
if name.endswith(".idb"):
continue
if name.endswith(".i64"):
continue
if name.endswith(".frz"):
continue
if name.endswith(".fnames"):
continue
path = os.path.join(root, name)
try:
with open(path, "rb") as f:
buf = f.read()
except IOError:
continue
sha256 = hashlib.sha256()
sha256.update(buf)
md5 = hashlib.md5()
md5.update(buf)
samples[sha256.hexdigest().lower()] = path
samples[sha256.hexdigest().upper()] = path
samples[md5.hexdigest().lower()] = path
samples[md5.hexdigest().upper()] = path
samples[name] = path
return samples
def main(argv=None):
if argv is None:
argv = sys.argv[1:]
samples_path = os.path.join(os.path.dirname(__file__), "..", "tests", "data")
parser = argparse.ArgumentParser(description="A program.")
parser.add_argument("rules", type=str, help="Path to rules")
parser.add_argument("--samples", type=str, default=samples_path, help="Path to samples")
parser.add_argument(
"--thorough", action="store_true", help="Enable thorough linting - takes more time, but does a better job",
)
parser.add_argument("-v", "--verbose", action="store_true", help="Enable debug logging")
parser.add_argument("-q", "--quiet", action="store_true", help="Disable all output but errors")
args = parser.parse_args(args=argv)
if args.verbose:
level = logging.DEBUG
elif args.quiet:
level = logging.ERROR
else:
level = logging.INFO
logging.basicConfig(level=level)
logging.getLogger("capa.lint").setLevel(level)
capa.main.set_vivisect_log_level(logging.CRITICAL)
logging.getLogger("capa").setLevel(logging.CRITICAL)
try:
rules = capa.main.get_rules(args.rules)
rules = capa.rules.RuleSet(rules)
logger.info("successfully loaded %s rules", len(rules))
except IOError as e:
logger.error("%s", str(e))
return -1
except capa.rules.InvalidRule as e:
logger.error("%s", str(e))
return -1
logger.info("collecting potentially referenced samples")
if not os.path.exists(args.samples):
logger.error("samples path %s does not exist", args.samples)
return -1
samples = collect_samples(args.samples)
ctx = {
"samples": samples,
"rules": rules,
"is_thorough": args.thorough,
}
did_violate = lint(ctx, rules)
if not did_violate:
logger.info("no suggestions, nice!")
return 0
else:
return 1
if __name__ == "__main__":
sys.exit(main())