pep8: scripts

This commit is contained in:
William Ballenthin
2020-07-01 12:42:33 -06:00
parent d23ef48bb6
commit 23e70b4e85
3 changed files with 182 additions and 176 deletions

View File

@@ -1,11 +1,11 @@
'''
"""
Reformat the given capa rule into a consistent style.
Use the -i flag to update the rule in-place.
Usage:
$ python capafmt.py -i foo.yml
'''
"""
import sys
import logging
@@ -14,22 +14,24 @@ import argparse
import capa.rules
logger = logging.getLogger('capafmt')
logger = logging.getLogger("capafmt")
def main(argv=None):
if argv is None:
argv = sys.argv[1:]
parser = argparse.ArgumentParser(description='Capa rule formatter.')
parser.add_argument('path', type=str,
help='Path to rule to format')
parser.add_argument('-i', '--in-place', action='store_true', dest='in_place',
help='Format the rule in place, otherwise, write formatted rule to STDOUT')
parser.add_argument('-v', '--verbose', action='store_true',
help='Enable debug logging')
parser.add_argument('-q', '--quiet', action='store_true',
help='Disable all output but errors')
parser = argparse.ArgumentParser(description="Capa rule formatter.")
parser.add_argument("path", type=str, help="Path to rule to format")
parser.add_argument(
"-i",
"--in-place",
action="store_true",
dest="in_place",
help="Format the rule in place, otherwise, write formatted rule to STDOUT",
)
parser.add_argument("-v", "--verbose", action="store_true", help="Enable debug logging")
parser.add_argument("-q", "--quiet", action="store_true", help="Disable all output but errors")
args = parser.parse_args(args=argv)
if args.verbose:
@@ -40,17 +42,17 @@ def main(argv=None):
level = logging.INFO
logging.basicConfig(level=level)
logging.getLogger('capafmt').setLevel(level)
logging.getLogger("capafmt").setLevel(level)
rule = capa.rules.Rule.from_yaml_file(args.path)
if args.in_place:
with open(args.path, 'wb') as f:
f.write(rule.to_yaml().encode('utf-8'))
with open(args.path, "wb") as f:
f.write(rule.to_yaml().encode("utf-8"))
else:
print(rule.to_yaml().rstrip("\n"))
return 0
if __name__ == '__main__':
sys.exit(main())
if __name__ == "__main__":
sys.exit(main())

View File

@@ -1,10 +1,10 @@
'''
"""
Check the given capa rules for style issues.
Usage:
$ python scripts/lint.py rules/
'''
"""
import os
import os.path
import sys
@@ -22,41 +22,40 @@ import capa.engine
import capa.features
import capa.features.insn
logger = logging.getLogger('capa.lint')
logger = logging.getLogger("capa.lint")
class Lint(object):
name = 'lint'
recommendation = ''
name = "lint"
recommendation = ""
def check_rule(self, ctx, rule):
return False
class NameCasing(Lint):
name = 'rule name casing'
recommendation = 'Rename rule using to start with lower case letters'
name = "rule name casing"
recommendation = "Rename rule using to start with lower case letters"
def check_rule(self, ctx, rule):
return (rule.name[0] in string.ascii_uppercase and
rule.name[1] not in string.ascii_uppercase)
return rule.name[0] in string.ascii_uppercase and rule.name[1] not in string.ascii_uppercase
class FilenameDoesntMatchRuleName(Lint):
name = 'filename doesn\'t match the rule name'
name = "filename doesn't match the rule name"
recommendation = 'Rename rule file to match the rule name, expected: "{:s}", found: "{:s}"'
def check_rule(self, ctx, rule):
expected = rule.name
expected = expected.lower()
expected = expected.replace(' ', '-')
expected = expected.replace('(', '')
expected = expected.replace(')', '')
expected = expected.replace('+', '')
expected = expected.replace('/', '')
expected = expected + '.yml'
expected = expected.replace(" ", "-")
expected = expected.replace("(", "")
expected = expected.replace(")", "")
expected = expected.replace("+", "")
expected = expected.replace("/", "")
expected = expected + ".yml"
found = os.path.basename(rule.meta['capa/path'])
found = os.path.basename(rule.meta["capa/path"])
self.recommendation = self.recommendation.format(expected, found)
@@ -64,95 +63,99 @@ class FilenameDoesntMatchRuleName(Lint):
class MissingNamespace(Lint):
name = 'missing rule namespace'
recommendation = 'Add meta.namespace so that the rule is emitted correctly'
name = "missing rule namespace"
recommendation = "Add meta.namespace so that the rule is emitted correctly"
def check_rule(self, ctx, rule):
return ('namespace' not in rule.meta and
not is_nursery_rule(rule) and
'maec/malware-category' not in rule.meta and
'lib' not in rule.meta)
return (
"namespace" not in rule.meta
and not is_nursery_rule(rule)
and "maec/malware-category" not in rule.meta
and "lib" not in rule.meta
)
class NamespaceDoesntMatchRulePath(Lint):
name = 'file path doesn\'t match rule namespace'
recommendation = 'Move rule to appropriate directory or update the namespace'
name = "file path doesn't match rule namespace"
recommendation = "Move rule to appropriate directory or update the namespace"
def check_rule(self, ctx, rule):
# let the other lints catch namespace issues
if 'namespace' not in rule.meta:
if "namespace" not in rule.meta:
return False
if is_nursery_rule(rule):
return False
if 'maec/malware-category' in rule.meta:
if "maec/malware-category" in rule.meta:
return False
if 'lib' in rule.meta:
if "lib" in rule.meta:
return False
return rule.meta['namespace'] not in posixpath.normpath(rule.meta['capa/path'])
return rule.meta["namespace"] not in posixpath.normpath(rule.meta["capa/path"])
class MissingScope(Lint):
name = 'missing scope'
recommendation = 'Add meta.scope so that the scope is explicit (defaults to `function`)'
name = "missing scope"
recommendation = "Add meta.scope so that the scope is explicit (defaults to `function`)"
def check_rule(self, ctx, rule):
return 'scope' not in rule.meta
return "scope" not in rule.meta
class InvalidScope(Lint):
name = 'invalid scope'
recommendation = 'Use only file, function, or basic block rule scopes'
name = "invalid scope"
recommendation = "Use only file, function, or basic block rule scopes"
def check_rule(self, ctx, rule):
return rule.meta.get('scope') not in ('file', 'function', 'basic block')
return rule.meta.get("scope") not in ("file", "function", "basic block")
class MissingAuthor(Lint):
name = 'missing author'
recommendation = 'Add meta.author so that users know who to contact with questions'
name = "missing author"
recommendation = "Add meta.author so that users know who to contact with questions"
def check_rule(self, ctx, rule):
return 'author' not in rule.meta
return "author" not in rule.meta
class MissingExamples(Lint):
name = 'missing examples'
recommendation = 'Add meta.examples so that the rule can be tested and verified'
name = "missing examples"
recommendation = "Add meta.examples so that the rule can be tested and verified"
def check_rule(self, ctx, rule):
return ('examples' not in rule.meta or
not isinstance(rule.meta['examples'], list) or
len(rule.meta['examples']) == 0 or
rule.meta['examples'] == [None])
return (
"examples" not in rule.meta
or not isinstance(rule.meta["examples"], list)
or len(rule.meta["examples"]) == 0
or rule.meta["examples"] == [None]
)
class MissingExampleOffset(Lint):
name = 'missing example offset'
recommendation = 'Add offset of example function'
name = "missing example offset"
recommendation = "Add offset of example function"
def check_rule(self, ctx, rule):
if rule.meta.get('scope') in ('function', 'basic block'):
for example in rule.meta.get('examples', []):
if example and ':' not in example:
logger.debug('example: %s', example)
if rule.meta.get("scope") in ("function", "basic block"):
for example in rule.meta.get("examples", []):
if example and ":" not in example:
logger.debug("example: %s", example)
return True
class ExampleFileDNE(Lint):
name = 'referenced example doesn\'t exist'
recommendation = 'Add the referenced example to samples directory ($capa-root/tests/data or supplied via --samples)'
name = "referenced example doesn't exist"
recommendation = "Add the referenced example to samples directory ($capa-root/tests/data or supplied via --samples)"
def check_rule(self, ctx, rule):
if not rule.meta.get('examples'):
if not rule.meta.get("examples"):
# let the MissingExamples lint catch this case, don't double report.
return False
found = False
for example in rule.meta.get('examples', []):
for example in rule.meta.get("examples", []):
if example:
example_id = example.partition(':')[0]
if example_id in ctx['samples']:
example_id = example.partition(":")[0]
if example_id in ctx["samples"]:
found = True
break
@@ -160,27 +163,27 @@ class ExampleFileDNE(Lint):
class DoesntMatchExample(Lint):
name = 'doesn\'t match on referenced example'
recommendation = 'Fix the rule logic or provide a different example'
name = "doesn't match on referenced example"
recommendation = "Fix the rule logic or provide a different example"
def check_rule(self, ctx, rule):
if not ctx['is_thorough']:
if not ctx["is_thorough"]:
return False
for example in rule.meta.get('examples', []):
example_id = example.partition(':')[0]
for example in rule.meta.get("examples", []):
example_id = example.partition(":")[0]
try:
path = ctx['samples'][example_id]
path = ctx["samples"][example_id]
except KeyError:
# lint ExampleFileDNE will catch this.
# don't double report.
continue
try:
extractor = capa.main.get_extractor(path, 'auto')
capabilities = capa.main.find_capabilities(ctx['rules'], extractor, disable_progress=True)
extractor = capa.main.get_extractor(path, "auto")
capabilities = capa.main.find_capabilities(ctx["rules"], extractor, disable_progress=True)
except Exception as e:
logger.error('failed to extract capabilities: %s %s %s', rule.name, path, e)
logger.error("failed to extract capabilities: %s %s %s", rule.name, path, e)
return True
if rule.name not in capabilities:
@@ -188,7 +191,7 @@ class DoesntMatchExample(Lint):
class UnusualMetaField(Lint):
name = 'unusual meta field'
name = "unusual meta field"
recommendation = 'Remove the meta field: "{:s}"'
def check_rule(self, ctx, rule):
@@ -204,32 +207,32 @@ class UnusualMetaField(Lint):
class LibRuleNotInLibDirectory(Lint):
name = 'lib rule not found in lib directory'
recommendation = 'Move the rule to the `lib` subdirectory of the rules path'
name = "lib rule not found in lib directory"
recommendation = "Move the rule to the `lib` subdirectory of the rules path"
def check_rule(self, ctx, rule):
if is_nursery_rule(rule):
return False
if 'lib' not in rule.meta:
if "lib" not in rule.meta:
return False
return '/lib/' not in posixpath.normpath(rule.meta['capa/path'])
return "/lib/" not in posixpath.normpath(rule.meta["capa/path"])
class LibRuleHasNamespace(Lint):
name = 'lib rule has a namespace'
recommendation = 'Remove the namespace from the rule'
name = "lib rule has a namespace"
recommendation = "Remove the namespace from the rule"
def check_rule(self, ctx, rule):
if 'lib' not in rule.meta:
if "lib" not in rule.meta:
return False
return 'namespace' in rule.meta
return "namespace" in rule.meta
class FeatureStringTooShort(Lint):
name = 'feature string too short'
name = "feature string too short"
recommendation = 'capa only extracts strings with length >= 4; will not match on "{:s}"'
def check_features(self, ctx, features):
@@ -242,9 +245,11 @@ class FeatureStringTooShort(Lint):
class FeatureNegativeNumberOrOffset(Lint):
name = 'feature value is negative'
recommendation = 'capa treats all numbers as unsigned values; you may specify the number\'s two\'s complement ' \
'representation; will not match on "{:d}"'
name = "feature value is negative"
recommendation = (
"capa treats all numbers as unsigned values; you may specify the number's two's complement "
'representation; will not match on "{:d}"'
)
def check_features(self, ctx, features):
for feature in features:
@@ -318,7 +323,7 @@ def lint_features(ctx, rule):
def get_features(ctx, rule):
# get features from rule and all dependencies including subscopes and matched rules
features = []
deps = [ctx['rules'].rules[dep] for dep in rule.get_dependencies()]
deps = [ctx["rules"].rules[dep] for dep in rule.get_dependencies()]
for r in [rule] + deps:
features.extend(get_rule_features(r))
return features
@@ -338,9 +343,7 @@ def get_rule_features(rule):
return features
LOGIC_LINTS = (
DoesntMatchExample(),
)
LOGIC_LINTS = (DoesntMatchExample(),)
def lint_logic(ctx, rule):
@@ -348,53 +351,58 @@ def lint_logic(ctx, rule):
def is_nursery_rule(rule):
'''
"""
The nursery is a spot for rules that have not yet been fully polished.
For example, they may not have references to public example of a technique.
Yet, we still want to capture and report on their matches.
'''
return rule.meta.get('capa/nursery')
"""
return rule.meta.get("capa/nursery")
def lint_rule(ctx, rule):
logger.debug(rule.name)
violations = list(itertools.chain(
lint_name(ctx, rule),
lint_scope(ctx, rule),
lint_meta(ctx, rule),
lint_logic(ctx, rule),
lint_features(ctx, rule),
))
violations = list(
itertools.chain(
lint_name(ctx, rule),
lint_scope(ctx, rule),
lint_meta(ctx, rule),
lint_logic(ctx, rule),
lint_features(ctx, rule),
)
)
if len(violations) > 0:
category = rule.meta.get('rule-category')
category = rule.meta.get("rule-category")
print('')
print('%s%s %s' % (' (nursery) ' if is_nursery_rule(rule) else '',
rule.name,
('(%s)' % category) if category else ''))
print("")
print(
"%s%s %s"
% (" (nursery) " if is_nursery_rule(rule) else "", rule.name, ("(%s)" % category) if category else "",)
)
level = 'WARN' if is_nursery_rule(rule) else 'FAIL'
level = "WARN" if is_nursery_rule(rule) else "FAIL"
for violation in violations:
print('%s %s: %s: %s' % (
' ' if is_nursery_rule(rule) else '', level, violation.name, violation.recommendation))
print(
"%s %s: %s: %s"
% (" " if is_nursery_rule(rule) else "", level, violation.name, violation.recommendation,)
)
return len(violations) > 0 and not is_nursery_rule(rule)
def lint(ctx, rules):
'''
"""
Args:
samples (Dict[string, string]): map from sample id to path.
for each sample, record sample id of sha256, md5, and filename.
see `collect_samples(path)`.
rules (List[Rule]): the rules to lint.
'''
"""
did_suggest_fix = False
for rule in rules.rules.values():
if rule.meta.get('capa/subscope-rule', False):
if rule.meta.get("capa/subscope-rule", False):
continue
did_suggest_fix = lint_rule(ctx, rule) or did_suggest_fix
@@ -403,27 +411,27 @@ def lint(ctx, rules):
def collect_samples(path):
'''
"""
recurse through the given path, collecting all file paths, indexed by their content sha256, md5, and filename.
'''
"""
samples = {}
for root, dirs, files in os.walk(path):
for name in files:
if name.endswith('.viv'):
if name.endswith(".viv"):
continue
if name.endswith('.idb'):
if name.endswith(".idb"):
continue
if name.endswith('.i64'):
if name.endswith(".i64"):
continue
if name.endswith('.frz'):
if name.endswith(".frz"):
continue
if name.endswith('.fnames'):
if name.endswith(".fnames"):
continue
path = os.path.join(root, name)
try:
with open(path, 'rb') as f:
with open(path, "rb") as f:
buf = f.read()
except IOError:
continue
@@ -447,19 +455,16 @@ def main(argv=None):
if argv is None:
argv = sys.argv[1:]
samples_path = os.path.join(os.path.dirname(__file__), '..', 'tests', 'data')
samples_path = os.path.join(os.path.dirname(__file__), "..", "tests", "data")
parser = argparse.ArgumentParser(description='A program.')
parser.add_argument('rules', type=str,
help='Path to rules')
parser.add_argument('--samples', type=str, default=samples_path,
help='Path to samples')
parser.add_argument('--thorough', action='store_true',
help='Enable thorough linting - takes more time, but does a better job')
parser.add_argument('-v', '--verbose', action='store_true',
help='Enable debug logging')
parser.add_argument('-q', '--quiet', action='store_true',
help='Disable all output but errors')
parser = argparse.ArgumentParser(description="A program.")
parser.add_argument("rules", type=str, help="Path to rules")
parser.add_argument("--samples", type=str, default=samples_path, help="Path to samples")
parser.add_argument(
"--thorough", action="store_true", help="Enable thorough linting - takes more time, but does a better job",
)
parser.add_argument("-v", "--verbose", action="store_true", help="Enable debug logging")
parser.add_argument("-q", "--quiet", action="store_true", help="Disable all output but errors")
args = parser.parse_args(args=argv)
if args.verbose:
@@ -470,42 +475,42 @@ def main(argv=None):
level = logging.INFO
logging.basicConfig(level=level)
logging.getLogger('capa.lint').setLevel(level)
logging.getLogger("capa.lint").setLevel(level)
capa.main.set_vivisect_log_level(logging.CRITICAL)
logging.getLogger('capa').setLevel(logging.CRITICAL)
logging.getLogger("capa").setLevel(logging.CRITICAL)
try:
rules = capa.main.get_rules(args.rules)
rules = capa.rules.RuleSet(rules)
logger.info('successfully loaded %s rules', len(rules))
logger.info("successfully loaded %s rules", len(rules))
except IOError as e:
logger.error('%s', str(e))
logger.error("%s", str(e))
return -1
except capa.rules.InvalidRule as e:
logger.error('%s', str(e))
logger.error("%s", str(e))
return -1
logger.info('collecting potentially referenced samples')
logger.info("collecting potentially referenced samples")
if not os.path.exists(args.samples):
logger.error('samples path %s does not exist', args.samples)
logger.error("samples path %s does not exist", args.samples)
return -1
samples = collect_samples(args.samples)
ctx = {
'samples': samples,
'rules': rules,
'is_thorough': args.thorough,
"samples": samples,
"rules": rules,
"is_thorough": args.thorough,
}
did_violate = lint(ctx, rules)
if not did_violate:
logger.info('no suggestions, nice!')
logger.info("no suggestions, nice!")
return 0
else:
return 1
if __name__ == '__main__':
if __name__ == "__main__":
sys.exit(main())

View File

@@ -1,7 +1,7 @@
#!/usr/bin/env python2
'''
"""
show the features extracted by capa.
'''
"""
import sys
import logging
@@ -20,28 +20,27 @@ def main(argv=None):
argv = sys.argv[1:]
formats = [
('auto', '(default) detect file type automatically'),
('pe', 'Windows PE file'),
('sc32', '32-bit shellcode'),
('sc64', '64-bit shellcode'),
('freeze', 'features previously frozen by capa'),
("auto", "(default) detect file type automatically"),
("pe", "Windows PE file"),
("sc32", "32-bit shellcode"),
("sc64", "64-bit shellcode"),
("freeze", "features previously frozen by capa"),
]
format_help = ', '.join(['%s: %s' % (f[0], f[1]) for f in formats])
format_help = ", ".join(["%s: %s" % (f[0], f[1]) for f in formats])
parser = argparse.ArgumentParser(description='detect capabilities in programs.')
parser.add_argument('sample', type=str,
help='Path to sample to analyze')
parser.add_argument('-f', '--format', choices=[f[0] for f in formats], default='auto',
help='Select sample format, %s' % format_help)
parser.add_argument('-F', '--function', type=lambda x: int(x, 0),
help='Show features for specific function')
parser = argparse.ArgumentParser(description="detect capabilities in programs.")
parser.add_argument("sample", type=str, help="Path to sample to analyze")
parser.add_argument(
"-f", "--format", choices=[f[0] for f in formats], default="auto", help="Select sample format, %s" % format_help
)
parser.add_argument("-F", "--function", type=lambda x: int(x, 0), help="Show features for specific function")
args = parser.parse_args(args=argv)
logging.basicConfig(level=logging.INFO)
logging.getLogger().setLevel(logging.INFO)
if args.format == 'freeze':
with open(args.sample, 'rb') as f:
if args.format == "freeze":
with open(args.sample, "rb") as f:
extractor = capa.features.freeze.load(f.read())
else:
vw = capa.main.get_workspace(args.sample, args.format)
@@ -50,32 +49,32 @@ def main(argv=None):
if not args.function:
for feature, va in extractor.extract_file_features():
if va:
print('file: 0x%08x: %s' % (va, feature))
print("file: 0x%08x: %s" % (va, feature))
else:
print('file: 0x00000000: %s' % (feature))
print("file: 0x00000000: %s" % (feature))
functions = extractor.get_functions()
if args.function:
if args.format == 'freeze':
if args.format == "freeze":
functions = filter(lambda f: f == args.function, functions)
else:
functions = filter(lambda f: f.va == args.function, functions)
for f in functions:
for feature, va in extractor.extract_function_features(f):
print('func: 0x%08x: %s' % (va, feature))
print("func: 0x%08x: %s" % (va, feature))
for bb in extractor.get_basic_blocks(f):
for feature, va in extractor.extract_basic_block_features(f, bb):
print('bb : 0x%08x: %s' % (va, feature))
print("bb : 0x%08x: %s" % (va, feature))
for insn in extractor.get_instructions(f, bb):
for feature, va in extractor.extract_insn_features(f, bb, insn):
print('insn: 0x%08x: %s' % (va, feature))
print("insn: 0x%08x: %s" % (va, feature))
return 0
if __name__ == '__main__':
if __name__ == "__main__":
sys.exit(main())