mirror of
https://github.com/mandiant/capa.git
synced 2026-01-19 08:01:05 -08:00
793 lines
27 KiB
Python
793 lines
27 KiB
Python
#!/usr/bin/env python2
|
|
'''
|
|
capa - detect capabilities in programs.
|
|
'''
|
|
import os
|
|
import os.path
|
|
import sys
|
|
import logging
|
|
import collections
|
|
|
|
import tqdm
|
|
import argparse
|
|
|
|
import capa.rules
|
|
import capa.engine
|
|
import capa.features
|
|
import capa.features.freeze
|
|
import capa.features.extractors
|
|
|
|
from capa.helpers import oint
|
|
|
|
|
|
SUPPORTED_FILE_MAGIC = set(['MZ'])
|
|
|
|
|
|
logger = logging.getLogger('capa')
|
|
|
|
|
|
def set_vivisect_log_level(level):
|
|
logging.getLogger('vivisect').setLevel(level)
|
|
logging.getLogger('vtrace').setLevel(level)
|
|
logging.getLogger('envi').setLevel(level)
|
|
|
|
|
|
def find_function_capabilities(ruleset, extractor, f):
|
|
# contains features from:
|
|
# - insns
|
|
# - function
|
|
function_features = collections.defaultdict(set)
|
|
bb_matches = collections.defaultdict(list)
|
|
|
|
for feature, va in extractor.extract_function_features(f):
|
|
function_features[feature].add(va)
|
|
|
|
for bb in extractor.get_basic_blocks(f):
|
|
# contains features from:
|
|
# - insns
|
|
# - basic blocks
|
|
bb_features = collections.defaultdict(set)
|
|
|
|
for feature, va in extractor.extract_basic_block_features(f, bb):
|
|
bb_features[feature].add(va)
|
|
|
|
for insn in extractor.get_instructions(f, bb):
|
|
for feature, va in extractor.extract_insn_features(f, bb, insn):
|
|
bb_features[feature].add(va)
|
|
function_features[feature].add(va)
|
|
|
|
_, matches = capa.engine.match(ruleset.basic_block_rules, bb_features, oint(bb))
|
|
|
|
for rule_name, res in matches.items():
|
|
bb_matches[rule_name].extend(res)
|
|
for va, _ in res:
|
|
function_features[capa.features.MatchedRule(rule_name)].add(va)
|
|
|
|
_, function_matches = capa.engine.match(ruleset.function_rules, function_features, oint(f))
|
|
return function_matches, bb_matches
|
|
|
|
|
|
def find_file_capabilities(ruleset, extractor, function_features):
|
|
file_features = collections.defaultdict(set)
|
|
|
|
for feature, va in extractor.extract_file_features():
|
|
# not all file features may have virtual addresses.
|
|
# if not, then at least ensure the feature shows up in the index.
|
|
# the set of addresses will still be empty.
|
|
if va:
|
|
file_features[feature].add(va)
|
|
else:
|
|
if feature not in file_features:
|
|
file_features[feature] = set()
|
|
|
|
logger.info('analyzed file and extracted %d features', len(file_features))
|
|
|
|
file_features.update(function_features)
|
|
|
|
_, matches = capa.engine.match(ruleset.file_rules, file_features, 0x0)
|
|
return matches
|
|
|
|
|
|
def find_capabilities(ruleset, extractor, disable_progress=None):
|
|
all_function_matches = collections.defaultdict(list)
|
|
all_bb_matches = collections.defaultdict(list)
|
|
|
|
for f in tqdm.tqdm(extractor.get_functions(), disable=disable_progress, unit=' functions'):
|
|
function_matches, bb_matches = find_function_capabilities(ruleset, extractor, f)
|
|
for rule_name, res in function_matches.items():
|
|
all_function_matches[rule_name].extend(res)
|
|
for rule_name, res in bb_matches.items():
|
|
all_bb_matches[rule_name].extend(res)
|
|
|
|
# mapping from matched rule feature to set of addresses at which it matched.
|
|
# type: Dict[MatchedRule, Set[int]]
|
|
function_features = {capa.features.MatchedRule(rule_name): set(map(lambda p: p[0], results))
|
|
for rule_name, results in all_function_matches.items()}
|
|
|
|
all_file_matches = find_file_capabilities(ruleset, extractor, function_features)
|
|
|
|
matches = {}
|
|
matches.update(all_bb_matches)
|
|
matches.update(all_function_matches)
|
|
matches.update(all_file_matches)
|
|
return matches
|
|
|
|
|
|
def pluck_meta(rules, key):
|
|
for rule in rules:
|
|
value = rule.meta.get(key)
|
|
if value:
|
|
yield value
|
|
|
|
|
|
def get_dispositions(matched_rules):
|
|
for disposition in pluck_meta(matched_rules, 'maec/analysis-conclusion'):
|
|
yield disposition
|
|
|
|
for disposition in pluck_meta(matched_rules, 'maec/analysis-conclusion-ov'):
|
|
yield disposition
|
|
|
|
|
|
def get_roles(matched_rules):
|
|
for role in pluck_meta(matched_rules, 'maec/malware-category'):
|
|
yield role
|
|
|
|
for role in pluck_meta(matched_rules, 'maec/malware-category-ov'):
|
|
yield role
|
|
|
|
|
|
RULE_CATEGORY = 'rule-category'
|
|
|
|
|
|
def is_other_feature_rule(rule):
|
|
'''
|
|
does this rule *not* have any of:
|
|
- maec/malware-category
|
|
- maec/analysis-conclusion
|
|
- rule-category
|
|
|
|
if so, it will be placed into the "other features" bucket
|
|
'''
|
|
if rule.meta.get('lib', False):
|
|
return False
|
|
|
|
for meta in ('maec/analysis-conclusion',
|
|
'maec/analysis-conclusion-ov',
|
|
'maec/malware-category',
|
|
'maec/malware-category-ov',
|
|
RULE_CATEGORY):
|
|
if meta in rule.meta:
|
|
return False
|
|
return True
|
|
|
|
|
|
def render_capabilities_default(ruleset, results):
|
|
rules = [ruleset.rules[rule_name] for rule_name in results.keys()]
|
|
|
|
# we render the highest level conclusions first:
|
|
#
|
|
# 1. is it malware?
|
|
# 2. what is the role? (dropper, backdoor, etc.)
|
|
#
|
|
# after this, we'll enumerate the specific objectives, behaviors, and techniques.
|
|
dispositions = list(sorted(get_dispositions(rules)))
|
|
if dispositions:
|
|
print('disposition: ' + ', '.join(dispositions))
|
|
|
|
categories = list(sorted(get_roles(rules)))
|
|
if categories:
|
|
print('role: ' + ', '.join(categories))
|
|
|
|
# rules may have a meta tag `rule-category` that specifies:
|
|
#
|
|
# rule-category: $objective[/$behavior[/$technique]]
|
|
#
|
|
# this classification describes a tree of increasingly specific conclusions.
|
|
# the tree allows us to tie a high-level conclusion, e.g. an objective, to
|
|
# the evidence of this - the behaviors, techniques, rules, and ultimately, features.
|
|
|
|
# this data structure is a nested map:
|
|
#
|
|
# objective name -> behavior name -> technique name -> rule name -> rule
|
|
#
|
|
# at each level, a matched rule is also legal.
|
|
# this indicates that only a portion of the rule-category was provided.
|
|
o = collections.defaultdict(
|
|
lambda: collections.defaultdict(
|
|
lambda: collections.defaultdict(
|
|
dict
|
|
)
|
|
)
|
|
)
|
|
objectives = set()
|
|
behaviors = set()
|
|
techniques = set()
|
|
|
|
for rule in rules:
|
|
objective = None
|
|
behavior = None
|
|
technique = None
|
|
|
|
parts = rule.meta.get(RULE_CATEGORY, '').split('/')
|
|
if len(parts) == 0 or list(parts) == ['']:
|
|
continue
|
|
if len(parts) > 0:
|
|
objective = parts[0].replace('-', ' ')
|
|
objectives.add(objective)
|
|
if len(parts) > 1:
|
|
behavior = parts[1].replace('-', ' ')
|
|
behaviors.add(behavior)
|
|
if len(parts) > 2:
|
|
technique = parts[2].replace('-', ' ')
|
|
techniques.add(technique)
|
|
if len(parts) > 3:
|
|
raise capa.rules.InvalidRule(RULE_CATEGORY + ' tag must have at most three components')
|
|
|
|
if technique:
|
|
o[objective][behavior][technique][rule.name] = rule
|
|
elif behavior:
|
|
o[objective][behavior][rule.name] = rule
|
|
elif objective:
|
|
o[objective][rule.name] = rule
|
|
|
|
if objectives:
|
|
print('\nobjectives:')
|
|
for objective in sorted(objectives):
|
|
print(' ' + objective)
|
|
|
|
if behaviors:
|
|
print('\nbehaviors:')
|
|
for behavior in sorted(behaviors):
|
|
print(' ' + behavior)
|
|
|
|
if techniques:
|
|
print('\ntechniques:')
|
|
for technique in sorted(techniques):
|
|
print(' ' + technique)
|
|
|
|
other_features = list(filter(is_other_feature_rule, rules))
|
|
if other_features:
|
|
print('\nother features:')
|
|
for rule in sorted(map(lambda r: r.name, other_features)):
|
|
print(' ' + rule)
|
|
|
|
# now, render a tree of the objectives, behaviors, techniques, and matched rule names.
|
|
# it will look something like:
|
|
#
|
|
# details:
|
|
# load data
|
|
# load data from self
|
|
# load data from resource
|
|
# extract resource via API
|
|
#
|
|
# implementation note:
|
|
# when we enumerate the items in this tree, we have two cases:
|
|
#
|
|
# 1. usually, we'll get a pair (objective name, map of children); but its possible that
|
|
# 2. we'll get a pair (rule name, rule instance)
|
|
#
|
|
# this is why we do the `ininstance(..., Rule)` check below.
|
|
#
|
|
# i believe the alternative, to have separate data structures for the tree and rules,
|
|
# is probably more code and more confusing.
|
|
if o:
|
|
print('\ndetails:')
|
|
for objective, behaviors in o.items():
|
|
print(' ' + objective)
|
|
|
|
if isinstance(behaviors, capa.rules.Rule):
|
|
continue
|
|
for behavior, techniques in behaviors.items():
|
|
print(' ' + behavior)
|
|
|
|
if isinstance(techniques, capa.rules.Rule):
|
|
continue
|
|
for technique, rules in techniques.items():
|
|
print(' ' + technique)
|
|
|
|
if isinstance(rules, capa.rules.Rule):
|
|
continue
|
|
for rule in rules.keys():
|
|
print(' ' + rule)
|
|
|
|
|
|
def render_capabilities_concise(results):
|
|
'''
|
|
print the matching rules, newline separated.
|
|
|
|
example:
|
|
|
|
foo
|
|
bar
|
|
mimikatz::kull_m_arc_sendrecv
|
|
'''
|
|
for rule in sorted(results.keys()):
|
|
print(rule)
|
|
|
|
|
|
def render_capabilities_verbose(ruleset, results):
|
|
'''
|
|
print the matching rules, and the functions in which they matched.
|
|
|
|
example:
|
|
|
|
foo:
|
|
- 0x401000
|
|
- 0x401005
|
|
bar:
|
|
- 0x402044
|
|
- 0x402076
|
|
mimikatz::kull_m_arc_sendrecv:
|
|
- 0x40105d
|
|
'''
|
|
for rule, ress in results.items():
|
|
rule_scope = ruleset.rules[rule].scope
|
|
if rule_scope == capa.rules.FILE_SCOPE:
|
|
# only display rule name at file scope
|
|
print('%s' % rule)
|
|
continue
|
|
print('%s:' % (rule))
|
|
seen = set([])
|
|
for (fva, _) in sorted(ress, key=lambda p: p[0]):
|
|
if fva in seen:
|
|
continue
|
|
print(' - 0x%x' % (fva))
|
|
seen.add(fva)
|
|
|
|
|
|
def render_result(res, indent=''):
|
|
'''
|
|
render the given Result to stdout.
|
|
|
|
args:
|
|
res (capa.engine.Result)
|
|
indent (str)
|
|
'''
|
|
# prune failing branches
|
|
if not res.success:
|
|
return
|
|
|
|
if isinstance(res.statement, capa.engine.Some):
|
|
if res.statement.count == 0:
|
|
# we asked for optional, so we'll match even if no children matched.
|
|
# but in this case, its not worth rendering the optional node.
|
|
if sum(map(lambda c: c.success, res.children)) > 0:
|
|
print('%soptional:' % indent)
|
|
else:
|
|
print('%s%d or more' % (indent, res.statement.count))
|
|
elif not isinstance(res.statement, (capa.features.Feature, capa.engine.Range, capa.engine.Regex)):
|
|
# when rending a structural node (and/or/not),
|
|
# then we only care about the node name.
|
|
#
|
|
# for example:
|
|
#
|
|
# and:
|
|
# Number(0x3136b0): True
|
|
# Number(0x3136b0): True
|
|
print('%s%s:' % (indent, res.statement.name.lower()))
|
|
else:
|
|
# but when rendering a Feature, want to see any arguments to it
|
|
#
|
|
# for example:
|
|
#
|
|
# Number(0x3136b0): True
|
|
print('%s%s:' % (indent, res.statement))
|
|
for location in sorted(res.locations):
|
|
print('%s - virtual address: 0x%x' % (indent, location))
|
|
|
|
for children in res.children:
|
|
render_result(children, indent=indent + ' ')
|
|
|
|
|
|
def render_capabilities_vverbose(ruleset, results):
|
|
'''
|
|
print the matching rules, the functions in which they matched,
|
|
and the logic tree with annotated matching features.
|
|
|
|
example:
|
|
|
|
function mimikatz::kull_m_arc_sendrecv:
|
|
- 0x40105d
|
|
Or:
|
|
And:
|
|
string("ACR > "):
|
|
- virtual address: 0x401089
|
|
number(0x3136b0):
|
|
- virtual address: 0x4010c8
|
|
'''
|
|
for rule, ress in results.items():
|
|
print('rule %s:' % (rule))
|
|
for (va, res) in sorted(ress, key=lambda p: p[0]):
|
|
rule_scope = ruleset.rules[rule].scope
|
|
if rule_scope == capa.rules.FILE_SCOPE:
|
|
# does not make sense to display va at file scope
|
|
print(' - %s:' % rule_scope)
|
|
else:
|
|
print(' - %s 0x%x:' % (rule_scope, va))
|
|
render_result(res, indent=' ')
|
|
|
|
|
|
def appears_rule_cat(rules, capabilities, rule_cat):
|
|
for rule_name in capabilities.keys():
|
|
if rules.rules[rule_name].meta.get('rule-category', '').startswith(rule_cat):
|
|
return True
|
|
return False
|
|
|
|
|
|
def is_file_limitation(rules, capabilities, is_standalone=True):
|
|
file_limitations = {
|
|
# capa will likely detect installer specific functionality.
|
|
# this is probably not what the user wants.
|
|
'other-features/installer/': [
|
|
' This sample appears to be an installer.',
|
|
' ',
|
|
' capa cannot handle installers well. This means the results may be misleading or incomplete.'
|
|
' You should try to understand the install mechanism and analyze created files with capa.'
|
|
],
|
|
# capa won't detect much in .NET samples.
|
|
# it might match some file-level things.
|
|
# for consistency, bail on things that we don't support.
|
|
'other-features/compiled-to-dot-net': [
|
|
' This sample appears to be a .NET module.',
|
|
' ',
|
|
' .NET is a cross-platform framework for running managed applications.',
|
|
' capa cannot handle non-native files. This means that the results may be misleading or incomplete.',
|
|
' You may have to analyze the file manually, using a tool like the .NET decompiler dnSpy.'
|
|
],
|
|
# capa will detect dozens of capabilities for AutoIt samples,
|
|
# but these are due to the AutoIt runtime, not the payload script.
|
|
# so, don't confuse the user with FP matches - bail instead
|
|
'other-features/compiled-with-autoit': [
|
|
' This sample appears to be compiled with AutoIt.',
|
|
' ',
|
|
' AutoIt is a freeware BASIC-like scripting language designed for automating the Windows GUI.',
|
|
' capa cannot handle AutoIt scripts. This means that the results will be misleading or incomplete.',
|
|
' You may have to analyze the file manually, using a tool like the AutoIt decompiler MyAut2Exe.'
|
|
],
|
|
# capa won't detect much in packed samples
|
|
'anti-analysis/packing/': [
|
|
' This sample appears to be packed.',
|
|
' ',
|
|
' Packed samples have often been obfuscated to hide their logic.',
|
|
' capa cannot handle obfuscation well. This means the results may be misleading or incomplete.',
|
|
' If possible, you should try to unpack this input file before analyzing it with capa.'
|
|
]
|
|
}
|
|
|
|
for category, dialogue in file_limitations.items():
|
|
if not appears_rule_cat(rules, capabilities, category):
|
|
continue
|
|
logger.warning('-' * 80)
|
|
for line in dialogue:
|
|
logger.warning(line)
|
|
if is_standalone:
|
|
logger.warning(' ')
|
|
logger.warning(' Use -v or -vv if you really want to see the capabilities identified by capa.')
|
|
logger.warning('-' * 80)
|
|
return True
|
|
return False
|
|
|
|
|
|
def is_supported_file_type(sample):
|
|
'''
|
|
Return if this is a supported file based on magic header values
|
|
'''
|
|
with open(sample, 'rb') as f:
|
|
magic = f.read(2)
|
|
if magic in SUPPORTED_FILE_MAGIC:
|
|
return True
|
|
else:
|
|
return False
|
|
|
|
|
|
def get_shellcode_vw(sample, arch='auto'):
|
|
'''
|
|
Return shellcode workspace using explicit arch or via auto detect
|
|
'''
|
|
import viv_utils
|
|
with open(sample, 'rb') as f:
|
|
sample_bytes = f.read()
|
|
if arch == 'auto':
|
|
# choose arch with most functions, idea by Jay G.
|
|
vw_cands = []
|
|
for arch in ['i386', 'amd64']:
|
|
vw_cands.append(viv_utils.getShellcodeWorkspace(sample_bytes, arch))
|
|
if not vw_cands:
|
|
raise ValueError('could not generate vivisect workspace')
|
|
vw = max(vw_cands, key=lambda vw: len(vw.getFunctions()))
|
|
else:
|
|
vw = viv_utils.getShellcodeWorkspace(sample_bytes, arch)
|
|
vw.setMeta('Format', 'blob') # TODO fix in viv_utils
|
|
return vw
|
|
|
|
|
|
def get_meta_str(vw):
|
|
'''
|
|
Return workspace meta information string
|
|
'''
|
|
meta = []
|
|
for k in ['Format', 'Platform', 'Architecture']:
|
|
if k in vw.metadata:
|
|
meta.append('%s: %s' % (k.lower(), vw.metadata[k]))
|
|
return '%s, number of functions: %d' % (', '.join(meta), len(vw.getFunctions()))
|
|
|
|
|
|
class UnsupportedFormatError(ValueError):
|
|
pass
|
|
|
|
|
|
def get_workspace(path, format):
|
|
import viv_utils
|
|
logger.info('generating vivisect workspace for: %s', path)
|
|
if format == 'auto':
|
|
if not is_supported_file_type(path):
|
|
raise UnsupportedFormatError()
|
|
vw = viv_utils.getWorkspace(path)
|
|
elif format == 'pe':
|
|
vw = viv_utils.getWorkspace(path)
|
|
elif format == 'sc32':
|
|
vw = get_shellcode_vw(path, arch='i386')
|
|
elif format == 'sc64':
|
|
vw = get_shellcode_vw(path, arch='amd64')
|
|
logger.info('%s', get_meta_str(vw))
|
|
return vw
|
|
|
|
|
|
def get_extractor_py2(path, format):
|
|
import capa.features.extractors.viv
|
|
vw = get_workspace(path, format)
|
|
return capa.features.extractors.viv.VivisectFeatureExtractor(vw, path)
|
|
|
|
|
|
class UnsupportedRuntimeError(RuntimeError):
|
|
pass
|
|
|
|
|
|
def get_extractor_py3(path, format):
|
|
raise UnsupportedRuntimeError()
|
|
|
|
|
|
def get_extractor(path, format):
|
|
'''
|
|
raises:
|
|
UnsupportedFormatError:
|
|
'''
|
|
if sys.version_info >= (3, 0):
|
|
return get_extractor_py3(path, format)
|
|
else:
|
|
return get_extractor_py2(path, format)
|
|
|
|
|
|
def is_nursery_rule_path(path):
|
|
'''
|
|
The nursery is a spot for rules that have not yet been fully polished.
|
|
For example, they may not have references to public example of a technique.
|
|
Yet, we still want to capture and report on their matches.
|
|
The nursery is currently a subdirectory of the rules directory with that name.
|
|
|
|
When nursery rules are loaded, their metadata section should be updated with:
|
|
`nursery=True`.
|
|
'''
|
|
return 'nursery' in path
|
|
|
|
|
|
def get_rules(rule_path):
|
|
if not os.path.exists(rule_path):
|
|
raise IOError('%s does not exist or cannot be accessed' % rule_path)
|
|
|
|
rules = []
|
|
if os.path.isfile(rule_path):
|
|
logger.info('reading rule file: %s', rule_path)
|
|
with open(rule_path, 'rb') as f:
|
|
rule = capa.rules.Rule.from_yaml(f.read().decode('utf-8'))
|
|
|
|
if is_nursery_rule_path(rule_path):
|
|
rule.meta['nursery'] = True
|
|
|
|
rules.append(rule)
|
|
logger.debug('rule: %s scope: %s', rule.name, rule.scope)
|
|
|
|
elif os.path.isdir(rule_path):
|
|
logger.info('reading rules from directory %s', rule_path)
|
|
for root, dirs, files in os.walk(rule_path):
|
|
for file in files:
|
|
if not file.endswith('.yml'):
|
|
logger.warning('skipping non-.yml file: %s', file)
|
|
continue
|
|
|
|
path = os.path.join(root, file)
|
|
logger.debug('reading rule file: %s', path)
|
|
try:
|
|
rule = capa.rules.Rule.from_yaml_file(path)
|
|
except capa.rules.InvalidRule:
|
|
raise
|
|
else:
|
|
if is_nursery_rule_path(root):
|
|
rule.meta['nursery'] = True
|
|
|
|
rules.append(rule)
|
|
logger.debug('rule: %s scope: %s', rule.name, rule.scope)
|
|
return rules
|
|
|
|
|
|
def main(argv=None):
|
|
if argv is None:
|
|
argv = sys.argv[1:]
|
|
|
|
formats = [
|
|
('auto', '(default) detect file type automatically'),
|
|
('pe', 'Windows PE file'),
|
|
('sc32', '32-bit shellcode'),
|
|
('sc64', '64-bit shellcode'),
|
|
('freeze', 'features previously frozen by capa'),
|
|
]
|
|
format_help = ', '.join(['%s: %s' % (f[0], f[1]) for f in formats])
|
|
|
|
parser = argparse.ArgumentParser(description='detect capabilities in programs.')
|
|
parser.add_argument('sample', type=str,
|
|
help='Path to sample to analyze')
|
|
parser.add_argument('-r', '--rules', type=str, default='(embedded rules)',
|
|
help='Path to rule file or directory, use embedded rules by default')
|
|
parser.add_argument('-t', '--tag', type=str,
|
|
help='Filter on rule meta field values')
|
|
parser.add_argument('-v', '--verbose', action='store_true',
|
|
help='Enable verbose output')
|
|
parser.add_argument('-vv', '--vverbose', action='store_true',
|
|
help='Enable very verbose output')
|
|
parser.add_argument('-q', '--quiet', action='store_true',
|
|
help='Disable all output but errors')
|
|
parser.add_argument('-f', '--format', choices=[f[0] for f in formats], default='auto',
|
|
help='Select sample format, %s' % format_help)
|
|
args = parser.parse_args(args=argv)
|
|
|
|
if args.quiet:
|
|
logging.basicConfig(level=logging.ERROR)
|
|
logging.getLogger().setLevel(logging.ERROR)
|
|
elif args.verbose:
|
|
logging.basicConfig(level=logging.DEBUG)
|
|
logging.getLogger().setLevel(logging.DEBUG)
|
|
else:
|
|
logging.basicConfig(level=logging.INFO)
|
|
logging.getLogger().setLevel(logging.INFO)
|
|
|
|
# disable vivisect-related logging, it's verbose and not relevant for capa users
|
|
set_vivisect_log_level(logging.CRITICAL)
|
|
|
|
# py2 doesn't know about cp65001, which is a variant of utf-8 on windows
|
|
# tqdm bails when trying to render the progress bar in this setup.
|
|
# because cp65001 is utf-8, we just map that codepage to the utf-8 codec.
|
|
# see #380 and: https://stackoverflow.com/a/3259271/87207
|
|
import codecs
|
|
codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None)
|
|
|
|
if args.rules == '(embedded rules)':
|
|
logger.info('-' * 80)
|
|
logger.info(' Using default embedded rules.')
|
|
logger.info(' To provide your own rules, use the form `capa.exe ./path/to/rules/ /path/to/mal.exe`.')
|
|
logger.info(' You can see the current default rule set here:')
|
|
logger.info(' https://github.com/fireeye/capa-rules')
|
|
logger.info('-' * 80)
|
|
|
|
if hasattr(sys, 'frozen') and hasattr(sys, '_MEIPASS'):
|
|
logger.debug('detected running under PyInstaller')
|
|
args.rules = os.path.join(sys._MEIPASS, 'rules')
|
|
logger.debug('default rule path (PyInstaller method): %s', args.rules)
|
|
else:
|
|
logger.debug('detected running from source')
|
|
args.rules = os.path.join(os.path.dirname(__file__), '..', 'rules')
|
|
logger.debug('default rule path (source method): %s', args.rules)
|
|
else:
|
|
logger.info('using rules path: %s', args.rules)
|
|
|
|
try:
|
|
rules = get_rules(args.rules)
|
|
rules = capa.rules.RuleSet(rules)
|
|
logger.info('successfully loaded %s rules', len(rules))
|
|
if args.tag:
|
|
rules = rules.filter_rules_by_meta(args.tag)
|
|
logger.info('selected %s rules', len(rules))
|
|
except (IOError, capa.rules.InvalidRule, capa.rules.InvalidRuleSet) as e:
|
|
logger.error('%s', str(e))
|
|
return -1
|
|
|
|
with open(args.sample, 'rb') as f:
|
|
taste = f.read(8)
|
|
|
|
if ((args.format == 'freeze')
|
|
or (args.format == 'auto' and capa.features.freeze.is_freeze(taste))):
|
|
with open(args.sample, 'rb') as f:
|
|
extractor = capa.features.freeze.load(f.read())
|
|
else:
|
|
try:
|
|
extractor = get_extractor(args.sample, args.format)
|
|
except UnsupportedFormatError:
|
|
logger.error('-' * 80)
|
|
logger.error(' Input file does not appear to be a PE file.')
|
|
logger.error(' ')
|
|
logger.error(' capa currently only supports analyzing PE files (or shellcode, when using --format sc32|sc64).')
|
|
logger.error(' If you don\'t know the input file type, you can try using the `file` utility to guess it.')
|
|
logger.error('-' * 80)
|
|
return -1
|
|
except UnsupportedRuntimeError:
|
|
logger.error('-' * 80)
|
|
logger.error(' Unsupported runtime or Python interpreter.')
|
|
logger.error(' ')
|
|
logger.error(' capa supports running under Python 2.7 using Vivisect for binary analysis.')
|
|
logger.error(' It can also run within IDA Pro, using either Python 2.7 or 3.5+.')
|
|
logger.error(' ')
|
|
logger.error(' If you\'re seeing this message on the command line, please ensure you\'re running Python 2.7.')
|
|
logger.error('-' * 80)
|
|
return -1
|
|
|
|
capabilities = find_capabilities(rules, extractor)
|
|
|
|
if is_file_limitation(rules, capabilities):
|
|
# bail if capa encountered file limitation e.g. a packed binary
|
|
# do show the output in verbose mode, though.
|
|
if not (args.verbose or args.vverbose):
|
|
return -1
|
|
|
|
if args.vverbose:
|
|
render_capabilities_vverbose(rules, capabilities)
|
|
elif args.verbose:
|
|
render_capabilities_verbose(rules, capabilities)
|
|
else:
|
|
render_capabilities_default(rules, capabilities)
|
|
|
|
logger.info('done.')
|
|
|
|
return 0
|
|
|
|
|
|
def ida_main():
|
|
logging.basicConfig(level=logging.INFO)
|
|
logging.getLogger().setLevel(logging.INFO)
|
|
|
|
import capa.ida.helpers
|
|
if not capa.ida.helpers.is_supported_file_type():
|
|
return -1
|
|
|
|
logger.info('-' * 80)
|
|
logger.info(' Using default embedded rules.')
|
|
logger.info(' ')
|
|
logger.info(' You can see the current default rule set here:')
|
|
logger.info(' https://github.com/fireeye/capa-rules')
|
|
logger.info('-' * 80)
|
|
|
|
if hasattr(sys, 'frozen') and hasattr(sys, '_MEIPASS'):
|
|
logger.debug('detected running under PyInstaller')
|
|
rules_path = os.path.join(sys._MEIPASS, 'rules')
|
|
logger.debug('default rule path (PyInstaller method): %s', rules_path)
|
|
else:
|
|
logger.debug('detected running from source')
|
|
rules_path = os.path.join(os.path.dirname(__file__), '..', 'rules')
|
|
logger.debug('default rule path (source method): %s', rules_path)
|
|
|
|
rules = get_rules(rules_path)
|
|
import capa.rules
|
|
rules = capa.rules.RuleSet(rules)
|
|
|
|
import capa.features.extractors.ida
|
|
capabilities = find_capabilities(rules, capa.features.extractors.ida.IdaFeatureExtractor())
|
|
|
|
if is_file_limitation(rules, capabilities, is_standalone=False):
|
|
capa.ida.helpers.inform_user_ida_ui('capa encountered warnings during analysis')
|
|
|
|
render_capabilities_default(rules, capabilities)
|
|
|
|
|
|
def is_runtime_ida():
|
|
try:
|
|
import idc
|
|
except ImportError:
|
|
return False
|
|
else:
|
|
return True
|
|
|
|
|
|
if __name__ == '__main__':
|
|
if is_runtime_ida():
|
|
ida_main()
|
|
else:
|
|
sys.exit(main())
|