mirror of
https://github.com/mandiant/capa.git
synced 2025-12-12 15:49:46 -08:00
extractors: do cast-to-int correctly
This commit is contained in:
@@ -6,9 +6,6 @@
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import sys
|
||||
import types
|
||||
|
||||
import idaapi
|
||||
|
||||
import capa.features.extractors.ida.file
|
||||
@@ -18,24 +15,40 @@ import capa.features.extractors.ida.basicblock
|
||||
from capa.features.extractors import FeatureExtractor
|
||||
|
||||
|
||||
def get_ea(self):
|
||||
""" """
|
||||
if isinstance(self, (idaapi.BasicBlock, idaapi.func_t)):
|
||||
class FunctionHandle:
|
||||
"""this acts like an idaapi.func_t but with __int__()"""
|
||||
def __init__(self, inner):
|
||||
self._inner = inner
|
||||
|
||||
def __int__(self):
|
||||
return self.start_ea
|
||||
if isinstance(self, idaapi.insn_t):
|
||||
|
||||
def __getattr__(self, name):
|
||||
return getattr(self._inner, name)
|
||||
|
||||
|
||||
class BasicBlockHandle:
|
||||
"""this acts like an idaapi.BasicBlock but with __int__()"""
|
||||
def __init__(self, inner):
|
||||
self._inner = inner
|
||||
|
||||
def __int__(self):
|
||||
return self.start_ea
|
||||
|
||||
def __getattr__(self, name):
|
||||
return getattr(self._inner, name)
|
||||
|
||||
|
||||
class InstructionHandle:
|
||||
"""this acts like an idaapi.insn_t but with __int__()"""
|
||||
def __init__(self, inner):
|
||||
self._inner = inner
|
||||
|
||||
def __int__(self):
|
||||
return self.ea
|
||||
raise TypeError
|
||||
|
||||
|
||||
def add_ea_int_cast(o):
|
||||
"""
|
||||
dynamically add a cast-to-int (`__int__`) method to the given object
|
||||
that returns the value of the `.ea` property.
|
||||
this bit of skullduggery lets use cast viv-utils objects as ints.
|
||||
the correct way of doing this is to update viv-utils (or subclass the objects here).
|
||||
"""
|
||||
setattr(o, "__int__", types.MethodType(get_ea, o))
|
||||
return o
|
||||
def __getattr__(self, name):
|
||||
return getattr(self._inner, name)
|
||||
|
||||
|
||||
class IdaFeatureExtractor(FeatureExtractor):
|
||||
@@ -59,21 +72,23 @@ class IdaFeatureExtractor(FeatureExtractor):
|
||||
# ignore library functions and thunk functions as identified by IDA
|
||||
for f in ida_helpers.get_functions(skip_thunks=True, skip_libs=True):
|
||||
setattr(f, "ctx", ctx)
|
||||
yield add_ea_int_cast(f)
|
||||
yield FunctionHandle(f)
|
||||
|
||||
@staticmethod
|
||||
def get_function(ea):
|
||||
f = idaapi.get_func(ea)
|
||||
setattr(f, "ctx", {})
|
||||
return add_ea_int_cast(f)
|
||||
return FunctionHandle(f)
|
||||
|
||||
def extract_function_features(self, f):
|
||||
for (feature, ea) in capa.features.extractors.ida.function.extract_features(f):
|
||||
yield feature, ea
|
||||
|
||||
def get_basic_blocks(self, f):
|
||||
for bb in capa.features.extractors.ida.helpers.get_function_blocks(f):
|
||||
yield add_ea_int_cast(bb)
|
||||
import capa.features.extractors.ida.helpers as ida_helpers
|
||||
|
||||
for bb in ida_helpers.get_function_blocks(f):
|
||||
yield BasicBlockHandle(bb)
|
||||
|
||||
def extract_basic_block_features(self, f, bb):
|
||||
for (feature, ea) in capa.features.extractors.ida.basicblock.extract_features(f, bb):
|
||||
@@ -83,7 +98,7 @@ class IdaFeatureExtractor(FeatureExtractor):
|
||||
import capa.features.extractors.ida.helpers as ida_helpers
|
||||
|
||||
for insn in ida_helpers.get_instructions_in_range(bb.start_ea, bb.end_ea):
|
||||
yield add_ea_int_cast(insn)
|
||||
yield InstructionHandle(insn)
|
||||
|
||||
def extract_insn_features(self, f, bb, insn):
|
||||
for (feature, ea) in capa.features.extractors.ida.insn.extract_features(f, bb, insn):
|
||||
|
||||
@@ -5,7 +5,6 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import types
|
||||
import logging
|
||||
|
||||
import viv_utils
|
||||
@@ -22,26 +21,16 @@ __all__ = ["file", "function", "basicblock", "insn"]
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_va(self):
|
||||
try:
|
||||
# vivisect type
|
||||
class InstructionHandle:
|
||||
"""this acts like a vivisect.Opcode but with an __int__() method"""
|
||||
def __init__(self, inner):
|
||||
self._inner = inner
|
||||
|
||||
def __int__(self):
|
||||
return self.va
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
raise TypeError()
|
||||
|
||||
|
||||
def add_va_int_cast(o):
|
||||
"""
|
||||
dynamically add a cast-to-int (`__int__`) method to the given object
|
||||
that returns the value of the `.va` property.
|
||||
|
||||
this bit of skullduggery lets use cast viv-utils objects as ints.
|
||||
the correct way of doing this is to update viv-utils (or subclass the objects here).
|
||||
"""
|
||||
setattr(o, "__int__", types.MethodType(get_va, o))
|
||||
return o
|
||||
def __getattr__(self, name):
|
||||
return getattr(self._inner, name)
|
||||
|
||||
|
||||
class VivisectFeatureExtractor(FeatureExtractor):
|
||||
@@ -60,15 +49,14 @@ class VivisectFeatureExtractor(FeatureExtractor):
|
||||
|
||||
def get_functions(self):
|
||||
for va in sorted(self.vw.getFunctions()):
|
||||
yield add_va_int_cast(viv_utils.Function(self.vw, va))
|
||||
yield viv_utils.Function(self.vw, va)
|
||||
|
||||
def extract_function_features(self, f):
|
||||
for feature, va in capa.features.extractors.viv.function.extract_features(f):
|
||||
yield feature, va
|
||||
|
||||
def get_basic_blocks(self, f):
|
||||
for bb in f.basic_blocks:
|
||||
yield add_va_int_cast(bb)
|
||||
return f.basic_blocks
|
||||
|
||||
def extract_basic_block_features(self, f, bb):
|
||||
for feature, va in capa.features.extractors.viv.basicblock.extract_features(f, bb):
|
||||
@@ -76,7 +64,7 @@ class VivisectFeatureExtractor(FeatureExtractor):
|
||||
|
||||
def get_instructions(self, f, bb):
|
||||
for insn in bb.instructions:
|
||||
yield add_va_int_cast(insn)
|
||||
yield InstructionHandle(insn)
|
||||
|
||||
def extract_insn_features(self, f, bb, insn):
|
||||
for feature, va in capa.features.extractors.viv.insn.extract_features(f, bb, insn):
|
||||
|
||||
@@ -122,7 +122,7 @@ def dumps(extractor):
|
||||
)
|
||||
|
||||
for insnva, insn in sorted(
|
||||
[(insn.__int__(), insn) for insn in extractor.get_instructions(f, bb)], key=lambda p: p[0]
|
||||
[(int(insn), insn) for insn in extractor.get_instructions(f, bb)], key=lambda p: p[0]
|
||||
):
|
||||
ret["functions"][hex(f)][hex(bb)].append(hex(insnva))
|
||||
|
||||
|
||||
@@ -12,18 +12,7 @@ _hex = hex
|
||||
|
||||
|
||||
def hex(i):
|
||||
return _hex(oint(i))
|
||||
|
||||
|
||||
def oint(i):
|
||||
# there seems to be some trouble with using `int(viv_utils.Function)`
|
||||
# with the black magic we do with binding the `__int__()` routine.
|
||||
# i haven't had a chance to debug this yet (and i have no hotel wifi).
|
||||
# so in the meantime, detect this, and call the method directly.
|
||||
try:
|
||||
return int(i)
|
||||
except TypeError:
|
||||
return i.__int__()
|
||||
return _hex(int(i))
|
||||
|
||||
|
||||
def get_file_taste(sample_path):
|
||||
|
||||
@@ -78,7 +78,7 @@ def find_func_features(f, extractor):
|
||||
_bb_features[feature].add(ea)
|
||||
func_features[feature].add(ea)
|
||||
|
||||
bb_features[capa.helpers.oint(bb)] = _bb_features
|
||||
bb_features[int(bb)] = _bb_features
|
||||
|
||||
return func_features, bb_features
|
||||
|
||||
@@ -100,7 +100,7 @@ def find_func_matches(f, ruleset, func_features, bb_features):
|
||||
func_features[capa.features.MatchedRule(name)].add(ea)
|
||||
|
||||
# find rule matches for function, function features include rule matches for basic blocks
|
||||
_, matches = capa.engine.match(ruleset.function_rules, func_features, capa.helpers.oint(f))
|
||||
_, matches = capa.engine.match(ruleset.function_rules, func_features, int(f))
|
||||
for (name, res) in matches.items():
|
||||
func_matches[name].extend(res)
|
||||
|
||||
|
||||
@@ -32,7 +32,7 @@ import capa.version
|
||||
import capa.features
|
||||
import capa.features.freeze
|
||||
import capa.features.extractors
|
||||
from capa.helpers import oint, get_file_taste
|
||||
from capa.helpers import get_file_taste
|
||||
|
||||
RULES_PATH_DEFAULT_STRING = "(embedded rules)"
|
||||
SUPPORTED_FILE_MAGIC = set([b"MZ"])
|
||||
@@ -87,14 +87,14 @@ def find_function_capabilities(ruleset, extractor, f):
|
||||
bb_features[feature].add(va)
|
||||
function_features[feature].add(va)
|
||||
|
||||
_, matches = capa.engine.match(ruleset.basic_block_rules, bb_features, oint(bb))
|
||||
_, matches = capa.engine.match(ruleset.basic_block_rules, bb_features, int(bb))
|
||||
|
||||
for rule_name, res in matches.items():
|
||||
bb_matches[rule_name].extend(res)
|
||||
for va, _ in res:
|
||||
function_features[capa.features.MatchedRule(rule_name)].add(va)
|
||||
|
||||
_, function_matches = capa.engine.match(ruleset.function_rules, function_features, oint(f))
|
||||
_, function_matches = capa.engine.match(ruleset.function_rules, function_features, int(f))
|
||||
return function_matches, bb_matches, len(function_features)
|
||||
|
||||
|
||||
@@ -139,7 +139,8 @@ def find_capabilities(ruleset, extractor, disable_progress=None):
|
||||
functions = list(extractor.get_functions())
|
||||
|
||||
for f in pbar(functions, desc="matching", unit=" functions"):
|
||||
function_address = f.__int__()
|
||||
#from IPython import embed; embed()
|
||||
function_address = int(f)
|
||||
|
||||
if extractor.is_library_function(function_address):
|
||||
function_name = extractor.get_function_name(function_address)
|
||||
|
||||
@@ -135,9 +135,9 @@ def main(argv=None):
|
||||
if args.format == "freeze":
|
||||
functions = tuple(filter(lambda f: f == args.function, functions))
|
||||
else:
|
||||
functions = tuple(filter(lambda f: capa.helpers.oint(f) == args.function, functions))
|
||||
functions = tuple(filter(lambda f: int(f) == args.function, functions))
|
||||
|
||||
if args.function not in [capa.helpers.oint(f) for f in functions]:
|
||||
if args.function not in [int(f) for f in functions]:
|
||||
print("0x%X not a function" % args.function)
|
||||
return -1
|
||||
|
||||
|
||||
@@ -244,14 +244,14 @@ def sample(request):
|
||||
|
||||
def get_function(extractor, fva):
|
||||
for f in extractor.get_functions():
|
||||
if f.__int__() == fva:
|
||||
if int(f) == fva:
|
||||
return f
|
||||
raise ValueError("function not found")
|
||||
|
||||
|
||||
def get_basic_block(extractor, f, va):
|
||||
for bb in extractor.get_basic_blocks(f):
|
||||
if bb.__int__() == va:
|
||||
if int(bb) == va:
|
||||
return bb
|
||||
raise ValueError("basic block not found")
|
||||
|
||||
|
||||
@@ -115,30 +115,25 @@ def compare_extractors_viv_null(viv_ext, null_ext):
|
||||
null_ext (capa.features.extractors.NullFeatureExtractor)
|
||||
"""
|
||||
assert list(viv_ext.extract_file_features()) == list(null_ext.extract_file_features())
|
||||
assert list(map(to_int, viv_ext.get_functions())) == list(null_ext.get_functions())
|
||||
assert list(map(int, viv_ext.get_functions())) == list(null_ext.get_functions())
|
||||
for f in viv_ext.get_functions():
|
||||
assert list(map(to_int, viv_ext.get_basic_blocks(f))) == list(null_ext.get_basic_blocks(to_int(f)))
|
||||
assert list(viv_ext.extract_function_features(f)) == list(null_ext.extract_function_features(to_int(f)))
|
||||
assert list(map(int, viv_ext.get_basic_blocks(f))) == list(null_ext.get_basic_blocks(int(f)))
|
||||
assert list(viv_ext.extract_function_features(f)) == list(null_ext.extract_function_features(int(f)))
|
||||
|
||||
for bb in viv_ext.get_basic_blocks(f):
|
||||
assert list(map(to_int, viv_ext.get_instructions(f, bb))) == list(
|
||||
null_ext.get_instructions(to_int(f), to_int(bb))
|
||||
assert list(map(int, viv_ext.get_instructions(f, bb))) == list(
|
||||
null_ext.get_instructions(int(f), int(bb))
|
||||
)
|
||||
assert list(viv_ext.extract_basic_block_features(f, bb)) == list(
|
||||
null_ext.extract_basic_block_features(to_int(f), to_int(bb))
|
||||
null_ext.extract_basic_block_features(int(f), int(bb))
|
||||
)
|
||||
|
||||
for insn in viv_ext.get_instructions(f, bb):
|
||||
assert list(viv_ext.extract_insn_features(f, bb, insn)) == list(
|
||||
null_ext.extract_insn_features(to_int(f), to_int(bb), to_int(insn))
|
||||
null_ext.extract_insn_features(int(f), int(bb), int(insn))
|
||||
)
|
||||
|
||||
|
||||
def to_int(o):
|
||||
"""helper to get int value of extractor items"""
|
||||
return capa.helpers.oint(o)
|
||||
|
||||
|
||||
def test_freeze_s_roundtrip():
|
||||
load = capa.features.freeze.loads
|
||||
dump = capa.features.freeze.dumps
|
||||
|
||||
Reference in New Issue
Block a user