extractors: do cast-to-int correctly

This commit is contained in:
William Ballenthin
2021-04-27 13:07:27 -06:00
parent e8457c7abf
commit 9ca1a7ebb6
9 changed files with 69 additions and 81 deletions

View File

@@ -6,9 +6,6 @@
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import sys
import types
import idaapi
import capa.features.extractors.ida.file
@@ -18,24 +15,40 @@ import capa.features.extractors.ida.basicblock
from capa.features.extractors import FeatureExtractor
def get_ea(self):
""" """
if isinstance(self, (idaapi.BasicBlock, idaapi.func_t)):
class FunctionHandle:
"""this acts like an idaapi.func_t but with __int__()"""
def __init__(self, inner):
self._inner = inner
def __int__(self):
return self.start_ea
if isinstance(self, idaapi.insn_t):
def __getattr__(self, name):
return getattr(self._inner, name)
class BasicBlockHandle:
"""this acts like an idaapi.BasicBlock but with __int__()"""
def __init__(self, inner):
self._inner = inner
def __int__(self):
return self.start_ea
def __getattr__(self, name):
return getattr(self._inner, name)
class InstructionHandle:
"""this acts like an idaapi.insn_t but with __int__()"""
def __init__(self, inner):
self._inner = inner
def __int__(self):
return self.ea
raise TypeError
def add_ea_int_cast(o):
"""
dynamically add a cast-to-int (`__int__`) method to the given object
that returns the value of the `.ea` property.
this bit of skullduggery lets use cast viv-utils objects as ints.
the correct way of doing this is to update viv-utils (or subclass the objects here).
"""
setattr(o, "__int__", types.MethodType(get_ea, o))
return o
def __getattr__(self, name):
return getattr(self._inner, name)
class IdaFeatureExtractor(FeatureExtractor):
@@ -59,21 +72,23 @@ class IdaFeatureExtractor(FeatureExtractor):
# ignore library functions and thunk functions as identified by IDA
for f in ida_helpers.get_functions(skip_thunks=True, skip_libs=True):
setattr(f, "ctx", ctx)
yield add_ea_int_cast(f)
yield FunctionHandle(f)
@staticmethod
def get_function(ea):
f = idaapi.get_func(ea)
setattr(f, "ctx", {})
return add_ea_int_cast(f)
return FunctionHandle(f)
def extract_function_features(self, f):
for (feature, ea) in capa.features.extractors.ida.function.extract_features(f):
yield feature, ea
def get_basic_blocks(self, f):
for bb in capa.features.extractors.ida.helpers.get_function_blocks(f):
yield add_ea_int_cast(bb)
import capa.features.extractors.ida.helpers as ida_helpers
for bb in ida_helpers.get_function_blocks(f):
yield BasicBlockHandle(bb)
def extract_basic_block_features(self, f, bb):
for (feature, ea) in capa.features.extractors.ida.basicblock.extract_features(f, bb):
@@ -83,7 +98,7 @@ class IdaFeatureExtractor(FeatureExtractor):
import capa.features.extractors.ida.helpers as ida_helpers
for insn in ida_helpers.get_instructions_in_range(bb.start_ea, bb.end_ea):
yield add_ea_int_cast(insn)
yield InstructionHandle(insn)
def extract_insn_features(self, f, bb, insn):
for (feature, ea) in capa.features.extractors.ida.insn.extract_features(f, bb, insn):

View File

@@ -5,7 +5,6 @@
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import types
import logging
import viv_utils
@@ -22,26 +21,16 @@ __all__ = ["file", "function", "basicblock", "insn"]
logger = logging.getLogger(__name__)
def get_va(self):
try:
# vivisect type
class InstructionHandle:
"""this acts like a vivisect.Opcode but with an __int__() method"""
def __init__(self, inner):
self._inner = inner
def __int__(self):
return self.va
except AttributeError:
pass
raise TypeError()
def add_va_int_cast(o):
"""
dynamically add a cast-to-int (`__int__`) method to the given object
that returns the value of the `.va` property.
this bit of skullduggery lets use cast viv-utils objects as ints.
the correct way of doing this is to update viv-utils (or subclass the objects here).
"""
setattr(o, "__int__", types.MethodType(get_va, o))
return o
def __getattr__(self, name):
return getattr(self._inner, name)
class VivisectFeatureExtractor(FeatureExtractor):
@@ -60,15 +49,14 @@ class VivisectFeatureExtractor(FeatureExtractor):
def get_functions(self):
for va in sorted(self.vw.getFunctions()):
yield add_va_int_cast(viv_utils.Function(self.vw, va))
yield viv_utils.Function(self.vw, va)
def extract_function_features(self, f):
for feature, va in capa.features.extractors.viv.function.extract_features(f):
yield feature, va
def get_basic_blocks(self, f):
for bb in f.basic_blocks:
yield add_va_int_cast(bb)
return f.basic_blocks
def extract_basic_block_features(self, f, bb):
for feature, va in capa.features.extractors.viv.basicblock.extract_features(f, bb):
@@ -76,7 +64,7 @@ class VivisectFeatureExtractor(FeatureExtractor):
def get_instructions(self, f, bb):
for insn in bb.instructions:
yield add_va_int_cast(insn)
yield InstructionHandle(insn)
def extract_insn_features(self, f, bb, insn):
for feature, va in capa.features.extractors.viv.insn.extract_features(f, bb, insn):

View File

@@ -122,7 +122,7 @@ def dumps(extractor):
)
for insnva, insn in sorted(
[(insn.__int__(), insn) for insn in extractor.get_instructions(f, bb)], key=lambda p: p[0]
[(int(insn), insn) for insn in extractor.get_instructions(f, bb)], key=lambda p: p[0]
):
ret["functions"][hex(f)][hex(bb)].append(hex(insnva))

View File

@@ -12,18 +12,7 @@ _hex = hex
def hex(i):
return _hex(oint(i))
def oint(i):
# there seems to be some trouble with using `int(viv_utils.Function)`
# with the black magic we do with binding the `__int__()` routine.
# i haven't had a chance to debug this yet (and i have no hotel wifi).
# so in the meantime, detect this, and call the method directly.
try:
return int(i)
except TypeError:
return i.__int__()
return _hex(int(i))
def get_file_taste(sample_path):

View File

@@ -78,7 +78,7 @@ def find_func_features(f, extractor):
_bb_features[feature].add(ea)
func_features[feature].add(ea)
bb_features[capa.helpers.oint(bb)] = _bb_features
bb_features[int(bb)] = _bb_features
return func_features, bb_features
@@ -100,7 +100,7 @@ def find_func_matches(f, ruleset, func_features, bb_features):
func_features[capa.features.MatchedRule(name)].add(ea)
# find rule matches for function, function features include rule matches for basic blocks
_, matches = capa.engine.match(ruleset.function_rules, func_features, capa.helpers.oint(f))
_, matches = capa.engine.match(ruleset.function_rules, func_features, int(f))
for (name, res) in matches.items():
func_matches[name].extend(res)

View File

@@ -32,7 +32,7 @@ import capa.version
import capa.features
import capa.features.freeze
import capa.features.extractors
from capa.helpers import oint, get_file_taste
from capa.helpers import get_file_taste
RULES_PATH_DEFAULT_STRING = "(embedded rules)"
SUPPORTED_FILE_MAGIC = set([b"MZ"])
@@ -87,14 +87,14 @@ def find_function_capabilities(ruleset, extractor, f):
bb_features[feature].add(va)
function_features[feature].add(va)
_, matches = capa.engine.match(ruleset.basic_block_rules, bb_features, oint(bb))
_, matches = capa.engine.match(ruleset.basic_block_rules, bb_features, int(bb))
for rule_name, res in matches.items():
bb_matches[rule_name].extend(res)
for va, _ in res:
function_features[capa.features.MatchedRule(rule_name)].add(va)
_, function_matches = capa.engine.match(ruleset.function_rules, function_features, oint(f))
_, function_matches = capa.engine.match(ruleset.function_rules, function_features, int(f))
return function_matches, bb_matches, len(function_features)
@@ -139,7 +139,8 @@ def find_capabilities(ruleset, extractor, disable_progress=None):
functions = list(extractor.get_functions())
for f in pbar(functions, desc="matching", unit=" functions"):
function_address = f.__int__()
#from IPython import embed; embed()
function_address = int(f)
if extractor.is_library_function(function_address):
function_name = extractor.get_function_name(function_address)

View File

@@ -135,9 +135,9 @@ def main(argv=None):
if args.format == "freeze":
functions = tuple(filter(lambda f: f == args.function, functions))
else:
functions = tuple(filter(lambda f: capa.helpers.oint(f) == args.function, functions))
functions = tuple(filter(lambda f: int(f) == args.function, functions))
if args.function not in [capa.helpers.oint(f) for f in functions]:
if args.function not in [int(f) for f in functions]:
print("0x%X not a function" % args.function)
return -1

View File

@@ -244,14 +244,14 @@ def sample(request):
def get_function(extractor, fva):
for f in extractor.get_functions():
if f.__int__() == fva:
if int(f) == fva:
return f
raise ValueError("function not found")
def get_basic_block(extractor, f, va):
for bb in extractor.get_basic_blocks(f):
if bb.__int__() == va:
if int(bb) == va:
return bb
raise ValueError("basic block not found")

View File

@@ -115,30 +115,25 @@ def compare_extractors_viv_null(viv_ext, null_ext):
null_ext (capa.features.extractors.NullFeatureExtractor)
"""
assert list(viv_ext.extract_file_features()) == list(null_ext.extract_file_features())
assert list(map(to_int, viv_ext.get_functions())) == list(null_ext.get_functions())
assert list(map(int, viv_ext.get_functions())) == list(null_ext.get_functions())
for f in viv_ext.get_functions():
assert list(map(to_int, viv_ext.get_basic_blocks(f))) == list(null_ext.get_basic_blocks(to_int(f)))
assert list(viv_ext.extract_function_features(f)) == list(null_ext.extract_function_features(to_int(f)))
assert list(map(int, viv_ext.get_basic_blocks(f))) == list(null_ext.get_basic_blocks(int(f)))
assert list(viv_ext.extract_function_features(f)) == list(null_ext.extract_function_features(int(f)))
for bb in viv_ext.get_basic_blocks(f):
assert list(map(to_int, viv_ext.get_instructions(f, bb))) == list(
null_ext.get_instructions(to_int(f), to_int(bb))
assert list(map(int, viv_ext.get_instructions(f, bb))) == list(
null_ext.get_instructions(int(f), int(bb))
)
assert list(viv_ext.extract_basic_block_features(f, bb)) == list(
null_ext.extract_basic_block_features(to_int(f), to_int(bb))
null_ext.extract_basic_block_features(int(f), int(bb))
)
for insn in viv_ext.get_instructions(f, bb):
assert list(viv_ext.extract_insn_features(f, bb, insn)) == list(
null_ext.extract_insn_features(to_int(f), to_int(bb), to_int(insn))
null_ext.extract_insn_features(int(f), int(bb), int(insn))
)
def to_int(o):
"""helper to get int value of extractor items"""
return capa.helpers.oint(o)
def test_freeze_s_roundtrip():
load = capa.features.freeze.loads
dump = capa.features.freeze.dumps