mirror of
https://github.com/mandiant/capa.git
synced 2025-12-12 15:49:46 -08:00
render: add initial proto generator
This commit is contained in:
445
capa/render/proto/__init__.py
Normal file
445
capa/render/proto/__init__.py
Normal file
@@ -0,0 +1,445 @@
|
||||
# Copyright (C) 2023 FireEye, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import io
|
||||
import sys
|
||||
from typing import Dict, Union
|
||||
from dataclasses import dataclass
|
||||
|
||||
import pydantic
|
||||
|
||||
import capa.render
|
||||
import capa.render.utils
|
||||
import capa.features.freeze
|
||||
import capa.render.result_document
|
||||
import capa.features.freeze.features
|
||||
from capa.render.utils import StringIO
|
||||
|
||||
|
||||
def emit_proto_enum(out: StringIO, enum):
|
||||
# like: AddressType
|
||||
title = enum["title"]
|
||||
|
||||
# like: ADDRESSTYPE
|
||||
prefix = title.upper()
|
||||
|
||||
def render_value(value):
|
||||
# like: ADDRESSTYPE_ABSOLUTE
|
||||
return "%s_%s" % (prefix, value.upper().replace(" ", "_"))
|
||||
|
||||
# like:
|
||||
#
|
||||
# enum AddressType {
|
||||
# ADDRESSTYPE_UNSPECIFIED = 0;
|
||||
# ADDRESSTYPE_ABSOLUTE = 1;
|
||||
# ADDRESSTYPE_RELATIVE = 2;
|
||||
# ...
|
||||
# }
|
||||
out.writeln(f"enum {title} {{")
|
||||
out.writeln(f' {render_value("unspecified")} = 0;')
|
||||
for i, value in enumerate(enum["enum"]):
|
||||
out.writeln(f" {render_value(value)} = {i + 1};")
|
||||
out.writeln(f"}}")
|
||||
out.writeln("")
|
||||
|
||||
|
||||
def is_ref(prop):
|
||||
return "$ref" in prop
|
||||
|
||||
|
||||
def get_ref_type_name(prop):
|
||||
# from: {"$ref": "#/definitions/Scope"}},
|
||||
# to: "Scope"
|
||||
|
||||
assert is_ref(prop)
|
||||
assert prop["$ref"].startswith("#/definitions/")
|
||||
|
||||
return prop["$ref"][len("#/definitions/") :]
|
||||
|
||||
|
||||
def is_primitive_type(prop):
|
||||
# things like: string, integer, bool, etc.
|
||||
return "type" in prop and not prop["type"] == "object"
|
||||
|
||||
|
||||
def is_custom_type(prop):
|
||||
# struct-like things defined in the schema, like Features, etc.
|
||||
return "type" in prop and prop["type"] == "object" and "additionalProperties" not in prop
|
||||
|
||||
|
||||
def get_custom_type_name(prop):
|
||||
return prop["title"]
|
||||
|
||||
|
||||
def is_tuple(prop):
|
||||
# a tuple is an array with a fixed size.
|
||||
# the types of the elements can vary.
|
||||
# we'll emit a custom message type for each tuple, like Pair_Address_Match.
|
||||
#
|
||||
# like:
|
||||
#
|
||||
# {"items": [{"$ref": "#/definitions/Address"},
|
||||
# {"$ref": "#/definitions/Match"}],
|
||||
# "maxItems": 2,
|
||||
# "minItems": 2,
|
||||
# "type": "array"},
|
||||
|
||||
if "type" not in prop:
|
||||
return False
|
||||
|
||||
if prop["type"] != "array":
|
||||
return False
|
||||
|
||||
if "maxItems" not in prop or "minItems" not in prop:
|
||||
return False
|
||||
if prop["maxItems"] != prop["minItems"]:
|
||||
# tuples have a fixed size
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def get_tuple_type_name(prop):
|
||||
assert is_tuple(prop)
|
||||
|
||||
if prop["maxItems"] == 2:
|
||||
base = "Pair"
|
||||
else:
|
||||
base = "Tuple"
|
||||
|
||||
# this won't work for nested tuples, but good enough for here.
|
||||
return base + "_" + "_".join(get_type_name(item) for item in prop["items"])
|
||||
|
||||
|
||||
def is_array(prop):
|
||||
# an array is a sequence of elements of the same type.
|
||||
# typically we can use a repeated field for this.
|
||||
# note: there's a special case within maps, where the array elements are a custom wrapper type.
|
||||
#
|
||||
# like:
|
||||
#
|
||||
# {"items": {"type": "string"},
|
||||
# "title": "Parts",
|
||||
# "type": "array"},
|
||||
|
||||
if "type" not in prop:
|
||||
return False
|
||||
|
||||
if prop["type"] != "array":
|
||||
return False
|
||||
|
||||
if "maxItems" in prop and "minItems" in prop and prop["maxItems"] == prop["minItems"]:
|
||||
# tuples have a fixed size, arrays are variable
|
||||
return False
|
||||
|
||||
if not isinstance(prop["items"], dict):
|
||||
# array elements have a fixed type
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def is_map(prop):
|
||||
# a map maps from string key to a fixed type.
|
||||
# the value type cannot be repeated, so we'll emit a custom wrapper type.
|
||||
#
|
||||
# like:
|
||||
#
|
||||
# {"additionalProperties": {"items": {"$ref": "#/definitions/Address"},
|
||||
# "type": "array"},
|
||||
# "title": "Captures",
|
||||
# "type": "object"},
|
||||
return "type" in prop and prop["type"] == "object" and "additionalProperties" in prop
|
||||
|
||||
|
||||
def get_primitive_type_name(prop):
|
||||
assert is_primitive_type(prop)
|
||||
|
||||
if prop["type"] == "string":
|
||||
return "string"
|
||||
|
||||
elif prop["type"] == "boolean":
|
||||
return "bool"
|
||||
|
||||
elif prop["type"] == "integer":
|
||||
# this integer has arbitrary range.
|
||||
# but proto supports only i64 and u64.
|
||||
# so we hook this specially, including within the translator.
|
||||
return "Integer"
|
||||
|
||||
elif prop["type"] == "number":
|
||||
# number: int | float
|
||||
# we hook this specially
|
||||
return "Number"
|
||||
|
||||
elif is_tuple(prop):
|
||||
return get_tuple_type_name(prop)
|
||||
|
||||
elif is_array(prop):
|
||||
aitem = prop["items"]
|
||||
|
||||
if is_primitive_type(aitem):
|
||||
atype = get_primitive_type_name(prop["items"])
|
||||
|
||||
elif is_ref(aitem):
|
||||
atype = get_ref_type_name(aitem)
|
||||
|
||||
elif is_custom_type(aitem):
|
||||
atype = get_custom_type_name(aitem)
|
||||
|
||||
else:
|
||||
raise NotImplementedError(aitem)
|
||||
|
||||
return f"repeated {atype}"
|
||||
|
||||
else:
|
||||
raise NotImplementedError(prop["type"])
|
||||
|
||||
|
||||
def get_type_name(prop):
|
||||
if is_primitive_type(prop):
|
||||
return get_primitive_type_name(prop)
|
||||
elif is_custom_type(prop):
|
||||
return get_custom_type_name(prop)
|
||||
elif is_ref(prop):
|
||||
return get_ref_type_name(prop)
|
||||
else:
|
||||
raise NotImplementedError(prop)
|
||||
|
||||
|
||||
def is_union(prop):
|
||||
# a union is a field that can be one of several types.
|
||||
return "anyOf" in prop
|
||||
|
||||
|
||||
def sanitize_prop_name(name):
|
||||
# like: "analysis-conclusion" -> "analysis_conclusion"
|
||||
# like: "att&ck" -> "attack"
|
||||
# like: "capa/subscope" -> "capa-subscope"
|
||||
# like: "function name" -> "function-name"
|
||||
return name.replace("-", "_").replace("&", "a").replace("/", "_").replace(" ", "_")
|
||||
|
||||
|
||||
def _find_capa_class(name):
|
||||
# try to find the capa class that corresponds to the given name.
|
||||
# we use this to find the class that defines the property order.
|
||||
|
||||
try:
|
||||
return getattr(capa.render.result_document, name)
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
try:
|
||||
return getattr(capa.features.freeze, name)
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
try:
|
||||
return getattr(capa.features.freeze.features, name)
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
raise NotImplementedError(name)
|
||||
|
||||
|
||||
def _enum_properties(message):
|
||||
"""enumerate the properties of the message definitioned, ordered by class declaration"""
|
||||
# this is just for convenience.
|
||||
|
||||
# the order of properties provided by the class. guaranteed.
|
||||
property_order = list(_find_capa_class(message["title"]).__signature__.parameters.keys())
|
||||
# order of properties provided by pydantic. not guaranteed. the fallback.
|
||||
# used when we can't figure out an alias, such as capa/subscope -> is_subscope.
|
||||
properties = list(message["properties"].keys())
|
||||
|
||||
def get_property_index(name):
|
||||
try:
|
||||
# prefer the order of properties provided by the class.
|
||||
return property_order.index(sanitize_prop_name(name))
|
||||
except ValueError:
|
||||
# fallback to whatever pydantic extracts.
|
||||
return len(message["properties"]) + properties.index(name)
|
||||
|
||||
return sorted(message["properties"].items(), key=lambda p: get_property_index(p[0]))
|
||||
|
||||
|
||||
@dataclass
|
||||
class DeferredArrayType:
|
||||
name: str
|
||||
item: dict
|
||||
|
||||
|
||||
@dataclass
|
||||
class DeferredTupleType:
|
||||
name: str
|
||||
count: int
|
||||
items: dict
|
||||
|
||||
|
||||
def emit_proto_message(out: StringIO, deferred_types: Dict, message):
|
||||
# like: Address
|
||||
title = message["title"]
|
||||
|
||||
out.writeln(f"message {title} {{")
|
||||
counter = iter(range(1, sys.maxsize))
|
||||
for raw_name, prop in _enum_properties(message):
|
||||
# we use a counter like this so that
|
||||
# union/oneof fields can increment the counter.
|
||||
i = next(counter)
|
||||
name = sanitize_prop_name(raw_name)
|
||||
|
||||
if is_ref(prop):
|
||||
ptype = get_ref_type_name(prop)
|
||||
out.writeln(f" {ptype} {name} = {i};")
|
||||
|
||||
elif is_primitive_type(prop):
|
||||
ptype = get_primitive_type_name(prop)
|
||||
out.writeln(f" {ptype} {name} = {i};")
|
||||
|
||||
if is_tuple(prop):
|
||||
deferred_types[ptype] = DeferredTupleType(ptype, prop["minItems"], prop["items"])
|
||||
|
||||
elif is_array(prop):
|
||||
aitem = prop["items"]
|
||||
|
||||
if is_tuple(aitem):
|
||||
atype = get_tuple_type_name(aitem)
|
||||
deferred_types[atype] = DeferredTupleType(atype, aitem["minItems"], aitem["items"])
|
||||
|
||||
elif is_custom_type(prop):
|
||||
ptype = get_custom_type_name(prop)
|
||||
out.writeln(f" {ptype} {name} = {i};")
|
||||
|
||||
elif is_union(prop):
|
||||
out.writeln(f" oneof {name} {{")
|
||||
|
||||
for j, of in enumerate(prop["anyOf"]):
|
||||
|
||||
if is_ref(of):
|
||||
ptype = get_ref_type_name(of)
|
||||
out.writeln(f" {ptype} v{j} = {i};")
|
||||
|
||||
elif is_primitive_type(of):
|
||||
ptype = get_primitive_type_name(of)
|
||||
out.writeln(f" {ptype} v{j} = {i};")
|
||||
|
||||
if is_tuple(of):
|
||||
deferred_types[ptype] = DeferredTupleType(ptype, of["minItems"], of["items"])
|
||||
|
||||
# pydantic doesn't seem to encode None option
|
||||
# fortunately, neither does protobuf.
|
||||
# still seems weird not to be explicit.
|
||||
|
||||
else:
|
||||
raise NotImplementedError(of)
|
||||
|
||||
i = next(counter)
|
||||
|
||||
out.writeln(f" }};")
|
||||
|
||||
elif is_map(prop):
|
||||
if is_array(prop["additionalProperties"]):
|
||||
# map values cannot be repeated, see:
|
||||
# https://stackoverflow.com/a/41552990/87207
|
||||
#
|
||||
# so create a wrapper type around the repeated values.
|
||||
# like: message Array_Integer { repeated int32 values = 1; }
|
||||
#
|
||||
# no:
|
||||
#
|
||||
# map <string, repeated int32> things = 1;
|
||||
#
|
||||
# yes:
|
||||
#
|
||||
# map <string, Array_Integer> things = 1;
|
||||
#
|
||||
# we could do this for every array, like Array_Integer and Array_Address,
|
||||
# but its less idiomatic and more noisy.
|
||||
# so we only create these types when we need them.
|
||||
item_def = prop["additionalProperties"]["items"]
|
||||
|
||||
vtype = "Array_" + get_type_name(item_def)
|
||||
|
||||
# register this type to be emitted once we're done with the
|
||||
# top level custom types in the schema.
|
||||
deferred_types[vtype] = DeferredArrayType(vtype, item_def)
|
||||
|
||||
else:
|
||||
vtype = get_type_name(prop["additionalProperties"])
|
||||
|
||||
out.writeln(f" map <string, {vtype}> {name} = {i};")
|
||||
|
||||
else:
|
||||
raise ValueError("unexpected type: %s" % prop)
|
||||
|
||||
out.writeln(f"}}")
|
||||
out.writeln("")
|
||||
|
||||
|
||||
def emit_proto_entry(out: StringIO, deferred_types: Dict, schema, name):
|
||||
if not name.startswith("#/definitions/"):
|
||||
raise ValueError("unexpected name: %s" % name)
|
||||
|
||||
title = name[len("#/definitions/") :]
|
||||
definition = schema["definitions"][title]
|
||||
|
||||
if definition["title"] != title:
|
||||
raise ValueError("title mismatch: %s" % definition["title"])
|
||||
|
||||
if definition["type"] == "string" and "enum" in definition:
|
||||
emit_proto_enum(out, definition)
|
||||
|
||||
elif definition["type"] == "object":
|
||||
emit_proto_message(out, deferred_types, definition)
|
||||
|
||||
else:
|
||||
raise NotImplementedError(definition["type"])
|
||||
|
||||
|
||||
def generate_proto_from_pydantic(schema):
|
||||
out: StringIO = capa.render.utils.StringIO()
|
||||
out.writeln("// Generated by the capa.render.proto translator. DO NOT EDIT!")
|
||||
out.writeln('syntax = "proto3";')
|
||||
out.writeln("")
|
||||
|
||||
deferred_types: Dict[str, Union[DeferredArrayType, DeferredTupleType]] = dict()
|
||||
for name in sorted(schema["definitions"].keys()):
|
||||
emit_proto_entry(out, deferred_types, schema, "#/definitions/" + name)
|
||||
|
||||
for name, deferred_type in sorted(deferred_types.items()):
|
||||
if isinstance(deferred_type, DeferredArrayType):
|
||||
vtype = get_type_name(deferred_type.item)
|
||||
out.writeln(f"message {name} {{ repeated {vtype} values = 1; }}\n")
|
||||
elif isinstance(deferred_type, DeferredTupleType):
|
||||
out.writeln(f"message {name} {{")
|
||||
for i, item in enumerate(deferred_type.items):
|
||||
vtype = get_type_name(item)
|
||||
out.writeln(f" {vtype} v{i} = {i + 1};")
|
||||
out.writeln(f"}}\n")
|
||||
|
||||
# these are additional primitive types that we'll use throughout.
|
||||
out.writeln("message Integer { oneof value { uint64 u = 1; int64 i = 2; } }\n")
|
||||
out.writeln("message Number { oneof value { uint64 u = 1; int64 i = 2; double f = 3; } }\n")
|
||||
|
||||
return out.getvalue()
|
||||
|
||||
|
||||
def generate_proto() -> str:
|
||||
"""
|
||||
generate a protobuf v3 schema for the ResultDocument format.
|
||||
we use introspection of the pydantic schema to generate this.
|
||||
|
||||
note: we *cannot* use the generated proto from version to version of capa,
|
||||
because this translator does guarantee field ordering/numbering.
|
||||
that is, if we add a new property to any of the pydantic models,
|
||||
the proto field numbers may change, and any clients using the proto will break.
|
||||
|
||||
instead, we should use this method to generate the proto,
|
||||
probably once per major version,
|
||||
and then commit the proto to the repo.
|
||||
"""
|
||||
return generate_proto_from_pydantic(pydantic.schema_of(capa.render.result_document.ResultDocument))
|
||||
392
capa/render/proto/capa.proto
Normal file
392
capa/render/proto/capa.proto
Normal file
@@ -0,0 +1,392 @@
|
||||
// Generated by the capa.render.proto translator. DO NOT EDIT!
|
||||
syntax = "proto3";
|
||||
|
||||
message APIFeature {
|
||||
string type = 1;
|
||||
string api = 2;
|
||||
string description = 3;
|
||||
}
|
||||
|
||||
message Address {
|
||||
AddressType type = 1;
|
||||
oneof value {
|
||||
Integer v0 = 2;
|
||||
Pair_Integer_Integer v1 = 3;
|
||||
};
|
||||
}
|
||||
|
||||
enum AddressType {
|
||||
ADDRESSTYPE_UNSPECIFIED = 0;
|
||||
ADDRESSTYPE_ABSOLUTE = 1;
|
||||
ADDRESSTYPE_RELATIVE = 2;
|
||||
ADDRESSTYPE_FILE = 3;
|
||||
ADDRESSTYPE_DN_TOKEN = 4;
|
||||
ADDRESSTYPE_DN_TOKEN_OFFSET = 5;
|
||||
ADDRESSTYPE_NO_ADDRESS = 6;
|
||||
}
|
||||
|
||||
message Analysis {
|
||||
string format = 1;
|
||||
string arch = 2;
|
||||
string os = 3;
|
||||
string extractor = 4;
|
||||
repeated string rules = 5;
|
||||
Address base_address = 6;
|
||||
Layout layout = 7;
|
||||
FeatureCounts feature_counts = 8;
|
||||
repeated LibraryFunction library_functions = 9;
|
||||
}
|
||||
|
||||
message ArchFeature {
|
||||
string type = 1;
|
||||
string arch = 2;
|
||||
string description = 3;
|
||||
}
|
||||
|
||||
message AttackSpec {
|
||||
repeated string parts = 1;
|
||||
string tactic = 2;
|
||||
string technique = 3;
|
||||
string subtechnique = 4;
|
||||
string id = 5;
|
||||
}
|
||||
|
||||
message BasicBlockFeature {
|
||||
string type = 1;
|
||||
string description = 2;
|
||||
}
|
||||
|
||||
message BasicBlockLayout {
|
||||
Address address = 1;
|
||||
}
|
||||
|
||||
message BytesFeature {
|
||||
string type = 1;
|
||||
string bytes = 2;
|
||||
string description = 3;
|
||||
}
|
||||
|
||||
message CharacteristicFeature {
|
||||
string type = 1;
|
||||
string characteristic = 2;
|
||||
string description = 3;
|
||||
}
|
||||
|
||||
message ClassFeature {
|
||||
string type = 1;
|
||||
string description = 2;
|
||||
string class = 3;
|
||||
}
|
||||
|
||||
message CompoundStatement {
|
||||
string type = 1;
|
||||
string description = 2;
|
||||
}
|
||||
|
||||
message ExportFeature {
|
||||
string type = 1;
|
||||
string export = 2;
|
||||
string description = 3;
|
||||
}
|
||||
|
||||
message FeatureCounts {
|
||||
Integer file = 1;
|
||||
repeated FunctionFeatureCount functions = 2;
|
||||
}
|
||||
|
||||
message FeatureNode {
|
||||
oneof feature {
|
||||
OSFeature v0 = 1;
|
||||
ArchFeature v1 = 2;
|
||||
FormatFeature v2 = 3;
|
||||
MatchFeature v3 = 4;
|
||||
CharacteristicFeature v4 = 5;
|
||||
ExportFeature v5 = 6;
|
||||
ImportFeature v6 = 7;
|
||||
SectionFeature v7 = 8;
|
||||
FunctionNameFeature v8 = 9;
|
||||
SubstringFeature v9 = 10;
|
||||
RegexFeature v10 = 11;
|
||||
StringFeature v11 = 12;
|
||||
ClassFeature v12 = 13;
|
||||
NamespaceFeature v13 = 14;
|
||||
APIFeature v14 = 15;
|
||||
PropertyFeature v15 = 16;
|
||||
NumberFeature v16 = 17;
|
||||
BytesFeature v17 = 18;
|
||||
OffsetFeature v18 = 19;
|
||||
MnemonicFeature v19 = 20;
|
||||
OperandNumberFeature v20 = 21;
|
||||
OperandOffsetFeature v21 = 22;
|
||||
BasicBlockFeature v22 = 23;
|
||||
};
|
||||
string type = 25;
|
||||
}
|
||||
|
||||
message FormatFeature {
|
||||
string type = 1;
|
||||
string format = 2;
|
||||
string description = 3;
|
||||
}
|
||||
|
||||
message FunctionFeatureCount {
|
||||
Address address = 1;
|
||||
Integer count = 2;
|
||||
}
|
||||
|
||||
message FunctionLayout {
|
||||
Address address = 1;
|
||||
repeated BasicBlockLayout matched_basic_blocks = 2;
|
||||
}
|
||||
|
||||
message FunctionNameFeature {
|
||||
string type = 1;
|
||||
string function_name = 2;
|
||||
string description = 3;
|
||||
}
|
||||
|
||||
message ImportFeature {
|
||||
string type = 1;
|
||||
string description = 2;
|
||||
string import = 3;
|
||||
}
|
||||
|
||||
message Layout {
|
||||
repeated FunctionLayout functions = 1;
|
||||
}
|
||||
|
||||
message LibraryFunction {
|
||||
Address address = 1;
|
||||
string name = 2;
|
||||
}
|
||||
|
||||
message MBCSpec {
|
||||
repeated string parts = 1;
|
||||
string objective = 2;
|
||||
string behavior = 3;
|
||||
string method = 4;
|
||||
string id = 5;
|
||||
}
|
||||
|
||||
message MaecMetadata {
|
||||
string analysis_conclusion = 1;
|
||||
string analysis_conclusion_ov = 2;
|
||||
string malware_family = 3;
|
||||
string malware_category = 4;
|
||||
string malware_category_ov = 5;
|
||||
}
|
||||
|
||||
message Match {
|
||||
bool success = 1;
|
||||
oneof node {
|
||||
StatementNode v0 = 2;
|
||||
FeatureNode v1 = 3;
|
||||
};
|
||||
repeated Match children = 5;
|
||||
repeated Address locations = 6;
|
||||
map <string, Array_Address> captures = 7;
|
||||
}
|
||||
|
||||
message MatchFeature {
|
||||
string type = 1;
|
||||
string match = 2;
|
||||
string description = 3;
|
||||
}
|
||||
|
||||
message Metadata {
|
||||
string timestamp = 1;
|
||||
string version = 2;
|
||||
repeated string argv = 3;
|
||||
Sample sample = 4;
|
||||
Analysis analysis = 5;
|
||||
}
|
||||
|
||||
message MnemonicFeature {
|
||||
string type = 1;
|
||||
string mnemonic = 2;
|
||||
string description = 3;
|
||||
}
|
||||
|
||||
message NamespaceFeature {
|
||||
string type = 1;
|
||||
string namespace = 2;
|
||||
string description = 3;
|
||||
}
|
||||
|
||||
message NumberFeature {
|
||||
string type = 1;
|
||||
oneof number {
|
||||
Integer v0 = 2;
|
||||
Number v1 = 3;
|
||||
};
|
||||
string description = 5;
|
||||
}
|
||||
|
||||
message OSFeature {
|
||||
string type = 1;
|
||||
string os = 2;
|
||||
string description = 3;
|
||||
}
|
||||
|
||||
message OffsetFeature {
|
||||
string type = 1;
|
||||
Integer offset = 2;
|
||||
string description = 3;
|
||||
}
|
||||
|
||||
message OperandNumberFeature {
|
||||
string type = 1;
|
||||
Integer index = 2;
|
||||
Integer operand_number = 3;
|
||||
string description = 4;
|
||||
}
|
||||
|
||||
message OperandOffsetFeature {
|
||||
string type = 1;
|
||||
Integer index = 2;
|
||||
Integer operand_offset = 3;
|
||||
string description = 4;
|
||||
}
|
||||
|
||||
message PropertyFeature {
|
||||
string type = 1;
|
||||
string access = 2;
|
||||
string property = 3;
|
||||
string description = 4;
|
||||
}
|
||||
|
||||
message RangeStatement {
|
||||
string description = 1;
|
||||
Integer min = 2;
|
||||
Integer max = 3;
|
||||
oneof child {
|
||||
OSFeature v0 = 4;
|
||||
ArchFeature v1 = 5;
|
||||
FormatFeature v2 = 6;
|
||||
MatchFeature v3 = 7;
|
||||
CharacteristicFeature v4 = 8;
|
||||
ExportFeature v5 = 9;
|
||||
ImportFeature v6 = 10;
|
||||
SectionFeature v7 = 11;
|
||||
FunctionNameFeature v8 = 12;
|
||||
SubstringFeature v9 = 13;
|
||||
RegexFeature v10 = 14;
|
||||
StringFeature v11 = 15;
|
||||
ClassFeature v12 = 16;
|
||||
NamespaceFeature v13 = 17;
|
||||
APIFeature v14 = 18;
|
||||
PropertyFeature v15 = 19;
|
||||
NumberFeature v16 = 20;
|
||||
BytesFeature v17 = 21;
|
||||
OffsetFeature v18 = 22;
|
||||
MnemonicFeature v19 = 23;
|
||||
OperandNumberFeature v20 = 24;
|
||||
OperandOffsetFeature v21 = 25;
|
||||
BasicBlockFeature v22 = 26;
|
||||
};
|
||||
string type = 28;
|
||||
}
|
||||
|
||||
message RegexFeature {
|
||||
string type = 1;
|
||||
string regex = 2;
|
||||
string description = 3;
|
||||
}
|
||||
|
||||
message ResultDocument {
|
||||
Metadata meta = 1;
|
||||
map <string, RuleMatches> rules = 2;
|
||||
}
|
||||
|
||||
message RuleMatches {
|
||||
RuleMetadata meta = 1;
|
||||
string source = 2;
|
||||
repeated Pair_Address_Match matches = 3;
|
||||
}
|
||||
|
||||
message RuleMetadata {
|
||||
string name = 1;
|
||||
string namespace = 2;
|
||||
repeated string authors = 3;
|
||||
Scope scope = 4;
|
||||
repeated AttackSpec attack = 5;
|
||||
repeated MBCSpec mbc = 6;
|
||||
repeated string references = 7;
|
||||
repeated string examples = 8;
|
||||
string description = 9;
|
||||
bool lib = 10;
|
||||
MaecMetadata maec = 11;
|
||||
bool capa_subscope = 12;
|
||||
}
|
||||
|
||||
message Sample {
|
||||
string md5 = 1;
|
||||
string sha1 = 2;
|
||||
string sha256 = 3;
|
||||
string path = 4;
|
||||
}
|
||||
|
||||
enum Scope {
|
||||
SCOPE_UNSPECIFIED = 0;
|
||||
SCOPE_FILE = 1;
|
||||
SCOPE_FUNCTION = 2;
|
||||
SCOPE_BASIC_BLOCK = 3;
|
||||
SCOPE_INSTRUCTION = 4;
|
||||
}
|
||||
|
||||
message SectionFeature {
|
||||
string type = 1;
|
||||
string section = 2;
|
||||
string description = 3;
|
||||
}
|
||||
|
||||
message SomeStatement {
|
||||
string description = 1;
|
||||
Integer count = 2;
|
||||
string type = 3;
|
||||
}
|
||||
|
||||
message StatementNode {
|
||||
oneof statement {
|
||||
RangeStatement v0 = 1;
|
||||
SomeStatement v1 = 2;
|
||||
SubscopeStatement v2 = 3;
|
||||
CompoundStatement v3 = 4;
|
||||
};
|
||||
string type = 6;
|
||||
}
|
||||
|
||||
message StringFeature {
|
||||
string type = 1;
|
||||
string string = 2;
|
||||
string description = 3;
|
||||
}
|
||||
|
||||
message SubscopeStatement {
|
||||
string description = 1;
|
||||
Scope scope = 2;
|
||||
string type = 3;
|
||||
}
|
||||
|
||||
message SubstringFeature {
|
||||
string type = 1;
|
||||
string substring = 2;
|
||||
string description = 3;
|
||||
}
|
||||
|
||||
message Array_Address { repeated Address values = 1; }
|
||||
|
||||
message Pair_Address_Match {
|
||||
Address v0 = 1;
|
||||
Match v1 = 2;
|
||||
}
|
||||
|
||||
message Pair_Integer_Integer {
|
||||
Integer v0 = 1;
|
||||
Integer v1 = 2;
|
||||
}
|
||||
|
||||
message Integer { oneof value { uint64 u = 1; int64 i = 2; } }
|
||||
|
||||
message Number { oneof value { uint64 u = 1; int64 i = 2; double f = 3; } }
|
||||
|
||||
137
capa/render/proto/capa_pb2.py
Normal file
137
capa/render/proto/capa_pb2.py
Normal file
File diff suppressed because one or more lines are too long
37
tests/test_proto.py
Normal file
37
tests/test_proto.py
Normal file
@@ -0,0 +1,37 @@
|
||||
# Copyright (C) 2023 FireEye, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import pathlib
|
||||
import subprocess
|
||||
|
||||
import capa.render
|
||||
import capa.render.proto
|
||||
import capa.render.utils
|
||||
import capa.features.freeze
|
||||
import capa.render.result_document
|
||||
import capa.features.freeze.features
|
||||
|
||||
|
||||
def test_generate_proto(tmp_path: pathlib.Path):
|
||||
tmp_path.mkdir(exist_ok=True, parents=True)
|
||||
proto_path = tmp_path / "capa.proto"
|
||||
|
||||
proto = capa.render.proto.generate_proto()
|
||||
|
||||
print("=====================================")
|
||||
print(proto_path)
|
||||
print("-------------------------------------")
|
||||
for i, line in enumerate(proto.split("\n")):
|
||||
print(f" {i} | {line}")
|
||||
print("=====================================")
|
||||
proto_path.write_text(proto)
|
||||
|
||||
subprocess.run(["protoc", "-I=" + str(tmp_path), "--python_out=" + str(tmp_path), str(proto_path)], check=True)
|
||||
|
||||
pb = tmp_path / "capa_pb2.py"
|
||||
print(pb.read_text())
|
||||
print("=====================================")
|
||||
Reference in New Issue
Block a user