mirror of
https://github.com/mandiant/capa.git
synced 2025-12-12 07:40:38 -08:00
567 lines
19 KiB
JavaScript
567 lines
19 KiB
JavaScript
/**
|
|
* Parses rules data for the CapaTreeTable component
|
|
* @param {Object} rules - The rules object from the rodc JSON data
|
|
* @param {string} flavor - The flavor of the analysis (static or dynamic)
|
|
* @param {Object} layout - The layout object from the rdoc JSON data
|
|
* @param {number} [maxMatches=300] - Maximum number of matches to parse per rule (used for optimized rendering in dynamic analysis)
|
|
* @returns {Array} - Parsed tree data for the TreeTable component
|
|
*/
|
|
export function parseRules(rules, flavor, _layout, maxMatches = 300) {
|
|
const layout = preprocessLayout(_layout);
|
|
const treeData = [];
|
|
let index = 0;
|
|
|
|
for (const [, rule] of Object.entries(rules)) {
|
|
const ruleNode = createRuleNode(rule, index, flavor);
|
|
|
|
// Limit the number of matches to process
|
|
// Dynamic matches can have thousands of matches, only show `maxMatches` for rendering optimization
|
|
const matchesToProcess = flavor === "dynamic" ? rule.matches.slice(0, maxMatches) : rule.matches;
|
|
|
|
for (let matchIndex = 0; matchIndex < matchesToProcess.length; matchIndex++) {
|
|
const match = matchesToProcess[matchIndex];
|
|
const matchKey = `${index}-${matchIndex}`;
|
|
|
|
// Check if the rule has a file-level scope
|
|
if (rule.meta.scopes && rule.meta.scopes.static === "file") {
|
|
// The scope for the rule is a file, so we don't need to show the match location address
|
|
ruleNode.children.push(parseNode(match[1], matchKey, rules, rule.meta.lib, layout));
|
|
} else {
|
|
// This is not a file-level match scope, we need to create an intermediate node for each match
|
|
const matchNode = createMatchNode(rule.meta.scopes.static, match, matchKey, flavor, layout);
|
|
matchNode.children.push(parseNode(match[1], matchKey, rules, rule.meta.lib, layout));
|
|
ruleNode.children.push(matchNode);
|
|
}
|
|
}
|
|
|
|
// Add note for additional non-covered matches in dynamic mode
|
|
if (flavor === "dynamic" && rule.matches.length > maxMatches) {
|
|
ruleNode.children.push(createAdditionalMatchesNode(index, rule.matches.length - maxMatches));
|
|
}
|
|
|
|
treeData.push(ruleNode);
|
|
index++;
|
|
}
|
|
return treeData;
|
|
}
|
|
|
|
/**
|
|
* Preprocesses the layout to create efficient lookup maps
|
|
* @param {Object} layout - The layout object from rdoc JSON data
|
|
* @returns {Object} An object containing lookup maps for calls, threads, and processes
|
|
*/
|
|
function preprocessLayout(layout) {
|
|
const processMap = new Map();
|
|
const threadMap = new Map();
|
|
const callMap = new Map();
|
|
|
|
if (layout && layout.processes) {
|
|
for (const process of layout.processes) {
|
|
if (process.address && process.address.type === "process" && process.address.value) {
|
|
const [ppid, pid] = process.address.value;
|
|
processMap.set(`${ppid}-${pid}`, process);
|
|
|
|
if (process.matched_threads) {
|
|
for (const thread of process.matched_threads) {
|
|
if (thread.address && thread.address.type === "thread" && thread.address.value) {
|
|
const [, , tid] = thread.address.value;
|
|
threadMap.set(`${ppid}-${pid}-${tid}`, thread);
|
|
|
|
if (thread.matched_calls) {
|
|
for (const call of thread.matched_calls) {
|
|
if (call.address && call.address.type === "call" && call.address.value) {
|
|
const [, , , callId] = call.address.value;
|
|
callMap.set(`${ppid}-${pid}-${tid}-${callId}`, call);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return { processMap, threadMap, callMap };
|
|
}
|
|
// Creates a node for a rule
|
|
function createRuleNode(rule, index) {
|
|
return {
|
|
key: `${index}`,
|
|
data: {
|
|
type: "rule",
|
|
name: rule.meta.name,
|
|
lib: rule.meta.lib,
|
|
matchCount: rule.matches.length,
|
|
namespace: rule.meta.namespace,
|
|
mbc: rule.meta.mbc,
|
|
source: rule.source,
|
|
attack: rule.meta.attack
|
|
},
|
|
children: []
|
|
};
|
|
}
|
|
|
|
// Creates a match location (e.g. basic block @ 0x1000 or explorer.exe (ppid: 1234, pid: 5678)) node
|
|
function createMatchNode(scope, match, matchKey, flavor, layout) {
|
|
const [location] = match;
|
|
const name = flavor === "static" ? `${scope} @ ${formatAddress(location)}` : getProcessName(layout, location);
|
|
|
|
return {
|
|
key: matchKey,
|
|
data: {
|
|
type: "match location",
|
|
name: name
|
|
},
|
|
children: []
|
|
};
|
|
}
|
|
|
|
// Creates a note node for additional non-covered matches in dynamic mode
|
|
function createAdditionalMatchesNode(index, additionalMatchCount) {
|
|
return {
|
|
key: `${index}`,
|
|
data: {
|
|
type: "match location",
|
|
name: `... and ${additionalMatchCount} more matches`
|
|
}
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Parses a single `node` object (i.e. statement or feature) in each rule
|
|
* @param {Object} node - The node to parse
|
|
* @param {string} key - The key for this node
|
|
* @param {Object} rules - The full rules object
|
|
* @param {boolean} lib - Whether this is a library rule
|
|
* @returns {Object} - Parsed node data
|
|
**/
|
|
|
|
function parseNode(node, key, rules, lib, layout) {
|
|
if (!node) return null;
|
|
|
|
const isNotStatement = node.node.statement && node.node.statement.type === "not";
|
|
const processedNode = isNotStatement ? invertNotStatementSuccess(node) : node;
|
|
|
|
if (!processedNode.success) {
|
|
return null;
|
|
}
|
|
|
|
const result = {
|
|
key: key,
|
|
data: {
|
|
type: processedNode.node.type, // feature or statement
|
|
typeValue: processedNode.node.statement?.type || processedNode.node.feature?.type,
|
|
success: processedNode.success,
|
|
name: getNodeName(processedNode),
|
|
lib: lib,
|
|
address: getNodeAddress(processedNode),
|
|
description: getNodeDescription(processedNode)
|
|
},
|
|
children: []
|
|
};
|
|
|
|
if (processedNode.children && Array.isArray(processedNode.children)) {
|
|
result.children = processedNode.children
|
|
.map((child) => parseNode(child, `${key}`, rules, lib, layout))
|
|
.filter((child) => child !== null);
|
|
}
|
|
|
|
if (processedNode.node.feature && processedNode.node.feature.type === "match") {
|
|
const ruleName = processedNode.node.feature.match;
|
|
const rule = rules[ruleName];
|
|
if (rule) {
|
|
result.data.source = rule.source;
|
|
}
|
|
result.children = [];
|
|
}
|
|
|
|
if (
|
|
processedNode.node.statement &&
|
|
processedNode.node.statement.type === "optional" &&
|
|
result.children.length === 0
|
|
) {
|
|
return null;
|
|
}
|
|
|
|
if (processedNode.node.feature && processedNode.node.feature.type === "regex") {
|
|
result.children = processRegexCaptures(processedNode, key);
|
|
}
|
|
|
|
if (processedNode.node.feature && processedNode.node.feature.type === "api") {
|
|
const callInfo = getCallInfo(node, layout);
|
|
if (callInfo) {
|
|
result.children.push({
|
|
key: key,
|
|
data: {
|
|
type: "call-info",
|
|
name: callInfo
|
|
},
|
|
children: []
|
|
});
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Get the process name using the optimized processNames Map
|
|
* @param {Map} layout - The layout object containing maps
|
|
* @param {Object} address - The address object containing process information
|
|
* @returns {string} The process name
|
|
*/
|
|
function getProcessName(layout, address) {
|
|
const [ppid, pid] = address.value;
|
|
const processKey = `${ppid}-${pid}`;
|
|
const process = layout.processMap.get(processKey);
|
|
return process.name + ` (ppid:${ppid}, pid:${pid})`;
|
|
}
|
|
|
|
/**
|
|
* Parses the capabilities of functions from a given rdoc.
|
|
*
|
|
* @param {Object} doc - The document containing function and rule information.
|
|
* @returns {Array} An array of objects, each representing a function with its address and capabilities.
|
|
*
|
|
* @example
|
|
* [
|
|
* {
|
|
* "address": "0x14002A690",
|
|
* "capabilities": [
|
|
* {
|
|
* "name": "contain loop",
|
|
* "lib": true
|
|
*
|
|
* },
|
|
* {
|
|
* "name": "get disk information",
|
|
* "namespace": "host-interaction/hardware/storage"
|
|
* "lib": false
|
|
* }
|
|
* ]
|
|
* }
|
|
* ]
|
|
*/
|
|
export function parseFunctionCapabilities(doc) {
|
|
// Map basic blocks to their their parent functions
|
|
const functionsByBB = new Map();
|
|
|
|
for (const finfo of doc.meta.analysis.layout.functions) {
|
|
const faddress = finfo.address;
|
|
for (const bb of finfo.matched_basic_blocks) {
|
|
const bbaddress = bb.address;
|
|
functionsByBB.set(formatAddress(bbaddress), formatAddress(faddress));
|
|
}
|
|
}
|
|
|
|
// Map to store capabilities matched to each function
|
|
const matchesByFunction = new Map();
|
|
|
|
// Add a special entry for file-level matches
|
|
matchesByFunction.set("file", new Set());
|
|
|
|
// Iterate through all rules in the document
|
|
for (const [, rule] of Object.entries(doc.rules)) {
|
|
if (rule.meta.scopes.static === "function") {
|
|
for (const [address] of rule.matches) {
|
|
const addr = formatAddress(address);
|
|
if (!matchesByFunction.has(addr)) {
|
|
matchesByFunction.set(addr, new Set());
|
|
}
|
|
matchesByFunction
|
|
.get(addr)
|
|
.add({ name: rule.meta.name, namespace: rule.meta.namespace, lib: rule.meta.lib });
|
|
}
|
|
} else if (rule.meta.scopes.static === "basic block") {
|
|
for (const [address] of rule.matches) {
|
|
const addr = formatAddress(address);
|
|
const function_ = functionsByBB.get(addr);
|
|
if (function_) {
|
|
if (!matchesByFunction.has(function_)) {
|
|
matchesByFunction.set(function_, new Set());
|
|
}
|
|
matchesByFunction
|
|
.get(function_)
|
|
.add({ name: rule.meta.name, namespace: rule.meta.namespace, lib: rule.meta.lib });
|
|
}
|
|
}
|
|
} else if (rule.meta.scopes.static === "file") {
|
|
// Add file-level matches to the special 'file' entry
|
|
matchesByFunction.get("file").add({
|
|
name: rule.meta.name,
|
|
namespace: rule.meta.namespace,
|
|
lib: rule.meta.lib
|
|
});
|
|
}
|
|
}
|
|
|
|
const result = [];
|
|
|
|
// Add file-level matches if there are any
|
|
if (matchesByFunction.get("file").size > 0) {
|
|
result.push({
|
|
address: "file",
|
|
capabilities: Array.from(matchesByFunction.get("file"))
|
|
});
|
|
}
|
|
|
|
// Iterate through all functions in the document
|
|
for (const f of doc.meta.analysis.feature_counts.functions) {
|
|
const addr = formatAddress(f.address);
|
|
const matches = matchesByFunction.get(addr);
|
|
// Skip functions with no matches (unlikely)
|
|
if (!matches || matches.size === 0) continue;
|
|
|
|
// Add function to result with its address and sorted capabilities
|
|
result.push({
|
|
address: addr,
|
|
capabilities: Array.from(matches)
|
|
});
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
// Helper functions
|
|
|
|
function getCallInfo(node, layout) {
|
|
if (!node.locations || node.locations.length === 0) return null;
|
|
|
|
const location = node.locations[0];
|
|
if (location.type !== "call") return null;
|
|
|
|
const pname = getProcessName(layout, location);
|
|
const cname = getCallName(layout, location);
|
|
|
|
return { processName: pname, callInfo: cname };
|
|
}
|
|
|
|
/**
|
|
* Get the call name from the preprocessed layout maps
|
|
* @param {Object} layoutMaps - The preprocessed layout maps
|
|
* @param {Object} address - The address object containing call information
|
|
* @returns {string} The call name or "Unknown Call" if not found
|
|
*/
|
|
function getCallName(layoutMaps, address) {
|
|
if (!address || !address.value || address.value.length < 4) {
|
|
return "Unknown Call";
|
|
}
|
|
|
|
const [ppid, pid, tid, callId] = address.value;
|
|
const callKey = `${ppid}-${pid}-${tid}-${callId}`;
|
|
|
|
const call = layoutMaps.callMap.get(callKey);
|
|
return call.name;
|
|
}
|
|
|
|
function processRegexCaptures(node, key) {
|
|
if (!node.captures) return [];
|
|
|
|
return Object.entries(node.captures).map(([capture, locations]) => ({
|
|
key: key,
|
|
data: {
|
|
type: "regex-capture",
|
|
name: `"${escape(capture)}"`,
|
|
address: formatAddress(locations[0])
|
|
}
|
|
}));
|
|
}
|
|
|
|
function formatAddress(address) {
|
|
switch (address.type) {
|
|
case "absolute":
|
|
return formatHex(address.value);
|
|
case "relative":
|
|
return `base address+${formatHex(address.value)}`;
|
|
case "file":
|
|
return `file+${formatHex(address.value)}`;
|
|
case "dn token":
|
|
return `token(${formatHex(address.value)})`;
|
|
case "dn token offset": {
|
|
const [token, offset] = address.value;
|
|
return `token(${formatHex(token)})+${formatHex(offset)}`;
|
|
}
|
|
case "process":
|
|
//const [ppid, pid] = address.value;
|
|
//return `process{pid:${pid}}`;
|
|
return formatDynamicAddress(address.value);
|
|
case "thread":
|
|
//const [threadPpid, threadPid, tid] = address.value;
|
|
//return `process{pid:${threadPid},tid:${tid}}`;
|
|
return formatDynamicAddress(address.value);
|
|
case "call":
|
|
//const [callPpid, callPid, callTid, id] = address.value;
|
|
//return `process{pid:${callPid},tid:${callTid},call:${id}}`;
|
|
return formatDynamicAddress(address.value);
|
|
case "no address":
|
|
return "";
|
|
default:
|
|
throw new Error("Unexpected address type");
|
|
}
|
|
}
|
|
|
|
function escape(str) {
|
|
return str.replace(/"/g, '\\"');
|
|
}
|
|
|
|
/**
|
|
* Inverts the success values for children of a 'not' statement
|
|
* @param {Object} node - The node to invert
|
|
* @returns {Object} The inverted node
|
|
*/
|
|
function invertNotStatementSuccess(node) {
|
|
if (!node) return null;
|
|
|
|
return {
|
|
...node,
|
|
children: node.children
|
|
? node.children.map((child) => ({
|
|
...child,
|
|
success: !child.success,
|
|
children: child.children ? invertNotStatementSuccess(child).children : []
|
|
}))
|
|
: []
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Gets the description of a node
|
|
* @param {Object} node - The node to get the description from
|
|
* @returns {string|null} The description or null if not found
|
|
*/
|
|
function getNodeDescription(node) {
|
|
if (node.node.statement) {
|
|
return node.node.statement.description;
|
|
} else if (node.node.feature) {
|
|
return node.node.feature.description;
|
|
} else {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Gets the name of a node
|
|
* @param {Object} node - The node to get the name from
|
|
* @returns {string} The name of the node
|
|
*/
|
|
function getNodeName(node) {
|
|
if (node.node.statement) {
|
|
return getStatementName(node.node.statement);
|
|
} else if (node.node.feature) {
|
|
return getFeatureName(node.node.feature);
|
|
}
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* Gets the name for a statement node
|
|
* @param {Object} statement - The statement object
|
|
* @returns {string} The name of the statement
|
|
*/
|
|
function getStatementName(statement) {
|
|
switch (statement.type) {
|
|
case "subscope":
|
|
// for example, "basic block: "
|
|
return `${statement.scope}:`;
|
|
case "range":
|
|
return getRangeName(statement);
|
|
case "some":
|
|
return `${statement.count} or more`;
|
|
default:
|
|
// statement (e.g. "and: ", "or: ", "optional:", ... etc)
|
|
return `${statement.type}:`;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Gets the name for a feature node
|
|
* @param {Object} feature - The feature object
|
|
* @returns {string} The name of the feature
|
|
*/
|
|
function getFeatureName(feature) {
|
|
switch (feature.type) {
|
|
case "number":
|
|
case "offset":
|
|
// example: "number: 0x1234", "offset: 0x3C"
|
|
// return `${feature.type}: 0x${feature[feature.type].toString(16).toUpperCase()}`
|
|
return `0x${feature[feature.type].toString(16).toUpperCase()}`;
|
|
case "bytes":
|
|
return formatBytes(feature.bytes);
|
|
case "operand offset":
|
|
return `operand[${feature.index}].offset: 0x${feature.operand_offset.toString(16).toUpperCase()}`;
|
|
case "class":
|
|
return `${feature.class_}`;
|
|
case "import":
|
|
return `${feature.import_}`;
|
|
default:
|
|
return `${feature[feature.type]}`;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Formats the name for a range statement
|
|
* @param {Object} statement - The range statement object
|
|
* @returns {string} The formatted range name
|
|
*/
|
|
function getRangeName(statement) {
|
|
const { child, min, max } = statement;
|
|
const { type, [type]: value } = child;
|
|
const rangeType = value || value === 0 ? `count(${type}(${value}))` : `count(${type})`;
|
|
let rangeValue;
|
|
|
|
if (min === max) {
|
|
rangeValue = `${min}`;
|
|
} else if (max >= Number.MAX_SAFE_INTEGER) {
|
|
rangeValue = `${min} or more`;
|
|
} else {
|
|
rangeValue = `between ${min} and ${max}`;
|
|
}
|
|
|
|
// for example: count(mnemonic(xor)): 2 or more
|
|
return `${rangeType}: ${rangeValue} `;
|
|
}
|
|
|
|
/**
|
|
* Gets the address of a node
|
|
* @param {Object} node - The node to get the address from
|
|
* @returns {string|null} The formatted address or null if not found
|
|
*/
|
|
function getNodeAddress(node) {
|
|
if (node.node.feature && node.node.feature.type === "regex") return null;
|
|
if (node.locations && node.locations.length > 0) {
|
|
return formatAddress(node.locations[0]);
|
|
}
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* Formats bytes string for display
|
|
* @param {Array} value - The bytes string
|
|
* @returns {string} - Formatted bytes string
|
|
*/
|
|
|
|
function formatBytes(byteString) {
|
|
// Use a regular expression to insert a space after every two characters
|
|
const formattedString = byteString.replace(/(.{2})/g, "$1 ").trim();
|
|
// convert to uppercase
|
|
return formattedString.toUpperCase();
|
|
}
|
|
|
|
/**
|
|
* Formats the address for dynamic flavor
|
|
* @param {Array} value - The address value array
|
|
* @returns {string} - Formatted address string
|
|
*/
|
|
function formatDynamicAddress(value) {
|
|
const parts = ["ppid", "pid", "tid", "id"];
|
|
return value
|
|
.map((item, index) => `${parts[index]}:${item}`)
|
|
.reverse()
|
|
.join(",");
|
|
}
|
|
|
|
function formatHex(address) {
|
|
return `0x${address.toString(16).toUpperCase()}`;
|
|
}
|