Compare commits

..

1 Commits

Author SHA1 Message Date
mr-tz
48fc4a54de add gemini cli workflows 2025-08-08 09:24:35 +00:00
61 changed files with 1786 additions and 1553 deletions

View File

@@ -1,27 +0,0 @@
[tool.bumpversion]
current_version = "9.3.1"
[[tool.bumpversion.files]]
filename = "capa/version.py"
search = '__version__ = "{current_version}"'
replace = '__version__ = "{new_version}"'
[[tool.bumpversion.files]]
filename = "capa/ida/plugin/ida-plugin.json"
search = '"version": "{current_version}"'
replace = '"version": "{new_version}"'
[[tool.bumpversion.files]]
filename = "capa/ida/plugin/ida-plugin.json"
search = '"flare-capa=={current_version}"'
replace = '"flare-capa=={new_version}"'
[[tool.bumpversion.files]]
filename = "CHANGELOG.md"
search = "v{current_version}...master"
replace = "v{current_version}...{new_version}"
[[tool.bumpversion.files]]
filename = "CHANGELOG.md"
search = "master (unreleased)"
replace = "v{new_version}"

View File

@@ -4,12 +4,6 @@ updates:
directory: "/"
schedule:
interval: "weekly"
groups:
pyasn1-updates:
patterns:
- "pyasn1"
- "pyasn1-modules"
- "vivisect"
ignore:
- dependency-name: "*"
update-types: ["version-update:semver-patch"]

View File

@@ -74,7 +74,6 @@ a = Analysis(
# only be installed locally.
"binaryninja",
"ida",
"ghidra",
# remove once https://github.com/mandiant/capa/issues/2681 has
# been addressed by PyInstaller
"pkg_resources",

View File

@@ -28,11 +28,6 @@ jobs:
artifact_name: capa
asset_name: linux
python_version: '3.10'
# for Ghidra
java-version: '21'
ghidra-version: '12.0'
public-version: 'PUBLIC_20251205'
ghidra-sha256: 'af43e8cfb2fa4490cf6020c3a2bde25c159d83f45236a0542688a024e8fc1941'
- os: ubuntu-22.04-arm
artifact_name: capa
asset_name: linux-arm64
@@ -51,8 +46,8 @@ jobs:
# artifact_name: capa.exe
# asset_name: windows-arm64
# python_version: '3.12'
- os: macos-15-intel
# macos-15-intel is the lowest native intel build
- os: macos-13
# use older macOS for assumed better portability
artifact_name: capa
asset_name: macos
python_version: '3.10'
@@ -111,24 +106,6 @@ jobs:
run: |
7z e "tests/data/dynamic/cape/v2.2/d46900384c78863420fb3e297d0a2f743cd2b6b3f7f82bf64059a168e07aceb7.json.gz"
dist/capa -d "d46900384c78863420fb3e297d0a2f743cd2b6b3f7f82bf64059a168e07aceb7.json"
- name: Set up Java ${{ matrix.java-version }}
if: matrix.os == 'ubuntu-22.04' && matrix.python_version == '3.10'
uses: actions/setup-java@387ac29b308b003ca37ba93a6cab5eb57c8f5f93 # v4.0.0
with:
distribution: 'temurin'
java-version: ${{ matrix.java-version }}
- name: Install Ghidra ${{ matrix.ghidra-version }}
if: matrix.os == 'ubuntu-22.04' && matrix.python_version == '3.10'
run: |
mkdir ./.github/ghidra
wget "https://github.com/NationalSecurityAgency/ghidra/releases/download/Ghidra_${{ matrix.ghidra-version }}_build/ghidra_${{ matrix.ghidra-version }}_${{ matrix.public-version }}.zip" -O ./.github/ghidra/ghidra_${{ matrix.ghidra-version }}_PUBLIC.zip
echo "${{ matrix.ghidra-sha256 }} ./.github/ghidra/ghidra_${{ matrix.ghidra-version }}_PUBLIC.zip" | sha256sum -c -
unzip .github/ghidra/ghidra_${{ matrix.ghidra-version }}_PUBLIC.zip -d .github/ghidra/
- name: Does it run (Ghidra)?
if: matrix.os == 'ubuntu-22.04' && matrix.python_version == '3.10'
env:
GHIDRA_INSTALL_DIR: ${{ github.workspace }}/.github/ghidra/ghidra_${{ matrix.ghidra-version }}_PUBLIC
run: dist/capa -b ghidra -d "tests/data/Practical Malware Analysis Lab 01-01.dll_"
- uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4.3.1
with:
name: ${{ matrix.asset_name }}

304
.github/workflows/gemini-cli.yml vendored Normal file
View File

@@ -0,0 +1,304 @@
name: '💬 Gemini CLI'
on:
pull_request_review_comment:
types:
- 'created'
pull_request_review:
types:
- 'submitted'
issue_comment:
types:
- 'created'
concurrency:
group: '${{ github.workflow }}-${{ github.event.issue.number }}'
cancel-in-progress: |-
${{ github.event.sender.type == 'User' && ( github.event.issue.author_association == 'OWNER' || github.event.issue.author_association == 'MEMBER' || github.event.issue.author_association == 'COLLABORATOR') }}
defaults:
run:
shell: 'bash'
permissions:
contents: 'write'
id-token: 'write'
pull-requests: 'write'
issues: 'write'
jobs:
gemini-cli:
# This condition is complex to ensure we only run when explicitly invoked.
if: |-
github.event_name == 'workflow_dispatch' ||
(
github.event_name == 'issues' && github.event.action == 'opened' &&
contains(github.event.issue.body, '@gemini-cli') &&
!contains(github.event.issue.body, '@gemini-cli /review') &&
!contains(github.event.issue.body, '@gemini-cli /triage') &&
contains(fromJSON('["OWNER", "MEMBER", "COLLABORATOR"]'), github.event.issue.author_association)
) ||
(
(
github.event_name == 'issue_comment' ||
github.event_name == 'pull_request_review_comment'
) &&
contains(github.event.comment.body, '@gemini-cli') &&
!contains(github.event.comment.body, '@gemini-cli /review') &&
!contains(github.event.comment.body, '@gemini-cli /triage') &&
contains(fromJSON('["OWNER", "MEMBER", "COLLABORATOR"]'), github.event.comment.author_association)
) ||
(
github.event_name == 'pull_request_review' &&
contains(github.event.review.body, '@gemini-cli') &&
!contains(github.event.review.body, '@gemini-cli /review') &&
!contains(github.event.review.body, '@gemini-cli /triage') &&
contains(fromJSON('["OWNER", "MEMBER", "COLLABORATOR"]'), github.event.review.author_association)
)
timeout-minutes: 10
runs-on: 'ubuntu-latest'
steps:
- name: 'Generate GitHub App Token'
id: 'generate_token'
if: |-
${{ vars.APP_ID }}
uses: 'actions/create-github-app-token@df432ceedc7162793a195dd1713ff69aefc7379e' # ratchet:actions/create-github-app-token@v2
with:
app-id: '${{ vars.APP_ID }}'
private-key: '${{ secrets.APP_PRIVATE_KEY }}'
- name: 'Get context from event'
id: 'get_context'
env:
EVENT_NAME: '${{ github.event_name }}'
EVENT_PAYLOAD: '${{ toJSON(github.event) }}'
run: |-
set -euo pipefail
USER_REQUEST=""
ISSUE_NUMBER=""
IS_PR="false"
if [[ "${EVENT_NAME}" == "issues" ]]; then
USER_REQUEST=$(echo "${EVENT_PAYLOAD}" | jq -r .issue.body)
ISSUE_NUMBER=$(echo "${EVENT_PAYLOAD}" | jq -r .issue.number)
elif [[ "${EVENT_NAME}" == "issue_comment" ]]; then
USER_REQUEST=$(echo "${EVENT_PAYLOAD}" | jq -r .comment.body)
ISSUE_NUMBER=$(echo "${EVENT_PAYLOAD}" | jq -r .issue.number)
if [[ $(echo "${EVENT_PAYLOAD}" | jq -r .issue.pull_request) != "null" ]]; then
IS_PR="true"
fi
elif [[ "${EVENT_NAME}" == "pull_request_review" ]]; then
USER_REQUEST=$(echo "${EVENT_PAYLOAD}" | jq -r .review.body)
ISSUE_NUMBER=$(echo "${EVENT_PAYLOAD}" | jq -r .pull_request.number)
IS_PR="true"
elif [[ "${EVENT_NAME}" == "pull_request_review_comment" ]]; then
USER_REQUEST=$(echo "${EVENT_PAYLOAD}" | jq -r .comment.body)
ISSUE_NUMBER=$(echo "${EVENT_PAYLOAD}" | jq -r .pull_request.number)
IS_PR="true"
fi
# Clean up user request
USER_REQUEST=$(echo "${USER_REQUEST}" | sed 's/.*@gemini-cli//' | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
{
echo "user_request=${USER_REQUEST}"
echo "issue_number=${ISSUE_NUMBER}"
echo "is_pr=${IS_PR}"
} >> "${GITHUB_OUTPUT}"
- name: 'Set up git user for commits'
run: |-
git config --global user.name 'gemini-cli[bot]'
git config --global user.email 'gemini-cli[bot]@users.noreply.github.com'
- name: 'Checkout PR branch'
if: |-
${{ steps.get_context.outputs.is_pr == 'true' }}
uses: 'actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683' # ratchet:actions/checkout@v4
with:
token: '${{ steps.generate_token.outputs.token || secrets.GITHUB_TOKEN }}'
repository: '${{ github.repository }}'
ref: 'refs/pull/${{ steps.get_context.outputs.issue_number }}/head'
fetch-depth: 0
- name: 'Checkout main branch'
if: |-
${{ steps.get_context.outputs.is_pr == 'false' }}
uses: 'actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683' # ratchet:actions/checkout@v4
with:
token: '${{ steps.generate_token.outputs.token || secrets.GITHUB_TOKEN }}'
repository: '${{ github.repository }}'
fetch-depth: 0
- name: 'Acknowledge request'
env:
GITHUB_ACTOR: '${{ github.actor }}'
GITHUB_TOKEN: '${{ steps.generate_token.outputs.token || secrets.GITHUB_TOKEN }}'
ISSUE_NUMBER: '${{ steps.get_context.outputs.issue_number }}'
REPOSITORY: '${{ github.repository }}'
REQUEST_TYPE: '${{ steps.get_context.outputs.request_type }}'
run: |-
set -euo pipefail
MESSAGE="@${GITHUB_ACTOR} I've received your request and I'm working on it now! 🤖"
if [[ -n "${MESSAGE}" ]]; then
gh issue comment "${ISSUE_NUMBER}" \
--body "${MESSAGE}" \
--repo "${REPOSITORY}"
fi
- name: 'Get description'
id: 'get_description'
env:
GITHUB_TOKEN: '${{ steps.generate_token.outputs.token || secrets.GITHUB_TOKEN }}'
IS_PR: '${{ steps.get_context.outputs.is_pr }}'
ISSUE_NUMBER: '${{ steps.get_context.outputs.issue_number }}'
run: |-
set -euo pipefail
if [[ "${IS_PR}" == "true" ]]; then
DESCRIPTION=$(gh pr view "${ISSUE_NUMBER}" --json body --template '{{.body}}')
else
DESCRIPTION=$(gh issue view "${ISSUE_NUMBER}" --json body --template '{{.body}}')
fi
{
echo "description<<EOF"
echo "${DESCRIPTION}"
echo "EOF"
} >> "${GITHUB_OUTPUT}"
- name: 'Get comments'
id: 'get_comments'
env:
GITHUB_TOKEN: '${{ steps.generate_token.outputs.token || secrets.GITHUB_TOKEN }}'
IS_PR: '${{ steps.get_context.outputs.is_pr }}'
ISSUE_NUMBER: '${{ steps.get_context.outputs.issue_number }}'
run: |-
set -euo pipefail
if [[ "${IS_PR}" == "true" ]]; then
COMMENTS=$(gh pr view "${ISSUE_NUMBER}" --json comments --template '{{range .comments}}{{.author.login}}: {{.body}}{{"\n"}}{{end}}')
else
COMMENTS=$(gh issue view "${ISSUE_NUMBER}" --json comments --template '{{range .comments}}{{.author.login}}: {{.body}}{{"\n"}}{{end}}')
fi
{
echo "comments<<EOF"
echo "${COMMENTS}"
echo "EOF"
} >> "${GITHUB_OUTPUT}"
- name: 'Run Gemini'
id: 'run_gemini'
uses: 'google-github-actions/run-gemini-cli@v0'
env:
GITHUB_TOKEN: '${{ steps.generate_token.outputs.token || secrets.GITHUB_TOKEN }}'
REPOSITORY: '${{ github.repository }}'
USER_REQUEST: '${{ steps.get_context.outputs.user_request }}'
ISSUE_NUMBER: '${{ steps.get_context.outputs.issue_number }}'
IS_PR: '${{ steps.get_context.outputs.is_pr }}'
with:
gemini_api_key: '${{ secrets.GEMINI_API_KEY }}'
gcp_workload_identity_provider: '${{ vars.GCP_WIF_PROVIDER }}'
gcp_project_id: '${{ vars.GOOGLE_CLOUD_PROJECT }}'
gcp_location: '${{ vars.GOOGLE_CLOUD_LOCATION }}'
gcp_service_account: '${{ vars.SERVICE_ACCOUNT_EMAIL }}'
use_vertex_ai: '${{ vars.GOOGLE_GENAI_USE_VERTEXAI }}'
use_gemini_code_assist: '${{ vars.GOOGLE_GENAI_USE_GCA }}'
settings: |-
{
"maxSessionTurns": 50,
"telemetry": {
"enabled": false,
"target": "gcp"
}
}
prompt: |-
## Role
You are a helpful AI assistant invoked via a CLI interface in a GitHub workflow. You have access to tools to interact with the repository and respond to the user.
## Context
- **Repository**: `${{ github.repository }}`
- **Triggering Event**: `${{ github.event_name }}`
- **Issue/PR Number**: `${{ steps.get_context.outputs.issue_number }}`
- **Is this a PR?**: `${{ steps.get_context.outputs.is_pr }}`
- **Issue/PR Description**:
`${{ steps.get_description.outputs.description }}`
- **Comments**:
`${{ steps.get_comments.outputs.comments }}`
## User Request
The user has sent the following request:
`${{ steps.get_context.outputs.user_request }}`
## How to Respond to Issues, PR Comments, and Questions
This workflow supports three main scenarios:
1. **Creating a Fix for an Issue**
- Carefully read the user request and the related issue or PR description.
- Use available tools to gather all relevant context (e.g., `gh issue view`, `gh pr view`, `gh pr diff`, `cat`, `head`, `tail`).
- Identify the root cause of the problem before proceeding.
- **Show and maintain a plan as a checklist**:
- At the very beginning, outline the steps needed to resolve the issue or address the request and post them as a checklist comment on the issue or PR (use GitHub markdown checkboxes: `- [ ] Task`).
- Example:
```
### Plan
- [ ] Investigate the root cause
- [ ] Implement the fix in `file.py`
- [ ] Add/modify tests
- [ ] Update documentation
- [ ] Verify the fix and close the issue
```
- Use: `gh pr comment "${ISSUE_NUMBER}" --body "<plan>"` or `gh issue comment "${ISSUE_NUMBER}" --body "<plan>"` to post the initial plan.
- As you make progress, keep the checklist visible and up to date by editing the same comment (check off completed tasks with `- [x]`).
- To update the checklist:
1. Find the comment ID for the checklist (use `gh pr comment list "${ISSUE_NUMBER}"` or `gh issue comment list "${ISSUE_NUMBER}"`).
2. Edit the comment with the updated checklist:
- For PRs: `gh pr comment --edit <comment-id> --body "<updated plan>"`
- For Issues: `gh issue comment --edit <comment-id> --body "<updated plan>"`
3. The checklist should only be maintained as a comment on the issue or PR. Do not track or update the checklist in code files.
- If the fix requires code changes, determine which files and lines are affected. If clarification is needed, note any questions for the user.
- Make the necessary code or documentation changes using the available tools (e.g., `write_file`). Ensure all changes follow project conventions and best practices. Reference all shell variables as `"${VAR}"` (with quotes and braces) to prevent errors.
- Run any relevant tests or checks to verify the fix works as intended. If possible, provide evidence (test output, screenshots, etc.) that the issue is resolved.
- **Branching and Committing**:
- **NEVER commit directly to the `main` branch.**
- If you are working on a **pull request** (`IS_PR` is `true`), the correct branch is already checked out. Simply commit and push to it.
- `git add .`
- `git commit -m "feat: <describe the change>"`
- `git push`
- If you are working on an **issue** (`IS_PR` is `false`), create a new branch for your changes. A good branch name would be `issue/${ISSUE_NUMBER}/<short-description>`.
- `git checkout -b issue/${ISSUE_NUMBER}/my-fix`
- `git add .`
- `git commit -m "feat: <describe the fix>"`
- `git push origin issue/${ISSUE_NUMBER}/my-fix`
- After pushing, you can create a pull request: `gh pr create --title "Fixes #${ISSUE_NUMBER}: <short title>" --body "This PR addresses issue #${ISSUE_NUMBER}."`
- Summarize what was changed and why in a markdown file: `write_file("response.md", "<your response here>")`
- Post the response as a comment:
- For PRs: `gh pr comment "${ISSUE_NUMBER}" --body-file response.md`
- For Issues: `gh issue comment "${ISSUE_NUMBER}" --body-file response.md`
2. **Addressing Comments on a Pull Request**
- Read the specific comment and the context of the PR.
- Use tools like `gh pr view`, `gh pr diff`, and `cat` to understand the code and discussion.
- If the comment requests a change or clarification, follow the same process as for fixing an issue: create a checklist plan, implement, test, and commit any required changes, updating the checklist as you go.
- **Committing Changes**: The correct PR branch is already checked out. Simply add, commit, and push your changes.
- `git add .`
- `git commit -m "fix: address review comments"`
- `git push`
- If the comment is a question, answer it directly and clearly, referencing code or documentation as needed.
- Document your response in `response.md` and post it as a PR comment: `gh pr comment "${ISSUE_NUMBER}" --body-file response.md`
3. **Answering Any Question on an Issue**
- Read the question and the full issue context using `gh issue view` and related tools.
- Research or analyze the codebase as needed to provide an accurate answer.
- If the question requires code or documentation changes, follow the fix process above, including creating and updating a checklist plan and **creating a new branch for your changes as described in section 1.**
- Write a clear, concise answer in `response.md` and post it as an issue comment: `gh issue comment "${ISSUE_NUMBER}" --body-file response.md`
## Guidelines
- **Be concise and actionable.** Focus on solving the user's problem efficiently.
- **Always commit and push your changes if you modify code or documentation.**
- **If you are unsure about the fix or answer, explain your reasoning and ask clarifying questions.**
- **Follow project conventions and best practices.**

View File

@@ -0,0 +1,130 @@
name: '🏷️ Gemini Automated Issue Triage'
on:
issues:
types:
- 'opened'
- 'reopened'
issue_comment:
types:
- 'created'
workflow_dispatch:
inputs:
issue_number:
description: 'issue number to triage'
required: true
type: 'number'
concurrency:
group: '${{ github.workflow }}-${{ github.event.issue.number }}'
cancel-in-progress: true
defaults:
run:
shell: 'bash'
permissions:
contents: 'read'
id-token: 'write'
issues: 'write'
statuses: 'write'
jobs:
triage-issue:
if: |-
github.event_name == 'issues' ||
github.event_name == 'workflow_dispatch' ||
(
github.event_name == 'issue_comment' &&
contains(github.event.comment.body, '@gemini-cli /triage') &&
contains(fromJSON('["OWNER", "MEMBER", "COLLABORATOR"]'), github.event.comment.author_association)
)
timeout-minutes: 5
runs-on: 'ubuntu-latest'
steps:
- name: 'Checkout repository'
uses: 'actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683' # ratchet:actions/checkout@v4
- name: 'Generate GitHub App Token'
id: 'generate_token'
if: |-
${{ vars.APP_ID }}
uses: 'actions/create-github-app-token@df432ceedc7162793a195dd1713ff69aefc7379e' # ratchet:actions/create-github-app-token@v2
with:
app-id: '${{ vars.APP_ID }}'
private-key: '${{ secrets.APP_PRIVATE_KEY }}'
- name: 'Run Gemini Issue Triage'
uses: 'google-github-actions/run-gemini-cli@v0'
id: 'gemini_issue_triage'
env:
GITHUB_TOKEN: '${{ steps.generate_token.outputs.token || secrets.GITHUB_TOKEN }}'
ISSUE_TITLE: '${{ github.event.issue.title }}'
ISSUE_BODY: '${{ github.event.issue.body }}'
ISSUE_NUMBER: '${{ github.event.issue.number }}'
REPOSITORY: '${{ github.repository }}'
with:
gemini_cli_version: '${{ vars.GEMINI_CLI_VERSION }}'
gcp_workload_identity_provider: '${{ vars.GCP_WIF_PROVIDER }}'
gcp_project_id: '${{ vars.GOOGLE_CLOUD_PROJECT }}'
gcp_location: '${{ vars.GOOGLE_CLOUD_LOCATION }}'
gcp_service_account: '${{ vars.SERVICE_ACCOUNT_EMAIL }}'
gemini_api_key: '${{ secrets.GEMINI_API_KEY }}'
use_vertex_ai: '${{ vars.GOOGLE_GENAI_USE_VERTEXAI }}'
use_gemini_code_assist: '${{ vars.GOOGLE_GENAI_USE_GCA }}'
settings: |-
{
"maxSessionTurns": 25,
"coreTools": [
"run_shell_command(echo)",
"run_shell_command(gh label list)",
"run_shell_command(gh issue edit)"
],
"telemetry": {
"enabled": false,
"target": "gcp"
}
}
prompt: |-
## Role
You are an issue triage assistant. Analyze the current GitHub issue
and apply the most appropriate existing labels. Use the available
tools to gather information; do not ask for information to be
provided.
## Steps
1. Run: `gh label list` to get all available labels.
2. Review the issue title and body provided in the environment
variables: "${ISSUE_TITLE}" and "${ISSUE_BODY}".
3. Select the most relevant labels from the existing labels. If
available, set labels that follow the `kind/*`, `area/*`, and
`priority/*` patterns.
4. Apply the selected labels to this issue using:
`gh issue edit "${ISSUE_NUMBER}" --add-label "label1,label2"`
5. If the "status/needs-triage" label is present, remove it using:
`gh issue edit "${ISSUE_NUMBER}" --remove-label "status/needs-triage"`
## Guidelines
- Only use labels that already exist in the repository
- Do not add comments or modify the issue content
- Triage only the current issue
- Assign all applicable labels based on the issue content
- Reference all shell variables as "${VAR}" (with quotes and braces)
- name: 'Post Issue Triage Failure Comment'
if: |-
${{ failure() && steps.gemini_issue_triage.outcome == 'failure' }}
uses: 'actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea'
with:
github-token: '${{ steps.generate_token.outputs.token || secrets.GITHUB_TOKEN }}'
script: |-
github.rest.issues.createComment({
owner: '${{ github.repository }}'.split('/')[0],
repo: '${{ github.repository }}'.split('/')[1],
issue_number: '${{ github.event.issue.number }}',
body: 'There is a problem with the Gemini CLI issue triaging. Please check the [action logs](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for details.'
})

View File

@@ -0,0 +1,123 @@
name: '📋 Gemini Scheduled Issue Triage'
on:
schedule:
- cron: '0 * * * *' # Runs every hour
workflow_dispatch:
concurrency:
group: '${{ github.workflow }}'
cancel-in-progress: true
defaults:
run:
shell: 'bash'
permissions:
contents: 'read'
id-token: 'write'
issues: 'write'
statuses: 'write'
jobs:
triage-issues:
timeout-minutes: 5
runs-on: 'ubuntu-latest'
steps:
- name: 'Checkout repository'
uses: 'actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683' # ratchet:actions/checkout@v4
- name: 'Generate GitHub App Token'
id: 'generate_token'
if: |-
${{ vars.APP_ID }}
uses: 'actions/create-github-app-token@df432ceedc7162793a195dd1713ff69aefc7379e' # ratchet:actions/create-github-app-token@v2
with:
app-id: '${{ vars.APP_ID }}'
private-key: '${{ secrets.APP_PRIVATE_KEY }}'
- name: 'Find untriaged issues'
id: 'find_issues'
env:
GITHUB_TOKEN: '${{ steps.generate_token.outputs.token || secrets.GITHUB_TOKEN }}'
GITHUB_REPOSITORY: '${{ github.repository }}'
GITHUB_OUTPUT: '${{ github.output }}'
run: |-
set -euo pipefail
echo '🔍 Finding issues without labels...'
NO_LABEL_ISSUES="$(gh issue list --repo "${GITHUB_REPOSITORY}" \
--search 'is:open is:issue no:label' --json number,title,body)"
echo '🏷️ Finding issues that need triage...'
NEED_TRIAGE_ISSUES="$(gh issue list --repo "${GITHUB_REPOSITORY}" \
--search 'is:open is:issue label:"status/needs-triage"' --json number,title,body)"
echo '🔄 Merging and deduplicating issues...'
ISSUES="$(echo "${NO_LABEL_ISSUES}" "${NEED_TRIAGE_ISSUES}" | jq -c -s 'add | unique_by(.number)')"
echo '📝 Setting output for GitHub Actions...'
echo "issues_to_triage=${ISSUES}" >> "${GITHUB_OUTPUT}"
ISSUE_COUNT="$(echo "${ISSUES}" | jq 'length')"
echo "✅ Found ${ISSUE_COUNT} issues to triage! 🎯"
- name: 'Run Gemini Issue Triage'
if: |-
${{ steps.find_issues.outputs.issues_to_triage != '[]' }}
uses: 'google-github-actions/run-gemini-cli@v0'
id: 'gemini_issue_triage'
env:
GITHUB_TOKEN: '${{ steps.generate_token.outputs.token || secrets.GITHUB_TOKEN }}'
ISSUES_TO_TRIAGE: '${{ steps.find_issues.outputs.issues_to_triage }}'
REPOSITORY: '${{ github.repository }}'
with:
gemini_cli_version: '${{ vars.GEMINI_CLI_VERSION }}'
gcp_workload_identity_provider: '${{ vars.GCP_WIF_PROVIDER }}'
gcp_project_id: '${{ vars.GOOGLE_CLOUD_PROJECT }}'
gcp_location: '${{ vars.GOOGLE_CLOUD_LOCATION }}'
gcp_service_account: '${{ vars.SERVICE_ACCOUNT_EMAIL }}'
gemini_api_key: '${{ secrets.GEMINI_API_KEY }}'
use_vertex_ai: '${{ vars.GOOGLE_GENAI_USE_VERTEXAI }}'
use_gemini_code_assist: '${{ vars.GOOGLE_GENAI_USE_GCA }}'
settings: |-
{
"maxSessionTurns": 25,
"coreTools": [
"run_shell_command(echo)",
"run_shell_command(gh label list)",
"run_shell_command(gh issue edit)",
"run_shell_command(gh issue list)"
],
"telemetry": {
"enabled": false,
"target": "gcp"
}
}
prompt: |-
## Role
You are an issue triage assistant. Analyze issues and apply
appropriate labels. Use the available tools to gather information;
do not ask for information to be provided.
## Steps
1. Run: `gh label list`
2. Check environment variable: "${ISSUES_TO_TRIAGE}" (JSON array
of issues)
3. For each issue, apply labels:
`gh issue edit "${ISSUE_NUMBER}" --add-label "label1,label2"`.
If available, set labels that follow the `kind/*`, `area/*`,
and `priority/*` patterns.
4. For each issue, if the `status/needs-triage` label is present,
remove it using:
`gh issue edit "${ISSUE_NUMBER}" --remove-label "status/needs-triage"`
## Guidelines
- Only use existing repository labels
- Do not add comments
- Triage each issue independently
- Reference all shell variables as "${VAR}" (with quotes and braces)

456
.github/workflows/gemini-pr-review.yml vendored Normal file
View File

@@ -0,0 +1,456 @@
name: '🧐 Gemini Pull Request Review'
on:
pull_request:
types:
- 'opened'
- 'reopened'
issue_comment:
types:
- 'created'
pull_request_review_comment:
types:
- 'created'
pull_request_review:
types:
- 'submitted'
workflow_dispatch:
inputs:
pr_number:
description: 'PR number to review'
required: true
type: 'number'
concurrency:
group: '${{ github.workflow }}-${{ github.head_ref || github.ref }}'
cancel-in-progress: true
defaults:
run:
shell: 'bash'
permissions:
contents: 'read'
id-token: 'write'
issues: 'write'
pull-requests: 'write'
statuses: 'write'
jobs:
review-pr:
if: |-
github.event_name == 'workflow_dispatch' ||
(
github.event_name == 'pull_request' &&
contains(fromJSON('["OWNER", "MEMBER", "COLLABORATOR"]'), github.event.pull_request.author_association)
) ||
(
(
(
github.event_name == 'issue_comment' &&
github.event.issue.pull_request
) ||
github.event_name == 'pull_request_review_comment'
) &&
contains(github.event.comment.body, '@gemini-cli /review') &&
contains(fromJSON('["OWNER", "MEMBER", "COLLABORATOR"]'), github.event.comment.author_association)
) ||
(
github.event_name == 'pull_request_review' &&
contains(github.event.review.body, '@gemini-cli /review') &&
contains(fromJSON('["OWNER", "MEMBER", "COLLABORATOR"]'), github.event.review.author_association)
)
timeout-minutes: 5
runs-on: 'ubuntu-latest'
steps:
- name: 'Checkout PR code'
uses: 'actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683' # ratchet:actions/checkout@v4
- name: 'Generate GitHub App Token'
id: 'generate_token'
if: |-
${{ vars.APP_ID }}
uses: 'actions/create-github-app-token@df432ceedc7162793a195dd1713ff69aefc7379e' # ratchet:actions/create-github-app-token@v2
with:
app-id: '${{ vars.APP_ID }}'
private-key: '${{ secrets.APP_PRIVATE_KEY }}'
- name: 'Get PR details (pull_request & workflow_dispatch)'
id: 'get_pr'
if: |-
${{ github.event_name == 'pull_request' || github.event_name == 'workflow_dispatch' }}
env:
GITHUB_TOKEN: '${{ steps.generate_token.outputs.token || secrets.GITHUB_TOKEN }}'
EVENT_NAME: '${{ github.event_name }}'
WORKFLOW_PR_NUMBER: '${{ github.event.inputs.pr_number }}'
PULL_REQUEST_NUMBER: '${{ github.event.pull_request.number }}'
run: |-
set -euo pipefail
if [[ "${EVENT_NAME}" = "workflow_dispatch" ]]; then
PR_NUMBER="${WORKFLOW_PR_NUMBER}"
else
PR_NUMBER="${PULL_REQUEST_NUMBER}"
fi
echo "pr_number=${PR_NUMBER}" >> "${GITHUB_OUTPUT}"
# Get PR details
PR_DATA="$(gh pr view "${PR_NUMBER}" --json title,body,additions,deletions,changedFiles,baseRefName,headRefName)"
echo "pr_data=${PR_DATA}" >> "${GITHUB_OUTPUT}"
# Get file changes
CHANGED_FILES="$(gh pr diff "${PR_NUMBER}" --name-only)"
{
echo "changed_files<<EOF"
echo "${CHANGED_FILES}"
echo "EOF"
} >> "${GITHUB_OUTPUT}"
- name: 'Get PR details (issue_comment)'
id: 'get_pr_comment'
if: |-
${{ github.event_name == 'issue_comment' }}
env:
GITHUB_TOKEN: '${{ steps.generate_token.outputs.token || secrets.GITHUB_TOKEN }}'
COMMENT_BODY: '${{ github.event.comment.body }}'
PR_NUMBER: '${{ github.event.issue.number }}'
run: |-
set -euo pipefail
echo "pr_number=${PR_NUMBER}" >> "${GITHUB_OUTPUT}"
# Extract additional instructions from comment
ADDITIONAL_INSTRUCTIONS="$(
echo "${COMMENT_BODY}" | sed 's/.*@gemini-cli \/review//' | xargs
)"
echo "additional_instructions=${ADDITIONAL_INSTRUCTIONS}" >> "${GITHUB_OUTPUT}"
# Get PR details
PR_DATA="$(gh pr view "${PR_NUMBER}" --json title,body,additions,deletions,changedFiles,baseRefName,headRefName)"
echo "pr_data=${PR_DATA}" >> "${GITHUB_OUTPUT}"
# Get file changes
CHANGED_FILES="$(gh pr diff "${PR_NUMBER}" --name-only)"
{
echo "changed_files<<EOF"
echo "${CHANGED_FILES}"
echo "EOF"
} >> "${GITHUB_OUTPUT}"
- name: 'Run Gemini PR Review'
uses: 'google-github-actions/run-gemini-cli@v0'
id: 'gemini_pr_review'
env:
GITHUB_TOKEN: '${{ steps.generate_token.outputs.token || secrets.GITHUB_TOKEN }}'
PR_NUMBER: '${{ steps.get_pr.outputs.pr_number || steps.get_pr_comment.outputs.pr_number }}'
PR_DATA: '${{ steps.get_pr.outputs.pr_data || steps.get_pr_comment.outputs.pr_data }}'
CHANGED_FILES: '${{ steps.get_pr.outputs.changed_files || steps.get_pr_comment.outputs.changed_files }}'
ADDITIONAL_INSTRUCTIONS: '${{ steps.get_pr.outputs.additional_instructions || steps.get_pr_comment.outputs.additional_instructions }}'
REPOSITORY: '${{ github.repository }}'
with:
gemini_cli_version: '${{ vars.GEMINI_CLI_VERSION }}'
gcp_workload_identity_provider: '${{ vars.GCP_WIF_PROVIDER }}'
gcp_project_id: '${{ vars.GOOGLE_CLOUD_PROJECT }}'
gcp_location: '${{ vars.GOOGLE_CLOUD_LOCATION }}'
gcp_service_account: '${{ vars.SERVICE_ACCOUNT_EMAIL }}'
gemini_api_key: '${{ secrets.GEMINI_API_KEY }}'
use_vertex_ai: '${{ vars.GOOGLE_GENAI_USE_VERTEXAI }}'
use_gemini_code_assist: '${{ vars.GOOGLE_GENAI_USE_GCA }}'
settings: |-
{
"maxSessionTurns": 20,
"mcpServers": {
"github": {
"command": "docker",
"args": [
"run",
"-i",
"--rm",
"-e",
"GITHUB_PERSONAL_ACCESS_TOKEN",
"ghcr.io/github/github-mcp-server"
],
"includeTools": [
"create_pending_pull_request_review",
"add_comment_to_pending_review",
"submit_pending_pull_request_review"
],
"env": {
"GITHUB_PERSONAL_ACCESS_TOKEN": "${GITHUB_TOKEN}"
}
}
},
"coreTools": [
"run_shell_command(echo)",
"run_shell_command(gh pr view)",
"run_shell_command(gh pr diff)",
"run_shell_command(cat)",
"run_shell_command(head)",
"run_shell_command(tail)",
"run_shell_command(grep)"
],
"telemetry": {
"enabled": false,
"target": "gcp"
}
}
prompt: |-
## Role
You are an expert code reviewer. You have access to tools to gather
PR information and perform the review on GitHub. Use the available tools to
gather information; do not ask for information to be provided.
## Requirements
1. All feedback must be left on GitHub.
2. Any output that is not left in GitHub will not be seen.
## Steps
Start by running these commands to gather the required data:
1. Run: echo $"{REPOSITORY}" to get the github repository in <OWNER>/<REPO> format
2. Run: echo "${PR_DATA}" to get PR details (JSON format)
3. Run: echo "${CHANGED_FILES}" to get the list of changed files
4. Run: echo "${PR_NUMBER}" to get the PR number
5. Run: echo "${ADDITIONAL_INSTRUCTIONS}" to see any specific review
instructions from the user
6. Run: gh pr diff "${PR_NUMBER}" to see the full diff and reference
Context section to understand it
7. For any specific files, use: cat filename, head -50 filename, or
tail -50 filename
8. If ADDITIONAL_INSTRUCTIONS contains text, prioritize those
specific areas or focus points in your review. Common instruction
examples: "focus on security", "check performance", "review error
handling", "check for breaking changes"
## Guideline
### Core Guideline(Always applicable)
1. Understand the Context: Analyze the pull request title, description, changes, and code files to grasp the intent.
2. Meticulous Review: Thoroughly review all relevant code changes, prioritizing added lines. Consider the specified
focus areas and any provided style guide.
3. Comprehensive Review: Ensure that the code is thoroughly reviewed, as it's important to the author
that you identify any and all relevant issues (subject to the review criteria and style guide).
Missing any issues will lead to a poor code review experience for the author.
4. Constructive Feedback:
* Provide clear explanations for each concern.
* Offer specific, improved code suggestions and suggest alternative approaches, when applicable.
Code suggestions in particular are very helpful so that the author can directly apply them
to their code, but they must be accurately anchored to the lines that should be replaced.
5. Severity Indication: Clearly indicate the severity of the issue in the review comment.
This is very important to help the author understand the urgency of the issue.
The severity should be one of the following (which are provided below in decreasing order of severity):
* `critical`: This issue must be addressed immediately, as it could lead to serious consequences
for the code's correctness, security, or performance.
* `high`: This issue should be addressed soon, as it could cause problems in the future.
* `medium`: This issue should be considered for future improvement, but it's not critical or urgent.
* `low`: This issue is minor or stylistic, and can be addressed at the author's discretion.
6. Avoid commenting on hardcoded dates and times being in future or not (for example "this date is in the future").
* Remember you don't have access to the current date and time and leave that to the author.
7. Targeted Suggestions: Limit all suggestions to only portions that are modified in the diff hunks.
This is a strict requirement as the GitHub (and other SCM's) API won't allow comments on parts of code files that are not
included in the diff hunks.
8. Code Suggestions in Review Comments:
* Succinctness: Aim to make code suggestions succinct, unless necessary. Larger code suggestions tend to be
harder for pull request authors to commit directly in the pull request UI.
* Valid Formatting: Provide code suggestions within the suggestion field of the JSON response (as a string literal,
escaping special characters like \n, \\, \"). Do not include markdown code blocks in the suggestion field.
Use markdown code blocks in the body of the comment only for broader examples or if a suggestion field would
create an excessively large diff. Prefer the suggestion field for specific, targeted code changes.
* Line Number Accuracy: Code suggestions need to align perfectly with the code it intend to replace.
Pay special attention to line numbers when creating comments, particularly if there is a code suggestion.
Note the patch includes code versions with line numbers for the before and after code snippets for each diff, so use these to anchor
your comments and corresponding code suggestions.
* Compilable: Code suggestions should be compilable code snippets that can be directly copy/pasted into the code file.
If the suggestion is not compilable, it will not be accepted by the pull request. Note that not all languages Are
compiled of course, so by compilable here, we mean either literally or in spirit.
* Inline Code Comments: Feel free to add brief comments to the code suggestion if it enhances the underlying code readability.
Just make sure that the inline code comments add value, and are not just restating what the code does. Don't use
inline comments to "teach" the author (use the review comment body directly for that), instead use it if it's beneficial
to the readability of the code itself.
10. Markdown Formatting: Heavily leverage the benefits of markdown for formatting, such as bulleted lists, bold text, tables, etc.
11. Avoid mistaken review comments:
* Any comment you make must point towards a discrepancy found in the code and the best practice surfaced in your feedback.
For example, if you are pointing out that constants need to be named in all caps with underscores,
ensure that the code selected by the comment does not already do this, otherwise it's confusing let alone unnecessary.
12. Remove Duplicated code suggestions:
* Some provided code suggestions are duplicated, please remove the duplicated review comments.
13. Don't Approve The Pull Request
14. Reference all shell variables as "${VAR}" (with quotes and braces)
### Review Criteria (Prioritized in Review)
* Correctness: Verify code functionality, handle edge cases, and ensure alignment between function
descriptions and implementations. Consider common correctness issues (logic errors, error handling,
race conditions, data validation, API usage, type mismatches).
* Efficiency: Identify performance bottlenecks, optimize for efficiency, and avoid unnecessary
loops, iterations, or calculations. Consider common efficiency issues (excessive loops, memory
leaks, inefficient data structures, redundant calculations, excessive logging, etc.).
* Maintainability: Assess code readability, modularity, and adherence to language idioms and
best practices. Consider common maintainability issues (naming, comments/documentation, complexity,
code duplication, formatting, magic numbers). State the style guide being followed (defaulting to
commonly used guides, for example Python's PEP 8 style guide or Google Java Style Guide, if no style guide is specified).
* Security: Identify potential vulnerabilities (e.g., insecure storage, injection attacks,
insufficient access controls).
### Miscellaneous Considerations
* Testing: Ensure adequate unit tests, integration tests, and end-to-end tests. Evaluate
coverage, edge case handling, and overall test quality.
* Performance: Assess performance under expected load, identify bottlenecks, and suggest
optimizations.
* Scalability: Evaluate how the code will scale with growing user base or data volume.
* Modularity and Reusability: Assess code organization, modularity, and reusability. Suggest
refactoring or creating reusable components.
* Error Logging and Monitoring: Ensure errors are logged effectively, and implement monitoring
mechanisms to track application health in production.
**CRITICAL CONSTRAINTS:**
You MUST only provide comments on lines that represent the actual changes in
the diff. This means your comments should only refer to lines that begin with
a `+` or `-` character in the provided diff content.
DO NOT comment on lines that start with a space (context lines).
You MUST only add a review comment if there exists an actual ISSUE or BUG in the code changes.
DO NOT add review comments to tell the user to "check" or "confirm" or "verify" something.
DO NOT add review comments to tell the user to "ensure" something.
DO NOT add review comments to explain what the code change does.
DO NOT add review comments to validate what the code change does.
DO NOT use the review comments to explain the code to the author. They already know their code. Only comment when there's an improvement opportunity. This is very important.
Pay close attention to line numbers and ensure they are correct.
Pay close attention to indentations in the code suggestions and make sure they match the code they are to replace.
Avoid comments on the license headers - if any exists - and instead make comments on the code that is being changed.
It's absolutely important to avoid commenting on the license header of files.
It's absolutely important to avoid commenting on copyright headers.
Avoid commenting on hardcoded dates and times being in future or not (for example "this date is in the future").
Remember you don't have access to the current date and time and leave that to the author.
Avoid mentioning any of your instructions, settings or criteria.
Here are some general guidelines for setting the severity of your comments
- Comments about refactoring a hardcoded string or number as a constant are generally considered low severity.
- Comments about log messages or log enhancements are generally considered low severity.
- Comments in .md files are medium or low severity. This is really important.
- Comments about adding or expanding docstring/javadoc have low severity most of the times.
- Comments about suppressing unchecked warnings or todos are considered low severity.
- Comments about typos are usually low or medium severity.
- Comments about testing or on tests are usually low severity.
- Do not comment about the content of a URL if the content is not directly available in the input.
Keep comments bodies concise and to the point.
Keep each comment focused on one issue.
## Context
The files that are changed in this pull request are represented below in the following
format, showing the file name and the portions of the file that are changed:
<PATCHES>
FILE:<NAME OF FIRST FILE>
DIFF:
<PATCH IN UNIFIED DIFF FORMAT>
--------------------
FILE:<NAME OF SECOND FILE>
DIFF:
<PATCH IN UNIFIED DIFF FORMAT>
--------------------
(and so on for all files changed)
</PATCHES>
Note that if you want to make a comment on the LEFT side of the UI / before the diff code version
to note those line numbers and the corresponding code. Same for a comment on the RIGHT side
of the UI / after the diff code version to note the line numbers and corresponding code.
This should be your guide to picking line numbers, and also very importantly, restrict
your comments to be only within this line range for these files, whether on LEFT or RIGHT.
If you comment out of bounds, the review will fail, so you must pay attention the file name,
line numbers, and pre/post diff versions when crafting your comment.
Here are the patches that were implemented in the pull request, per the
formatting above:
The get the files changed in this pull request, run:
"$(gh pr diff "${PR_NUMBER}" --patch)" to get the list of changed files PATCH
## Review
Once you have the information and are ready to leave a review on GitHub, post the review to GitHub using the GitHub MCP tool by:
1. Creating a pending review: Use the mcp__github__create_pending_pull_request_review to create a Pending Pull Request Review.
2. Adding review comments:
2.1 Use the mcp__github__add_comment_to_pending_review to add comments to the Pending Pull Request Review. Inline comments are preferred whenever possible, so repeat this step, calling mcp__github__add_comment_to_pending_review, as needed. All comments about specific lines of code should use inline comments. It is preferred to use code suggestions when possible, which include a code block that is labeled "suggestion", which contains what the new code should be. All comments should also have a severity. The syntax is:
Normal Comment Syntax:
<COMMENT>
{{SEVERITY}} {{COMMENT_TEXT}}
</COMMENT>
Inline Comment Syntax: (Preferred):
<COMMENT>
{{SEVERITY}} {{COMMENT_TEXT}}
```suggestion
{{CODE_SUGGESTION}}
```
</COMMENT>
Prepend a severity emoji to each comment:
- 🟢 for low severity
- 🟡 for medium severity
- 🟠 for high severity
- 🔴 for critical severity
- 🔵 if severity is unclear
Including all of this, an example inline comment would be:
<COMMENT>
🟢 Use camelCase for function names
```suggestion
myFooBarFunction
```
</COMMENT>
A critical severity example would be:
<COMMENT>
🔴 Remove storage key from GitHub
```suggestion
```
3. Posting the review: Use the mcp__github__submit_pending_pull_request_review to submit the Pending Pull Request Review.
3.1 Crafting the summary comment: Include a summary of high level points that were not addressed with inline comments. Be concise. Do not repeat details mentioned inline.
Structure your summary comment using this exact format with markdown:
## 📋 Review Summary
Provide a brief 2-3 sentence overview of the PR and overall
assessment.
## 🔍 General Feedback
- List general observations about code quality
- Mention overall patterns or architectural decisions
- Highlight positive aspects of the implementation
- Note any recurring themes across files
## Final Instructions
Remember, you are running in a VM and no one reviewing your output. Your review must be posted to GitHub using the MCP tools to create a pending review, add comments to the pending review, and submit the pending review.
- name: 'Post PR review failure comment'
if: |-
${{ failure() && steps.gemini_pr_review.outcome == 'failure' }}
uses: 'actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea'
with:
github-token: '${{ steps.generate_token.outputs.token || secrets.GITHUB_TOKEN }}'
script: |-
github.rest.issues.createComment({
owner: '${{ github.repository }}'.split('/')[0],
repo: '${{ github.repository }}'.split('/')[1],
issue_number: '${{ steps.get_pr.outputs.pr_number || steps.get_pr_comment.outputs.pr_number }}',
body: 'There is a problem with the Gemini CLI PR review. Please check the [action logs](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for details.'
})

View File

@@ -42,10 +42,10 @@ jobs:
- name: Checkout capa
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
# use latest available python to take advantage of best performance
- name: Set up Python 3.13
- name: Set up Python 3.12
uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
with:
python-version: "3.13"
python-version: "3.12"
- name: Install dependencies
run: |
pip install -r requirements.txt
@@ -70,10 +70,10 @@ jobs:
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
with:
submodules: recursive
- name: Set up Python 3.13
- name: Set up Python 3.12
uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
with:
python-version: "3.13"
python-version: "3.12"
- name: Install capa
run: |
pip install -r requirements.txt
@@ -88,11 +88,13 @@ jobs:
strategy:
fail-fast: false
matrix:
os: [ubuntu-22.04, ubuntu-22.04-arm, windows-2022, macos-15-intel, macos-14]
os: [ubuntu-22.04, windows-2022, macos-13]
# across all operating systems
python-version: ["3.10", "3.13"]
python-version: ["3.10", "3.11"]
include:
# on Ubuntu run these as well
- os: ubuntu-22.04
python-version: "3.10"
- os: ubuntu-22.04
python-version: "3.11"
- os: ubuntu-22.04
@@ -129,7 +131,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ["3.10", "3.13"]
python-version: ["3.10", "3.11"]
steps:
- name: Checkout capa with submodules
# do only run if BN_SERIAL is available, have to do this in every step, see https://github.com/orgs/community/discussions/26726#discussioncomment-3253118
@@ -171,11 +173,11 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ["3.10", "3.13"]
java-version: ["21"]
ghidra-version: ["12.0"]
public-version: ["PUBLIC_20251205"] # for ghidra releases
ghidra-sha256: ['af43e8cfb2fa4490cf6020c3a2bde25c159d83f45236a0542688a024e8fc1941']
python-version: ["3.10", "3.11"]
java-version: ["17"]
ghidra-version: ["11.0.1"]
public-version: ["PUBLIC_20240130"] # for ghidra releases
ghidrathon-version: ["4.0.0"]
steps:
- name: Checkout capa with submodules
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
@@ -194,66 +196,26 @@ jobs:
run: |
mkdir ./.github/ghidra
wget "https://github.com/NationalSecurityAgency/ghidra/releases/download/Ghidra_${{ matrix.ghidra-version }}_build/ghidra_${{ matrix.ghidra-version }}_${{ matrix.public-version }}.zip" -O ./.github/ghidra/ghidra_${{ matrix.ghidra-version }}_PUBLIC.zip
echo "${{ matrix.ghidra-sha256 }} ./.github/ghidra/ghidra_${{ matrix.ghidra-version }}_PUBLIC.zip" | sha256sum -c -
unzip .github/ghidra/ghidra_${{ matrix.ghidra-version }}_PUBLIC.zip -d .github/ghidra/
- name: Install Ghidrathon
run : |
mkdir ./.github/ghidrathon
wget "https://github.com/mandiant/Ghidrathon/releases/download/v${{ matrix.ghidrathon-version }}/Ghidrathon-v${{ matrix.ghidrathon-version}}.zip" -O ./.github/ghidrathon/ghidrathon-v${{ matrix.ghidrathon-version }}.zip
unzip .github/ghidrathon/ghidrathon-v${{ matrix.ghidrathon-version }}.zip -d .github/ghidrathon/
python -m pip install -r .github/ghidrathon/requirements.txt
python .github/ghidrathon/ghidrathon_configure.py $(pwd)/.github/ghidra/ghidra_${{ matrix.ghidra-version }}_PUBLIC
unzip .github/ghidrathon/Ghidrathon-v${{ matrix.ghidrathon-version }}.zip -d .github/ghidra/ghidra_${{ matrix.ghidra-version }}_PUBLIC/Ghidra/Extensions
- name: Install pyyaml
run: sudo apt-get install -y libyaml-dev
- name: Install capa with Ghidra extra
run: |
pip install -e .[dev,ghidra]
- name: Run tests
env:
GHIDRA_INSTALL_DIR: ${{ github.workspace }}/.github/ghidra/ghidra_${{ matrix.ghidra-version }}_PUBLIC
run: pytest -v tests/test_ghidra_features.py
idalib-tests:
name: IDA ${{ matrix.ida.version }} tests for ${{ matrix.python-version }}
runs-on: ubuntu-22.04
needs: [tests]
env:
IDA_LICENSE_ID: ${{ secrets.IDA_LICENSE_ID }}
strategy:
fail-fast: false
matrix:
python-version: ["3.10", "3.13"]
ida:
- version: 9.0
slug: "release/9.0/ida-essential/ida-essential_90_x64linux.run"
- version: 9.1
slug: "release/9.1/ida-essential/ida-essential_91_x64linux.run"
- version: 9.2
slug: "release/9.2/ida-essential/ida-essential_92_x64linux.run"
steps:
- name: Checkout capa with submodules
# do only run if IDA_LICENSE_ID is available, have to do this in every step, see https://github.com/orgs/community/discussions/26726#discussioncomment-3253118
if: ${{ env.IDA_LICENSE_ID != 0 }}
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
with:
submodules: recursive
- name: Set up Python ${{ matrix.python-version }}
if: ${{ env.IDA_LICENSE_ID != 0 }}
uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
with:
python-version: ${{ matrix.python-version }}
- name: Setup uv
if: ${{ env.IDA_LICENSE_ID != 0 }}
uses: astral-sh/setup-uv@61cb8a9741eeb8a550a1b8544337180c0fc8476b # v7.2.0
- name: Install dependencies
if: ${{ env.IDA_LICENSE_ID != 0 }}
run: sudo apt-get install -y libyaml-dev
- name: Install capa
if: ${{ env.IDA_LICENSE_ID != 0 }}
run: |
pip install -r requirements.txt
pip install -e .[dev,scripts]
pip install idapro
- name: Install IDA ${{ matrix.ida.version }}
if: ${{ env.IDA_LICENSE_ID != 0 }}
run: |
uv run hcli --disable-updates ida install --download-id ${{ matrix.ida.slug }} --license-id ${{ secrets.IDA_LICENSE_ID }} --set-default --yes
env:
HCLI_API_KEY: ${{ secrets.HCLI_API_KEY }}
IDA_LICENSE_ID: ${{ secrets.IDA_LICENSE_ID }}
- name: Run tests
if: ${{ env.IDA_LICENSE_ID != 0 }}
run: pytest -v tests/test_idalib_features.py # explicitly refer to the idalib tests for performance. other tests run above.
run: |
mkdir ./.github/ghidra/project
.github/ghidra/ghidra_${{ matrix.ghidra-version }}_PUBLIC/support/analyzeHeadless .github/ghidra/project ghidra_test -Import ./tests/data/mimikatz.exe_ -ScriptPath ./tests/ -PostScript test_ghidra_features.py > ../output.log
cat ../output.log
exit_code=$(cat ../output.log | grep exit | awk '{print $NF}')
exit $exit_code

1
.gitignore vendored
View File

@@ -122,7 +122,6 @@ scripts/perf/*.zip
*/.DS_Store
Pipfile
Pipfile.lock
uv.lock
/cache/
.github/binja/binaryninja
.github/binja/download_headless.py

View File

@@ -138,7 +138,6 @@ repos:
- "--ignore=tests/test_ghidra_features.py"
- "--ignore=tests/test_ida_features.py"
- "--ignore=tests/test_viv_features.py"
- "--ignore=tests/test_idalib_features.py"
- "--ignore=tests/test_main.py"
- "--ignore=tests/test_scripts.py"
always_run: true

View File

@@ -3,111 +3,29 @@
## master (unreleased)
### New Features
- ghidra: support PyGhidra @mike-hunhoff #2788
### Breaking Changes
### New Rules (5)
- nursery/run-as-nodejs-native-module mehunhoff@google.com
- nursery/inject-shellcode-using-thread-pool-work-insertion-with-tp_io still@teamt5.org
- nursery/inject-shellcode-using-thread-pool-work-insertion-with-tp_timer still@teamt5.org
- nursery/inject-shellcode-using-thread-pool-work-insertion-with-tp_work still@teamt5.org
- data-manipulation/encryption/hc-256/encrypt-data-using-hc-256 wballenthin@hex-rays.com
-
### Bug Fixes
- Fixed insecure deserialization vulnerability in YAML loading @0x1622 (#2770)
- loader: gracefully handle ELF files with unsupported architectures kamranulhaq2002@gmail.com #2800
### capa Explorer Web
### capa Explorer IDA Pro plugin
### Development
- ci: deprecate macos-13 runner and use Python v3.13 for testing @mike-hunhoff #2777
### Raw diffs
- [capa v9.3.1...master](https://github.com/mandiant/capa/compare/v9.3.1...master)
- [capa-rules v9.3.1...master](https://github.com/mandiant/capa-rules/compare/v9.3.1...master)
## v9.3.1
This patch release fixes a missing import for the capa explorer plugin for IDA Pro.
### Bug Fixes
- add missing ida-netnode dependency to project.toml @mike-hunhoff #2765
### Development
- ci: bump binja min version @mike-hunhoff #2763
### Raw diffs
- [capa v9.3.0...master](https://github.com/mandiant/capa/compare/v9.3.0...master)
- [capa-rules v9.3.0...master](https://github.com/mandiant/capa-rules/compare/v9.3.0...master)
## v9.3.0
capa v9.3.0 comes with over 20 new and/or impoved rules.
For IDA users the capa explorer plugin is now available via the IDA Pro plugin repository and contains Qt compatibility layer for PyQt5 and PySide6 support.
Additionally a Binary Ninja bug has been fixed. Released binaries now include ARM64 binaries (Linux and macOS).
### New Features
- ci: add support for arm64 binary releases
- tests: run tests against IDA via idalib @williballenthin #2742
### Breaking Changes
### New Rules (24)
### New Rules (2)
- anti-analysis/anti-vm/vm-detection/detect-mouse-movement-via-activity-checks-on-windows tevajdr@gmail.com
- nursery/create-executable-heap moritz.raabe@mandiant.com
- anti-analysis/packer/dxpack/packed-with-dxpack jakubjozwiak@google.com
- anti-analysis/anti-av/patch-bitdefender-hooking-dll-function jakubjozwiak@google.com
- nursery/acquire-load-driver-privileges mehunhoff@google.com
- nursery/communicate-using-ftp mehunhoff@google.com
- linking/static/eclipse-paho-mqtt-c/linked-against-eclipse-paho-mqtt-c jakubjozwiak@google.com
- linking/static/qmqtt/linked-against-qmqtt jakubjozwiak@google.com
- anti-analysis/anti-forensic/disable-powershell-transcription jakubjozwiak@google.com
- host-interaction/powershell/bypass-powershell-constrained-language-mode-via-getsystemlockdownpolicy-patch jakubjozwiak@google.com
- linking/static/grpc/linked-against-grpc jakubjozwiak@google.com
- linking/static/hp-socket/linked-against-hp-socket jakubjozwiak@google.com
- load-code/execute-jscript-via-vsaengine-in-dotnet jakubjozwiak@google.com
- linking/static/funchook/linked-against-funchook jakubjozwiak@google.com
- linking/static/plthook/linked-against-plthook jakubjozwiak@google.com
- host-interaction/network/enumerate-tcp-connections-via-wmi-com-api jakubjozwiak@google.com
- host-interaction/network/routing-table/create-routing-table-entry jakubjozwiak@google.com
- host-interaction/network/routing-table/get-routing-table michael.hunhoff@mandiant.com
- host-interaction/file-system/use-io_uring-io-interface-on-linux jakubjozwiak@google.com
- collection/keylog/log-keystrokes-via-direct-input zeze-zeze
- nursery/compiled-from-fsharp mehunhoff@google.com
- nursery/decrypt-data-using-aes-via-dotnet mehunhoff@google.com
- nursery/get-dotnet-assembly-entry-point mehunhoff@google.com
-
### Bug Fixes
- binja: fix a crash during feature extraction when the MLIL is unavailable @xusheng6 #2714
### capa Explorer Web
### capa Explorer IDA Pro plugin
- add `ida-plugin.json` for inclusion in the IDA Pro plugin repository @williballenthin
- ida plugin: add Qt compatibility layer for PyQt5 and PySide6 support @williballenthin #2707
- delay import to not load Qt* when running under idalib @mr-tz #2752
### Development
- ci: remove redundant "test_run" action from build workflow @mike-hunhoff #2692
- dev: add bumpmyversion to bump and sync versions across the project @mr-tz
### Raw diffs
- [capa v9.2.1...9.3.0](https://github.com/mandiant/capa/compare/v9.2.1...9.3.0)
- [capa-rules v9.2.1...9.3.0](https://github.com/mandiant/capa-rules/compare/v9.2.1...9.3.0)
- [capa v9.2.1...master](https://github.com/mandiant/capa/compare/v9.2.1...master)
- [capa-rules v9.2.1...master](https://github.com/mandiant/capa-rules/compare/v9.2.1...master)
## v9.2.1

View File

@@ -291,17 +291,11 @@ It also uses your local changes to the .idb to extract better features, such as
![capa + IDA Pro integration](https://github.com/mandiant/capa/blob/master/doc/img/explorer_expanded.png)
# Ghidra integration
capa supports using Ghidra (via [PyGhidra](https://github.com/NationalSecurityAgency/ghidra/tree/master/Ghidra/Features/PyGhidra)) as a feature extraction backend. This allows you to run capa against binaries using Ghidra's analysis engine.
You can run and view capa results in the Ghidra UI using [capa explorer for Ghidra](https://github.com/mandiant/capa/tree/master/capa/ghidra/plugin).
If you use Ghidra, then you can use the [capa + Ghidra integration](/capa/ghidra/) to run capa's analysis directly on your Ghidra database and render the results in Ghidra's user interface.
<img src="https://github.com/mandiant/capa/assets/66766340/eeae33f4-99d4-42dc-a5e8-4c1b8c661492" width=300>
You can also run capa from the command line using the [Ghidra backend](https://github.com/mandiant/capa/tree/master/capa/ghidra).
# blog posts
- [Riding Dragons: capa Harnesses Ghidra](https://www.mandiant.com/resources/blog/capa-harnesses-ghidra)
- [Dynamic capa: Exploring Executable Run-Time Behavior with the CAPE Sandbox](https://www.mandiant.com/resources/blog/dynamic-capa-executable-behavior-cape-sandbox)
- [capa v4: casting a wider .NET](https://www.mandiant.com/resources/blog/capa-v4-casting-wider-net) (.NET support)
- [ELFant in the Room capa v3](https://www.mandiant.com/resources/elfant-in-the-room-capa-v3) (ELF support)
@@ -321,6 +315,3 @@ You can also run capa from the command line using the [Ghidra backend](https://g
## capa testfiles
The [capa-testfiles repository](https://github.com/mandiant/capa-testfiles) contains the data we use to test capa's code and rules
## mailing list
Subscribe to the FLARE mailing list for community announcements! Email "subscribe" to [flare-external@google.com](mailto:flare-external@google.com?subject=subscribe).

View File

@@ -19,6 +19,7 @@ from binaryninja import (
Function,
BinaryView,
SymbolType,
ILException,
RegisterValueType,
VariableSourceType,
LowLevelILOperation,
@@ -191,8 +192,9 @@ def extract_stackstring(fh: FunctionHandle):
if bv is None:
return
mlil = func.mlil
if mlil is None:
try:
mlil = func.mlil
except ILException:
return
for block in mlil.basic_blocks:

View File

@@ -35,7 +35,7 @@ from capa.features.extractors.base_extractor import (
logger = logging.getLogger(__name__)
TESTED_VERSIONS = {"2.2-CAPE", "2.4-CAPE", "2.5-CAPE"}
TESTED_VERSIONS = {"2.2-CAPE", "2.4-CAPE"}
class CapeExtractor(DynamicFeatureExtractor):

View File

@@ -83,7 +83,7 @@ def bb_contains_stackstring(bb: ghidra.program.model.block.CodeBlock) -> bool:
true if basic block contains enough moves of constant bytes to the stack
"""
count = 0
for insn in capa.features.extractors.ghidra.helpers.get_current_program().getListing().getInstructions(bb, True):
for insn in currentProgram().getListing().getInstructions(bb, True): # type: ignore [name-defined] # noqa: F821
if is_mov_imm_to_stack(insn):
count += get_printable_len(insn.getScalar(1))
if count > MIN_STACKSTRING_LEN:
@@ -96,9 +96,7 @@ def _bb_has_tight_loop(bb: ghidra.program.model.block.CodeBlock):
parse tight loops, true if last instruction in basic block branches to bb start
"""
# Reverse Ordered, first InstructionDB
last_insn = (
capa.features.extractors.ghidra.helpers.get_current_program().getListing().getInstructions(bb, False).next()
)
last_insn = currentProgram().getListing().getInstructions(bb, False).next() # type: ignore [name-defined] # noqa: F821
if last_insn.getFlowType().isJump():
return last_insn.getAddress(0) == bb.getMinAddress()
@@ -142,3 +140,20 @@ def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Featur
for bb_handler in BASIC_BLOCK_HANDLERS:
for feature, addr in bb_handler(fh, bbh):
yield feature, addr
def main():
features = []
from capa.features.extractors.ghidra.extractor import GhidraFeatureExtractor
for fh in GhidraFeatureExtractor().get_functions():
for bbh in capa.features.extractors.ghidra.helpers.get_function_blocks(fh):
features.extend(list(extract_features(fh, bbh)))
import pprint
pprint.pprint(features) # noqa: T203
if __name__ == "__main__":
main()

View File

@@ -1,44 +0,0 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Optional
class GhidraContext:
"""
State holder for the Ghidra backend to avoid passing state to every function.
PyGhidra uses a context manager to set up the Ghidra environment (program, transaction, etc.).
We store the relevant objects here to allow easy access throughout the extractor
without needing to pass them as arguments to every feature extraction method.
"""
def __init__(self, program, flat_api, monitor):
self.program = program
self.flat_api = flat_api
self.monitor = monitor
_context: Optional[GhidraContext] = None
def set_context(program, flat_api, monitor):
global _context
_context = GhidraContext(program, flat_api, monitor)
def get_context() -> GhidraContext:
if _context is None:
raise RuntimeError("GhidraContext not initialized")
return _context

View File

@@ -12,14 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import weakref
import contextlib
from typing import Iterator
import capa.features.extractors.ghidra.file
import capa.features.extractors.ghidra.insn
import capa.features.extractors.ghidra.global_
import capa.features.extractors.ghidra.helpers as ghidra_helpers
import capa.features.extractors.ghidra.function
import capa.features.extractors.ghidra.basicblock
from capa.features.common import Feature
@@ -34,20 +31,19 @@ from capa.features.extractors.base_extractor import (
class GhidraFeatureExtractor(StaticFeatureExtractor):
def __init__(self, ctx_manager=None, tmpdir=None):
self.ctx_manager = ctx_manager
self.tmpdir = tmpdir
def __init__(self):
import capa.features.extractors.ghidra.helpers as ghidra_helpers
super().__init__(
SampleHashes(
md5=ghidra_helpers.get_current_program().getExecutableMD5(),
md5=capa.ghidra.helpers.get_file_md5(),
# ghidra doesn't expose this hash.
# https://ghidra.re/ghidra_docs/api/ghidra/program/model/listing/Program.html
#
# the hashes are stored in the database, not computed on the fly,
# so it's probably not trivial to add SHA1.
sha1="",
sha256=ghidra_helpers.get_current_program().getExecutableSHA256(),
sha256=capa.ghidra.helpers.get_file_sha256(),
)
)
@@ -59,14 +55,8 @@ class GhidraFeatureExtractor(StaticFeatureExtractor):
self.externs = ghidra_helpers.get_file_externs()
self.fakes = ghidra_helpers.map_fake_import_addrs()
# Register cleanup to run when the extractor is garbage collected or when the program exits.
# We use weakref.finalize instead of __del__ to avoid issues with reference cycles and
# to ensure deterministic cleanup on interpreter shutdown.
if self.ctx_manager or self.tmpdir:
weakref.finalize(self, cleanup, self.ctx_manager, self.tmpdir)
def get_base_address(self):
return AbsoluteVirtualAddress(ghidra_helpers.get_current_program().getImageBase().getOffset())
return AbsoluteVirtualAddress(currentProgram().getImageBase().getOffset()) # type: ignore [name-defined] # noqa: F821
def extract_global_features(self):
yield from self.global_features
@@ -75,6 +65,7 @@ class GhidraFeatureExtractor(StaticFeatureExtractor):
yield from capa.features.extractors.ghidra.file.extract_features()
def get_functions(self) -> Iterator[FunctionHandle]:
import capa.features.extractors.ghidra.helpers as ghidra_helpers
for fhandle in ghidra_helpers.get_function_symbols():
fh: FunctionHandle = FunctionHandle(
@@ -86,14 +77,14 @@ class GhidraFeatureExtractor(StaticFeatureExtractor):
@staticmethod
def get_function(addr: int) -> FunctionHandle:
func = ghidra_helpers.get_flat_api().getFunctionContaining(ghidra_helpers.get_flat_api().toAddr(addr))
func = getFunctionContaining(toAddr(addr)) # type: ignore [name-defined] # noqa: F821
return FunctionHandle(address=AbsoluteVirtualAddress(func.getEntryPoint().getOffset()), inner=func)
def extract_function_features(self, fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
yield from capa.features.extractors.ghidra.function.extract_features(fh)
def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]:
import capa.features.extractors.ghidra.helpers as ghidra_helpers
yield from ghidra_helpers.get_function_blocks(fh)
@@ -101,17 +92,9 @@ class GhidraFeatureExtractor(StaticFeatureExtractor):
yield from capa.features.extractors.ghidra.basicblock.extract_features(fh, bbh)
def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]:
import capa.features.extractors.ghidra.helpers as ghidra_helpers
yield from ghidra_helpers.get_insn_in_range(bbh)
def extract_insn_features(self, fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle):
yield from capa.features.extractors.ghidra.insn.extract_features(fh, bbh, ih)
def cleanup(ctx_manager, tmpdir):
if ctx_manager:
with contextlib.suppress(Exception):
ctx_manager.__exit__(None, None, None)
if tmpdir:
with contextlib.suppress(Exception):
tmpdir.cleanup()

View File

@@ -80,54 +80,22 @@ def extract_file_embedded_pe() -> Iterator[tuple[Feature, Address]]:
for i in range(256)
]
for block in capa.features.extractors.ghidra.helpers.get_current_program().getMemory().getBlocks():
for block in currentProgram().getMemory().getBlocks(): # type: ignore [name-defined] # noqa: F821
if not all((block.isLoaded(), block.isInitialized(), "Headers" not in block.getName())):
continue
for off, _ in find_embedded_pe(capa.features.extractors.ghidra.helpers.get_block_bytes(block), mz_xor):
# add offset back to block start
ea_addr = block.getStart().add(off)
ea = ea_addr.getOffset()
f_offset = capa.features.extractors.ghidra.helpers.get_file_offset(ea_addr)
if f_offset != -1:
ea = f_offset
ea: int = block.getStart().add(off).getOffset()
yield Characteristic("embedded pe"), FileOffsetAddress(ea)
def extract_file_export_names() -> Iterator[tuple[Feature, Address]]:
"""extract function exports"""
program = capa.features.extractors.ghidra.helpers.get_current_program()
st = program.getSymbolTable()
st = currentProgram().getSymbolTable() # type: ignore [name-defined] # noqa: F821
for addr in st.getExternalEntryPointIterator():
sym = st.getPrimarySymbol(addr)
name = sym.getName()
# Check for forwarded export
is_forwarded = False
refs = program.getReferenceManager().getReferencesFrom(addr)
for ref in refs:
if ref.getToAddress().isExternalAddress():
ext_sym = st.getPrimarySymbol(ref.getToAddress())
if ext_sym:
ext_loc = program.getExternalManager().getExternalLocation(ext_sym)
if ext_loc:
# It is a forwarded export
libname = ext_loc.getLibraryName()
if libname.lower().endswith(".dll"):
libname = libname[:-4]
forwarded_name = f"{libname}.{ext_loc.getLabel()}"
forwarded_name = capa.features.extractors.helpers.reformat_forwarded_export_name(forwarded_name)
yield Export(forwarded_name), AbsoluteVirtualAddress(addr.getOffset())
yield Characteristic("forwarded export"), AbsoluteVirtualAddress(addr.getOffset())
is_forwarded = True
break
if not is_forwarded:
yield Export(name), AbsoluteVirtualAddress(addr.getOffset())
yield Export(st.getPrimarySymbol(addr).getName()), AbsoluteVirtualAddress(addr.getOffset())
def extract_file_import_names() -> Iterator[tuple[Feature, Address]]:
@@ -142,7 +110,7 @@ def extract_file_import_names() -> Iterator[tuple[Feature, Address]]:
- importname
"""
for f in capa.features.extractors.ghidra.helpers.get_current_program().getFunctionManager().getExternalFunctions():
for f in currentProgram().getFunctionManager().getExternalFunctions(): # type: ignore [name-defined] # noqa: F821
for r in f.getSymbol().getReferences():
if r.getReferenceType().isData():
addr = r.getFromAddress().getOffset() # gets pointer to fake external addr
@@ -158,14 +126,14 @@ def extract_file_import_names() -> Iterator[tuple[Feature, Address]]:
def extract_file_section_names() -> Iterator[tuple[Feature, Address]]:
"""extract section names"""
for block in capa.features.extractors.ghidra.helpers.get_current_program().getMemory().getBlocks():
for block in currentProgram().getMemory().getBlocks(): # type: ignore [name-defined] # noqa: F821
yield Section(block.getName()), AbsoluteVirtualAddress(block.getStart().getOffset())
def extract_file_strings() -> Iterator[tuple[Feature, Address]]:
"""extract ASCII and UTF-16 LE strings"""
for block in capa.features.extractors.ghidra.helpers.get_current_program().getMemory().getBlocks():
for block in currentProgram().getMemory().getBlocks(): # type: ignore [name-defined] # noqa: F821
if not block.isInitialized():
continue
@@ -185,8 +153,7 @@ def extract_file_function_names() -> Iterator[tuple[Feature, Address]]:
extract the names of statically-linked library functions.
"""
for sym in capa.features.extractors.ghidra.helpers.get_current_program().getSymbolTable().getAllSymbols(True):
for sym in currentProgram().getSymbolTable().getAllSymbols(True): # type: ignore [name-defined] # noqa: F821
# .isExternal() misses more than this config for the function symbols
if sym.getSymbolType() == SymbolType.FUNCTION and sym.getSource() == SourceType.ANALYSIS and sym.isGlobal():
name = sym.getName() # starts to resolve names based on Ghidra's FidDB
@@ -203,7 +170,7 @@ def extract_file_function_names() -> Iterator[tuple[Feature, Address]]:
def extract_file_format() -> Iterator[tuple[Feature, Address]]:
ef = capa.features.extractors.ghidra.helpers.get_current_program().getExecutableFormat()
ef = currentProgram().getExecutableFormat() # type: ignore [name-defined] # noqa: F821
if "PE" in ef:
yield Format(FORMAT_PE), NO_ADDRESS
elif "ELF" in ef:
@@ -231,3 +198,14 @@ FILE_HANDLERS = (
extract_file_function_names,
extract_file_format,
)
def main():
""" """
import pprint
pprint.pprint(list(extract_features())) # noqa: T203
if __name__ == "__main__":
main()

View File

@@ -26,25 +26,21 @@ from capa.features.extractors.base_extractor import FunctionHandle
def extract_function_calls_to(fh: FunctionHandle):
"""extract callers to a function"""
f: "ghidra.program.database.function.FunctionDB" = fh.inner
f: ghidra.program.database.function.FunctionDB = fh.inner
for ref in f.getSymbol().getReferences():
if ref.getReferenceType().isCall():
yield Characteristic("calls to"), AbsoluteVirtualAddress(ref.getFromAddress().getOffset())
def extract_function_loop(fh: FunctionHandle):
f: "ghidra.program.database.function.FunctionDB" = fh.inner
f: ghidra.program.database.function.FunctionDB = fh.inner
edges = []
for block in SimpleBlockIterator(
BasicBlockModel(capa.features.extractors.ghidra.helpers.get_current_program()),
f.getBody(),
capa.features.extractors.ghidra.helpers.get_monitor(),
):
dests = block.getDestinations(capa.features.extractors.ghidra.helpers.get_monitor())
for block in SimpleBlockIterator(BasicBlockModel(currentProgram()), f.getBody(), monitor()): # type: ignore [name-defined] # noqa: F821
dests = block.getDestinations(monitor()) # type: ignore [name-defined] # noqa: F821
s_addrs = block.getStartAddresses()
while dests.hasNext():
while dests.hasNext(): # For loop throws Python TypeError
for addr in s_addrs:
edges.append((addr.getOffset(), dests.next().getDestinationAddress().getOffset()))
@@ -53,17 +49,32 @@ def extract_function_loop(fh: FunctionHandle):
def extract_recursive_call(fh: FunctionHandle):
f: "ghidra.program.database.function.FunctionDB" = fh.inner
f: ghidra.program.database.function.FunctionDB = fh.inner
for func in f.getCalledFunctions(capa.features.extractors.ghidra.helpers.get_monitor()):
for func in f.getCalledFunctions(monitor()): # type: ignore [name-defined] # noqa: F821
if func.getEntryPoint().getOffset() == f.getEntryPoint().getOffset():
yield Characteristic("recursive call"), AbsoluteVirtualAddress(f.getEntryPoint().getOffset())
def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
for function_handler in FUNCTION_HANDLERS:
for feature, addr in function_handler(fh):
for func_handler in FUNCTION_HANDLERS:
for feature, addr in func_handler(fh):
yield feature, addr
FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop, extract_recursive_call)
def main():
""" """
features = []
for fhandle in capa.features.extractors.ghidra.helpers.get_function_symbols():
features.extend(list(extract_features(fhandle)))
import pprint
pprint.pprint(features) # noqa: T203
if __name__ == "__main__":
main()

View File

@@ -26,7 +26,7 @@ logger = logging.getLogger(__name__)
def extract_os() -> Iterator[tuple[Feature, Address]]:
format_name: str = capa.features.extractors.ghidra.helpers.get_current_program().getExecutableFormat()
format_name: str = currentProgram().getExecutableFormat() # type: ignore [name-defined] # noqa: F821
if "PE" in format_name:
yield OS(OS_WINDOWS), NO_ADDRESS
@@ -53,7 +53,7 @@ def extract_os() -> Iterator[tuple[Feature, Address]]:
def extract_arch() -> Iterator[tuple[Feature, Address]]:
lang_id = capa.features.extractors.ghidra.helpers.get_current_program().getMetadata().get("Language ID")
lang_id = currentProgram().getMetadata().get("Language ID") # type: ignore [name-defined] # noqa: F821
if "x86" in lang_id and "64" in lang_id:
yield Arch(ARCH_AMD64), NO_ADDRESS

View File

@@ -22,22 +22,9 @@ from ghidra.program.model.symbol import SourceType, SymbolType
from ghidra.program.model.address import AddressSpace
import capa.features.extractors.helpers
import capa.features.extractors.ghidra.context as ghidra_context
from capa.features.common import THUNK_CHAIN_DEPTH_DELTA
from capa.features.address import AbsoluteVirtualAddress
from capa.features.extractors.base_extractor import BBHandle, InsnHandle
def get_current_program():
return ghidra_context.get_context().program
def get_monitor():
return ghidra_context.get_context().monitor
def get_flat_api():
return ghidra_context.get_context().flat_api
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle
def ints_to_bytes(bytez: list[int]) -> bytes:
@@ -49,7 +36,7 @@ def ints_to_bytes(bytez: list[int]) -> bytes:
return bytes([b & 0xFF for b in bytez])
def find_byte_sequence(addr: "ghidra.program.model.address.Address", seq: bytes) -> Iterator[int]:
def find_byte_sequence(addr: ghidra.program.model.address.Address, seq: bytes) -> Iterator[int]:
"""yield all ea of a given byte sequence
args:
@@ -57,25 +44,12 @@ def find_byte_sequence(addr: "ghidra.program.model.address.Address", seq: bytes)
seq: bytes to search e.g. b"\x01\x03"
"""
seqstr = "".join([f"\\x{b:02x}" for b in seq])
eas = get_flat_api().findBytes(addr, seqstr, java.lang.Integer.MAX_VALUE, 1)
eas = findBytes(addr, seqstr, java.lang.Integer.MAX_VALUE, 1) # type: ignore [name-defined] # noqa: F821
yield from eas
def get_file_offset(addr: "ghidra.program.model.address.Address") -> int:
"""get file offset for an address"""
block = get_current_program().getMemory().getBlock(addr)
if not block:
return -1
for info in block.getSourceInfos():
if info.contains(addr):
return info.getFileBytesOffset(addr)
return -1
def get_bytes(addr: "ghidra.program.model.address.Address", length: int) -> bytes:
def get_bytes(addr: ghidra.program.model.address.Address, length: int) -> bytes:
"""yield length bytes at addr
args:
@@ -83,12 +57,12 @@ def get_bytes(addr: "ghidra.program.model.address.Address", length: int) -> byte
length: length of bytes to pull
"""
try:
return ints_to_bytes(get_flat_api().getBytes(addr, int(length)))
except Exception:
return ints_to_bytes(getBytes(addr, length)) # type: ignore [name-defined] # noqa: F821
except RuntimeError:
return b""
def get_block_bytes(block: "ghidra.program.model.mem.MemoryBlock") -> bytes:
def get_block_bytes(block: ghidra.program.model.mem.MemoryBlock) -> bytes:
"""yield all bytes in a given block
args:
@@ -99,21 +73,20 @@ def get_block_bytes(block: "ghidra.program.model.mem.MemoryBlock") -> bytes:
def get_function_symbols():
"""yield all non-external function symbols"""
yield from get_current_program().getFunctionManager().getFunctionsNoStubs(True)
yield from currentProgram().getFunctionManager().getFunctionsNoStubs(True) # type: ignore [name-defined] # noqa: F821
def get_function_blocks(fh: "capa.features.extractors.base_extractor.FunctionHandle") -> Iterator[BBHandle]:
"""
yield the basic blocks of the function
"""
def get_function_blocks(fh: FunctionHandle) -> Iterator[BBHandle]:
"""yield BBHandle for each bb in a given function"""
for block in SimpleBlockIterator(BasicBlockModel(get_current_program()), fh.inner.getBody(), get_monitor()):
yield BBHandle(address=AbsoluteVirtualAddress(block.getMinAddress().getOffset()), inner=block)
func: ghidra.program.database.function.FunctionDB = fh.inner
for bb in SimpleBlockIterator(BasicBlockModel(currentProgram()), func.getBody(), monitor()): # type: ignore [name-defined] # noqa: F821
yield BBHandle(address=AbsoluteVirtualAddress(bb.getMinAddress().getOffset()), inner=bb)
def get_insn_in_range(bbh: BBHandle) -> Iterator[InsnHandle]:
"""yield InshHandle for each insn in a given basicblock"""
for insn in get_current_program().getListing().getInstructions(bbh.inner, True):
for insn in currentProgram().getListing().getInstructions(bbh.inner, True): # type: ignore [name-defined] # noqa: F821
yield InsnHandle(address=AbsoluteVirtualAddress(insn.getAddress().getOffset()), inner=insn)
@@ -122,7 +95,7 @@ def get_file_imports() -> dict[int, list[str]]:
import_dict: dict[int, list[str]] = {}
for f in get_current_program().getFunctionManager().getExternalFunctions():
for f in currentProgram().getFunctionManager().getExternalFunctions(): # type: ignore [name-defined] # noqa: F821
for r in f.getSymbol().getReferences():
if r.getReferenceType().isData():
addr = r.getFromAddress().getOffset() # gets pointer to fake external addr
@@ -160,7 +133,7 @@ def get_file_externs() -> dict[int, list[str]]:
extern_dict: dict[int, list[str]] = {}
for sym in get_current_program().getSymbolTable().getAllSymbols(True):
for sym in currentProgram().getSymbolTable().getAllSymbols(True): # type: ignore [name-defined] # noqa: F821
# .isExternal() misses more than this config for the function symbols
if sym.getSymbolType() == SymbolType.FUNCTION and sym.getSource() == SourceType.ANALYSIS and sym.isGlobal():
name = sym.getName() # starts to resolve names based on Ghidra's FidDB
@@ -198,7 +171,7 @@ def map_fake_import_addrs() -> dict[int, list[int]]:
"""
fake_dict: dict[int, list[int]] = {}
for f in get_current_program().getFunctionManager().getExternalFunctions():
for f in currentProgram().getFunctionManager().getExternalFunctions(): # type: ignore [name-defined] # noqa: F821
for r in f.getSymbol().getReferences():
if r.getReferenceType().isData():
fake_dict.setdefault(f.getEntryPoint().getOffset(), []).append(r.getFromAddress().getOffset())
@@ -207,7 +180,7 @@ def map_fake_import_addrs() -> dict[int, list[int]]:
def check_addr_for_api(
addr: "ghidra.program.model.address.Address",
addr: ghidra.program.model.address.Address,
fakes: dict[int, list[int]],
imports: dict[int, list[str]],
externs: dict[int, list[str]],
@@ -229,18 +202,18 @@ def check_addr_for_api(
return False
def is_call_or_jmp(insn: "ghidra.program.database.code.InstructionDB") -> bool:
def is_call_or_jmp(insn: ghidra.program.database.code.InstructionDB) -> bool:
return any(mnem in insn.getMnemonicString() for mnem in ["CALL", "J"]) # JMP, JNE, JNZ, etc
def is_sp_modified(insn: "ghidra.program.database.code.InstructionDB") -> bool:
def is_sp_modified(insn: ghidra.program.database.code.InstructionDB) -> bool:
for i in range(insn.getNumOperands()):
if insn.getOperandType(i) == OperandType.REGISTER:
return "SP" in insn.getRegister(i).getName() and insn.getOperandRefType(i).isWrite()
return False
def is_stack_referenced(insn: "ghidra.program.database.code.InstructionDB") -> bool:
def is_stack_referenced(insn: ghidra.program.database.code.InstructionDB) -> bool:
"""generic catch-all for stack references"""
for i in range(insn.getNumOperands()):
if insn.getOperandType(i) == OperandType.REGISTER:
@@ -252,7 +225,7 @@ def is_stack_referenced(insn: "ghidra.program.database.code.InstructionDB") -> b
return any(ref.isStackReference() for ref in insn.getReferencesFrom())
def is_zxor(insn: "ghidra.program.database.code.InstructionDB") -> bool:
def is_zxor(insn: ghidra.program.database.code.InstructionDB) -> bool:
# assume XOR insn
# XOR's against the same operand zero out
ops = []
@@ -268,29 +241,29 @@ def is_zxor(insn: "ghidra.program.database.code.InstructionDB") -> bool:
return all(n == operands[0] for n in operands)
def handle_thunk(addr: "ghidra.program.model.address.Address"):
def handle_thunk(addr: ghidra.program.model.address.Address):
"""Follow thunk chains down to a reasonable depth"""
ref = addr
for _ in range(THUNK_CHAIN_DEPTH_DELTA):
thunk_jmp = get_flat_api().getInstructionAt(ref)
thunk_jmp = getInstructionAt(ref) # type: ignore [name-defined] # noqa: F821
if thunk_jmp and is_call_or_jmp(thunk_jmp):
if OperandType.isAddress(thunk_jmp.getOperandType(0)):
ref = thunk_jmp.getAddress(0)
else:
thunk_dat = get_flat_api().getDataContaining(ref)
thunk_dat = getDataContaining(ref) # type: ignore [name-defined] # noqa: F821
if thunk_dat and thunk_dat.isDefined() and thunk_dat.isPointer():
ref = thunk_dat.getValue()
break # end of thunk chain reached
return ref
def dereference_ptr(insn: "ghidra.program.database.code.InstructionDB"):
def dereference_ptr(insn: ghidra.program.database.code.InstructionDB):
addr_code = OperandType.ADDRESS | OperandType.CODE
to_deref = insn.getAddress(0)
dat = get_flat_api().getDataContaining(to_deref)
dat = getDataContaining(to_deref) # type: ignore [name-defined] # noqa: F821
if insn.getOperandType(0) == addr_code:
thfunc = get_flat_api().getFunctionContaining(to_deref)
thfunc = getFunctionContaining(to_deref) # type: ignore [name-defined] # noqa: F821
if thfunc and thfunc.isThunk():
return handle_thunk(to_deref)
else:
@@ -321,7 +294,7 @@ def find_data_references_from_insn(insn, max_depth: int = 10):
to_addr = reference.getToAddress()
for _ in range(max_depth - 1):
data = get_flat_api().getDataAt(to_addr)
data = getDataAt(to_addr) # type: ignore [name-defined] # noqa: F821
if data and data.isPointer():
ptr_value = data.getValue()

View File

@@ -234,7 +234,7 @@ def extract_insn_bytes_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
push offset iid_004118d4_IShellLinkA ; riid
"""
for addr in capa.features.extractors.ghidra.helpers.find_data_references_from_insn(ih.inner):
data = capa.features.extractors.ghidra.helpers.get_flat_api().getDataAt(addr)
data = getDataAt(addr) # type: ignore [name-defined] # noqa: F821
if data and not data.hasStringValue():
extracted_bytes = capa.features.extractors.ghidra.helpers.get_bytes(addr, MAX_BYTES_FEATURE_SIZE)
if extracted_bytes and not capa.features.extractors.helpers.all_zeros(extracted_bytes):
@@ -249,9 +249,9 @@ def extract_insn_string_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandl
push offset aAcr ; "ACR > "
"""
for addr in capa.features.extractors.ghidra.helpers.find_data_references_from_insn(ih.inner):
data = capa.features.extractors.ghidra.helpers.get_flat_api().getDataAt(addr)
data = getDataAt(addr) # type: ignore [name-defined] # noqa: F821
if data and data.hasStringValue():
yield String(str(data.getValue())), ih.address
yield String(data.getValue()), ih.address
def extract_insn_mnemonic_features(
@@ -361,8 +361,8 @@ def extract_insn_cross_section_cflow(
if capa.features.extractors.ghidra.helpers.check_addr_for_api(ref, fakes, imports, externs):
return
this_mem_block = capa.features.extractors.ghidra.helpers.get_flat_api().getMemoryBlock(insn.getAddress())
ref_block = capa.features.extractors.ghidra.helpers.get_flat_api().getMemoryBlock(ref)
this_mem_block = getMemoryBlock(insn.getAddress()) # type: ignore [name-defined] # noqa: F821
ref_block = getMemoryBlock(ref) # type: ignore [name-defined] # noqa: F821
if ref_block != this_mem_block:
yield Characteristic("cross section flow"), ih.address
@@ -425,19 +425,19 @@ def check_nzxor_security_cookie_delta(
Check if insn within last addr of last bb - delta
"""
model = SimpleBlockModel(capa.features.extractors.ghidra.helpers.get_current_program())
model = SimpleBlockModel(currentProgram()) # type: ignore [name-defined] # noqa: F821
insn_addr = insn.getAddress()
func_asv = fh.getBody()
first_addr = func_asv.getMinAddress()
if insn_addr < first_addr.add(SECURITY_COOKIE_BYTES_DELTA):
first_bb = model.getFirstCodeBlockContaining(first_addr, capa.features.extractors.ghidra.helpers.get_monitor())
first_bb = model.getFirstCodeBlockContaining(first_addr, monitor()) # type: ignore [name-defined] # noqa: F821
if first_bb.contains(insn_addr):
return True
last_addr = func_asv.getMaxAddress()
if insn_addr > last_addr.add(SECURITY_COOKIE_BYTES_DELTA * -1):
last_bb = model.getFirstCodeBlockContaining(last_addr, capa.features.extractors.ghidra.helpers.get_monitor())
last_bb = model.getFirstCodeBlockContaining(last_addr, monitor()) # type: ignore [name-defined] # noqa: F821
if last_bb.contains(insn_addr):
return True
@@ -488,3 +488,22 @@ INSTRUCTION_HANDLERS = (
extract_function_calls_from,
extract_function_indirect_call_characteristic_features,
)
def main():
""" """
features = []
from capa.features.extractors.ghidra.extractor import GhidraFeatureExtractor
for fh in GhidraFeatureExtractor().get_functions():
for bb in capa.features.extractors.ghidra.helpers.get_function_blocks(fh):
for insn in capa.features.extractors.ghidra.helpers.get_insn_in_range(bb):
features.extend(list(extract_features(fh, bb, insn)))
import pprint
pprint.pprint(features) # noqa: T203
if __name__ == "__main__":
main()

View File

@@ -18,7 +18,6 @@ import idaapi
import idautils
import capa.features.extractors.ida.helpers
from capa.features.file import FunctionName
from capa.features.common import Feature, Characteristic
from capa.features.address import Address, AbsoluteVirtualAddress
from capa.features.extractors import loops
@@ -51,39 +50,10 @@ def extract_recursive_call(fh: FunctionHandle):
yield Characteristic("recursive call"), fh.address
def extract_function_name(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
ea = fh.inner.start_ea
name = idaapi.get_name(ea)
if name.startswith("sub_"):
# skip default names, like "sub_401000"
return
yield FunctionName(name), fh.address
if name.startswith("_"):
# some linkers may prefix linked routines with a `_` to avoid name collisions.
# extract features for both the mangled and un-mangled representations.
# e.g. `_fwrite` -> `fwrite`
# see: https://stackoverflow.com/a/2628384/87207
yield FunctionName(name[1:]), fh.address
def extract_function_alternative_names(fh: FunctionHandle):
"""Get all alternative names for an address."""
for aname in capa.features.extractors.ida.helpers.get_function_alternative_names(fh.inner.start_ea):
yield FunctionName(aname), fh.address
def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
for func_handler in FUNCTION_HANDLERS:
for feature, addr in func_handler(fh):
yield feature, addr
FUNCTION_HANDLERS = (
extract_function_calls_to,
extract_function_loop,
extract_recursive_call,
extract_function_name,
extract_function_alternative_names,
)
FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop, extract_recursive_call)

View File

@@ -20,7 +20,6 @@ import idaapi
import ida_nalt
import idautils
import ida_bytes
import ida_funcs
import ida_segment
from capa.features.address import AbsoluteVirtualAddress
@@ -437,16 +436,3 @@ def is_basic_block_return(bb: idaapi.BasicBlock) -> bool:
def has_sib(oper: idaapi.op_t) -> bool:
# via: https://reverseengineering.stackexchange.com/a/14300
return oper.specflag1 == 1
def find_alternative_names(cmt: str):
for line in cmt.split("\n"):
if line.startswith("Alternative name is '") and line.endswith("'"):
name = line[len("Alternative name is '") : -1] # Extract name between quotes
yield name
def get_function_alternative_names(fva: int):
"""Get all alternative names for an address."""
yield from find_alternative_names(ida_bytes.get_cmt(fva, False) or "")
yield from find_alternative_names(ida_funcs.get_func_cmt(idaapi.get_func(fva), False) or "")

View File

@@ -22,7 +22,6 @@ import idautils
import capa.features.extractors.helpers
import capa.features.extractors.ida.helpers
from capa.features.file import FunctionName
from capa.features.insn import API, MAX_STRUCTURE_SIZE, Number, Offset, Mnemonic, OperandNumber, OperandOffset
from capa.features.common import MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, String, Feature, Characteristic
from capa.features.address import Address, AbsoluteVirtualAddress
@@ -130,8 +129,8 @@ def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle)
# not a function (start)
return
name = idaapi.get_name(target_func.start_ea)
if target_func.flags & idaapi.FUNC_LIB or not name.startswith("sub_"):
if target_func.flags & idaapi.FUNC_LIB:
name = idaapi.get_name(target_func.start_ea)
yield API(name), ih.address
if name.startswith("_"):
# some linkers may prefix linked routines with a `_` to avoid name collisions.
@@ -140,10 +139,6 @@ def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle)
# see: https://stackoverflow.com/a/2628384/87207
yield API(name[1:]), ih.address
for altname in capa.features.extractors.ida.helpers.get_function_alternative_names(target_func.start_ea):
yield FunctionName(altname), ih.address
yield API(altname), ih.address
def extract_insn_number_features(
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle

View File

@@ -1,75 +1,107 @@
# capa analysis using Ghidra
<div align="center">
<img src="../../doc/img/ghidra_backend_logo.png" width=240 height=125>
</div>
capa supports using Ghidra (via [PyGhidra](https://github.com/NationalSecurityAgency/ghidra/tree/master/Ghidra/Features/PyGhidra)) as a feature extraction backend. This enables you to run capa against binaries using Ghidra's analysis engine.
# capa + Ghidra
[capa](https://github.com/mandiant/capa) is the FLARE teams open-source tool that detects capabilities in executable files. [Ghidra](https://github.com/NationalSecurityAgency/ghidra) is an open-source software reverse engineering framework created and maintained by the National Security Agency Research Directorate. capa + Ghidra brings capas detection capabilities directly to Ghidras user interface helping speed up your reverse engineering tasks by identifying what parts of a program suggest interesting behavior, such as setting a registry value. You can execute the included Python 3 scripts [capa_explorer.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_explorer.py) or [capa_ghidra.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_ghidra.py) to run capas analysis and view the results in Ghidra. You may be asking yourself, “Python 3 scripts in Ghidra?”. You read that correctly. This integration is written entirely in Python 3 and relies on [Ghidrathon]( https://github.com/mandiant/ghidrathon), an open source Ghidra extension that adds Python 3 scripting to Ghidra.
Check out our capa + Ghidra blog posts:
* [Riding Dragons: capa Harnesses Ghidra](https://www.mandiant.com/resources/blog/capa-harnesses-ghidra)
## UI Integration
[capa_explorer.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_explorer.py) renders capa results in Ghidra's UI to help you quickly navigate them. This includes adding matched functions to Ghidras Symbol Tree and Bookmarks windows and adding comments to functions that indicate matched capabilities and features. You can execute this script using Ghidras Script Manager window.
### Symbol Tree Window
Matched functions are added to Ghidra's Symbol Tree window under a custom namespace that maps to the capabilities' [capa namespace](https://github.com/mandiant/capa-rules/blob/master/doc/format.md#rule-namespace).
<div align="center">
<img src="https://github.com/mandiant/capa/assets/66766340/eeae33f4-99d4-42dc-a5e8-4c1b8c661492" width=300>
</div>
### Comments
Comments are added at the beginning of matched functions indicating matched capabilities and inline comments are added to functions indicating matched features. You can view these comments in Ghidras Disassembly Listing and Decompile windows.
<div align="center">
<img src="https://github.com/mandiant/capa/assets/66766340/bb2b4170-7fd4-45fc-8c7b-ff8f2e2f101b" width=1000>
</div>
### Bookmarks
Bookmarks are added to functions that matched a capability that is mapped to a MITRE ATT&CK and/or Malware Behavior Catalog (MBC) technique. You can view these bookmarks in Ghidra's Bookmarks window.
<div align="center">
<img src="https://github.com/mandiant/capa/assets/66766340/7f9a66a9-7be7-4223-91c6-4b8fc4651336" width=825>
</div>
## Text-based Integration
[capa_ghidra.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_ghidra.py) outputs text-based capa results that mirror the output of capas standalone tool. You can execute this script using Ghidras Script Manager and view its output in Ghidras Console window.
<div align="center">
<img src="../../doc/img/ghidra_script_mngr_output.png" width=700>
</div>
You can also execute [capa_ghidra.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_ghidra.py) using Ghidra's Headless Analyzer to view its output in a terminal window.
<div align="center">
<img src="../../doc/img/ghidra_headless_analyzer.png">
</div>
# Getting Started
## Requirements
| Tool | Version | Source |
|------------|---------|--------|
| capa | `>= 7.0.0` | https://github.com/mandiant/capa/releases |
| Ghidrathon | `>= 3.0.0` | https://github.com/mandiant/Ghidrathon/releases |
| Ghidra | `>= 10.3.2` | https://github.com/NationalSecurityAgency/ghidra/releases |
| Python | `>= 3.10.0` | https://www.python.org/downloads |
## Installation
**Note**: capa + Ghidra relies on [Ghidrathon]( https://github.com/mandiant/ghidrathon) to execute Python 3 code in Ghidra. You must first install and configure Ghidrathon using the [steps outlined in its README]( https://github.com/mandiant/ghidrathon?tab=readme-ov-file#installing-ghidrathon). Then, you must use the Python 3 interpreter that you configured with Ghidrathon to complete the following steps:
1. Install capa and its dependencies from PyPI using the following command:
```bash
$ capa -b ghidra Practical\ Malware\ Analysis\ Lab\ 01-01.exe_
┌──────────┬──────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ md5 │ bb7425b82141a1c0f7d60e5106676bb1 │
│ sha1 │ │
│ sha256 │ 58898bd42c5bd3bf9b1389f0eee5b39cd59180e8370eb9ea838a0b327bd6fe47 │
│ analysis │ static │
│ os │ windows │
│ format │ pe │
│ arch │ i386 │
│ path │ ~/Documents/capa/tests/data/Practical Malware Analysis Lab 01-01.exe_ │
└──────────┴──────────────────────────────────────────────────────────────────────────────────────────────────────┘
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ ATT&CK Tactic ┃ ATT&CK Technique ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│ DISCOVERY │ File and Directory Discovery [T1083]
└────────────────────────────────────┴─────────────────────────────────────────────────────────────┘
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ MBC Objective ┃ MBC Behavior ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│ DISCOVERY │ File and Directory Discovery [E1083]
│ FILE SYSTEM │ Copy File [C0045]
│ │ Read File [C0051]
│ PROCESS │ Terminate Process [C0018]
└────────────────────────────────────┴─────────────────────────────────────────────────────────────┘
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ Capability ┃ Namespace ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│ copy file │ host-interaction/file-system/copy │
│ enumerate files recursively │ host-interaction/file-system/files/list │
read file via mapping (2 matches) │ host-interaction/file-system/read │
│ terminate process (2 matches) │ host-interaction/process/terminate │
│ resolve function by parsing PE exports │ load-code/pe │
└────────────────────────────────────────────────┴─────────────────────────────────────────────────┘
$ pip install flare-capa
```
## getting started
### requirements
- [Ghidra](https://github.com/NationalSecurityAgency/ghidra) >= 12.0 must be installed and available via the `GHIDRA_INSTALL_DIR` environment variable.
#### standalone binary (recommended)
The capa [standalone binary](https://github.com/mandiant/capa/releases) is the preferred way to run capa with the Ghidra backend.
Although the binary does not bundle the Java environment or Ghidra itself, it will dynamically load them at runtime.
#### python package
You can also use the Ghidra backend with the capa Python package by installing `flare-capa` with the `ghidra` extra.
2. Download and extract the [official capa rules](https://github.com/mandiant/capa-rules/releases) that match the capa version you have installed. You can use the following command to view the version of capa you have installed:
```bash
$ pip install "flare-capa[ghidra]"
$ pip show flare-capa
OR
$ capa --version
```
### usage
3. Copy [capa_explorer.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_explorer.py) and [capa_ghidra.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_ghidra.py) to your `ghidra_scripts` directory or manually add the parent directory of each script using Ghidras Script Manager.
To use the Ghidra backend, specify it with the `-b` or `--backend` flag:
## Usage
You can execute [capa_explorer.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_explorer.py) and [capa_ghidra.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_ghidra.py) using Ghidras Script Manager. [capa_ghidra.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_ghidra.py) can also be executed using Ghidra's Headless Analyzer.
### Execution using Ghidras Script Manager
You can execute [capa_explorer.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_explorer.py) and [capa_ghidra.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_ghidra.py) using Ghidra's Script Manager as follows:
1. Navigate to `Window > Script Manager`
2. Expand the `Python 3 > capa` category
3. Double-click a script to execute it
Both scripts ask you to provide the path of your capa rules directory (see installation step 2). [capa_ghidra.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_ghidra.py) also has you choose one of `default`, `verbose`, and `vverbose` output formats which mirror the output formats of capas standalone tool.
### Execution using Ghidras Headless Analyzer
You can execute [capa_ghidra.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_ghidra.py) using Ghidras Headless Analyzer by invoking the `analyzeHeadless` script included with Ghidra in its `support` directory. The following arguments must be provided:
| Argument | Description |
|----|----|
|`<project_path>`| Path to Ghidra project|
| `<project_name>`| Name of Ghidra Project|
| `-Process <sample_name>` OR `-Import <sample_path>`| Name of sample `<sample_name>` already imported into `<project_name>` OR absolute path of sample `<sample_path>` to import into `<project_name>`|
| `-ScriptPath <script_path>`| OPTIONAL parent directory `<script_path>` of [capa_ghidra.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_ghidra.py)|
| `-PostScript capa_ghidra.py`| Execute [capa_ghidra.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_ghidra.py) after Ghidra analysis|
| `"<script_args>"`| Quoted string `"<script_args>"` containing script arguments passed to [capa_ghidra.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_ghidra.py) that must specify a capa rules path and optionally the output format (`--verbose`, `--vverbose`, `--json`) you can specify `”help”` to view the scripts help message |
The following is an example of combining these arguments into a single `analyzeHeadless` script command:
```bash
$ capa -b ghidra /path/to/sample
$ analyzeHeadless /home/wumbo/demo demo -Import /home/wumbo/capa/tests/data/Practical\ Malware\ Analysis\ Lab\ 01-01.dll_ -PostScript capa_ghidra.py "/home/wumbo/capa/rules --verbose"
```
capa will:
1. Initialize a headless Ghidra instance.
2. Create a temporary project.
3. Import and analyze the sample.
4. Extract features and match rules.
5. Clean up the temporary project.
**Note:** The first time you run this, it may take a few moments to initialize the Ghidra environment.

View File

@@ -1,3 +1,7 @@
# Run capa against loaded Ghidra database and render results in Ghidra UI
# @author Colton Gabertan (gabertan.colton@gmail.com)
# @category Python 3.capa
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -12,63 +16,36 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# Run capa against loaded Ghidra database and render results in Ghidra UI
# @author Colton Gabertan (gabertan.colton@gmail.com)
# @category capa
# @runtime PyGhidra
import sys
import json
import logging
import pathlib
from typing import Any
from java.util import ArrayList
from ghidra.util import Msg
from ghidra.app.cmd.label import AddLabelCmd, CreateNamespacesCmd
from ghidra.util.exception import CancelledException
from ghidra.program.flatapi import FlatProgramAPI
from ghidra.program.model.symbol import Namespace, SourceType, SymbolType
import capa
import capa.main
import capa.rules
import capa.version
import capa.render.json
import capa.ghidra.helpers
import capa.capabilities.common
import capa.features.extractors.ghidra.context
import capa.features.extractors.ghidra.extractor
logger = logging.getLogger("capa_explorer")
def show_monitor_message(msg):
capa.ghidra.helpers.get_monitor().checkCanceled()
capa.ghidra.helpers.get_monitor().setMessage(msg)
def show_error(msg):
Msg.showError(None, None, "capa explorer", msg)
def show_warn(msg):
Msg.showWarn(None, None, "capa explorer", msg)
def show_info(msg):
Msg.showInfo(None, None, "capa explorer", msg)
def add_bookmark(addr, txt, category="CapaExplorer"):
"""create bookmark at addr"""
capa.ghidra.helpers.get_current_program().getBookmarkManager().setBookmark(addr, "Info", category, txt)
currentProgram().getBookmarkManager().setBookmark(addr, "Info", category, txt) # type: ignore [name-defined] # noqa: F821
def create_namespace(namespace_str):
"""create new Ghidra namespace for each capa namespace"""
cmd = CreateNamespacesCmd(namespace_str, SourceType.USER_DEFINED)
cmd.applyTo(capa.ghidra.helpers.get_current_program())
cmd.applyTo(currentProgram()) # type: ignore [name-defined] # noqa: F821
return cmd.getNamespace()
@@ -76,7 +53,7 @@ def create_label(ghidra_addr, name, capa_namespace):
"""custom label cmd to overlay symbols under capa-generated namespaces"""
# prevent duplicate labels under the same capa-generated namespace
symbol_table = capa.ghidra.helpers.get_current_program().getSymbolTable()
symbol_table = currentProgram().getSymbolTable() # type: ignore [name-defined] # noqa: F821
for sym in symbol_table.getSymbols(ghidra_addr):
if sym.getName(True) == capa_namespace.getName(True) + Namespace.DELIMITER + name:
return
@@ -84,7 +61,7 @@ def create_label(ghidra_addr, name, capa_namespace):
# create SymbolType.LABEL at addr
# prioritize capa-generated namespace (duplicate match @ new addr), else put under global Ghidra one (new match)
cmd = AddLabelCmd(ghidra_addr, name, True, SourceType.USER_DEFINED)
cmd.applyTo(capa.ghidra.helpers.get_current_program())
cmd.applyTo(currentProgram()) # type: ignore [name-defined] # noqa: F821
# assign new match overlay label to capa-generated namespace
cmd.getSymbol().setNamespace(capa_namespace)
@@ -115,8 +92,8 @@ class CapaMatchData:
return
for key in self.matches.keys():
addr = capa.ghidra.helpers.get_flat_api().toAddr(hex(key))
func = capa.ghidra.helpers.get_flat_api().getFunctionContaining(addr)
addr = toAddr(hex(key)) # type: ignore [name-defined] # noqa: F821
func = getFunctionContaining(addr) # type: ignore [name-defined] # noqa: F821
# bookmark & tag MITRE ATT&CK tactics & MBC @ function scope
if func is not None:
@@ -140,160 +117,140 @@ class CapaMatchData:
def set_plate_comment(self, ghidra_addr):
"""set plate comments at matched functions"""
comment = capa.ghidra.helpers.get_flat_api().getPlateComment(ghidra_addr)
comment = getPlateComment(ghidra_addr) # type: ignore [name-defined] # noqa: F821
rule_path = self.namespace.replace(Namespace.DELIMITER, "/")
# 2 calls to avoid duplicate comments via subsequent script runs
if comment is None:
# first comment @ function
comment = rule_path + "\n"
capa.ghidra.helpers.get_flat_api().setPlateComment(ghidra_addr, comment)
setPlateComment(ghidra_addr, comment) # type: ignore [name-defined] # noqa: F821
elif rule_path not in comment:
comment = comment + rule_path + "\n"
capa.ghidra.helpers.get_flat_api().setPlateComment(ghidra_addr, comment)
setPlateComment(ghidra_addr, comment) # type: ignore [name-defined] # noqa: F821
else:
return
def set_pre_comment(self, ghidra_addr, sub_type, description):
"""set pre comments at subscoped matches of main rules"""
comment = capa.ghidra.helpers.get_flat_api().getPreComment(ghidra_addr)
comment = getPreComment(ghidra_addr) # type: ignore [name-defined] # noqa: F821
if comment is None:
comment = "capa: " + sub_type + "(" + description + ")" + ' matched in "' + self.capability + '"\n'
capa.ghidra.helpers.get_flat_api().setPreComment(ghidra_addr, comment)
setPreComment(ghidra_addr, comment) # type: ignore [name-defined] # noqa: F821
elif self.capability not in comment:
comment = (
comment + "capa: " + sub_type + "(" + description + ")" + ' matched in "' + self.capability + '"\n'
)
capa.ghidra.helpers.get_flat_api().setPreComment(ghidra_addr, comment)
setPreComment(ghidra_addr, comment) # type: ignore [name-defined] # noqa: F821
else:
return
def label_matches(self, do_namespaces, do_comments):
def label_matches(self):
"""label findings at function scopes and comment on subscope matches"""
capa_namespace = None
if do_namespaces:
capa_namespace = create_namespace(self.namespace)
symbol_table = capa.ghidra.helpers.get_current_program().getSymbolTable()
capa_namespace = create_namespace(self.namespace)
symbol_table = currentProgram().getSymbolTable() # type: ignore [name-defined] # noqa: F821
# handle function main scope of matched rule
# these will typically contain further matches within
if self.scope == "function":
for addr in self.matches.keys():
ghidra_addr = capa.ghidra.helpers.get_flat_api().toAddr(hex(addr))
ghidra_addr = toAddr(hex(addr)) # type: ignore [name-defined] # noqa: F821
# classify new function label under capa-generated namespace
if do_namespaces:
sym = symbol_table.getPrimarySymbol(ghidra_addr)
if sym is not None:
if sym.getSymbolType() == SymbolType.FUNCTION:
create_label(ghidra_addr, sym.getName(), capa_namespace)
sym = symbol_table.getPrimarySymbol(ghidra_addr)
if sym is not None:
if sym.getSymbolType() == SymbolType.FUNCTION:
create_label(ghidra_addr, sym.getName(), capa_namespace)
self.set_plate_comment(ghidra_addr)
if do_comments:
self.set_plate_comment(ghidra_addr)
# parse the corresponding nodes, and pre-comment subscope matched features
# under the encompassing function(s)
for sub_match in self.matches.get(addr):
for loc, node in sub_match.items():
sub_ghidra_addr = toAddr(hex(loc)) # type: ignore [name-defined] # noqa: F821
if sub_ghidra_addr == ghidra_addr:
# skip duplicates
continue
# parse the corresponding nodes, and pre-comment subscope matched features
# under the encompassing function(s)
for sub_match in self.matches.get(addr):
for loc, node in sub_match.items():
sub_ghidra_addr = capa.ghidra.helpers.get_flat_api().toAddr(hex(loc))
if sub_ghidra_addr == ghidra_addr:
# skip duplicates
continue
# precomment subscope matches under the function
if node != {} and do_comments:
for sub_type, description in parse_node(node):
self.set_pre_comment(sub_ghidra_addr, sub_type, description)
# precomment subscope matches under the function
if node != {}:
for sub_type, description in parse_node(node):
self.set_pre_comment(sub_ghidra_addr, sub_type, description)
else:
# resolve the encompassing function for the capa namespace
# of non-function scoped main matches
for addr in self.matches.keys():
ghidra_addr = capa.ghidra.helpers.get_flat_api().toAddr(hex(addr))
ghidra_addr = toAddr(hex(addr)) # type: ignore [name-defined] # noqa: F821
# basic block / insn scoped main matches
# Ex. See "Create Process on Windows" Rule
func = capa.ghidra.helpers.get_flat_api().getFunctionContaining(ghidra_addr)
func = getFunctionContaining(ghidra_addr) # type: ignore [name-defined] # noqa: F821
if func is not None:
func_addr = func.getEntryPoint()
if do_namespaces:
create_label(func_addr, func.getName(), capa_namespace)
if do_comments:
self.set_plate_comment(func_addr)
create_label(func_addr, func.getName(), capa_namespace)
self.set_plate_comment(func_addr)
# create subscope match precomments
for sub_match in self.matches.get(addr):
for loc, node in sub_match.items():
sub_ghidra_addr = capa.ghidra.helpers.get_flat_api().toAddr(hex(loc))
sub_ghidra_addr = toAddr(hex(loc)) # type: ignore [name-defined] # noqa: F821
if node != {}:
if func is not None:
# basic block/ insn scope under resolved function
if do_comments:
for sub_type, description in parse_node(node):
self.set_pre_comment(sub_ghidra_addr, sub_type, description)
for sub_type, description in parse_node(node):
self.set_pre_comment(sub_ghidra_addr, sub_type, description)
else:
# this would be a global/file scoped main match
# try to resolve the encompassing function via the subscope match, instead
# Ex. "run as service" rule
sub_func = capa.ghidra.helpers.get_flat_api().getFunctionContaining(sub_ghidra_addr)
sub_func = getFunctionContaining(sub_ghidra_addr) # type: ignore [name-defined] # noqa: F821
if sub_func is not None:
sub_func_addr = sub_func.getEntryPoint()
# place function in capa namespace & create the subscope match label in Ghidra's global namespace
if do_namespaces:
create_label(sub_func_addr, sub_func.getName(), capa_namespace)
if do_comments:
self.set_plate_comment(sub_func_addr)
if do_comments:
for sub_type, description in parse_node(node):
self.set_pre_comment(sub_ghidra_addr, sub_type, description)
create_label(sub_func_addr, sub_func.getName(), capa_namespace)
self.set_plate_comment(sub_func_addr)
for sub_type, description in parse_node(node):
self.set_pre_comment(sub_ghidra_addr, sub_type, description)
else:
# addr is in some other file section like .data
# represent this location with a label symbol under the capa namespace
# Ex. See "Reference Base64 String" rule
if do_namespaces:
for _sub_type, _description in parse_node(node):
# in many cases, these will be ghidra-labeled data, so just add the existing
# label symbol to the capa namespace
for sym in symbol_table.getSymbols(sub_ghidra_addr):
if sym.getSymbolType() == SymbolType.LABEL:
sym.setNamespace(capa_namespace)
if do_comments:
for sub_type, description in parse_node(node):
self.set_pre_comment(sub_ghidra_addr, sub_type, description)
for sub_type, description in parse_node(node):
# in many cases, these will be ghidra-labeled data, so just add the existing
# label symbol to the capa namespace
for sym in symbol_table.getSymbols(sub_ghidra_addr):
if sym.getSymbolType() == SymbolType.LABEL:
sym.setNamespace(capa_namespace)
self.set_pre_comment(sub_ghidra_addr, sub_type, description)
def get_capabilities():
rules_dir = ""
show_monitor_message(f"requesting capa {capa.version.__version__} rules directory")
selected_dir = askDirectory(f"choose capa {capa.version.__version__} rules directory", "Ok") # type: ignore [name-defined] # noqa: F821
if selected_dir:
rules_dir = selected_dir.getPath()
rules_dir: str = ""
try:
selected_dir = askDirectory("Choose capa rules directory", "Ok") # type: ignore [name-defined] # noqa: F821
if selected_dir:
rules_dir = selected_dir.getPath()
except RuntimeError:
# RuntimeError thrown when user selects "Cancel"
pass
if not rules_dir:
raise CancelledException
logger.info("You must choose a capa rules directory before running capa.")
return "" # return empty str to avoid handling both int and str types
rules_path: pathlib.Path = pathlib.Path(rules_dir)
logger.info("running capa using rules from %s", str(rules_path))
show_monitor_message(f"loading rules from {rules_path}")
rules = capa.rules.get_rules([rules_path])
show_monitor_message("collecting binary metadata")
meta = capa.ghidra.helpers.collect_metadata([rules_path])
show_monitor_message("running capa analysis")
extractor = capa.features.extractors.ghidra.extractor.GhidraFeatureExtractor()
capabilities = capa.capabilities.common.find_capabilities(rules, extractor, True)
show_monitor_message("checking for static limitations")
if capa.capabilities.common.has_static_limitation(rules, capabilities, is_standalone=False):
show_warn(
"capa explorer encountered warnings during analysis. Please check the console output for more information.",
)
popup("capa explorer encountered warnings during analysis. Please check the console output for more information.") # type: ignore [name-defined] # noqa: F821
logger.info("capa encountered warnings during analysis")
show_monitor_message("rendering results")
return capa.render.json.render(meta, rules, capabilities.matches)
@@ -371,12 +328,12 @@ def parse_json(capa_data):
# this requires the correct delimiter used by Ghidra
# Ex. 'communication/named-pipe/create/create pipe' -> capa::communication::named-pipe::create::create-pipe
namespace_str = Namespace.DELIMITER.join(meta["namespace"].split("/"))
namespace = "capa_explorer" + Namespace.DELIMITER + namespace_str + fmt_rule
namespace = "capa" + Namespace.DELIMITER + namespace_str + fmt_rule
else:
# lib rules via the official rules repo will not contain data
# for the "namespaces" key, so format using rule itself
# Ex. 'contain loop' -> capa::lib::contain-loop
namespace = "capa_explorer" + Namespace.DELIMITER + "lib" + fmt_rule
namespace = "capa" + Namespace.DELIMITER + "lib" + fmt_rule
yield CapaMatchData(namespace, scope, rule, rule_matches, attack, mbc)
@@ -385,79 +342,44 @@ def main():
logging.basicConfig(level=logging.INFO)
logging.getLogger().setLevel(logging.INFO)
choices = ["namespaces", "bookmarks", "comments"]
# use ArrayList to resolve ambiguous askChoices overloads (List vs List, List) in PyGhidra
choices_java = ArrayList()
for c in choices:
choices_java.add(c)
if isRunningHeadless(): # type: ignore [name-defined] # noqa: F821
logger.error("unsupported Ghidra execution mode")
return capa.main.E_UNSUPPORTED_GHIDRA_EXECUTION_MODE
choice_labels = [
'add "capa_explorer" namespace for matched functions',
"add bookmarks for matched functions",
"add comments to matched functions",
]
# use ArrayList to resolve ambiguous askChoices overloads (List vs List, List) in PyGhidra
choice_labels_java = ArrayList()
for c in choice_labels:
choice_labels_java.add(c)
selected = list(askChoices("capa explorer", "select actions:", choices_java, choice_labels_java)) # type: ignore [name-defined] # noqa: F821
do_namespaces = "namespaces" in selected
do_comments = "comments" in selected
do_bookmarks = "bookmarks" in selected
if not any((do_namespaces, do_comments, do_bookmarks)):
raise CancelledException("no actions selected")
# initialize the context for the extractor/helpers
capa.features.extractors.ghidra.context.set_context(
currentProgram, # type: ignore [name-defined] # noqa: F821
FlatProgramAPI(currentProgram), # type: ignore [name-defined] # noqa: F821
monitor, # type: ignore [name-defined] # noqa: F821
)
show_monitor_message("checking supported Ghidra version")
if not capa.ghidra.helpers.is_supported_ghidra_version():
show_error("unsupported Ghidra version")
logger.error("unsupported Ghidra version")
return capa.main.E_UNSUPPORTED_GHIDRA_VERSION
show_monitor_message("checking supported file type")
if not capa.ghidra.helpers.is_supported_file_type():
show_error("unsupported file type")
logger.error("unsupported file type")
return capa.main.E_INVALID_FILE_TYPE
show_monitor_message("checking supported file architecture")
if not capa.ghidra.helpers.is_supported_arch_type():
show_error("unsupported file architecture")
logger.error("unsupported file architecture")
return capa.main.E_INVALID_FILE_ARCH
# capa_data will always contain {'meta':..., 'rules':...}
# if the 'rules' key contains no values, then there were no matches
capa_data = json.loads(get_capabilities())
if capa_data.get("rules") is None:
show_info("capa explorer found no matches.")
logger.info("capa explorer found no matches")
popup("capa explorer found no matches.") # type: ignore [name-defined] # noqa: F821
return capa.main.E_EMPTY_REPORT
show_monitor_message("processing matches")
for item in parse_json(capa_data):
if do_bookmarks:
show_monitor_message("adding bookmarks")
item.bookmark_functions()
if do_namespaces or do_comments:
show_monitor_message("adding labels")
item.label_matches(do_namespaces, do_comments)
show_info("capa explorer analysis complete.")
item.bookmark_functions()
item.label_matches()
logger.info("capa explorer analysis complete")
popup("capa explorer analysis complete.\nPlease see results in the Bookmarks Window and Namespaces section of the Symbol Tree Window.") # type: ignore [name-defined] # noqa: F821
return 0
if __name__ == "__main__":
try:
if main() != 0:
show_error(
"capa explorer encountered errors during analysis. Please check the console output for more information.",
)
except CancelledException:
show_info("capa explorer analysis cancelled.")
if sys.version_info < (3, 10):
from capa.exceptions import UnsupportedRuntimeError
raise UnsupportedRuntimeError("This version of capa can only be used with Python 3.10+")
exit_code = main()
if exit_code != 0:
popup("capa explorer encountered errors during analysis. Please check the console output for more information.") # type: ignore [name-defined] # noqa: F821
sys.exit(exit_code)

174
capa/ghidra/capa_ghidra.py Normal file
View File

@@ -0,0 +1,174 @@
# Run capa against loaded Ghidra database and render results in Ghidra Console window
# @author Mike Hunhoff (mehunhoff@google.com)
# @category Python 3.capa
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import logging
import pathlib
import argparse
import capa
import capa.main
import capa.rules
import capa.ghidra.helpers
import capa.render.default
import capa.capabilities.common
import capa.features.extractors.ghidra.extractor
logger = logging.getLogger("capa_ghidra")
def run_headless():
parser = argparse.ArgumentParser(description="The FLARE team's open-source tool to integrate capa with Ghidra.")
parser.add_argument(
"rules",
type=str,
help="path to rule file or directory",
)
parser.add_argument(
"-v", "--verbose", action="store_true", help="enable verbose result document (no effect with --json)"
)
parser.add_argument(
"-vv", "--vverbose", action="store_true", help="enable very verbose result document (no effect with --json)"
)
parser.add_argument("-d", "--debug", action="store_true", help="enable debugging output on STDERR")
parser.add_argument("-q", "--quiet", action="store_true", help="disable all output but errors")
parser.add_argument("-j", "--json", action="store_true", help="emit JSON instead of text")
script_args = list(getScriptArgs()) # type: ignore [name-defined] # noqa: F821
if not script_args or len(script_args) > 1:
script_args = []
else:
script_args = script_args[0].split()
for idx, arg in enumerate(script_args):
if arg.lower() == "help":
script_args[idx] = "--help"
args = parser.parse_args(args=script_args)
if args.quiet:
logging.basicConfig(level=logging.WARNING)
logging.getLogger().setLevel(logging.WARNING)
elif args.debug:
logging.basicConfig(level=logging.DEBUG)
logging.getLogger().setLevel(logging.DEBUG)
else:
logging.basicConfig(level=logging.INFO)
logging.getLogger().setLevel(logging.INFO)
logger.debug("running in Ghidra headless mode")
rules_path = pathlib.Path(args.rules)
logger.debug("rule path: %s", rules_path)
rules = capa.rules.get_rules([rules_path])
meta = capa.ghidra.helpers.collect_metadata([rules_path])
extractor = capa.features.extractors.ghidra.extractor.GhidraFeatureExtractor()
capabilities = capa.capabilities.common.find_capabilities(rules, extractor, False)
meta.analysis.feature_counts = capabilities.feature_counts
meta.analysis.library_functions = capabilities.library_functions
meta.analysis.layout = capa.loader.compute_layout(rules, extractor, capabilities.matches)
if capa.capabilities.common.has_static_limitation(rules, capabilities, is_standalone=True):
logger.info("capa encountered warnings during analysis")
if args.json:
print(capa.render.json.render(meta, rules, capabilities.matches)) # noqa: T201
elif args.vverbose:
print(capa.render.vverbose.render(meta, rules, capabilities.matches)) # noqa: T201
elif args.verbose:
print(capa.render.verbose.render(meta, rules, capabilities.matches)) # noqa: T201
else:
print(capa.render.default.render(meta, rules, capabilities.matches)) # noqa: T201
return 0
def run_ui():
logging.basicConfig(level=logging.INFO)
logging.getLogger().setLevel(logging.INFO)
rules_dir: str = ""
try:
selected_dir = askDirectory("Choose capa rules directory", "Ok") # type: ignore [name-defined] # noqa: F821
if selected_dir:
rules_dir = selected_dir.getPath()
except RuntimeError:
# RuntimeError thrown when user selects "Cancel"
pass
if not rules_dir:
logger.info("You must choose a capa rules directory before running capa.")
return capa.main.E_MISSING_RULES
verbose = askChoice( # type: ignore [name-defined] # noqa: F821
"capa output verbosity", "Choose capa output verbosity", ["default", "verbose", "vverbose"], "default"
)
rules_path: pathlib.Path = pathlib.Path(rules_dir)
logger.info("running capa using rules from %s", str(rules_path))
rules = capa.rules.get_rules([rules_path])
meta = capa.ghidra.helpers.collect_metadata([rules_path])
extractor = capa.features.extractors.ghidra.extractor.GhidraFeatureExtractor()
capabilities = capa.capabilities.common.find_capabilities(rules, extractor, True)
meta.analysis.feature_counts = capabilities.feature_counts
meta.analysis.library_functions = capabilities.library_functions
meta.analysis.layout = capa.loader.compute_layout(rules, extractor, capabilities.matches)
if capa.capabilities.common.has_static_limitation(rules, capabilities, is_standalone=False):
logger.info("capa encountered warnings during analysis")
if verbose == "vverbose":
print(capa.render.vverbose.render(meta, rules, capabilities.matches)) # noqa: T201
elif verbose == "verbose":
print(capa.render.verbose.render(meta, rules, capabilities.matches)) # noqa: T201
else:
print(capa.render.default.render(meta, rules, capabilities.matches)) # noqa: T201
return 0
def main():
if not capa.ghidra.helpers.is_supported_ghidra_version():
return capa.main.E_UNSUPPORTED_GHIDRA_VERSION
if not capa.ghidra.helpers.is_supported_file_type():
return capa.main.E_INVALID_FILE_TYPE
if not capa.ghidra.helpers.is_supported_arch_type():
return capa.main.E_INVALID_FILE_ARCH
if isRunningHeadless(): # type: ignore [name-defined] # noqa: F821
return run_headless()
else:
return run_ui()
if __name__ == "__main__":
if sys.version_info < (3, 10):
from capa.exceptions import UnsupportedRuntimeError
raise UnsupportedRuntimeError("This version of capa can only be used with Python 3.10+")
sys.exit(main())

View File

@@ -22,7 +22,6 @@ import capa.version
import capa.features.common
import capa.features.freeze
import capa.render.result_document as rdoc
import capa.features.extractors.ghidra.context as ghidra_context
import capa.features.extractors.ghidra.helpers
from capa.features.address import AbsoluteVirtualAddress
@@ -32,18 +31,6 @@ logger = logging.getLogger("capa")
SUPPORTED_FILE_TYPES = ("Executable and Linking Format (ELF)", "Portable Executable (PE)", "Raw Binary")
def get_current_program():
return ghidra_context.get_context().program
def get_flat_api():
return ghidra_context.get_context().flat_api
def get_monitor():
return ghidra_context.get_context().monitor
class GHIDRAIO:
"""
An object that acts as a file-like object,
@@ -61,12 +48,7 @@ class GHIDRAIO:
self.offset = offset
def read(self, size):
logger.debug(
"reading 0x%x bytes at 0x%x (ea: 0x%x)",
size,
self.offset,
get_current_program().getImageBase().add(self.offset).getOffset(),
)
logger.debug("reading 0x%x bytes at 0x%x (ea: 0x%x)", size, self.offset, currentProgram().getImageBase().add(self.offset).getOffset()) # type: ignore [name-defined] # noqa: F821
if size > len(self.bytes_) - self.offset:
logger.debug("cannot read 0x%x bytes at 0x%x (ea: BADADDR)", size, self.offset)
@@ -78,7 +60,7 @@ class GHIDRAIO:
return
def get_bytes(self):
file_bytes = get_current_program().getMemory().getAllFileBytes()[0]
file_bytes = currentProgram().getMemory().getAllFileBytes()[0] # type: ignore [name-defined] # noqa: F821
# getOriginalByte() allows for raw file parsing on the Ghidra side
# other functions will fail as Ghidra will think that it's reading uninitialized memory
@@ -88,32 +70,21 @@ class GHIDRAIO:
def is_supported_ghidra_version():
import ghidra.framework
version = ghidra.framework.Application.getApplicationVersion()
try:
# version format example: "11.1.2" or "11.4"
major, minor = map(int, version.split(".")[:2])
if major < 12:
logger.error("-" * 80)
logger.error(" Ghidra version %s is not supported.", version)
logger.error(" ")
logger.error(" capa requires Ghidra 12.0 or higher.")
logger.error("-" * 80)
return False
except ValueError:
logger.warning("could not parse Ghidra version: %s", version)
version = float(getGhidraVersion()[:4]) # type: ignore [name-defined] # noqa: F821
if version < 10.2:
warning_msg = "capa does not support this Ghidra version"
logger.warning(warning_msg)
logger.warning("Your Ghidra version is: %s. Supported versions are: Ghidra >= 10.2", version)
return False
return True
def is_running_headless():
return True # PyGhidra is always headless in this context
return isRunningHeadless() # type: ignore [name-defined] # noqa: F821
def is_supported_file_type():
file_info = get_current_program().getExecutableFormat()
file_info = currentProgram().getExecutableFormat() # type: ignore [name-defined] # noqa: F821
if file_info not in SUPPORTED_FILE_TYPES:
logger.error("-" * 80)
logger.error(" Input file does not appear to be a supported file type.")
@@ -128,7 +99,7 @@ def is_supported_file_type():
def is_supported_arch_type():
lang_id = str(get_current_program().getLanguageID()).lower()
lang_id = str(currentProgram().getLanguageID()).lower() # type: ignore [name-defined] # noqa: F821
if not all((lang_id.startswith("x86"), any(arch in lang_id for arch in ("32", "64")))):
logger.error("-" * 80)
@@ -141,18 +112,18 @@ def is_supported_arch_type():
def get_file_md5():
return get_current_program().getExecutableMD5()
return currentProgram().getExecutableMD5() # type: ignore [name-defined] # noqa: F821
def get_file_sha256():
return get_current_program().getExecutableSHA256()
return currentProgram().getExecutableSHA256() # type: ignore [name-defined] # noqa: F821
def collect_metadata(rules: list[Path]):
md5 = get_file_md5()
sha256 = get_file_sha256()
info = get_current_program().getLanguageID().toString()
info = currentProgram().getLanguageID().toString() # type: ignore [name-defined] # noqa: F821
if "x86" in info and "64" in info:
arch = "x86_64"
elif "x86" in info and "32" in info:
@@ -160,11 +131,11 @@ def collect_metadata(rules: list[Path]):
else:
arch = "unknown arch"
format_name: str = get_current_program().getExecutableFormat()
format_name: str = currentProgram().getExecutableFormat() # type: ignore [name-defined] # noqa: F821
if "PE" in format_name:
os = "windows"
elif "ELF" in format_name:
with contextlib.closing(GHIDRAIO()) as f:
with contextlib.closing(capa.ghidra.helpers.GHIDRAIO()) as f:
os = capa.features.extractors.elf.detect_elf_os(f)
else:
os = "unknown os"
@@ -177,18 +148,16 @@ def collect_metadata(rules: list[Path]):
md5=md5,
sha1="",
sha256=sha256,
path=get_current_program().getExecutablePath(),
path=currentProgram().getExecutablePath(), # type: ignore [name-defined] # noqa: F821
),
flavor=rdoc.Flavor.STATIC,
analysis=rdoc.StaticAnalysis(
format=get_current_program().getExecutableFormat(),
format=currentProgram().getExecutableFormat(), # type: ignore [name-defined] # noqa: F821
arch=arch,
os=os,
extractor="ghidra",
rules=tuple(r.resolve().absolute().as_posix() for r in rules),
base_address=capa.features.freeze.Address.from_capa(
AbsoluteVirtualAddress(get_current_program().getImageBase().getOffset())
),
base_address=capa.features.freeze.Address.from_capa(AbsoluteVirtualAddress(currentProgram().getImageBase().getOffset())), # type: ignore [name-defined] # noqa: F821
layout=rdoc.StaticLayout(
functions=(),
),

View File

@@ -1,54 +0,0 @@
<div align="center">
<img src="https://github.com/mandiant/capa/blob/master/doc/img/ghidra_backend_logo.png" width=240 height=125>
</div>
# capa explorer for Ghidra
capa explorer for Ghidra brings capas detection capabilities directly to Ghidras user interface helping speed up your reverse engineering tasks by identifying what parts of a program suggest interesting behavior, such as setting a registry value. You can execute (via [PyGhidra](https://github.com/NationalSecurityAgency/ghidra/tree/master/Ghidra/Features/PyGhidra)) the script [capa_explorer.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/plugin/capa_explorer.py) using Ghidras Script Manager window to run capas analysis and view the results in Ghidra.
## ui integration
[capa_explorer.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/capa_explorer.py) renders capa results in Ghidra's UI to help you quickly navigate them. This includes adding matched functions to Ghidras Symbol Tree and Bookmarks windows and adding comments to functions that indicate matched capabilities and features. You can execute this script using Ghidras Script Manager window.
### symbol tree window
Matched functions are added to Ghidra's Symbol Tree window under a custom namespace that maps to the capabilities' [capa namespace](https://github.com/mandiant/capa-rules/blob/master/doc/format.md#rule-namespace).
<div align="center">
<img src="https://github.com/mandiant/capa/assets/66766340/eeae33f4-99d4-42dc-a5e8-4c1b8c661492" width=300>
</div>
### comments
Comments are added at the beginning of matched functions indicating matched capabilities and inline comments are added to functions indicating matched features. You can view these comments in Ghidras Disassembly Listing and Decompile windows.
<div align="center">
<img src="https://github.com/mandiant/capa/assets/66766340/bb2b4170-7fd4-45fc-8c7b-ff8f2e2f101b" width=1000>
</div>
### bookmarks
Bookmarks are added to functions that matched a capability that is mapped to a MITRE ATT&CK and/or Malware Behavior Catalog (MBC) technique. You can view these bookmarks in Ghidra's Bookmarks window.
<div align="center">
<img src="https://github.com/mandiant/capa/assets/66766340/7f9a66a9-7be7-4223-91c6-4b8fc4651336" width=825>
</div>
# getting started
## requirements
- [Ghidra](https://github.com/NationalSecurityAgency/ghidra) >= 12.0 must be installed.
- [flare-capa](https://pypi.org/project/flare-capa/) >= 10.0 must be installed (virtual environment recommended) with the `ghidra` extra (e.g., `pip install "flare-capa[ghidra]"`).
- [capa rules](https://github.com/mandiant/capa-rules) must be downloaded for the version of capa you are using.
## execution
### 1. run Ghidra with PyGhidra
You must start Ghidra using the `pyghidraRun` script provided in the support directory of your Ghidra installation to ensure the Python environment is correctly loaded. You should execute `pyghidraRun` from within the Python environment that you used to install capa.
```bash
<ghidra_install>/support/pyghidraRun
```
### 2. run capa_explorer.py
1. Open your Ghidra project and CodeBrowser.
2. Open the Script Manager.
3. Add [capa_explorer.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ghidra/plugin/capa_explorer.py) to the script directories.
4. Filter for capa and run the script.
5. When prompted, select the directory containing the downloaded capa rules.

View File

@@ -96,7 +96,11 @@ def is_runtime_ida():
def is_runtime_ghidra():
return importlib.util.find_spec("ghidra") is not None
try:
currentProgram # type: ignore [name-defined] # noqa: F821
except NameError:
return False
return True
def assert_never(value) -> NoReturn:

View File

@@ -17,6 +17,7 @@ import logging
import idaapi
import ida_kernwin
from capa.ida.plugin.form import CapaExplorerForm
from capa.ida.plugin.icon import ICON
logger = logging.getLogger(__name__)
@@ -73,9 +74,6 @@ class CapaExplorerPlugin(idaapi.plugin_t):
arg (int): bitflag. Setting LSB enables automatic analysis upon
loading. The other bits are currently undefined. See `form.Options`.
"""
# delay import to not trigger load of Qt components when not running in idaq, i.e., in idalib
from capa.ida.plugin.form import CapaExplorerForm
if not self.form:
self.form = CapaExplorerForm(self.PLUGIN_NAME, arg)
else:

View File

@@ -14,9 +14,9 @@
import ida_kernwin
from PyQt5 import QtCore
from capa.ida.plugin.error import UserCancelledError
from capa.ida.plugin.qt_compat import QtCore, Signal
from capa.features.extractors.ida.extractor import IdaFeatureExtractor
from capa.features.extractors.base_extractor import FunctionHandle
@@ -24,7 +24,7 @@ from capa.features.extractors.base_extractor import FunctionHandle
class CapaExplorerProgressIndicator(QtCore.QObject):
"""implement progress signal, used during feature extraction"""
progress = Signal(str)
progress = QtCore.pyqtSignal(str)
def update(self, text):
"""emit progress update

View File

@@ -23,6 +23,7 @@ from pathlib import Path
import idaapi
import ida_kernwin
import ida_settings
from PyQt5 import QtGui, QtCore, QtWidgets
import capa.main
import capa.rules
@@ -50,7 +51,6 @@ from capa.ida.plugin.hooks import CapaExplorerIdaHooks
from capa.ida.plugin.model import CapaExplorerDataModel
from capa.ida.plugin.proxy import CapaExplorerRangeProxyModel, CapaExplorerSearchProxyModel
from capa.ida.plugin.extractor import CapaExplorerFeatureExtractor
from capa.ida.plugin.qt_compat import QtGui, QtCore, QtWidgets
from capa.features.extractors.base_extractor import FunctionHandle
logger = logging.getLogger(__name__)
@@ -1358,7 +1358,7 @@ class CapaExplorerForm(idaapi.PluginForm):
@param state: checked state
"""
if state:
if state == QtCore.Qt.Checked:
self.limit_results_to_function(idaapi.get_func(idaapi.get_screen_ea()))
else:
self.range_model_proxy.reset_address_range_filter()
@@ -1367,7 +1367,7 @@ class CapaExplorerForm(idaapi.PluginForm):
def slot_checkbox_limit_features_by_ea(self, state):
""" """
if state:
if state == QtCore.Qt.Checked:
self.view_rulegen_features.filter_items_by_ea(idaapi.get_screen_ea())
else:
self.view_rulegen_features.show_all_items()

View File

@@ -1,38 +0,0 @@
{
"IDAMetadataDescriptorVersion": 1,
"plugin": {
"name": "capa",
"entryPoint": "capa_explorer.py",
"version": "9.3.1",
"idaVersions": ">=7.4",
"description": "Identify capabilities in executable files using FLARE's capa framework",
"license": "Apache-2.0",
"categories": [
"malware-analysis",
"api-scripting-and-automation",
"ui-ux-and-visualization"
],
"pythonDependencies": ["flare-capa==9.3.1"],
"urls": {
"repository": "https://github.com/mandiant/capa"
},
"authors": [
{"name": "Willi Ballenthin", "email": "wballenthin@hex-rays.com"},
{"name": "Moritz Raabe", "email": "moritzraabe@google.com"},
{"name": "Mike Hunhoff", "email": "mike.hunhoff@gmail.com"},
{"name": "Yacine Elhamer", "email": "elhamer.yacine@gmail.com"}
],
"keywords": [
"capability-detection",
"malware-analysis",
"behavior-analysis",
"reverse-engineering",
"att&ck",
"rule-engine",
"feature-extraction",
"yara-like-rules",
"static-analysis",
"dynamic-analysis"
]
}
}

View File

@@ -18,10 +18,10 @@ from typing import Iterator, Optional
import idc
import idaapi
from PyQt5 import QtCore
import capa.ida.helpers
from capa.features.address import Address, FileOffsetAddress, AbsoluteVirtualAddress
from capa.ida.plugin.qt_compat import QtCore, qt_get_item_flag_tristate
def info_to_name(display):
@@ -55,7 +55,7 @@ class CapaExplorerDataItem:
self.flags = QtCore.Qt.ItemIsEnabled | QtCore.Qt.ItemIsSelectable
if self._can_check:
self.flags = self.flags | QtCore.Qt.ItemIsUserCheckable | qt_get_item_flag_tristate()
self.flags = self.flags | QtCore.Qt.ItemIsUserCheckable | QtCore.Qt.ItemIsTristate
if self.pred:
self.pred.appendChild(self)

View File

@@ -18,6 +18,7 @@ from collections import deque
import idc
import idaapi
from PyQt5 import QtGui, QtCore
import capa.rules
import capa.ida.helpers
@@ -41,7 +42,6 @@ from capa.ida.plugin.item import (
CapaExplorerInstructionViewItem,
)
from capa.features.address import Address, AbsoluteVirtualAddress
from capa.ida.plugin.qt_compat import QtGui, QtCore
# default highlight color used in IDA window
DEFAULT_HIGHLIGHT = 0xE6C700
@@ -269,7 +269,7 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
visited.add(child_index)
for idx in range(self.rowCount(child_index)):
stack.append(self.index(idx, 0, child_index))
stack.append(child_index.child(idx, 0))
def reset_ida_highlighting(self, item, checked):
"""reset IDA highlight for item

View File

@@ -12,8 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from PyQt5 import QtCore
from PyQt5.QtCore import Qt
from capa.ida.plugin.model import CapaExplorerDataModel
from capa.ida.plugin.qt_compat import Qt, QtCore
class CapaExplorerRangeProxyModel(QtCore.QSortFilterProxyModel):

View File

@@ -1,79 +0,0 @@
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Qt compatibility layer for capa IDA Pro plugin.
Handles PyQt5 (IDA < 9.2) vs PySide6 (IDA >= 9.2) differences.
This module provides a unified import interface for Qt modules and handles
API changes between Qt5 and Qt6.
"""
try:
# IDA 9.2+ uses PySide6
from PySide6 import QtGui, QtCore, QtWidgets
from PySide6.QtGui import QAction
QT_LIBRARY = "PySide6"
Signal = QtCore.Signal
except ImportError:
# Older IDA versions use PyQt5
try:
from PyQt5 import QtGui, QtCore, QtWidgets
from PyQt5.QtWidgets import QAction
QT_LIBRARY = "PyQt5"
Signal = QtCore.pyqtSignal
except ImportError:
raise ImportError("Neither PySide6 nor PyQt5 is available. Cannot initialize capa IDA plugin.")
Qt = QtCore.Qt
def qt_get_item_flag_tristate():
"""
Get the tristate item flag compatible with Qt5 and Qt6.
Qt5 (PyQt5): Uses Qt.ItemIsTristate
Qt6 (PySide6): Qt.ItemIsTristate was removed, uses Qt.ItemIsAutoTristate
ItemIsAutoTristate automatically manages tristate based on child checkboxes,
matching the original ItemIsTristate behavior where parent checkboxes reflect
the check state of their children.
Returns:
int: The appropriate flag value for the Qt version
Raises:
AttributeError: If the tristate flag cannot be found in the Qt library
"""
if QT_LIBRARY == "PySide6":
# Qt6: ItemIsTristate was removed, replaced with ItemIsAutoTristate
# Try different possible locations (API varies slightly across PySide6 versions)
if hasattr(Qt, "ItemIsAutoTristate"):
return Qt.ItemIsAutoTristate
elif hasattr(Qt, "ItemFlag") and hasattr(Qt.ItemFlag, "ItemIsAutoTristate"):
return Qt.ItemFlag.ItemIsAutoTristate
else:
raise AttributeError(
"Cannot find ItemIsAutoTristate in PySide6. "
+ "Your PySide6 version may be incompatible with capa. "
+ f"Available Qt attributes: {[attr for attr in dir(Qt) if 'Item' in attr]}"
)
else:
# Qt5: Use the original ItemIsTristate flag
return Qt.ItemIsTristate
__all__ = ["qt_get_item_flag_tristate", "Signal", "QAction", "QtGui", "QtCore", "QtWidgets"]

View File

@@ -18,6 +18,7 @@ from collections import Counter
import idc
import idaapi
from PyQt5 import QtGui, QtCore, QtWidgets
import capa.rules
import capa.engine
@@ -27,7 +28,6 @@ import capa.features.basicblock
from capa.ida.plugin.item import CapaExplorerFunctionItem
from capa.features.address import AbsoluteVirtualAddress, _NoAddress
from capa.ida.plugin.model import CapaExplorerDataModel
from capa.ida.plugin.qt_compat import QtGui, QtCore, Signal, QAction, QtWidgets
MAX_SECTION_SIZE = 750
@@ -147,7 +147,7 @@ def calc_item_depth(o):
def build_action(o, display, data, slot):
""" """
action = QAction(display, o)
action = QtWidgets.QAction(display, o)
action.setData(data)
action.triggered.connect(lambda checked: slot(action))
@@ -312,7 +312,7 @@ class CapaExplorerRulegenPreview(QtWidgets.QTextEdit):
class CapaExplorerRulegenEditor(QtWidgets.QTreeWidget):
updated = Signal()
updated = QtCore.pyqtSignal()
def __init__(self, preview, parent=None):
""" """

View File

@@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import io
import os
import logging
import datetime
@@ -22,13 +23,24 @@ from pathlib import Path
from rich.console import Console
from typing_extensions import assert_never
import capa.perf
import capa.rules
import capa.engine
import capa.helpers
import capa.version
import capa.render.json
import capa.rules.cache
import capa.render.default
import capa.render.verbose
import capa.features.common
import capa.features.freeze as frz
import capa.render.vverbose
import capa.features.extractors
import capa.render.result_document
import capa.render.result_document as rdoc
import capa.features.extractors.common
import capa.features.extractors.base_extractor
import capa.features.extractors.cape.extractor
from capa.rules import RuleSet
from capa.engine import MatchResults
from capa.exceptions import UnsupportedOSError, UnsupportedArchError, UnsupportedFormatError
@@ -67,7 +79,6 @@ BACKEND_VMRAY = "vmray"
BACKEND_FREEZE = "freeze"
BACKEND_BINEXPORT2 = "binexport2"
BACKEND_IDA = "ida"
BACKEND_GHIDRA = "ghidra"
class CorruptFile(ValueError):
@@ -167,15 +178,8 @@ def get_workspace(path: Path, input_format: str, sigpaths: list[Path]):
except Exception as e:
# vivisect raises raw Exception instances, and we don't want
# to do a subclass check via isinstance.
if type(e) is Exception and e.args:
error_msg = str(e.args[0])
if "Couldn't convert rva" in error_msg:
raise CorruptFile(error_msg) from e
elif "Unsupported Architecture" in error_msg:
# Extract architecture number if available
arch_info = e.args[1] if len(e.args) > 1 else "unknown"
raise CorruptFile(f"Unsupported architecture: {arch_info}") from e
if type(e) is Exception and "Couldn't convert rva" in e.args[0]:
raise CorruptFile(e.args[0]) from e
raise
viv_utils.flirt.register_flirt_signature_analyzers(vw, [str(s) for s in sigpaths])
@@ -334,24 +338,12 @@ def get_extractor(
import capa.features.extractors.ida.extractor
logger.debug("idalib: opening database...")
idapro.enable_console_messages(False)
with console.status("analyzing program...", spinner="dots"):
# we set the primary and secondary Lumina servers to 0.0.0.0 to disable Lumina,
# which sometimes provides bad names, including overwriting names from debug info.
#
# use -R to load resources, which can help us embedded PE files.
#
# return values from open_database:
# 0 - Success
# 2 - User cancelled or 32-64 bit conversion failed
# 4 - Database initialization failed
# -1 - Generic errors (database already open, auto-analysis failed, etc.)
# -2 - User cancelled operation
ret = idapro.open_database(
str(input_path), run_auto_analysis=True, args="-Olumina:host=0.0.0.0 -Osecondary_lumina:host=0.0.0.0 -R"
)
if ret != 0:
raise RuntimeError("failed to analyze input file")
# idalib writes to stdout (ugh), so we have to capture that
# so as not to screw up structured output.
with capa.helpers.stdout_redirector(io.BytesIO()):
with console.status("analyzing program...", spinner="dots"):
if idapro.open_database(str(input_path), run_auto_analysis=True):
raise RuntimeError("failed to analyze input file")
logger.debug("idalib: waiting for analysis...")
ida_auto.auto_wait()
@@ -359,69 +351,6 @@ def get_extractor(
return capa.features.extractors.ida.extractor.IdaFeatureExtractor()
elif backend == BACKEND_GHIDRA:
import pyghidra
with console.status("analyzing program...", spinner="dots"):
if not pyghidra.started():
pyghidra.start()
import capa.ghidra.helpers
if not capa.ghidra.helpers.is_supported_ghidra_version():
raise RuntimeError("unsupported Ghidra version")
import tempfile
tmpdir = tempfile.TemporaryDirectory()
project_cm = pyghidra.open_project(tmpdir.name, "CapaProject", create=True)
project = project_cm.__enter__()
try:
from ghidra.util.task import TaskMonitor
monitor = TaskMonitor.DUMMY
# Import file
loader = pyghidra.program_loader().project(project).source(str(input_path)).name(input_path.name)
with loader.load() as load_results:
load_results.save(monitor)
# Open program
program, consumer = pyghidra.consume_program(project, "/" + input_path.name)
# Analyze
pyghidra.analyze(program, monitor)
from ghidra.program.flatapi import FlatProgramAPI
flat_api = FlatProgramAPI(program)
import capa.features.extractors.ghidra.context as ghidra_context
ghidra_context.set_context(program, flat_api, monitor)
# Wrapper to handle cleanup of program (consumer) and project
class GhidraContextWrapper:
def __init__(self, project_cm, program, consumer):
self.project_cm = project_cm
self.program = program
self.consumer = consumer
def __exit__(self, exc_type, exc_val, exc_tb):
self.program.release(self.consumer)
self.project_cm.__exit__(exc_type, exc_val, exc_tb)
cm = GhidraContextWrapper(project_cm, program, consumer)
except Exception:
project_cm.__exit__(None, None, None)
tmpdir.cleanup()
raise
import capa.features.extractors.ghidra.extractor
return capa.features.extractors.ghidra.extractor.GhidraFeatureExtractor(ctx_manager=cm, tmpdir=tmpdir)
else:
raise ValueError("unexpected backend: " + backend)

View File

@@ -55,7 +55,6 @@ from capa.loader import (
BACKEND_VMRAY,
BACKEND_DOTNET,
BACKEND_FREEZE,
BACKEND_GHIDRA,
BACKEND_PEFILE,
BACKEND_DRAKVUF,
BACKEND_BINEXPORT2,
@@ -299,7 +298,6 @@ def install_common_args(parser, wanted=None):
(BACKEND_BINJA, "Binary Ninja"),
(BACKEND_DOTNET, ".NET"),
(BACKEND_BINEXPORT2, "BinExport2"),
(BACKEND_GHIDRA, "Ghidra"),
(BACKEND_FREEZE, "capa freeze"),
(BACKEND_CAPE, "CAPE"),
(BACKEND_DRAKVUF, "DRAKVUF"),
@@ -394,7 +392,6 @@ class ShouldExitError(Exception):
"""raised when a main-related routine indicates the program should exit."""
def __init__(self, status_code: int):
super().__init__(status_code)
self.status_code = status_code
@@ -1107,26 +1104,14 @@ def ida_main():
def ghidra_main():
from ghidra.program.flatapi import FlatProgramAPI
import capa.rules
import capa.ghidra.helpers
import capa.render.default
import capa.features.extractors.ghidra.context
import capa.features.extractors.ghidra.extractor
logging.basicConfig(level=logging.INFO)
logging.getLogger().setLevel(logging.INFO)
# These are provided by the Ghidra scripting environment
# but are not available when running standard python
# so we have to ignore the linting errors
program = currentProgram # type: ignore [name-defined] # noqa: F821
monitor_ = monitor # type: ignore [name-defined] # noqa: F821
flat_api = FlatProgramAPI(program)
capa.features.extractors.ghidra.context.set_context(program, flat_api, monitor_)
logger.debug("-" * 80)
logger.debug(" Using default embedded rules.")
logger.debug(" ")

View File

@@ -274,8 +274,12 @@ SUPPORTED_FEATURES[Scope.FUNCTION].update(SUPPORTED_FEATURES[Scope.BASIC_BLOCK])
class InvalidRule(ValueError):
def __init__(self, msg):
super().__init__()
self.msg = msg
def __str__(self):
return f"invalid rule: {super().__str__()}"
return f"invalid rule: {self.msg}"
def __repr__(self):
return str(self)
@@ -285,15 +289,20 @@ class InvalidRuleWithPath(InvalidRule):
def __init__(self, path, msg):
super().__init__(msg)
self.path = path
self.msg = msg
self.__cause__ = None
def __str__(self):
return f"invalid rule: {self.path}: {super(InvalidRule, self).__str__()}"
return f"invalid rule: {self.path}: {self.msg}"
class InvalidRuleSet(ValueError):
def __init__(self, msg):
super().__init__()
self.msg = msg
def __str__(self):
return f"invalid rule set: {super().__str__()}"
return f"invalid rule set: {self.msg}"
def __repr__(self):
return str(self)
@@ -1093,15 +1102,15 @@ class Rule:
@lru_cache()
def _get_yaml_loader():
try:
# prefer to use CLoader to be fast, see #306 / CSafeLoader is the same as CLoader but with safe loading
# prefer to use CLoader to be fast, see #306
# on Linux, make sure you install libyaml-dev or similar
# on Windows, get WHLs from pyyaml.org/pypi
logger.debug("using libyaml CSafeLoader.")
return yaml.CSafeLoader
logger.debug("using libyaml CLoader.")
return yaml.CLoader
except Exception:
logger.debug("unable to import libyaml CSafeLoader, falling back to Python yaml parser.")
logger.debug("unable to import libyaml CLoader, falling back to Python yaml parser.")
logger.debug("this will be slower to load rules.")
return yaml.SafeLoader
return yaml.Loader
@staticmethod
def _get_ruamel_yaml_parser():

View File

@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
__version__ = "9.3.1"
__version__ = "9.2.1"
def get_major_version():

Binary file not shown.

After

Width:  |  Height:  |  Size: 210 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 108 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 110 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 79 KiB

View File

@@ -7,7 +7,6 @@
- [ ] Review changes
- capa https://github.com/mandiant/capa/compare/\<last-release\>...master
- capa-rules https://github.com/mandiant/capa-rules/compare/\<last-release>\...master
- [ ] Run `$ bump-my-version bump {patch/minor/major} [--allow-dirty]` to update [capa/version.py](https://github.com/mandiant/capa/blob/master/capa/version.py) and other version files
- [ ] Update [CHANGELOG.md](https://github.com/mandiant/capa/blob/master/CHANGELOG.md)
- Do not forget to add a nice introduction thanking contributors
- Remember that we need a major release if we introduce breaking changes
@@ -37,6 +36,7 @@
- [capa <release>...master](https://github.com/mandiant/capa/compare/<release>...master)
- [capa-rules <release>...master](https://github.com/mandiant/capa-rules/compare/<release>...master)
```
- [ ] Update [capa/version.py](https://github.com/mandiant/capa/blob/master/capa/version.py)
- [ ] Create a PR with the updated [CHANGELOG.md](https://github.com/mandiant/capa/blob/master/CHANGELOG.md) and [capa/version.py](https://github.com/mandiant/capa/blob/master/capa/version.py). Copy this checklist in the PR description.
- [ ] Update the [homepage](https://github.com/mandiant/capa/blob/master/web/public/index.html) (i.e. What's New section)
- [ ] After PR review, merge the PR and [create the release in GH](https://github.com/mandiant/capa/releases/new) using text from the [CHANGELOG.md](https://github.com/mandiant/capa/blob/master/CHANGELOG.md).

View File

@@ -74,8 +74,7 @@ dependencies = [
# comments and context.
"pyyaml>=6",
"colorama>=0.4",
"ida-netnode>=3.0",
"ida-settings>=3.1.0",
"ida-settings>=2",
"ruamel.yaml>=0.18",
"pefile>=2023.2.7",
"pyelftools>=0.31",
@@ -105,17 +104,10 @@ dependencies = [
"networkx>=3",
"dnfile>=0.17.0",
"dnfile>=0.15.0",
]
dynamic = ["version"]
[tool.pytest.ini_options]
filterwarnings = [
"ignore:builtin type SwigPyPacked has no __module__ attribute:DeprecationWarning",
"ignore:builtin type SwigPyObject has no __module__ attribute:DeprecationWarning",
"ignore:builtin type swigvarlink has no __module__ attribute:DeprecationWarning",
]
[tool.setuptools.dynamic]
version = {attr = "capa.version.__version__"}
@@ -129,57 +121,51 @@ dev = [
# we want all developer environments to be consistent.
# These dependencies are not used in production environments
# and should not conflict with other libraries/tooling.
"pre-commit==4.5.0",
"pytest==9.0.2",
"pytest-sugar==1.1.1",
"pre-commit==4.2.0",
"pytest==8.0.0",
"pytest-sugar==1.0.0",
"pytest-instafail==0.5.0",
"flake8==7.3.0",
"flake8-bugbear==25.11.29",
"flake8-bugbear==24.12.12",
"flake8-encodings==0.5.1",
"flake8-comprehensions==3.17.0",
"flake8-comprehensions==3.16.0",
"flake8-logging-format==0.9.0",
"flake8-no-implicit-concat==0.3.5",
"flake8-print==5.0.0",
"flake8-todos==0.3.1",
"flake8-simplify==0.30.0",
"flake8-simplify==0.22.0",
"flake8-use-pathlib==0.3.0",
"flake8-copyright==0.2.4",
"ruff==0.14.7",
"black==25.12.0",
"isort==7.0.0",
"mypy==1.19.1",
"mypy-protobuf==5.0.0",
"PyGithub==2.8.1",
"bump-my-version==1.2.4",
"ruff==0.12.0",
"black==25.1.0",
"isort==6.0.0",
"mypy==1.16.0",
"mypy-protobuf==3.6.0",
"PyGithub==2.6.0",
# type stubs for mypy
"types-backports==0.1.3",
"types-colorama==0.4.15.11",
"types-PyYAML==6.0.8",
"types-psutil==7.2.0.20251228",
"types-psutil==7.0.0.20250218",
"types_requests==2.32.0.20240712",
"types-protobuf==6.32.1.20250918",
"deptry==0.24.0"
"types-protobuf==6.30.2.20250516",
"deptry==0.23.0"
]
build = [
# Dev and build dependencies are not relaxed because
# we want all developer environments to be consistent.
# These dependencies are not used in production environments
# and should not conflict with other libraries/tooling.
"pyinstaller==6.18.0",
"pyinstaller==6.14.1",
"setuptools==80.9.0",
"build==1.4.0"
"build==1.2.2"
]
scripts = [
# can (optionally) be more lenient on dependencies here
# see comment on dependencies for more context
"jschema_to_python==1.2.3",
"psutil==7.2.1",
"psutil==7.0.0",
"stix2==3.0.1",
"sarif_om==1.0.4",
"requests>=2.32.4",
]
ghidra = [
"pyghidra>=3.0.0",
"requests==2.32.3",
]
[tool.deptry]
@@ -211,8 +197,7 @@ known_first_party = [
"idc",
"java",
"netnode",
"PyQt5",
"PySide6"
"PyQt5"
]
[tool.deptry.per_rule_ignores]
@@ -220,7 +205,6 @@ known_first_party = [
DEP002 = [
"black",
"build",
"bump-my-version",
"deptry",
"flake8",
"flake8-bugbear",

View File

@@ -10,40 +10,38 @@ annotated-types==0.7.0
colorama==0.4.6
cxxfilt==0.3.0
dncil==1.0.2
dnfile==0.17.0
dnfile==0.15.0
funcy==2.0
humanize==4.15.0
humanize==4.12.0
ida-netnode==3.0
ida-settings==3.2.2
intervaltree==3.2.1
markdown-it-py==4.0.0
ida-settings==2.1.0
intervaltree==3.1.0
markdown-it-py==3.0.0
mdurl==0.1.2
msgpack==1.1.2
msgpack==1.0.8
networkx==3.4.2
pefile==2024.8.26
pip==25.3
protobuf==6.33.1
pip==25.1.1
protobuf==6.31.1
pyasn1==0.5.1
pyasn1-modules==0.3.0
pycparser==2.23
pydantic==2.12.4
pycparser==2.22
pydantic==2.11.4
# pydantic pins pydantic-core,
# but dependabot updates these separately (which is broken) and is annoying,
# so we rely on pydantic to pull in the right version of pydantic-core.
# pydantic-core==2.23.4
xmltodict==1.0.2
xmltodict==0.14.2
pyelftools==0.32
pygments==2.19.1
pyghidra==3.0.0
python-flirt==0.9.2
pyyaml==6.0.2
rich==14.2.0
ruamel-yaml==0.19.1
ruamel-yaml-clib==0.2.14
rich==14.0.0
ruamel-yaml==0.18.6
ruamel-yaml-clib==0.2.8
setuptools==80.9.0
six==1.17.0
sortedcontainers==2.4.0
viv-utils==0.8.0
vivisect==1.3.0
msgspec==0.20.0
bump-my-version==1.2.4
vivisect==1.2.1
msgspec==0.19.0

2
rules

Submodule rules updated: 6a0d506713...2f09b4d471

View File

@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import contextlib
import collections
from pathlib import Path
@@ -20,7 +20,7 @@ from functools import lru_cache
import pytest
import capa.loader
import capa.main
import capa.features.file
import capa.features.insn
import capa.features.common
@@ -53,7 +53,6 @@ from capa.features.extractors.base_extractor import (
)
from capa.features.extractors.dnfile.extractor import DnfileFeatureExtractor
logger = logging.getLogger(__name__)
CD = Path(__file__).resolve().parent
DOTNET_DIR = CD / "data" / "dotnet"
DNFILE_TESTFILES = DOTNET_DIR / "dnfile-testfiles"
@@ -201,73 +200,6 @@ def get_binja_extractor(path: Path):
return extractor
# we can't easily cache this because the extractor relies on global state (the opened database)
# which also has to be closed elsewhere. so, the idalib tests will just take a little bit to run.
def get_idalib_extractor(path: Path):
import capa.features.extractors.ida.idalib as idalib
if not idalib.has_idalib():
raise RuntimeError("cannot find IDA idalib module.")
if not idalib.load_idalib():
raise RuntimeError("failed to load IDA idalib module.")
import idapro
import ida_auto
import capa.features.extractors.ida.extractor
logger.debug("idalib: opening database...")
idapro.enable_console_messages(False)
# we set the primary and secondary Lumina servers to 0.0.0.0 to disable Lumina,
# which sometimes provides bad names, including overwriting names from debug info.
#
# use -R to load resources, which can help us embedded PE files.
#
# return values from open_database:
# 0 - Success
# 2 - User cancelled or 32-64 bit conversion failed
# 4 - Database initialization failed
# -1 - Generic errors (database already open, auto-analysis failed, etc.)
# -2 - User cancelled operation
ret = idapro.open_database(
str(path), run_auto_analysis=True, args="-Olumina:host=0.0.0.0 -Osecondary_lumina:host=0.0.0.0 -R"
)
if ret != 0:
raise RuntimeError("failed to analyze input file")
logger.debug("idalib: waiting for analysis...")
ida_auto.auto_wait()
logger.debug("idalib: opened database.")
extractor = capa.features.extractors.ida.extractor.IdaFeatureExtractor()
fixup_idalib(path, extractor)
return extractor
def fixup_idalib(path: Path, extractor):
"""
IDA fixups to overcome differences between backends
"""
import idaapi
import ida_funcs
def remove_library_id_flag(fva):
f = idaapi.get_func(fva)
f.flags &= ~ida_funcs.FUNC_LIB
ida_funcs.update_func(f)
if "kernel32-64" in path.name:
# remove (correct) library function id, so we can test x64 thunk
remove_library_id_flag(0x1800202B0)
if "al-khaser_x64" in path.name:
# remove (correct) library function id, so we can test x64 nested thunk
remove_library_id_flag(0x14004B4F0)
@lru_cache(maxsize=1)
def get_cape_extractor(path):
from capa.helpers import load_json_from_path
@@ -295,33 +227,13 @@ def get_vmray_extractor(path):
return VMRayExtractor.from_zipfile(path)
GHIDRA_CACHE: dict[Path, tuple] = {}
@lru_cache(maxsize=1)
def get_ghidra_extractor(path: Path):
# we need to start PyGhidra before importing the extractor
# because the extractor imports Ghidra modules that are only available after PyGhidra is started
import pyghidra
if not pyghidra.started():
pyghidra.start()
import capa.features.extractors.ghidra.context
import capa.features.extractors.ghidra.extractor
if path in GHIDRA_CACHE:
extractor, program, flat_api, monitor = GHIDRA_CACHE[path]
capa.features.extractors.ghidra.context.set_context(program, flat_api, monitor)
return extractor
extractor = capa.features.extractors.ghidra.extractor.GhidraFeatureExtractor()
setattr(extractor, "path", path.as_posix())
# We use a larger cache size to avoid re-opening the same file multiple times
# which is very slow with Ghidra.
extractor = capa.loader.get_extractor(
path, FORMAT_AUTO, OS_AUTO, capa.loader.BACKEND_GHIDRA, [], disable_progress=True
)
ctx = capa.features.extractors.ghidra.context.get_context()
GHIDRA_CACHE[path] = (extractor, ctx.program, ctx.flat_api, ctx.monitor)
return extractor
@@ -982,8 +894,20 @@ FEATURE_PRESENCE_TESTS = sorted(
("mimikatz", "function=0x4556E5", capa.features.insn.API("advapi32.LsaQueryInformationPolicy"), False),
("mimikatz", "function=0x4556E5", capa.features.insn.API("LsaQueryInformationPolicy"), True),
# insn/api: x64
(
"kernel32-64",
"function=0x180001010",
capa.features.insn.API("RtlVirtualUnwind"),
True,
),
("kernel32-64", "function=0x180001010", capa.features.insn.API("RtlVirtualUnwind"), True),
# insn/api: x64 thunk
(
"kernel32-64",
"function=0x1800202B0",
capa.features.insn.API("RtlCaptureContext"),
True,
),
("kernel32-64", "function=0x1800202B0", capa.features.insn.API("RtlCaptureContext"), True),
# insn/api: x64 nested thunk
("al-khaser x64", "function=0x14004B4F0", capa.features.insn.API("__vcrt_GetModuleHandle"), True),
@@ -1071,20 +995,20 @@ FEATURE_PRESENCE_TESTS = sorted(
("pma16-01", "file", OS(OS_WINDOWS), True),
("pma16-01", "file", OS(OS_LINUX), False),
("mimikatz", "file", OS(OS_WINDOWS), True),
("pma16-01", "function=0x401100", OS(OS_WINDOWS), True),
("pma16-01", "function=0x401100,bb=0x401130", OS(OS_WINDOWS), True),
("pma16-01", "function=0x404356", OS(OS_WINDOWS), True),
("pma16-01", "function=0x404356,bb=0x4043B9", OS(OS_WINDOWS), True),
("mimikatz", "function=0x40105D", OS(OS_WINDOWS), True),
("pma16-01", "file", Arch(ARCH_I386), True),
("pma16-01", "file", Arch(ARCH_AMD64), False),
("mimikatz", "file", Arch(ARCH_I386), True),
("pma16-01", "function=0x401100", Arch(ARCH_I386), True),
("pma16-01", "function=0x401100,bb=0x401130", Arch(ARCH_I386), True),
("pma16-01", "function=0x404356", Arch(ARCH_I386), True),
("pma16-01", "function=0x404356,bb=0x4043B9", Arch(ARCH_I386), True),
("mimikatz", "function=0x40105D", Arch(ARCH_I386), True),
("pma16-01", "file", Format(FORMAT_PE), True),
("pma16-01", "file", Format(FORMAT_ELF), False),
("mimikatz", "file", Format(FORMAT_PE), True),
# format is also a global feature
("pma16-01", "function=0x401100", Format(FORMAT_PE), True),
("pma16-01", "function=0x404356", Format(FORMAT_PE), True),
("mimikatz", "function=0x456BB9", Format(FORMAT_PE), True),
# elf support
("7351f.elf", "file", OS(OS_LINUX), True),

View File

@@ -70,4 +70,4 @@ def test_standalone_binja_backend():
@pytest.mark.skipif(binja_present is False, reason="Skip binja tests if the binaryninja Python API is not installed")
def test_binja_version():
version = binaryninja.core_version_info()
assert version.major == 5 and version.minor == 2
assert version.major == 5 and version.minor == 0

View File

@@ -11,42 +11,95 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import importlib.util
"""
Must invoke this script from within the Ghidra Runtime Environment
"""
import sys
import logging
from pathlib import Path
import pytest
import fixtures
import capa.features.common
ghidra_present = importlib.util.find_spec("pyghidra") is not None and "GHIDRA_INSTALL_DIR" in os.environ
try:
sys.path.append(str(Path(__file__).parent))
import fixtures
finally:
sys.path.pop()
@pytest.mark.skipif(ghidra_present is False, reason="PyGhidra not installed")
@fixtures.parametrize(
"sample,scope,feature,expected",
[
(
pytest.param(
*t,
marks=pytest.mark.xfail(
reason="specific to Vivisect and basic blocks do not align with Ghidra's analysis"
),
)
if t[0] == "294b8d..." and t[2] == capa.features.common.String("\r\n\x00:ht")
else t
)
for t in fixtures.FEATURE_PRESENCE_TESTS
],
indirect=["sample", "scope"],
)
logger = logging.getLogger("test_ghidra_features")
ghidra_present: bool = False
try:
import ghidra # noqa: F401
ghidra_present = True
except ImportError:
pass
def standardize_posix_str(psx_str):
"""fixture test passes the PosixPath to the test data
params: psx_str - PosixPath() to the test data
return: string that matches test-id sample name
"""
if "Practical Malware Analysis Lab" in str(psx_str):
# <PosixPath>/'Practical Malware Analysis Lab 16-01.exe_' -> 'pma16-01'
wanted_str = "pma" + str(psx_str).split("/")[-1][len("Practical Malware Analysis Lab ") : -5]
else:
# <PosixPath>/mimikatz.exe_ -> mimikatz
wanted_str = str(psx_str).split("/")[-1][:-5]
if "_" in wanted_str:
# al-khaser_x86 -> al-khaser x86
wanted_str = wanted_str.replace("_", " ")
return wanted_str
def check_input_file(wanted):
"""check that test is running on the loaded sample
params: wanted - PosixPath() passed from test arg
"""
import capa.ghidra.helpers as ghidra_helpers
found = ghidra_helpers.get_file_md5()
sample_name = standardize_posix_str(wanted)
if not found.startswith(fixtures.get_sample_md5_by_name(sample_name)):
raise RuntimeError(f"please run the tests against sample with MD5: `{found}`")
@pytest.mark.skipif(ghidra_present is False, reason="Ghidra tests must be ran within Ghidra")
@fixtures.parametrize("sample,scope,feature,expected", fixtures.FEATURE_PRESENCE_TESTS, indirect=["sample", "scope"])
def test_ghidra_features(sample, scope, feature, expected):
try:
check_input_file(sample)
except RuntimeError:
pytest.skip(reason="Test must be ran against sample loaded in Ghidra")
fixtures.do_test_feature_presence(fixtures.get_ghidra_extractor, sample, scope, feature, expected)
@pytest.mark.skipif(ghidra_present is False, reason="PyGhidra not installed")
@pytest.mark.skipif(ghidra_present is False, reason="Ghidra tests must be ran within Ghidra")
@fixtures.parametrize(
"sample,scope,feature,expected", fixtures.FEATURE_COUNT_TESTS_GHIDRA, indirect=["sample", "scope"]
)
def test_ghidra_feature_counts(sample, scope, feature, expected):
try:
check_input_file(sample)
except RuntimeError:
pytest.skip(reason="Test must be ran against sample loaded in Ghidra")
fixtures.do_test_feature_count(fixtures.get_ghidra_extractor, sample, scope, feature, expected)
if __name__ == "__main__":
# No support for faulthandler module in Ghidrathon, see:
# https://github.com/mandiant/Ghidrathon/issues/70
sys.exit(pytest.main(["--pyargs", "-p no:faulthandler", "test_ghidra_features"]))

View File

@@ -1,86 +0,0 @@
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
from pathlib import Path
import pytest
import fixtures
import capa.features.extractors.ida.idalib
from capa.features.file import FunctionName
from capa.features.insn import API
from capa.features.common import Characteristic
logger = logging.getLogger(__name__)
idalib_present = capa.features.extractors.ida.idalib.has_idalib()
if idalib_present:
try:
import idapro # noqa: F401 [imported but unused]
import ida_kernwin
kernel_version: str = ida_kernwin.get_kernel_version()
except ImportError:
idalib_present = False
kernel_version = "0.0"
@pytest.mark.skipif(idalib_present is False, reason="Skip idalib tests if the idalib Python API is not installed")
@fixtures.parametrize(
"sample,scope,feature,expected",
fixtures.FEATURE_PRESENCE_TESTS + fixtures.FEATURE_SYMTAB_FUNC_TESTS,
indirect=["sample", "scope"],
)
def test_idalib_features(sample: Path, scope, feature, expected):
if kernel_version in {"9.0", "9.1"} and sample.name.startswith("2bf18d"):
if isinstance(feature, (API, FunctionName)) and feature.value == "__libc_connect":
# see discussion here: https://github.com/mandiant/capa/pull/2742#issuecomment-3674146335
#
# > i confirmed that there were changes in 9.2 related to the ELF loader handling names,
# > so I think its reasonable to conclude that 9.1 and older had a bug that
# > prevented this name from surfacing.
pytest.xfail(f"IDA {kernel_version} does not extract all ELF symbols")
if kernel_version in {"9.0"} and sample.name.startswith("Practical Malware Analysis Lab 12-04.exe_"):
if isinstance(feature, Characteristic) and feature.value == "embedded pe":
# see discussion here: https://github.com/mandiant/capa/pull/2742#issuecomment-3667086165
#
# idalib for IDA 9.0 doesn't support argv arguments, so we can't ask that resources are loaded
pytest.xfail("idalib 9.0 does not support loading resource segments")
try:
fixtures.do_test_feature_presence(fixtures.get_idalib_extractor, sample, scope, feature, expected)
finally:
logger.debug("closing database...")
import idapro
idapro.close_database(save=False)
logger.debug("closed database.")
@pytest.mark.skipif(idalib_present is False, reason="Skip idalib tests if the idalib Python API is not installed")
@fixtures.parametrize(
"sample,scope,feature,expected",
fixtures.FEATURE_COUNT_TESTS,
indirect=["sample", "scope"],
)
def test_idalib_feature_counts(sample, scope, feature, expected):
try:
fixtures.do_test_feature_count(fixtures.get_idalib_extractor, sample, scope, feature, expected)
finally:
logger.debug("closing database...")
import idapro
idapro.close_database(save=False)
logger.debug("closed database.")

View File

@@ -27,7 +27,7 @@
"eslint-plugin-vue": "^9.23.0",
"jsdom": "^24.1.0",
"prettier": "^3.2.5",
"vite": "^6.4.1",
"vite": "^6.3.4",
"vite-plugin-singlefile": "^2.2.0",
"vitest": "^3.0.9"
}
@@ -1416,20 +1416,6 @@
"node": ">=8"
}
},
"node_modules/call-bind-apply-helpers": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz",
"integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==",
"dev": true,
"license": "MIT",
"dependencies": {
"es-errors": "^1.3.0",
"function-bind": "^1.1.2"
},
"engines": {
"node": ">= 0.4"
}
},
"node_modules/callsites": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz",
@@ -1660,21 +1646,6 @@
"node": ">=6.0.0"
}
},
"node_modules/dunder-proto": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz",
"integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==",
"dev": true,
"license": "MIT",
"dependencies": {
"call-bind-apply-helpers": "^1.0.1",
"es-errors": "^1.3.0",
"gopd": "^1.2.0"
},
"engines": {
"node": ">= 0.4"
}
},
"node_modules/eastasianwidth": {
"version": "0.2.0",
"resolved": "https://registry.npmjs.org/eastasianwidth/-/eastasianwidth-0.2.0.tgz",
@@ -1740,26 +1711,6 @@
"url": "https://github.com/fb55/entities?sponsor=1"
}
},
"node_modules/es-define-property": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz",
"integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==",
"dev": true,
"license": "MIT",
"engines": {
"node": ">= 0.4"
}
},
"node_modules/es-errors": {
"version": "1.3.0",
"resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz",
"integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==",
"dev": true,
"license": "MIT",
"engines": {
"node": ">= 0.4"
}
},
"node_modules/es-module-lexer": {
"version": "1.6.0",
"resolved": "https://registry.npmjs.org/es-module-lexer/-/es-module-lexer-1.6.0.tgz",
@@ -1767,35 +1718,6 @@
"dev": true,
"license": "MIT"
},
"node_modules/es-object-atoms": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz",
"integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==",
"dev": true,
"license": "MIT",
"dependencies": {
"es-errors": "^1.3.0"
},
"engines": {
"node": ">= 0.4"
}
},
"node_modules/es-set-tostringtag": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/es-set-tostringtag/-/es-set-tostringtag-2.1.0.tgz",
"integrity": "sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==",
"dev": true,
"license": "MIT",
"dependencies": {
"es-errors": "^1.3.0",
"get-intrinsic": "^1.2.6",
"has-tostringtag": "^1.0.2",
"hasown": "^2.0.2"
},
"engines": {
"node": ">= 0.4"
}
},
"node_modules/esbuild": {
"version": "0.25.1",
"resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.25.1.tgz",
@@ -2186,16 +2108,13 @@
}
},
"node_modules/form-data": {
"version": "4.0.4",
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.4.tgz",
"integrity": "sha512-KrGhL9Q4zjj0kiUt5OO4Mr/A/jlI2jDYs5eHBpYHPcBEVSiipAvn2Ko2HnPe20rmcuuvMHNdZFp+4IlGTMF0Ow==",
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz",
"integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==",
"dev": true,
"license": "MIT",
"dependencies": {
"asynckit": "^0.4.0",
"combined-stream": "^1.0.8",
"es-set-tostringtag": "^2.1.0",
"hasown": "^2.0.2",
"mime-types": "^2.1.12"
},
"engines": {
@@ -2222,61 +2141,11 @@
"node": "^8.16.0 || ^10.6.0 || >=11.0.0"
}
},
"node_modules/function-bind": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz",
"integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==",
"dev": true,
"license": "MIT",
"funding": {
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/get-intrinsic": {
"version": "1.3.0",
"resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz",
"integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==",
"dev": true,
"license": "MIT",
"dependencies": {
"call-bind-apply-helpers": "^1.0.2",
"es-define-property": "^1.0.1",
"es-errors": "^1.3.0",
"es-object-atoms": "^1.1.1",
"function-bind": "^1.1.2",
"get-proto": "^1.0.1",
"gopd": "^1.2.0",
"has-symbols": "^1.1.0",
"hasown": "^2.0.2",
"math-intrinsics": "^1.1.0"
},
"engines": {
"node": ">= 0.4"
},
"funding": {
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/get-proto": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz",
"integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==",
"dev": true,
"license": "MIT",
"dependencies": {
"dunder-proto": "^1.0.1",
"es-object-atoms": "^1.0.0"
},
"engines": {
"node": ">= 0.4"
}
},
"node_modules/glob": {
"version": "10.5.0",
"resolved": "https://registry.npmjs.org/glob/-/glob-10.5.0.tgz",
"integrity": "sha512-DfXN8DfhJ7NH3Oe7cFmu3NCu1wKbkReJ8TorzSAFbSKrlNaQSKfIzqYqVY8zlbs2NLBbWpRiU52GX2PbaBVNkg==",
"version": "10.4.2",
"resolved": "https://registry.npmjs.org/glob/-/glob-10.4.2.tgz",
"integrity": "sha512-GwMlUF6PkPo3Gk21UxkCohOv0PLcIXVtKyLlpEI28R/cO/4eNOdmLk3CMW1wROV/WR/EsZOWAfBbBOqYvs88/w==",
"dev": true,
"license": "ISC",
"dependencies": {
"foreground-child": "^3.1.0",
"jackspeak": "^3.1.2",
@@ -2288,6 +2157,9 @@
"bin": {
"glob": "dist/esm/bin.mjs"
},
"engines": {
"node": ">=16 || 14 >=14.18"
},
"funding": {
"url": "https://github.com/sponsors/isaacs"
}
@@ -2343,19 +2215,6 @@
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/gopd": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz",
"integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==",
"dev": true,
"license": "MIT",
"engines": {
"node": ">= 0.4"
},
"funding": {
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/graphemer": {
"version": "1.4.0",
"resolved": "https://registry.npmjs.org/graphemer/-/graphemer-1.4.0.tgz",
@@ -2371,48 +2230,6 @@
"node": ">=8"
}
},
"node_modules/has-symbols": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz",
"integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==",
"dev": true,
"license": "MIT",
"engines": {
"node": ">= 0.4"
},
"funding": {
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/has-tostringtag": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/has-tostringtag/-/has-tostringtag-1.0.2.tgz",
"integrity": "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==",
"dev": true,
"license": "MIT",
"dependencies": {
"has-symbols": "^1.0.3"
},
"engines": {
"node": ">= 0.4"
},
"funding": {
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/hasown": {
"version": "2.0.2",
"resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz",
"integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==",
"dev": true,
"license": "MIT",
"dependencies": {
"function-bind": "^1.1.2"
},
"engines": {
"node": ">= 0.4"
}
},
"node_modules/highlight.js": {
"version": "11.9.0",
"resolved": "https://registry.npmjs.org/highlight.js/-/highlight.js-11.9.0.tgz",
@@ -2639,11 +2456,10 @@
}
},
"node_modules/js-yaml": {
"version": "4.1.1",
"resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.1.tgz",
"integrity": "sha512-qQKT4zQxXl8lLwBtHMWwaTcGfFOZviOJet3Oy/xmGk2gZH677CJM9EvtfdSkgWcATZhj/55JZ0rmy3myCT5lsA==",
"version": "4.1.0",
"resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.0.tgz",
"integrity": "sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==",
"dev": true,
"license": "MIT",
"dependencies": {
"argparse": "^2.0.1"
},
@@ -2792,16 +2608,6 @@
"@jridgewell/sourcemap-codec": "^1.5.0"
}
},
"node_modules/math-intrinsics": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz",
"integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==",
"dev": true,
"license": "MIT",
"engines": {
"node": ">= 0.4"
}
},
"node_modules/micromatch": {
"version": "4.0.8",
"resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.8.tgz",
@@ -3800,9 +3606,9 @@
"dev": true
},
"node_modules/vite": {
"version": "6.4.1",
"resolved": "https://registry.npmjs.org/vite/-/vite-6.4.1.tgz",
"integrity": "sha512-+Oxm7q9hDoLMyJOYfUYBuHQo+dkAloi33apOPP56pzj+vsdJDzr+j1NISE5pyaAuKL4A3UD34qd0lx5+kfKp2g==",
"version": "6.3.4",
"resolved": "https://registry.npmjs.org/vite/-/vite-6.3.4.tgz",
"integrity": "sha512-BiReIiMS2fyFqbqNT/Qqt4CVITDU9M9vE+DKcVAsB+ZV0wvTKd+3hMbkpxz1b+NmEDMegpVbisKiAZOnvO92Sw==",
"dev": true,
"license": "MIT",
"dependencies": {

View File

@@ -33,7 +33,7 @@
"eslint-plugin-vue": "^9.23.0",
"jsdom": "^24.1.0",
"prettier": "^3.2.5",
"vite": "^6.4.1",
"vite": "^6.3.4",
"vite-plugin-singlefile": "^2.2.0",
"vitest": "^3.0.9"
}

View File

@@ -212,18 +212,6 @@
<h2 class="mt-3">Tool Updates</h2>
<h3 class="mt-2">v9.3.1 (<em>2025-11-19</em>)</h3>
<p class="mt-0">
This patch release fixes a missing import for the capa explorer plugin for IDA Pro.
</p>
<h3 class="mt-2">v9.3.0 (<em>2025-11-12</em>)</h3>
<p class="mt-0">
capa v9.3.0 comes with over 20 new and/or impoved rules.
For IDA users the capa explorer plugin is now available via the IDA Pro plugin repository and contains Qt compatibility layer for PyQt5 and PySide6 support.
Additionally a Binary Ninja bug has been fixed. Released binaries now include ARM64 binaries (Linux and macOS).
</p>
<h3 class="mt-2">v9.2.1 (<em>2025-06-06</em>)</h3>
<p class="mt-0">
This point release fixes bugs including removing an unnecessary PyInstaller warning message and enabling the standalone binary to execute on systems running older versions of glibc.