feat(repo): add git repository metadata to reports (#9252)

Co-authored-by: knqyf263 <knqyf263@users.noreply.github.com>
Co-authored-by: DmitriyLewen <91113035+DmitriyLewen@users.noreply.github.com>
This commit is contained in:
Teppei Fukuda
2025-07-29 15:49:58 +04:00
committed by GitHub
parent b4193d0d31
commit f4b2cf10e9
8 changed files with 165 additions and 36 deletions

View File

@@ -109,6 +109,26 @@ $ trivy repo --scanners license (REPO_PATH | REPO_URL)
Trivy can generate SBOM for code repositories.
See [here](../supply-chain/sbom.md) for the detail.
## Git Metadata
When scanning git repositories (both local and remote), Trivy automatically extracts and includes git metadata in the scan results.
This metadata provides context about the scanned repository.
The metadata includes information such as:
- Repository URL
- Branch name
- Tags
- Commit details (hash, message, commiter)
- Author information
This feature works automatically for any git repository.
When using JSON format output, the git metadata will be included in the `Metadata` field.
For detailed information about the available fields, please refer to the JSON output of your scan results.
```bash
$ trivy repo --format json <repo-name>
```
## Scan Cache
When scanning git repositories, it stores analysis results in the cache, using the latest commit hash as the key.
Note that the cache is not used when the repository is dirty, otherwise Trivy will miss the files that are not committed.

View File

@@ -313,6 +313,10 @@ func TestRepository(t *testing.T) {
input: "testdata/fixtures/repo/trivy-ci-test",
},
golden: "testdata/test-repo.json.golden",
override: func(_ *testing.T, want, _ *types.Report) {
// Clear all metadata as this is a local directory scan without git info
want.Metadata = types.Metadata{}
},
},
{
name: "installed.json",

View File

@@ -13,7 +13,13 @@
"diff_ids": null
},
"config": {}
}
},
"RepoURL": "https://github.com/knqyf263/trivy-ci-test",
"Branch": "master",
"Commit": "5ae342eb2802672402d9b2c26f09e2051bbd91b8",
"CommitMsg": "Use COPY instead of ADD in Dockerfile (#4)",
"Author": "gy741 <gy741.kim@gmail.com>",
"Committer": "knqyf263 <knqyf263@gmail.com>"
},
"Results": [
{

View File

@@ -92,6 +92,7 @@ type Reference struct {
ID string
BlobIDs []string
ImageMetadata ImageMetadata
RepoMetadata RepoMetadata
// SBOM
BOM *core.BOM
@@ -104,3 +105,13 @@ type ImageMetadata struct {
RepoDigests []string
ConfigFile v1.ConfigFile
}
type RepoMetadata struct {
RepoURL string // repository URL (from upstream/origin)
Branch string // current branch name
Tags []string // tag names pointing to HEAD
Commit string // commit hash
CommitMsg string // commit message
Author string // commit author
Committer string // commit committer
}

View File

@@ -13,6 +13,7 @@ import (
"sync"
"github.com/go-git/go-git/v5"
"github.com/go-git/go-git/v5/plumbing"
"github.com/google/wire"
"github.com/samber/lo"
"golang.org/x/xerrors"
@@ -56,7 +57,8 @@ type Artifact struct {
artifactOption artifact.Option
commitHash string // only set when the git repository is clean
isClean bool // whether git repository is clean (for caching)
repoMetadata artifact.RepoMetadata // git repository metadata
}
func NewArtifact(rootPath string, c cache.ArtifactCache, w Walker, opt artifact.Option) (artifact.Artifact, error) {
@@ -86,10 +88,14 @@ func NewArtifact(rootPath string, c cache.ArtifactCache, w Walker, opt artifact.
art.logger.Debug("Analyzing...", log.String("root", art.rootPath),
lo.Ternary(opt.Original != "", log.String("original", opt.Original), log.Nil))
// Check if the directory is a git repository and clean
if hash, err := gitCommitHash(art.rootPath); err == nil {
art.logger.Debug("Using the latest commit hash for calculating cache key", log.String("commit_hash", hash))
art.commitHash = hash
// Check if the directory is a git repository and extract metadata
if art.isClean, art.repoMetadata, err = extractGitInfo(art.rootPath); err == nil {
if art.isClean {
art.logger.Debug("Using the latest commit hash for calculating cache key",
log.String("commit_hash", art.repoMetadata.Commit))
} else {
art.logger.Debug("Repository is dirty, random cache key will be used")
}
} else if !errors.Is(err, git.ErrRepositoryNotExists) {
// Only log if the file path is a git repository
art.logger.Debug("Random cache key will be used", log.Err(err))
@@ -98,36 +104,72 @@ func NewArtifact(rootPath string, c cache.ArtifactCache, w Walker, opt artifact.
return art, nil
}
// gitCommitHash returns the latest commit hash if the git repository is clean, otherwise returns an error
func gitCommitHash(dir string) (string, error) {
// extractGitInfo extracts git repository information including clean status and metadata
// Returns clean status (for caching), metadata, and error
func extractGitInfo(dir string) (bool, artifact.RepoMetadata, error) {
var metadata artifact.RepoMetadata
repo, err := git.PlainOpen(dir)
if err != nil {
return "", xerrors.Errorf("failed to open git repository: %w", err)
return false, metadata, xerrors.Errorf("failed to open git repository: %w", err)
}
// Get the working tree
worktree, err := repo.Worktree()
if err != nil {
return "", xerrors.Errorf("failed to get worktree: %w", err)
}
// Get the current status
status, err := worktree.Status()
if err != nil {
return "", xerrors.Errorf("failed to get status: %w", err)
}
if !status.IsClean() {
return "", xerrors.New("repository is dirty")
}
// Get the HEAD commit hash
// Get HEAD commit
head, err := repo.Head()
if err != nil {
return "", xerrors.Errorf("failed to get HEAD: %w", err)
return false, metadata, xerrors.Errorf("failed to get HEAD: %w", err)
}
return head.Hash().String(), nil
commit, err := repo.CommitObject(head.Hash())
if err != nil {
return false, metadata, xerrors.Errorf("failed to get commit object: %w", err)
}
// Extract basic commit metadata
metadata.Commit = head.Hash().String()
metadata.CommitMsg = strings.TrimSpace(commit.Message)
metadata.Author = commit.Author.String()
metadata.Committer = commit.Committer.String()
// Get branch name
if head.Name().IsBranch() {
metadata.Branch = head.Name().Short()
}
// Get all tag names that point to HEAD
if tags, err := repo.Tags(); err == nil {
var headTags []string
_ = tags.ForEach(func(tag *plumbing.Reference) error {
if tag.Hash() == head.Hash() {
headTags = append(headTags, tag.Name().Short())
}
return nil
})
metadata.Tags = headTags
}
// Get repository URL - prefer upstream, fallback to origin
remoteConfig, err := repo.Remote("upstream")
if err != nil {
remoteConfig, err = repo.Remote("origin")
}
if err == nil && len(remoteConfig.Config().URLs) > 0 {
metadata.RepoURL = remoteConfig.Config().URLs[0]
}
// Check if repository is clean for caching purposes
worktree, err := repo.Worktree()
if err != nil {
return false, metadata, xerrors.Errorf("failed to get worktree: %w", err)
}
status, err := worktree.Status()
if err != nil {
return false, metadata, xerrors.Errorf("failed to get status: %w", err)
}
// Return clean status and metadata
return status.IsClean(), metadata, nil
}
func (a Artifact) Inspect(ctx context.Context) (artifact.Reference, error) {
@@ -138,7 +180,7 @@ func (a Artifact) Inspect(ctx context.Context) (artifact.Reference, error) {
}
// Check if the cache exists only when it's a clean git repository
if a.commitHash != "" {
if a.isClean && a.repoMetadata.Commit != "" {
_, missingBlobs, err := a.cache.MissingBlobs(cacheKey, []string{cacheKey})
if err != nil {
return artifact.Reference{}, xerrors.Errorf("unable to get missing blob: %w", err)
@@ -231,10 +273,11 @@ func (a Artifact) Inspect(ctx context.Context) (artifact.Reference, error) {
}
return artifact.Reference{
Name: hostName,
Type: a.artifactOption.Type,
ID: cacheKey, // use a cache key as pseudo artifact ID
BlobIDs: []string{cacheKey},
Name: hostName,
Type: a.artifactOption.Type,
ID: cacheKey, // use a cache key as pseudo artifact ID
BlobIDs: []string{cacheKey},
RepoMetadata: a.repoMetadata,
}, nil
}
@@ -295,7 +338,7 @@ func (a Artifact) analyzeWithTraversal(ctx context.Context, root, relativePath s
func (a Artifact) Clean(reference artifact.Reference) error {
// Don't delete cache if it's a clean git repository
if a.commitHash != "" {
if a.isClean && a.repoMetadata.Commit != "" {
return nil
}
return a.cache.DeleteBlobs(reference.BlobIDs)
@@ -303,8 +346,8 @@ func (a Artifact) Clean(reference artifact.Reference) error {
func (a Artifact) calcCacheKey() (string, error) {
// If this is a clean git repository, use the commit hash as cache key
if a.commitHash != "" {
return cache.CalcKey(a.commitHash, artifactVersion, a.analyzer.AnalyzerVersions(), a.handlerManager.Versions(), a.artifactOption)
if a.isClean && a.repoMetadata.Commit != "" {
return cache.CalcKey(a.repoMetadata.Commit, artifactVersion, a.analyzer.AnalyzerVersions(), a.handlerManager.Versions(), a.artifactOption)
}
// For non-git repositories or dirty git repositories, use UUID as cache key

View File

@@ -185,6 +185,15 @@ func TestArtifact_Inspect(t *testing.T) {
BlobIDs: []string{
"sha256:dc7c6039424c9fce969d3c2972d261af442a33f13e7494464386dbe280612d4c", // Calculated from commit hash
},
RepoMetadata: artifact.RepoMetadata{
RepoURL: ts.URL + "/test-repo.git",
Branch: "main",
Tags: []string{"v0.0.1"},
Commit: "8a19b492a589955c3e70c6ad8efd1e4ec6ae0d35",
CommitMsg: "Update README.md",
Author: "Teppei Fukuda <knqyf263@gmail.com>",
Committer: "GitHub <noreply@github.com>",
},
},
wantBlobInfo: &types.BlobInfo{
SchemaVersion: types.BlobJSONSchemaVersion,
@@ -200,6 +209,15 @@ func TestArtifact_Inspect(t *testing.T) {
BlobIDs: []string{
"sha256:dc7c6039424c9fce969d3c2972d261af442a33f13e7494464386dbe280612d4c", // Calculated from commit hash
},
RepoMetadata: artifact.RepoMetadata{
RepoURL: "https://github.com/aquasecurity/trivy-test-repo/",
Branch: "main",
Tags: []string{"v0.0.1"},
Commit: "8a19b492a589955c3e70c6ad8efd1e4ec6ae0d35",
CommitMsg: "Update README.md",
Author: "Teppei Fukuda <knqyf263@gmail.com>",
Committer: "GitHub <noreply@github.com>",
},
},
wantBlobInfo: &types.BlobInfo{
SchemaVersion: types.BlobJSONSchemaVersion,
@@ -221,6 +239,15 @@ func TestArtifact_Inspect(t *testing.T) {
BlobIDs: []string{
"sha256:6f4672e139d4066fd00391df614cdf42bda5f7a3f005d39e1d8600be86157098",
},
RepoMetadata: artifact.RepoMetadata{
RepoURL: "https://github.com/aquasecurity/trivy-test-repo/",
Branch: "main",
Tags: []string{"v0.0.1"},
Commit: "8a19b492a589955c3e70c6ad8efd1e4ec6ae0d35",
CommitMsg: "Update README.md",
Author: "Teppei Fukuda <knqyf263@gmail.com>",
Committer: "GitHub <noreply@github.com>",
},
},
wantBlobInfo: &types.BlobInfo{
SchemaVersion: types.BlobJSONSchemaVersion,

View File

@@ -207,6 +207,15 @@ func (s Service) ScanArtifact(ctx context.Context, options types.ScanOptions) (t
ImageConfig: artifactInfo.ImageMetadata.ConfigFile,
Size: scanResponse.Layers.TotalSize(),
Layers: lo.Ternary(len(scanResponse.Layers) > 0, scanResponse.Layers, nil),
// Git repository
RepoURL: artifactInfo.RepoMetadata.RepoURL,
Branch: artifactInfo.RepoMetadata.Branch,
Tags: artifactInfo.RepoMetadata.Tags,
Commit: artifactInfo.RepoMetadata.Commit,
CommitMsg: artifactInfo.RepoMetadata.CommitMsg,
Author: artifactInfo.RepoMetadata.Author,
Committer: artifactInfo.RepoMetadata.Committer,
},
Results: scanResponse.Results,
BOM: artifactInfo.BOM,

View File

@@ -34,6 +34,15 @@ type Metadata struct {
RepoDigests []string `json:",omitempty"`
ImageConfig v1.ConfigFile `json:",omitzero"`
Layers ftypes.Layers `json:",omitzero"`
// Git repository
RepoURL string `json:",omitzero"`
Branch string `json:",omitzero"`
Tags []string `json:",omitzero"`
Commit string `json:",omitzero"`
CommitMsg string `json:",omitzero"`
Author string `json:",omitzero"`
Committer string `json:",omitzero"`
}
// Results to hold list of Result