From b5062f3ae20044d1452bf293f210a24cd1d419b3 Mon Sep 17 00:00:00 2001 From: Teppei Fukuda Date: Mon, 27 Jan 2025 17:53:49 +0900 Subject: [PATCH] feat(fs): use git commit hash as cache key for clean repositories (#8278) Signed-off-by: knqyf263 --- .gitignore | 1 + docs/docs/configuration/cache.md | 6 +- .../configuration/cli/trivy_repository.md | 2 +- docs/docs/target/container_image.md | 6 + docs/docs/target/filesystem.md | 10 ++ docs/docs/target/repository.md | 6 + docs/docs/target/vm.md | 8 + internal/gittest/server.go | 47 ++++++ internal/gittest/testdata/fixture.go | 43 ++++++ magefiles/magefile.go | 8 +- pkg/commands/app.go | 2 - pkg/fanal/artifact/artifact.go | 5 + pkg/fanal/artifact/local/fs.go | 112 ++++++++++++-- pkg/fanal/artifact/repo/git.go | 40 +---- pkg/fanal/artifact/repo/git_test.go | 144 +++++++++++++----- .../repo/testdata/test-repo/anothertest.txt | 1 - .../artifact/repo/testdata/test-repo/test.txt | 1 - pkg/log/handler.go | 1 + 18 files changed, 345 insertions(+), 98 deletions(-) create mode 100644 internal/gittest/testdata/fixture.go delete mode 100644 pkg/fanal/artifact/repo/testdata/test-repo/anothertest.txt delete mode 100644 pkg/fanal/artifact/repo/testdata/test-repo/test.txt diff --git a/.gitignore b/.gitignore index 88070261a6..a2fc00ad08 100644 --- a/.gitignore +++ b/.gitignore @@ -26,6 +26,7 @@ thumbs.db coverage.txt integration/testdata/fixtures/images integration/testdata/fixtures/vm-images +internal/gittest/testdata/test-repo # SBOMs generated during CI /bom.json diff --git a/docs/docs/configuration/cache.md b/docs/docs/configuration/cache.md index 2ad5086d0e..87618ddc02 100644 --- a/docs/docs/configuration/cache.md +++ b/docs/docs/configuration/cache.md @@ -51,9 +51,7 @@ It supports three types of backends for this cache: - TTL can be configured via `--cache-ttl` ### Local File System -The local file system backend is the default choice for container and VM image scans. -When scanning container images, it stores analysis results on a per-layer basis, using layer IDs as keys. -This approach enables faster scans of the same container image or different images that share layers. +The local file system backend is the default choice for container image, VM image and repository scans. !!! note Internally, this backend uses [BoltDB][boltdb], which has an important limitation: only one process can access the cache at a time. @@ -63,7 +61,7 @@ This approach enables faster scans of the same container image or different imag ### Memory The memory backend stores analysis results in memory, which means the cache is discarded when the process ends. This makes it useful in scenarios where caching is not required or desired. -It serves as the default for repository, filesystem and SBOM scans and can also be employed for container image scans when caching is unnecessary. +It serves as the default for filesystem and SBOM scans and can also be employed for container image scans when caching is unnecessary. To use the memory backend for a container image scan, you can use the following command: diff --git a/docs/docs/references/configuration/cli/trivy_repository.md b/docs/docs/references/configuration/cli/trivy_repository.md index 38ae6611b5..1b756d9003 100644 --- a/docs/docs/references/configuration/cli/trivy_repository.md +++ b/docs/docs/references/configuration/cli/trivy_repository.md @@ -19,7 +19,7 @@ trivy repository [flags] (REPO_PATH | REPO_URL) ``` --branch string pass the branch name to be scanned - --cache-backend string [EXPERIMENTAL] cache backend (e.g. redis://localhost:6379) (default "memory") + --cache-backend string [EXPERIMENTAL] cache backend (e.g. redis://localhost:6379) (default "fs") --cache-ttl duration cache TTL when using redis as cache backend --cf-params strings specify paths to override the CloudFormation parameters files --check-namespaces strings Rego namespaces diff --git a/docs/docs/target/container_image.md b/docs/docs/target/container_image.md index 9e8a7d6c73..b5066cde58 100644 --- a/docs/docs/target/container_image.md +++ b/docs/docs/target/container_image.md @@ -463,6 +463,12 @@ trivy image --compliance docker-cis-1.6.0 [YOUR_IMAGE_NAME] ## Authentication Please reference [this page](../advanced/private-registries/index.md). +## Scan Cache +When scanning container images, it stores analysis results in the cache, using the image ID and the layer IDs as the key. +This approach enables faster scans of the same container image or different images that share layers. + +More details are available in the [cache documentation](../configuration/cache.md#scan-cache-backend). + ## Options ### Scan Image on a specific Architecture and OS By default, Trivy loads an image on a "linux/amd64" machine. diff --git a/docs/docs/target/filesystem.md b/docs/docs/target/filesystem.md index 72e47f0e0c..273a511887 100644 --- a/docs/docs/target/filesystem.md +++ b/docs/docs/target/filesystem.md @@ -91,3 +91,13 @@ $ trivy fs --scanners license /path/to/project ## SBOM generation Trivy can generate SBOM for local projects. See [here](../supply-chain/sbom.md) for the detail. + +## Scan Cache +When scanning local projects, it doesn't use the cache by default. +However, when the local project is a git repository with clean status and the cache backend other than the memory one is enabled, it stores analysis results, using the latest commit hash as the key. + +```shell +$ trivy fs --cache-backend fs /path/to/git/repo +``` + +More details are available in the [cache documentation](../configuration/cache.md#scan-cache-backend). diff --git a/docs/docs/target/repository.md b/docs/docs/target/repository.md index a4385825e3..9a8a3165d2 100644 --- a/docs/docs/target/repository.md +++ b/docs/docs/target/repository.md @@ -109,6 +109,12 @@ $ trivy repo --scanners license (REPO_PATH | REPO_URL) Trivy can generate SBOM for code repositories. See [here](../supply-chain/sbom.md) for the detail. +## Scan Cache +When scanning git repositories, it stores analysis results in the cache, using the latest commit hash as the key. +Note that the cache is not used when the repository is dirty, otherwise Trivy will miss the files that are not committed. + +More details are available in the [cache documentation](../configuration/cache.md#scan-cache-backend). + ## References The following flags and environmental variables are available for remote git repositories. diff --git a/docs/docs/target/vm.md b/docs/docs/target/vm.md index 44ab945ce3..4bba676264 100644 --- a/docs/docs/target/vm.md +++ b/docs/docs/target/vm.md @@ -182,6 +182,14 @@ $ trivy vm --scanners license [YOUR_VM_IMAGE] Trivy can generate SBOM for VM images. See [here](../supply-chain/sbom.md) for the detail. +## Scan Cache +When scanning AMI or EBS snapshots, it stores analysis results in the cache, using the snapshot ID. +Scanning the same snapshot several times skips analysis if the cache is already available. + +When scanning local files, it doesn't use the cache by default. + +More details are available in the [cache documentation](../configuration/cache.md#scan-cache-backend). + ## Supported Architectures ### Virtual machine images diff --git a/internal/gittest/server.go b/internal/gittest/server.go index 277d645de2..0a0012b297 100644 --- a/internal/gittest/server.go +++ b/internal/gittest/server.go @@ -5,7 +5,9 @@ package gittest import ( "errors" "net/http/httptest" + "os" "path/filepath" + "runtime" "testing" "time" @@ -59,6 +61,51 @@ func NewServer(t *testing.T, repo, dir string) *httptest.Server { return httptest.NewServer(service) } +// NewServerWithRepository creates a git server with an existing repository +func NewServerWithRepository(t *testing.T, repo, dir string) *httptest.Server { + // Create a bare repository + bareDir := t.TempDir() + gitDir := filepath.Join(bareDir, repo+".git") + + // Clone the existing repository as a bare repository + r, err := git.PlainClone(gitDir, true, &git.CloneOptions{ + URL: dir, + Tags: git.AllTags, + }) + require.NoError(t, err) + + // Fetch all remote branches and create local branches + err = r.Fetch(&git.FetchOptions{ + RefSpecs: []config.RefSpec{ + "+refs/remotes/origin/*:refs/heads/*", + }, + Tags: git.AllTags, + }) + if err != nil && !errors.Is(err, git.NoErrAlreadyUpToDate) { + require.NoError(t, err) + } + + // Set up a git server + service := gitkit.New(gitkit.Config{Dir: bareDir}) + err = service.Setup() + require.NoError(t, err) + + return httptest.NewServer(service) +} + +// NewTestServer creates a git server with the local copy of "github.com/aquasecurity/trivy-test-repo". +// If the test repository doesn't exist, it suggests running 'mage test:unit'. +func NewTestServer(t *testing.T) *httptest.Server { + _, filePath, _, _ := runtime.Caller(0) + dir := filepath.Join(filepath.Dir(filePath), "testdata", "test-repo") + + if _, err := os.Stat(dir); os.IsNotExist(err) { + require.Fail(t, "test-repo not found. Please run 'mage test:unit' to set up the test fixtures") + } + + return NewServerWithRepository(t, "test-repo", dir) +} + func Clone(t *testing.T, ts *httptest.Server, repo, worktree string) *git.Repository { cloneOptions := git.CloneOptions{ URL: ts.URL + "/" + repo + ".git", diff --git a/internal/gittest/testdata/fixture.go b/internal/gittest/testdata/fixture.go new file mode 100644 index 0000000000..75b0d75b62 --- /dev/null +++ b/internal/gittest/testdata/fixture.go @@ -0,0 +1,43 @@ +package gittest + +import ( + "log/slog" + "path/filepath" + "runtime" + + "github.com/go-git/go-git/v5" + "github.com/magefile/mage/target" + "golang.org/x/xerrors" +) + +const ( + repoURL = "https://github.com/aquasecurity/trivy-test-repo/" + repoDir = "test-repo" // subdirectory for the cloned repository +) + +// Fixtures clones a Git repository for unit tests +func Fixtures() error { + _, filePath, _, _ := runtime.Caller(0) + dir := filepath.Dir(filePath) + cloneDir := filepath.Join(dir, repoDir) + + // Check if the directory already exists and is up to date + if updated, err := target.Path(cloneDir, filePath); err != nil { + return err + } else if !updated { + return nil + } + + slog.Info("Cloning...", slog.String("url", repoURL)) + + // Clone the repository with all branches and tags + _, err := git.PlainClone(cloneDir, false, &git.CloneOptions{ + URL: repoURL, + Tags: git.AllTags, + }) + if err != nil { + return xerrors.Errorf("error cloning repository: %w", err) + } + + return nil +} diff --git a/magefiles/magefile.go b/magefiles/magefile.go index a95b715095..9e8ffba136 100644 --- a/magefiles/magefile.go +++ b/magefiles/magefile.go @@ -16,10 +16,12 @@ import ( "github.com/magefile/mage/sh" "github.com/magefile/mage/target" - //mage:import rpm - rpm "github.com/aquasecurity/trivy/pkg/fanal/analyzer/pkg/rpm/testdata" // Trivy packages should not be imported in Mage (see https://github.com/aquasecurity/trivy/pull/4242), // but this package doesn't have so many dependencies, and Mage is still fast. + //mage:import gittest + gittest "github.com/aquasecurity/trivy/internal/gittest/testdata" + //mage:import rpm + rpm "github.com/aquasecurity/trivy/pkg/fanal/analyzer/pkg/rpm/testdata" "github.com/aquasecurity/trivy/pkg/log" ) @@ -286,7 +288,7 @@ func compileWasmModules(pattern string) error { // Unit runs unit tests func (t Test) Unit() error { - mg.Deps(t.GenerateModules, rpm.Fixtures) + mg.Deps(t.GenerateModules, rpm.Fixtures, gittest.Fixtures) return sh.RunWithV(ENV, "go", "test", "-v", "-short", "-coverprofile=coverage.txt", "-covermode=atomic", "./...") } diff --git a/pkg/commands/app.go b/pkg/commands/app.go index 922ba6e29b..7bedce99ff 100644 --- a/pkg/commands/app.go +++ b/pkg/commands/app.go @@ -478,8 +478,6 @@ func NewRepositoryCommand(globalFlags *flag.GlobalFlagGroup) *cobra.Command { repoFlags.ScanFlagGroup.DistroFlag = nil // `repo` subcommand doesn't support scanning OS packages, so we can disable `--distro` - repoFlags.CacheFlagGroup.CacheBackend.Default = string(cache.TypeMemory) // Use memory cache by default - cmd := &cobra.Command{ Use: "repository [flags] (REPO_PATH | REPO_URL)", Aliases: []string{"repo"}, diff --git a/pkg/fanal/artifact/artifact.go b/pkg/fanal/artifact/artifact.go index b6034cb5ac..6dfa81fa73 100644 --- a/pkg/fanal/artifact/artifact.go +++ b/pkg/fanal/artifact/artifact.go @@ -14,6 +14,7 @@ import ( ) type Option struct { + Type Type AnalyzerGroup analyzer.Group // It is empty in OSS DisabledAnalyzers []analyzer.Type DisabledHandlers []types.HandlerType @@ -30,6 +31,10 @@ type Option struct { FileChecksum bool // For SPDX DetectionPriority types.DetectionPriority + // Original is the original target location, e.g. "github.com/aquasecurity/trivy" + // Currently, it is used only for remote git repositories + Original string + // Git repositories RepoBranch string RepoCommit string diff --git a/pkg/fanal/artifact/local/fs.go b/pkg/fanal/artifact/local/fs.go index 2de864b0a3..a0f220bbe9 100644 --- a/pkg/fanal/artifact/local/fs.go +++ b/pkg/fanal/artifact/local/fs.go @@ -1,16 +1,20 @@ package local import ( + "cmp" "context" "crypto/sha256" + "errors" "os" "path" "path/filepath" "strings" "sync" + "github.com/go-git/go-git/v5" "github.com/google/wire" "github.com/opencontainers/go-digest" + "github.com/samber/lo" "golang.org/x/xerrors" "github.com/aquasecurity/trivy/pkg/cache" @@ -19,6 +23,7 @@ import ( "github.com/aquasecurity/trivy/pkg/fanal/handler" "github.com/aquasecurity/trivy/pkg/fanal/types" "github.com/aquasecurity/trivy/pkg/fanal/walker" + "github.com/aquasecurity/trivy/pkg/log" "github.com/aquasecurity/trivy/pkg/semaphore" "github.com/aquasecurity/trivy/pkg/uuid" ) @@ -39,12 +44,15 @@ type Walker interface { type Artifact struct { rootPath string + logger *log.Logger cache cache.ArtifactCache walker Walker analyzer analyzer.AnalyzerGroup handlerManager handler.Manager artifactOption artifact.Option + + commitHash string // only set when the git repository is clean } func NewArtifact(rootPath string, c cache.ArtifactCache, w Walker, opt artifact.Option) (artifact.Artifact, error) { @@ -58,17 +66,92 @@ func NewArtifact(rootPath string, c cache.ArtifactCache, w Walker, opt artifact. return nil, xerrors.Errorf("analyzer group error: %w", err) } - return Artifact{ + opt.Type = cmp.Or(opt.Type, artifact.TypeFilesystem) + prefix := lo.Ternary(opt.Type == artifact.TypeRepository, "repo", "fs") + + art := Artifact{ rootPath: filepath.ToSlash(filepath.Clean(rootPath)), + logger: log.WithPrefix(prefix), cache: c, walker: w, analyzer: a, handlerManager: handlerManager, artifactOption: opt, - }, nil + } + + art.logger.Debug("Analyzing...", log.String("root", art.rootPath), + lo.Ternary(opt.Original != "", log.String("original", opt.Original), log.Nil)) + + // Check if the directory is a git repository and clean + if hash, err := gitCommitHash(art.rootPath); err == nil { + art.logger.Debug("Using the latest commit hash for calculating cache key", log.String("commit_hash", hash)) + art.commitHash = hash + } else if !errors.Is(err, git.ErrRepositoryNotExists) { + // Only log if the file path is a git repository + art.logger.Debug("Random cache key will be used", log.Err(err)) + } + + return art, nil +} + +// gitCommitHash returns the latest commit hash if the git repository is clean, otherwise returns an error +func gitCommitHash(dir string) (string, error) { + repo, err := git.PlainOpen(dir) + if err != nil { + return "", xerrors.Errorf("failed to open git repository: %w", err) + } + + // Get the working tree + worktree, err := repo.Worktree() + if err != nil { + return "", xerrors.Errorf("failed to get worktree: %w", err) + } + + // Get the current status + status, err := worktree.Status() + if err != nil { + return "", xerrors.Errorf("failed to get status: %w", err) + } + + if !status.IsClean() { + return "", xerrors.New("repository is dirty") + } + + // Get the HEAD commit hash + head, err := repo.Head() + if err != nil { + return "", xerrors.Errorf("failed to get HEAD: %w", err) + } + + return head.Hash().String(), nil } func (a Artifact) Inspect(ctx context.Context) (artifact.Reference, error) { + // Calculate cache key + cacheKey, err := a.calcCacheKey() + if err != nil { + return artifact.Reference{}, xerrors.Errorf("failed to calculate a cache key: %w", err) + } + + // Check if the cache exists only when it's a clean git repository + if a.commitHash != "" { + _, missingBlobs, err := a.cache.MissingBlobs(cacheKey, []string{cacheKey}) + if err != nil { + return artifact.Reference{}, xerrors.Errorf("unable to get missing blob: %w", err) + } + + if len(missingBlobs) == 0 { + // Cache hit + a.logger.DebugContext(ctx, "Cache hit", log.String("key", cacheKey)) + return artifact.Reference{ + Name: cmp.Or(a.artifactOption.Original, a.rootPath), + Type: a.artifactOption.Type, + ID: cacheKey, + BlobIDs: []string{cacheKey}, + }, nil + } + } + var wg sync.WaitGroup result := analyzer.NewAnalysisResult() limit := semaphore.New(a.artifactOption.Parallel) @@ -141,11 +224,6 @@ func (a Artifact) Inspect(ctx context.Context) (artifact.Reference, error) { return artifact.Reference{}, xerrors.Errorf("failed to call hooks: %w", err) } - cacheKey, err := a.calcCacheKey() - if err != nil { - return artifact.Reference{}, xerrors.Errorf("failed to calculate a cache key: %w", err) - } - if err = a.cache.PutBlob(cacheKey, blobInfo); err != nil { return artifact.Reference{}, xerrors.Errorf("failed to store blob (%s) in cache: %w", cacheKey, err) } @@ -156,29 +234,35 @@ func (a Artifact) Inspect(ctx context.Context) (artifact.Reference, error) { if err == nil && len(b) != 0 { hostName = strings.TrimSpace(string(b)) } else { - // To slash for Windows - hostName = filepath.ToSlash(a.rootPath) + target := cmp.Or(a.artifactOption.Original, a.rootPath) + hostName = filepath.ToSlash(target) // To slash for Windows } return artifact.Reference{ Name: hostName, - Type: artifact.TypeFilesystem, + Type: a.artifactOption.Type, ID: cacheKey, // use a cache key as pseudo artifact ID BlobIDs: []string{cacheKey}, }, nil } func (a Artifact) Clean(reference artifact.Reference) error { + // Don't delete cache if it's a clean git repository + if a.commitHash != "" { + return nil + } return a.cache.DeleteBlobs(reference.BlobIDs) } func (a Artifact) calcCacheKey() (string, error) { - // Generate a random UUID for the cache key - id := uuid.New() + // If this is a clean git repository, use the commit hash as cache key + if a.commitHash != "" { + return cache.CalcKey(a.commitHash, a.analyzer.AnalyzerVersions(), a.handlerManager.Versions(), a.artifactOption) + } - // Calculate sha256 hash from UUID + // For non-git repositories or dirty git repositories, use UUID as cache key h := sha256.New() - if _, err := h.Write([]byte(id.String())); err != nil { + if _, err := h.Write([]byte(uuid.New().String())); err != nil { return "", xerrors.Errorf("sha256 calculation error: %w", err) } diff --git a/pkg/fanal/artifact/repo/git.go b/pkg/fanal/artifact/repo/git.go index 4532b631c6..ebd7458f33 100644 --- a/pkg/fanal/artifact/repo/git.go +++ b/pkg/fanal/artifact/repo/git.go @@ -1,7 +1,6 @@ package repo import ( - "context" "net/url" "os" @@ -32,17 +31,12 @@ type Walker interface { Walk(root string, opt walker.Option, fn walker.WalkFunc) error } -type Artifact struct { - url string - local artifact.Artifact -} - -func NewArtifact(target string, c cache.ArtifactCache, w Walker, artifactOpt artifact.Option) ( - artifact.Artifact, func(), error) { - +func NewArtifact(target string, c cache.ArtifactCache, w Walker, artifactOpt artifact.Option) (artifact.Artifact, func(), error) { var cleanup func() var errs error + artifactOpt.Type = artifact.TypeRepository + // Try the local repository art, err := tryLocalRepo(target, c, w, artifactOpt) if err == nil { @@ -61,24 +55,6 @@ func NewArtifact(target string, c cache.ArtifactCache, w Walker, artifactOpt art return nil, cleanup, errs } -func (a Artifact) Inspect(ctx context.Context) (artifact.Reference, error) { - ref, err := a.local.Inspect(ctx) - if err != nil { - return artifact.Reference{}, xerrors.Errorf("remote repository error: %w", err) - } - - if a.url != "" { - ref.Name = a.url - } - ref.Type = artifact.TypeRepository - - return ref, nil -} - -func (Artifact) Clean(_ artifact.Reference) error { - return nil -} - func tryLocalRepo(target string, c cache.ArtifactCache, w Walker, artifactOpt artifact.Option) (artifact.Artifact, error) { if _, err := os.Stat(target); err != nil { return nil, xerrors.Errorf("no such path: %w", err) @@ -88,9 +64,7 @@ func tryLocalRepo(target string, c cache.ArtifactCache, w Walker, artifactOpt ar if err != nil { return nil, xerrors.Errorf("local repo artifact error: %w", err) } - return Artifact{ - local: art, - }, nil + return art, nil } func tryRemoteRepo(target string, c cache.ArtifactCache, w Walker, artifactOpt artifact.Option) (artifact.Artifact, func(), error) { @@ -107,15 +81,13 @@ func tryRemoteRepo(target string, c cache.ArtifactCache, w Walker, artifactOpt a cleanup = func() { _ = os.RemoveAll(tmpDir) } + artifactOpt.Original = target art, err := local.NewArtifact(tmpDir, c, w, artifactOpt) if err != nil { return nil, cleanup, xerrors.Errorf("fs artifact: %w", err) } - return Artifact{ - url: target, - local: art, - }, cleanup, nil + return art, cleanup, nil } diff --git a/pkg/fanal/artifact/repo/git_test.go b/pkg/fanal/artifact/repo/git_test.go index 3835e9ae0a..8d2dbd037f 100644 --- a/pkg/fanal/artifact/repo/git_test.go +++ b/pkg/fanal/artifact/repo/git_test.go @@ -4,16 +4,17 @@ package repo import ( "context" - "net/http/httptest" + "os" + "path/filepath" "testing" - "github.com/go-git/go-git/v5" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/aquasecurity/trivy/internal/gittest" "github.com/aquasecurity/trivy/pkg/cache" "github.com/aquasecurity/trivy/pkg/fanal/artifact" + "github.com/aquasecurity/trivy/pkg/fanal/types" "github.com/aquasecurity/trivy/pkg/fanal/walker" "github.com/aquasecurity/trivy/pkg/uuid" @@ -21,29 +22,10 @@ import ( _ "github.com/aquasecurity/trivy/pkg/fanal/analyzer/secret" ) -func setupGitRepository(t *testing.T, repo, dir string) (*httptest.Server, *git.Repository) { - gs := gittest.NewServer(t, repo, dir) - - worktree := t.TempDir() - r := gittest.Clone(t, gs, repo, worktree) - - // Branch - gittest.CreateRemoteBranch(t, r, "valid-branch") - - // Tag - gittest.SetTag(t, r, "v1.0.0") - gittest.PushTags(t, r) - - return gs, r -} - func TestNewArtifact(t *testing.T) { - ts, repo := setupGitRepository(t, "test-repo", "testdata/test-repo") + ts := gittest.NewTestServer(t) defer ts.Close() - head, err := repo.Head() - require.NoError(t, err) - type args struct { target string c cache.ArtifactCache @@ -69,7 +51,7 @@ func TestNewArtifact(t *testing.T) { { name: "local repo", args: args{ - target: "testdata", + target: "../../../../internal/gittest/testdata/test-repo", c: nil, noProgress: false, }, @@ -98,7 +80,7 @@ func TestNewArtifact(t *testing.T) { args: args{ target: ts.URL + "/test-repo.git", c: nil, - repoTag: "v1.0.0", + repoTag: "v0.0.1", }, assertion: assert.NoError, }, @@ -107,7 +89,7 @@ func TestNewArtifact(t *testing.T) { args: args{ target: ts.URL + "/test-repo.git", c: nil, - repoCommit: head.String(), + repoCommit: "8a19b492a589955c3e70c6ad8efd1e4ec6ae0d35", }, assertion: assert.NoError, }, @@ -183,44 +165,132 @@ func TestNewArtifact(t *testing.T) { } func TestArtifact_Inspect(t *testing.T) { - ts, _ := setupGitRepository(t, "test-repo", "testdata/test-repo") + ts := gittest.NewTestServer(t) defer ts.Close() tests := []struct { - name string - rawurl string - want artifact.Reference - wantErr bool + name string + rawurl string + setup func(t *testing.T, dir string, c cache.ArtifactCache) + want artifact.Reference + wantBlobInfo *types.BlobInfo + wantErr bool }{ { - name: "happy path", + name: "remote repo", rawurl: ts.URL + "/test-repo.git", want: artifact.Reference{ Name: ts.URL + "/test-repo.git", Type: artifact.TypeRepository, + ID: "sha256:dc7c6039424c9fce969d3c2972d261af442a33f13e7494464386dbe280612d4c", // Calculated from commit hash + BlobIDs: []string{ + "sha256:dc7c6039424c9fce969d3c2972d261af442a33f13e7494464386dbe280612d4c", // Calculated from commit hash + }, + }, + wantBlobInfo: &types.BlobInfo{ + SchemaVersion: types.BlobJSONSchemaVersion, + }, + }, + { + name: "local repo", + rawurl: "../../../../internal/gittest/testdata/test-repo", + want: artifact.Reference{ + Name: "../../../../internal/gittest/testdata/test-repo", + Type: artifact.TypeRepository, + ID: "sha256:dc7c6039424c9fce969d3c2972d261af442a33f13e7494464386dbe280612d4c", // Calculated from commit hash + BlobIDs: []string{ + "sha256:dc7c6039424c9fce969d3c2972d261af442a33f13e7494464386dbe280612d4c", // Calculated from commit hash + }, + }, + wantBlobInfo: &types.BlobInfo{ + SchemaVersion: types.BlobJSONSchemaVersion, + }, + }, + { + name: "dirty repository", + rawurl: "../../../../internal/gittest/testdata/test-repo", + setup: func(t *testing.T, dir string, _ cache.ArtifactCache) { + require.NoError(t, os.WriteFile(filepath.Join(dir, "new-file.txt"), []byte("test"), 0644)) + t.Cleanup(func() { + require.NoError(t, os.Remove(filepath.Join(dir, "new-file.txt"))) + }) + }, + want: artifact.Reference{ + Name: "../../../../internal/gittest/testdata/test-repo", + Type: artifact.TypeRepository, ID: "sha256:6f4672e139d4066fd00391df614cdf42bda5f7a3f005d39e1d8600be86157098", BlobIDs: []string{ "sha256:6f4672e139d4066fd00391df614cdf42bda5f7a3f005d39e1d8600be86157098", }, }, + wantBlobInfo: &types.BlobInfo{ + SchemaVersion: types.BlobJSONSchemaVersion, + }, + }, + { + name: "cache hit", + rawurl: "../../../../internal/gittest/testdata/test-repo", + setup: func(t *testing.T, dir string, c cache.ArtifactCache) { + blobInfo := types.BlobInfo{ + SchemaVersion: types.BlobJSONSchemaVersion, + OS: types.OS{ + Family: types.Alpine, + Name: "3.16.0", + }, + } + // Store the blob info in the cache to test cache hit + cacheKey := "sha256:dc7c6039424c9fce969d3c2972d261af442a33f13e7494464386dbe280612d4c" + err := c.PutBlob(cacheKey, blobInfo) + require.NoError(t, err) + }, + want: artifact.Reference{ + Name: "../../../../internal/gittest/testdata/test-repo", + Type: artifact.TypeRepository, + ID: "sha256:dc7c6039424c9fce969d3c2972d261af442a33f13e7494464386dbe280612d4c", + BlobIDs: []string{ + "sha256:dc7c6039424c9fce969d3c2972d261af442a33f13e7494464386dbe280612d4c", + }, + }, + wantBlobInfo: &types.BlobInfo{ + SchemaVersion: types.BlobJSONSchemaVersion, + OS: types.OS{ + Family: types.Alpine, + Name: "3.16.0", + }, + }, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - // Set fake UUID for consistent test results + // Set fake UUID for consistency uuid.SetFakeUUID(t, "3ff14136-e09f-4df9-80ea-%012d") - fsCache, err := cache.NewFSCache(t.TempDir()) - require.NoError(t, err) + // Create memory cache + c := cache.NewMemoryCache() - art, cleanup, err := NewArtifact(tt.rawurl, fsCache, walker.NewFS(), artifact.Option{}) + // Apply setup if specified + if tt.setup != nil { + tt.setup(t, tt.rawurl, c) + } + + art, cleanup, err := NewArtifact(tt.rawurl, c, walker.NewFS(), artifact.Option{}) require.NoError(t, err) defer cleanup() ref, err := art.Inspect(context.Background()) + if tt.wantErr { + assert.Error(t, err) + return + } + require.NoError(t, err) assert.Equal(t, tt.want, ref) + + // Verify cache contents after inspection + blobInfo, err := c.GetBlob(tt.want.BlobIDs[0]) + require.NoError(t, err) + assert.Equal(t, tt.wantBlobInfo, &blobInfo, "cache content mismatch") }) } } @@ -263,10 +333,8 @@ func Test_newURL(t *testing.T) { if tt.wantErr != "" { require.ErrorContains(t, err, tt.wantErr) return - } else { - require.NoError(t, err) } - + require.NoError(t, err) assert.Equal(t, tt.want, got.String()) }) } diff --git a/pkg/fanal/artifact/repo/testdata/test-repo/anothertest.txt b/pkg/fanal/artifact/repo/testdata/test-repo/anothertest.txt deleted file mode 100644 index f4836be649..0000000000 --- a/pkg/fanal/artifact/repo/testdata/test-repo/anothertest.txt +++ /dev/null @@ -1 +0,0 @@ -this is another text file. \ No newline at end of file diff --git a/pkg/fanal/artifact/repo/testdata/test-repo/test.txt b/pkg/fanal/artifact/repo/testdata/test-repo/test.txt deleted file mode 100644 index c042cd14d2..0000000000 --- a/pkg/fanal/artifact/repo/testdata/test-repo/test.txt +++ /dev/null @@ -1 +0,0 @@ -this is a text file. \ No newline at end of file diff --git a/pkg/log/handler.go b/pkg/log/handler.go index d5e75a7704..9bbfd8060e 100644 --- a/pkg/log/handler.go +++ b/pkg/log/handler.go @@ -264,6 +264,7 @@ var ( Duration = slog.Duration Group = slog.Group Any = slog.Any + Nil = slog.Attr{Key: "", Value: slog.AnyValue(nil)} // Nil will not be logged ) // Err returns an Attr that represents an error.