From d70d994d8882a6e7a8b0c9a9b08524a2cae32ea4 Mon Sep 17 00:00:00 2001 From: Teppei Fukuda Date: Thu, 6 Nov 2025 17:32:06 +0400 Subject: [PATCH] feat(db): enable concurrent access to vulnerability database (#9750) Co-authored-by: DmitriyLewen <91113035+DmitriyLewen@users.noreply.github.com> --- docs/docs/references/troubleshooting.md | 47 +++++++++++++++---------- internal/dbtest/db.go | 34 ++++++++++++++++++ pkg/db/db.go | 8 ++++- pkg/db/db_test.go | 19 +++------- pkg/rpc/server/listen_test.go | 18 ++++------ 5 files changed, 81 insertions(+), 45 deletions(-) diff --git a/docs/docs/references/troubleshooting.md b/docs/docs/references/troubleshooting.md index a81520a599..9d39a55340 100644 --- a/docs/docs/references/troubleshooting.md +++ b/docs/docs/references/troubleshooting.md @@ -125,50 +125,61 @@ $ trivy image --download-java-db-only $ trivy image [YOUR_JAVA_IMAGE] ``` -### Database and cache lock errors +### Cache lock errors !!! error ``` cache may be in use by another process ``` -!!! error - ``` - vulnerability database may be in use by another process - ``` +Trivy's vulnerability database is opened in read-only mode, so it does not cause lock issues. Lock errors occur only when using filesystem cache for scan cache storage. -By default, Trivy uses BoltDB for its vulnerability database and cache storage. BoltDB creates file locks to prevent data corruption, which means only one process can access the same database file at a time. - -As stated in the BoltDB documentation: +Filesystem cache uses BoltDB internally, which creates file locks to prevent data corruption. As stated in the BoltDB documentation: > Please note that Bolt obtains a file lock on the data file so multiple processes cannot open the same database at the same time. Opening an already open Bolt database will cause it to hang until the other process closes it. Reference: [BoltDB README](https://github.com/boltdb/bolt#opening-a-database) +If you're using memory cache (default for some commands like `fs`, `rootfs`, `config`, and `sbom`) or external cache (Redis), you will not encounter lock errors. Lock issues only occur when using filesystem cache with multiple concurrent processes. +See [Cache Backend](../configuration/cache.md#scan-cache-backend) for more details. + These errors occur when: -- Multiple Trivy processes try to use the same cache directory simultaneously +- Multiple Trivy processes try to use the same filesystem cache directory simultaneously - A previous Trivy process did not shut down cleanly -- Trivy server is running and holding locks on the database and cache - -#### Important Note - -Running multiple Trivy processes on the same machine is **not recommended**. Using the same cache directory for multiple processes does not improve performance and can cause unexpected errors due to BoltDB's locking mechanism. +- Trivy server is running with filesystem cache and holding a lock on the cache #### Solutions -**Solution 1: Terminate conflicting processes** (Recommended) +**Solution 1: Use memory cache or Redis cache** (Recommended) -Check for running Trivy processes and terminate them: +Memory cache is the default for some commands (e.g., `fs`, `rootfs`, `config`, `sbom`). For other commands like image scanning, you can use `--cache-backend memory` to enable concurrent execution: + +```bash +$ trivy image --cache-backend memory debian:11 & +$ trivy image --cache-backend memory debian:12 & +``` + +Note that memory cache does not persist scan results, so subsequent scans will take longer as layers need to be scanned again each time. + +For server mode or persistent cache with concurrent access, use Redis cache: + +```bash +$ trivy server --cache-backend redis://localhost:6379 +``` + +**Solution 2: Terminate conflicting processes** + +If you need to use filesystem cache, check for running Trivy processes and terminate them: ```bash $ ps aux | grep trivy $ kill [process_id] ``` -**Solution 2: Use different cache directories** (If multiple processes are absolutely necessary) +**Solution 3: Use different cache directories** -If you must run multiple Trivy processes on the same machine, specify different cache directories for each process: +If you must run multiple Trivy processes with filesystem cache, specify different cache directories for each process: ```bash $ trivy image --cache-dir /tmp/trivy-cache-1 debian:11 & diff --git a/internal/dbtest/db.go b/internal/dbtest/db.go index b8cbf58196..05c1478408 100644 --- a/internal/dbtest/db.go +++ b/internal/dbtest/db.go @@ -6,10 +6,12 @@ import ( "testing" "time" + "github.com/samber/lo" "github.com/stretchr/testify/require" fixtures "github.com/aquasecurity/bolt-fixtures" trivydb "github.com/aquasecurity/trivy-db/pkg/db" + "github.com/aquasecurity/trivy-db/pkg/metadata" jdb "github.com/aquasecurity/trivy-java-db/pkg/db" "github.com/aquasecurity/trivy/pkg/db" ) @@ -40,6 +42,38 @@ func Close() error { return db.Close() } +// InitWithMetadata initializes a database with optional metadata and DB file creation. +// If meta is empty, no metadata file is created. +// If createDBFile is false, no DB file is created (useful for testing "DB not found" scenarios). +// Returns the dbDir path. +func InitWithMetadata(t *testing.T, meta metadata.Metadata, createDBFile bool) string { + t.Helper() + + cacheDir := t.TempDir() + dbDir := db.Dir(cacheDir) + + // Create metadata if provided + if !lo.IsEmpty(meta) { + metaClient := metadata.NewClient(dbDir) + err := metaClient.Update(meta) + require.NoError(t, err) + } + + // Create DB file if requested + if createDBFile { + // First, create the DB file using trivy-db directly in write mode + err := trivydb.Init(dbDir) + require.NoError(t, err) + require.NoError(t, trivydb.Close()) + + // Then open it in read-only mode using our wrapper + err = db.Init(dbDir) + require.NoError(t, err) + } + + return dbDir +} + func InitJavaDB(t *testing.T, cacheDir string) { dbDir := filepath.Join(cacheDir, "java-db") javaDB, err := jdb.New(dbDir) diff --git a/pkg/db/db.go b/pkg/db/db.go index 906fd3fb73..35c5ceed0f 100644 --- a/pkg/db/db.go +++ b/pkg/db/db.go @@ -10,6 +10,7 @@ import ( "github.com/google/go-containerregistry/pkg/name" "github.com/samber/lo" + bolt "go.etcd.io/bbolt" "golang.org/x/xerrors" "github.com/aquasecurity/trivy-db/pkg/db" @@ -34,11 +35,16 @@ var ( DefaultGCRRepository = fmt.Sprintf("%s:%d", "mirror.gcr.io/aquasec/trivy-db", db.SchemaVersion) defaultGCRRepository = lo.Must(name.NewTag(DefaultGCRRepository)) - Init = db.Init Close = db.Close Path = db.Path ) +// Init initializes the vulnerability database with read-only mode +func Init(dbDir string, opts ...db.Option) error { + opts = append(opts, db.WithBoltOptions(&bolt.Options{ReadOnly: true})) + return db.Init(dbDir, opts...) +} + type options struct { artifact *oci.Artifact dbRepositories []name.Reference diff --git a/pkg/db/db_test.go b/pkg/db/db_test.go index 04f78449a7..6d6e8a8b81 100644 --- a/pkg/db/db_test.go +++ b/pkg/db/db_test.go @@ -240,20 +240,11 @@ func TestClient_NeedsUpdate(t *testing.T) { logger := log.New(log.NewHandler(out, &log.Options{Level: log.LevelDebug})) log.SetDefault(logger) - dbDir := db.Dir(t.TempDir()) - if tt.metadata != (metadata.Metadata{}) { - meta := metadata.NewClient(dbDir) - err := meta.Update(tt.metadata) - require.NoError(t, err) - } - - if tt.dbFileExists { - err := db.Init(dbDir) - require.NoError(t, err) - t.Cleanup(func() { - require.NoError(t, db.Close()) - }) - } + // Initialize DB with metadata and optionally create DB file + dbDir := dbtest.InitWithMetadata(t, tt.metadata, tt.dbFileExists) + t.Cleanup(func() { + require.NoError(t, db.Close()) + }) // Set a fake time ctx := clock.With(t.Context(), time.Date(2019, 10, 1, 0, 0, 0, 0, time.UTC)) diff --git a/pkg/rpc/server/listen_test.go b/pkg/rpc/server/listen_test.go index 2d82914774..233d561099 100644 --- a/pkg/rpc/server/listen_test.go +++ b/pkg/rpc/server/listen_test.go @@ -73,17 +73,11 @@ func Test_dbWorker_update(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - dbDir := db.Dir(t.TempDir()) - - // Initialize the cache - meta := metadata.NewClient(dbDir) - err := meta.Update(cachedMetadata) - require.NoError(t, err) - - err = db.Init(dbDir) - require.NoError(t, err) - - defer func() { _ = db.Close() }() + // Initialize DB with metadata + dbDir := dbtest.InitWithMetadata(t, cachedMetadata, true) + t.Cleanup(func() { + require.NoError(t, db.Close()) + }) // Set a fake time ctx := clock.With(t.Context(), tt.now) @@ -97,7 +91,7 @@ func Test_dbWorker_update(t *testing.T) { w := newDBWorker(client) var dbUpdateWg, requestWg sync.WaitGroup - err = w.update(ctx, "1.2.3", dbDir, + err := w.update(ctx, "1.2.3", dbDir, tt.skipUpdate, &dbUpdateWg, &requestWg, ftypes.RegistryOptions{}) if tt.wantErr != "" { require.ErrorContains(t, err, tt.wantErr, tt.name)