feat(debian): detect third-party packages using maintainer list (#9917)

This commit is contained in:
Teppei Fukuda
2025-12-11 14:18:31 +09:00
committed by GitHub
parent 335cc993fa
commit effc1c0d4d
5 changed files with 162 additions and 15 deletions

View File

@@ -676,7 +676,7 @@ func TestAnalyzerGroup_AnalyzerVersions(t *testing.T) {
"ubuntu-esm": 1,
},
PostAnalyzers: map[string]int{
"dpkg": 5,
"dpkg": 6,
"jar": 1,
"poetry": 1,
},

View File

@@ -41,7 +41,7 @@ func newDpkgAnalyzer(_ analyzer.AnalyzerOptions) (analyzer.PostAnalyzer, error)
}
const (
analyzerVersion = 5
analyzerVersion = 6
statusFile = "var/lib/dpkg/status"
statusDir = "var/lib/dpkg/status.d/"
@@ -54,6 +54,56 @@ const (
var (
dpkgSrcCaptureRegexp = regexp.MustCompile(`(?P<name>[^\s]*)( \((?P<version>.*)\))?`)
dpkgSrcCaptureRegexpNames = dpkgSrcCaptureRegexp.SubexpNames()
// thirdPartyMaintainerPatterns contains patterns that indicate a package is from a third-party repository.
// Packages with maintainers matching these patterns will NOT have their InstalledFiles tracked,
// allowing language scanners to properly analyze files installed by those packages.
// See https://github.com/aquasecurity/trivy/issues/9916 for more details.
thirdPartyMaintainerPatterns = []string{
// Container & orchestration
"support@docker.com", // Docker
// Cloud providers & infrastructure
"@nvidia.com", // NVIDIA CUDA
"Google Cloud CLI Authors", // Google Cloud SDK
"sapmachine@sap.com", // SAP Machine JDK
"@hashicorp.com", // HashiCorp (Terraform, Vault, Consul, etc.)
"@microsoft.com", // Microsoft (VS Code, Azure CLI, .NET, etc.)
// Databases
"@mongodb.com", // MongoDB
"developers@lists.mariadb.org", // MariaDB
"dev@couchdb.apache.org", // Apache CouchDB
"info@elastic.co", // Elastic (Elasticsearch, Kibana, etc.)
// Web servers & API gateways
"nginx-packaging@f5.com", // NGINX (from nginx.org, not Debian)
"@konghq.com", // Kong
"@cloudflare.com", // Cloudflare (cloudflared, WARP)
// Monitoring & observability
"support@influxdb.com", // InfluxData (InfluxDB, Telegraf)
"support@gitlab.com", // GitLab
"contact@grafana.com", // Grafana Labs
"@datadoghq.com", // Datadog
// Language runtimes (third-party repos)
"@nodesource.com", // NodeSource (Node.js)
// Networking & VPN
"info@tailscale.com", // Tailscale
// Robotics
"@openrobotics.org", // ROS (Robot Operating System)
"@osrfoundation.org", // ROS (Robot Operating System)
}
// thirdPartyMaintainerExact contains maintainer strings that require exact match.
// These are too short or generic for substring matching.
thirdPartyMaintainerExact = []string{
"GitHub", // GitHub CLI
"HashiCorp", // HashiCorp (Terraform, Vault, Consul, etc.)
}
)
func (a dpkgAnalyzer) PostAnalyze(_ context.Context, input analyzer.PostAnalysisInput) (*analyzer.AnalysisResult, error) {
@@ -82,7 +132,7 @@ func (a dpkgAnalyzer) PostAnalyze(_ context.Context, input analyzer.PostAnalysis
return xerrors.Errorf("failed to parse %s file: %w", path, err)
}
packageFiles[strings.TrimSuffix(filepath.Base(path), md5sumsExtension)] = systemFiles
systemInstalledFiles = append(systemInstalledFiles, systemFiles...)
// Note: systemInstalledFiles will be populated later based on maintainer check
return nil
}
// parse status files
@@ -97,14 +147,26 @@ func (a dpkgAnalyzer) PostAnalyze(_ context.Context, input analyzer.PostAnalysis
return nil, xerrors.Errorf("dpkg walk error: %w", err)
}
// map the packages to their respective files
// Map packages to their respective files.
// Third-party packages will NOT have their InstalledFiles populated to avoid filtering out
// language packages (npm, pip, etc.) installed by those third-party OS packages.
for i, pkgInfo := range packageInfos {
for j, pkg := range pkgInfo.Packages {
installedFiles, found := packageFiles[pkg.Name]
if !found {
installedFiles = packageFiles[pkg.Name+":"+pkg.Arch]
}
// Skip InstalledFiles for third-party packages
if isThirdPartyPackage(pkg.Maintainer) {
a.logger.Debug("Third-party package detected",
log.String("package", pkg.Name),
log.String("maintainer", pkg.Maintainer))
continue
}
packageInfos[i].Packages[j].InstalledFiles = installedFiles
systemInstalledFiles = append(systemInstalledFiles, installedFiles...)
}
}
@@ -349,6 +411,21 @@ func (a dpkgAnalyzer) isMd5SumsFile(dir, fileName string) bool {
return strings.HasSuffix(fileName, md5sumsExtension)
}
// isThirdPartyPackage checks if a package is from a third-party repository
// by examining the Maintainer field against known third-party patterns.
//
// Unlike RPM which has a dedicated "Vendor" field, dpkg packages don't have a reliable
// way to identify their origin. We use a heuristic approach based on maintainer patterns.
// See https://github.com/aquasecurity/trivy/issues/9916 for more details.
func isThirdPartyPackage(maintainer string) bool {
if slices.Contains(thirdPartyMaintainerExact, maintainer) {
return true
}
return slices.ContainsFunc(thirdPartyMaintainerPatterns, func(pattern string) bool {
return strings.Contains(maintainer, pattern)
})
}
func (a dpkgAnalyzer) Type() analyzer.Type {
return analyzer.TypeDpkg
}

View File

@@ -1420,9 +1420,45 @@ func Test_dpkgAnalyzer_Analyze(t *testing.T) {
},
},
{
name: "md5sums",
testFiles: map[string]string{"./testdata/tar.md5sums": "var/lib/dpkg/info/tar.md5sums"},
name: "md5sums",
testFiles: map[string]string{
"./testdata/tar-status": "var/lib/dpkg/status",
"./testdata/tar.md5sums": "var/lib/dpkg/info/tar.md5sums",
},
want: &analyzer.AnalysisResult{
PackageInfos: []types.PackageInfo{
{
FilePath: "var/lib/dpkg/status",
Packages: types.Packages{
{
ID: "tar@1.29b-2",
Name: "tar",
Version: "1.29b",
Release: "2",
Arch: "amd64",
SrcName: "tar",
SrcVersion: "1.29b",
SrcRelease: "2",
Maintainer: "Ubuntu Developers <ubuntu-devel-discuss@lists.ubuntu.com>",
InstalledFiles: []string{
"/usr/bin/tar",
"/usr/lib/mime/packages/tar",
"/usr/sbin/rmt-tar",
"/usr/sbin/tarcat",
"/usr/share/doc/tar/AUTHORS",
"/usr/share/doc/tar/NEWS.gz",
"/usr/share/doc/tar/README.Debian",
"/usr/share/doc/tar/THANKS.gz",
"/usr/share/doc/tar/changelog.Debian.gz",
"/usr/share/doc/tar/copyright",
"/usr/share/man/man1/tar.1.gz",
"/usr/share/man/man1/tarcat.1.gz",
"/usr/share/man/man8/rmt-tar.8.gz",
},
},
},
},
},
SystemInstalledFiles: []string{
"/usr/bin/tar",
"/usr/lib/mime/packages/tar",
@@ -1470,6 +1506,23 @@ func Test_dpkgAnalyzer_Analyze(t *testing.T) {
}
}
func Test_isThirdPartyPackage(t *testing.T) {
tests := []struct {
name string
maintainer string
want bool
}{
{"third-party (Docker)", "Docker <support@docker.com>", true},
{"third-party (GitHub - exact match)", "GitHub", true},
{"official (Ubuntu)", "Ubuntu Developers <ubuntu-devel-discuss@lists.ubuntu.com>", false},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
assert.Equal(t, tt.want, isThirdPartyPackage(tt.maintainer))
})
}
}
func Test_dpkgAnalyzer_Required(t *testing.T) {
tests := []struct {
name string

View File

@@ -0,0 +1,17 @@
Package: tar
Essential: yes
Status: install ok installed
Priority: required
Section: utils
Installed-Size: 864
Maintainer: Ubuntu Developers <ubuntu-devel-discuss@lists.ubuntu.com>
Architecture: amd64
Multi-Arch: foreign
Version: 1.29b-2
Replaces: cpio (<< 2.4.2-39)
Pre-Depends: libacl1 (>= 2.2.51-8), libc6 (>= 2.17), libselinux1 (>= 1.32)
Suggests: bzip2, ncompress, xz-utils, tar-scripts, tar-doc
Breaks: dpkg-dev (<< 1.14.26)
Conflicts: cpio (<= 2.4.2-38)
Description: GNU version of the tar archiving utility

View File

@@ -36,7 +36,7 @@ import (
// Common blob IDs used across multiple test cases to reduce duplication
const (
alpineBaseLayerID = "sha256:5fa8e7300cfe1b8f70c304e3b04f9b1f022942a0dc57d3fc0d4d3f04327e6d2a"
alpineBaseLayerID = "sha256:6c42077a82b21707f581759b12a99cc9a593ce35a0d7be4c19c01eb48bd5ba33"
alpineBaseLayerDiffID = "sha256:beee9f30bc1f711043e78d4a2be0668955d4b761d587d6f60c2c8dc081efb203"
alpineArtifactID = "sha256:3c709d2a158be3a97051e10cd0e30f047225cb9505101feb3fadcd395c2e0408"
composerImageID = "sha256:a187dde48cd289ac374ad8539930628314bc581a481cdb41409c9289419ddb72"
@@ -510,7 +510,7 @@ func TestArtifact_Inspect(t *testing.T) {
},
wantBlobs: []cachetest.WantBlob{
{
ID: "sha256:5b61242ed7786d642c7037c5d42c97ef4eb77e79b5cee7d47c3a2476bdd37e54",
ID: "sha256:75a461ca76eecc6cea981889d69aa1c2dd78c436108be8be1bbc29295520c7d4",
BlobInfo: types.BlobInfo{
SchemaVersion: types.BlobJSONSchemaVersion,
Size: 3061760,
@@ -598,7 +598,7 @@ func TestArtifact_Inspect(t *testing.T) {
},
},
{
ID: "sha256:1a8ac8af11a039295f3fffd3e058c034dae966ac7ace649121f0559146133ee5",
ID: "sha256:81afc1747d0fdec7a606c27570313634ae331fab6f13566b23d0f6b3e498c050",
BlobInfo: types.BlobInfo{
SchemaVersion: types.BlobJSONSchemaVersion,
Size: 15441920,
@@ -693,7 +693,7 @@ func TestArtifact_Inspect(t *testing.T) {
},
},
{
ID: "sha256:a686ab4c4132800a0d67a8ddf33dd89387d750a7b3427c01b9ce7bf3219cadfb",
ID: "sha256:0778c3e388c54f736a3d6e74ed390a91fdb42c6809f8fb743d4f72acb41a5d6d",
BlobInfo: types.BlobInfo{
SchemaVersion: types.BlobJSONSchemaVersion,
Size: 29696,
@@ -900,7 +900,7 @@ func TestArtifact_Inspect(t *testing.T) {
},
},
{
ID: "sha256:789b01e58c608d3a3021ce18cf6c8bd21e701116134089d949da35a25f73d9ec",
ID: "sha256:5a3e3f25fdc97a14d69d99c63dd640cd2d38af5b987b7a95084cce3d835970fb",
BlobInfo: types.BlobInfo{
SchemaVersion: types.BlobJSONSchemaVersion,
Size: 6656,
@@ -1763,10 +1763,10 @@ func TestArtifact_Inspect(t *testing.T) {
Type: types.TypeContainerImage,
ID: "sha256:0bebf0773ffd87baa7c64fbdbdf79a24ae125e3f99a8adebe52d1ccbe6bed16b",
BlobIDs: []string{
"sha256:5b61242ed7786d642c7037c5d42c97ef4eb77e79b5cee7d47c3a2476bdd37e54",
"sha256:1a8ac8af11a039295f3fffd3e058c034dae966ac7ace649121f0559146133ee5",
"sha256:a686ab4c4132800a0d67a8ddf33dd89387d750a7b3427c01b9ce7bf3219cadfb",
"sha256:789b01e58c608d3a3021ce18cf6c8bd21e701116134089d949da35a25f73d9ec",
"sha256:75a461ca76eecc6cea981889d69aa1c2dd78c436108be8be1bbc29295520c7d4",
"sha256:81afc1747d0fdec7a606c27570313634ae331fab6f13566b23d0f6b3e498c050",
"sha256:0778c3e388c54f736a3d6e74ed390a91fdb42c6809f8fb743d4f72acb41a5d6d",
"sha256:5a3e3f25fdc97a14d69d99c63dd640cd2d38af5b987b7a95084cce3d835970fb",
},
ImageMetadata: artifact.ImageMetadata{
ID: "sha256:58701fd185bda36cab0557bb6438661831267aa4a9e0b54211c4d5317a48aff4",