Files
trivy/pkg/fanal/analyzer/analyzer.go
Teppei Fukuda 983ac15f22 ci: add depguard (#6963)
Signed-off-by: knqyf263 <knqyf263@gmail.com>
2024-06-20 02:48:08 +00:00

518 lines
14 KiB
Go

package analyzer
import (
"context"
"errors"
"io/fs"
"os"
"regexp"
"slices"
"sort"
"strings"
"sync"
"github.com/samber/lo"
"golang.org/x/sync/semaphore"
"golang.org/x/xerrors"
fos "github.com/aquasecurity/trivy/pkg/fanal/analyzer/os"
"github.com/aquasecurity/trivy/pkg/fanal/types"
"github.com/aquasecurity/trivy/pkg/log"
"github.com/aquasecurity/trivy/pkg/misconf"
xio "github.com/aquasecurity/trivy/pkg/x/io"
)
var (
analyzers = make(map[Type]analyzer)
postAnalyzers = make(map[Type]postAnalyzerInitialize)
// ErrUnknownOS occurs when unknown OS is analyzed.
ErrUnknownOS = xerrors.New("unknown OS")
// ErrPkgAnalysis occurs when the analysis of packages is failed.
ErrPkgAnalysis = xerrors.New("failed to analyze packages")
// ErrNoPkgsDetected occurs when the required files for an OS package manager are not detected
ErrNoPkgsDetected = xerrors.New("no packages detected")
)
//////////////////////
// Analyzer options //
//////////////////////
// AnalyzerOptions is used to initialize analyzers
type AnalyzerOptions struct {
Group Group
Parallel int
FilePatterns []string
DisabledAnalyzers []Type
MisconfScannerOption misconf.ScannerOption
SecretScannerOption SecretScannerOption
LicenseScannerOption LicenseScannerOption
}
type SecretScannerOption struct {
ConfigPath string
}
type LicenseScannerOption struct {
// Use license classifier to get better results though the classification is expensive.
Full bool
ClassifierConfidenceLevel float64
}
////////////////
// Interfaces //
////////////////
// Initializer represents analyzers that need to take parameters from users
type Initializer interface {
Init(AnalyzerOptions) error
}
type analyzer interface {
Type() Type
Version() int
Analyze(ctx context.Context, input AnalysisInput) (*AnalysisResult, error)
Required(filePath string, info os.FileInfo) bool
}
type PostAnalyzer interface {
Type() Type
Version() int
PostAnalyze(ctx context.Context, input PostAnalysisInput) (*AnalysisResult, error)
Required(filePath string, info os.FileInfo) bool
}
////////////////////
// Analyzer group //
////////////////////
type Group string
const GroupBuiltin Group = "builtin"
func RegisterAnalyzer(analyzer analyzer) {
if _, ok := analyzers[analyzer.Type()]; ok {
log.Fatal("Analyzer is registered twice", log.String("type", string(analyzer.Type())))
}
analyzers[analyzer.Type()] = analyzer
}
type postAnalyzerInitialize func(options AnalyzerOptions) (PostAnalyzer, error)
func RegisterPostAnalyzer(t Type, initializer postAnalyzerInitialize) {
if _, ok := postAnalyzers[t]; ok {
log.Fatal("Analyzer is registered twice", log.String("type", string(t)))
}
postAnalyzers[t] = initializer
}
// DeregisterAnalyzer is mainly for testing
func DeregisterAnalyzer(t Type) {
delete(analyzers, t)
}
// CustomGroup returns a group name for custom analyzers
// This is mainly intended to be used in Aqua products.
type CustomGroup interface {
Group() Group
}
type Opener func() (xio.ReadSeekCloserAt, error)
type AnalyzerGroup struct {
logger *log.Logger
analyzers []analyzer
postAnalyzers []PostAnalyzer
filePatterns map[Type][]*regexp.Regexp
}
///////////////////////////
// Analyzer input/output //
///////////////////////////
type AnalysisInput struct {
Dir string
FilePath string
Info os.FileInfo
Content xio.ReadSeekerAt
Options AnalysisOptions
}
type PostAnalysisInput struct {
FS fs.FS
Options AnalysisOptions
}
type AnalysisOptions struct {
Offline bool
FileChecksum bool
}
type AnalysisResult struct {
m sync.Mutex
OS types.OS
Repository *types.Repository
PackageInfos []types.PackageInfo
Applications []types.Application
Misconfigurations []types.Misconfiguration
Secrets []types.Secret
Licenses []types.LicenseFile
SystemInstalledFiles []string // A list of files installed by OS package manager
// Digests contains SHA-256 digests of unpackaged files
// used to search for SBOM attestation.
Digests map[string]string
// For Red Hat
BuildInfo *types.BuildInfo
// CustomResources hold analysis results from custom analyzers.
// It is for extensibility and not used in OSS.
CustomResources []types.CustomResource
}
func NewAnalysisResult() *AnalysisResult {
result := new(AnalysisResult)
return result
}
func (r *AnalysisResult) isEmpty() bool {
return lo.IsEmpty(r.OS) && r.Repository == nil && len(r.PackageInfos) == 0 && len(r.Applications) == 0 &&
len(r.Misconfigurations) == 0 && len(r.Secrets) == 0 && len(r.Licenses) == 0 && len(r.SystemInstalledFiles) == 0 &&
r.BuildInfo == nil && len(r.Digests) == 0 && len(r.CustomResources) == 0
}
func (r *AnalysisResult) Sort() {
// OS packages
sort.Slice(r.PackageInfos, func(i, j int) bool {
return r.PackageInfos[i].FilePath < r.PackageInfos[j].FilePath
})
for _, pi := range r.PackageInfos {
sort.Sort(pi.Packages)
}
// Language-specific packages
sort.Slice(r.Applications, func(i, j int) bool {
if r.Applications[i].FilePath != r.Applications[j].FilePath {
return r.Applications[i].FilePath < r.Applications[j].FilePath
}
return r.Applications[i].Type < r.Applications[j].Type
})
for _, app := range r.Applications {
sort.Sort(app.Packages)
}
// Custom resources
sort.Slice(r.CustomResources, func(i, j int) bool {
return r.CustomResources[i].FilePath < r.CustomResources[j].FilePath
})
// Misconfigurations
sort.Slice(r.Misconfigurations, func(i, j int) bool {
return r.Misconfigurations[i].FilePath < r.Misconfigurations[j].FilePath
})
// Secrets
sort.Slice(r.Secrets, func(i, j int) bool {
return r.Secrets[i].FilePath < r.Secrets[j].FilePath
})
for _, sec := range r.Secrets {
sort.Slice(sec.Findings, func(i, j int) bool {
if sec.Findings[i].RuleID != sec.Findings[j].RuleID {
return sec.Findings[i].RuleID < sec.Findings[j].RuleID
}
return sec.Findings[i].StartLine < sec.Findings[j].StartLine
})
}
// License files
sort.Slice(r.Licenses, func(i, j int) bool {
if r.Licenses[i].Type == r.Licenses[j].Type {
if r.Licenses[i].FilePath == r.Licenses[j].FilePath {
return r.Licenses[i].Layer.DiffID < r.Licenses[j].Layer.DiffID
} else {
return r.Licenses[i].FilePath < r.Licenses[j].FilePath
}
}
return r.Licenses[i].Type < r.Licenses[j].Type
})
}
func (r *AnalysisResult) Merge(newResult *AnalysisResult) {
if newResult == nil || newResult.isEmpty() {
return
}
// this struct is accessed by multiple goroutines
r.m.Lock()
defer r.m.Unlock()
r.OS.Merge(newResult.OS)
if newResult.Repository != nil {
r.Repository = newResult.Repository
}
if len(newResult.PackageInfos) > 0 {
r.PackageInfos = append(r.PackageInfos, newResult.PackageInfos...)
}
if len(newResult.Applications) > 0 {
r.Applications = append(r.Applications, newResult.Applications...)
}
// Merge SHA-256 digests of unpackaged files
if newResult.Digests != nil {
r.Digests = lo.Assign(r.Digests, newResult.Digests)
}
r.Misconfigurations = append(r.Misconfigurations, newResult.Misconfigurations...)
r.Secrets = append(r.Secrets, newResult.Secrets...)
r.Licenses = append(r.Licenses, newResult.Licenses...)
r.SystemInstalledFiles = append(r.SystemInstalledFiles, newResult.SystemInstalledFiles...)
if newResult.BuildInfo != nil {
if r.BuildInfo == nil {
r.BuildInfo = newResult.BuildInfo
} else {
// We don't need to merge build info here
// because there is theoretically only one file about build info in each layer.
if newResult.BuildInfo.Nvr != "" || newResult.BuildInfo.Arch != "" {
r.BuildInfo.Nvr = newResult.BuildInfo.Nvr
r.BuildInfo.Arch = newResult.BuildInfo.Arch
}
if len(newResult.BuildInfo.ContentSets) > 0 {
r.BuildInfo.ContentSets = newResult.BuildInfo.ContentSets
}
}
}
r.CustomResources = append(r.CustomResources, newResult.CustomResources...)
}
func belongToGroup(groupName Group, analyzerType Type, disabledAnalyzers []Type, analyzer any) bool {
if slices.Contains(disabledAnalyzers, analyzerType) {
return false
}
analyzerGroupName := GroupBuiltin
if cg, ok := analyzer.(CustomGroup); ok {
analyzerGroupName = cg.Group()
}
if analyzerGroupName != groupName {
return false
}
return true
}
const separator = ":"
func NewAnalyzerGroup(opt AnalyzerOptions) (AnalyzerGroup, error) {
groupName := opt.Group
if groupName == "" {
groupName = GroupBuiltin
}
group := AnalyzerGroup{
logger: log.WithPrefix("analyzer"),
filePatterns: make(map[Type][]*regexp.Regexp),
}
for _, p := range opt.FilePatterns {
// e.g. "dockerfile:my_dockerfile_*"
s := strings.SplitN(p, separator, 2)
if len(s) != 2 {
return group, xerrors.Errorf("invalid file pattern (%s) expected format: \"fileType:regexPattern\" e.g. \"dockerfile:my_dockerfile_*\"", p)
}
fileType, pattern := s[0], s[1]
r, err := regexp.Compile(pattern)
if err != nil {
return group, xerrors.Errorf("invalid file regexp (%s): %w", p, err)
}
if _, ok := group.filePatterns[Type(fileType)]; !ok {
group.filePatterns[Type(fileType)] = []*regexp.Regexp{}
}
group.filePatterns[Type(fileType)] = append(group.filePatterns[Type(fileType)], r)
}
for analyzerType, a := range analyzers {
if !belongToGroup(groupName, analyzerType, opt.DisabledAnalyzers, a) {
continue
}
// Initialize only scanners that have Init()
if ini, ok := a.(Initializer); ok {
if err := ini.Init(opt); err != nil {
return AnalyzerGroup{}, xerrors.Errorf("analyzer initialization error: %w", err)
}
}
group.analyzers = append(group.analyzers, a)
}
for analyzerType, init := range postAnalyzers {
a, err := init(opt)
if err != nil {
return AnalyzerGroup{}, xerrors.Errorf("post-analyzer init error: %w", err)
}
if !belongToGroup(groupName, analyzerType, opt.DisabledAnalyzers, a) {
continue
}
group.postAnalyzers = append(group.postAnalyzers, a)
}
return group, nil
}
type Versions struct {
Analyzers map[string]int
PostAnalyzers map[string]int
}
// AnalyzerVersions returns analyzer version identifier used for cache keys.
func (ag AnalyzerGroup) AnalyzerVersions() Versions {
analyzerVersions := make(map[string]int)
for _, a := range ag.analyzers {
analyzerVersions[string(a.Type())] = a.Version()
}
postAnalyzerVersions := make(map[string]int)
for _, a := range ag.postAnalyzers {
postAnalyzerVersions[string(a.Type())] = a.Version()
}
return Versions{
Analyzers: analyzerVersions,
PostAnalyzers: postAnalyzerVersions,
}
}
// AnalyzeFile determines which files are required by the analyzers based on the file name and attributes,
// and passes only those files to the analyzer for analysis.
// This function may be called concurrently and must be thread-safe.
func (ag AnalyzerGroup) AnalyzeFile(ctx context.Context, wg *sync.WaitGroup, limit *semaphore.Weighted, result *AnalysisResult,
dir, filePath string, info os.FileInfo, opener Opener, disabled []Type, opts AnalysisOptions) error {
if info.IsDir() {
return nil
}
// filepath extracted from tar file doesn't have the prefix "/"
cleanPath := strings.TrimLeft(filePath, "/")
for _, a := range ag.analyzers {
// Skip disabled analyzers
if slices.Contains(disabled, a.Type()) {
continue
}
if !ag.filePatternMatch(a.Type(), cleanPath) && !a.Required(cleanPath, info) {
continue
}
rc, err := opener()
if errors.Is(err, fs.ErrPermission) {
ag.logger.Debug("Permission error", log.String("file_path", filePath))
break
} else if err != nil {
return xerrors.Errorf("unable to open %s: %w", filePath, err)
}
if err = limit.Acquire(ctx, 1); err != nil {
return xerrors.Errorf("semaphore acquire: %w", err)
}
wg.Add(1)
go func(a analyzer, rc xio.ReadSeekCloserAt) {
defer limit.Release(1)
defer wg.Done()
defer rc.Close()
ret, err := a.Analyze(ctx, AnalysisInput{
Dir: dir,
FilePath: filePath,
Info: info,
Content: rc,
Options: opts,
})
if err != nil && !errors.Is(err, fos.AnalyzeOSError) {
ag.logger.Debug("Analysis error", log.Err(err))
return
}
result.Merge(ret)
}(a, rc)
}
return nil
}
// RequiredPostAnalyzers returns a list of analyzer types that require the given file.
func (ag AnalyzerGroup) RequiredPostAnalyzers(filePath string, info os.FileInfo) []Type {
if info.IsDir() {
return nil
}
var postAnalyzerTypes []Type
for _, a := range ag.postAnalyzers {
if ag.filePatternMatch(a.Type(), filePath) || a.Required(filePath, info) {
postAnalyzerTypes = append(postAnalyzerTypes, a.Type())
}
}
return postAnalyzerTypes
}
// PostAnalyze passes a virtual filesystem containing only required files
// and passes it to the respective post-analyzer.
// The obtained results are merged into the "result".
// This function may be called concurrently and must be thread-safe.
func (ag AnalyzerGroup) PostAnalyze(ctx context.Context, compositeFS *CompositeFS, result *AnalysisResult, opts AnalysisOptions) error {
for _, a := range ag.postAnalyzers {
fsys, ok := compositeFS.Get(a.Type())
if !ok {
continue
}
skippedFiles := result.SystemInstalledFiles
for _, app := range result.Applications {
skippedFiles = append(skippedFiles, app.FilePath)
for _, pkg := range app.Packages {
// The analysis result could contain packages listed in SBOM.
// The files of those packages don't have to be analyzed.
// This is especially helpful for expensive post-analyzers such as the JAR analyzer.
if pkg.FilePath != "" {
skippedFiles = append(skippedFiles, pkg.FilePath)
}
}
}
filteredFS, err := fsys.Filter(skippedFiles)
if err != nil {
return xerrors.Errorf("unable to filter filesystem: %w", err)
}
res, err := a.PostAnalyze(ctx, PostAnalysisInput{
FS: filteredFS,
Options: opts,
})
if err != nil {
return xerrors.Errorf("post analysis error: %w", err)
}
result.Merge(res)
}
return nil
}
// PostAnalyzerFS returns a composite filesystem that contains multiple filesystems for each post-analyzer
func (ag AnalyzerGroup) PostAnalyzerFS() (*CompositeFS, error) {
return NewCompositeFS(ag)
}
func (ag AnalyzerGroup) filePatternMatch(analyzerType Type, filePath string) bool {
for _, pattern := range ag.filePatterns[analyzerType] {
if pattern.MatchString(filePath) {
return true
}
}
return false
}