mirror of
https://github.com/aquasecurity/trivy.git
synced 2025-12-12 15:50:15 -08:00
fix(secret): add UTF-8 validation in secret scanner to prevent protobuf marshalling errors (#9253)
Co-authored-by: knqyf263 <knqyf263@users.noreply.github.com>
This commit is contained in:
@@ -10,6 +10,7 @@ import (
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/samber/lo"
|
||||
"golang.org/x/xerrors"
|
||||
@@ -19,7 +20,12 @@ import (
|
||||
"github.com/aquasecurity/trivy/pkg/log"
|
||||
)
|
||||
|
||||
var lineSep = []byte{'\n'}
|
||||
var (
|
||||
lineSep = []byte{'\n'}
|
||||
warnUTF8Once = sync.OnceFunc(func() {
|
||||
log.WithPrefix(log.PrefixSecret).Warn("Invalid UTF-8 sequences detected in file content, replacing with empty string")
|
||||
})
|
||||
)
|
||||
|
||||
type Scanner struct {
|
||||
logger *log.Logger
|
||||
@@ -280,7 +286,7 @@ func ParseConfig(configPath string) (*Config, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
logger := log.WithPrefix("secret").With("config_path", configPath)
|
||||
logger := log.WithPrefix(log.PrefixSecret).With("config_path", configPath)
|
||||
f, err := os.Open(configPath)
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
// If the specified file doesn't exist, it just uses built-in rules and allow rules.
|
||||
@@ -318,7 +324,7 @@ func convertSeverity(logger *log.Logger, severity string) string {
|
||||
}
|
||||
|
||||
func NewScanner(config *Config) Scanner {
|
||||
logger := log.WithPrefix("secret")
|
||||
logger := log.WithPrefix(log.PrefixSecret)
|
||||
|
||||
// Use the default rules
|
||||
if config == nil {
|
||||
@@ -512,7 +518,7 @@ func findLocation(start, end int, content []byte) (int, int, types.Code, string)
|
||||
lineStart = lo.Ternary(start-lineStart-30 < 0, lineStart, start-30)
|
||||
lineEnd = lo.Ternary(end+20 > lineEnd, lineEnd, end+20)
|
||||
}
|
||||
matchLine := string(content[lineStart:lineEnd])
|
||||
matchLine := sanitizeUTF8String(content[lineStart:lineEnd])
|
||||
endLineNum := startLineNum + bytes.Count(content[start:end], lineSep)
|
||||
|
||||
var code types.Code
|
||||
@@ -529,9 +535,9 @@ func findLocation(start, end int, content []byte) (int, int, types.Code, string)
|
||||
|
||||
var strRawLine string
|
||||
if len(rawLine) > maxLineLength {
|
||||
strRawLine = lo.Ternary(inCause, matchLine, string(rawLine[:maxLineLength]))
|
||||
strRawLine = lo.Ternary(inCause, matchLine, sanitizeUTF8String(rawLine[:maxLineLength]))
|
||||
} else {
|
||||
strRawLine = string(rawLine)
|
||||
strRawLine = sanitizeUTF8String(rawLine)
|
||||
}
|
||||
|
||||
code.Lines = append(code.Lines, types.Line{
|
||||
@@ -555,3 +561,14 @@ func findLocation(start, end int, content []byte) (int, int, types.Code, string)
|
||||
|
||||
return startLineNum + 1, endLineNum + 1, code, matchLine
|
||||
}
|
||||
|
||||
// sanitizeUTF8String converts bytes to a valid UTF-8 string, logging a warning once if invalid sequences are found
|
||||
func sanitizeUTF8String(data []byte) string {
|
||||
if utf8.Valid(data) {
|
||||
return string(data)
|
||||
}
|
||||
|
||||
warnUTF8Once()
|
||||
|
||||
return strings.ToValidUTF8(string(data), string(utf8.RuneError))
|
||||
}
|
||||
|
||||
@@ -1406,6 +1406,42 @@ func TestSecretScanner(t *testing.T) {
|
||||
Findings: []types.SecretFinding{wantFindingTokenInsideJs},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "invalid UTF-8 sequences in secrets",
|
||||
configPath: filepath.Join("testdata", "skip-test.yaml"),
|
||||
inputFilePath: filepath.Join("testdata", "invalid-utf8.txt"),
|
||||
want: types.Secret{
|
||||
FilePath: filepath.Join("testdata", "invalid-utf8.txt"),
|
||||
Findings: []types.SecretFinding{
|
||||
{
|
||||
RuleID: "github-pat",
|
||||
Category: secret.CategoryGitHub,
|
||||
Title: "GitHub Personal Access Token",
|
||||
Severity: "CRITICAL",
|
||||
StartLine: 1,
|
||||
EndLine: 1,
|
||||
Match: "token=****************************************",
|
||||
Code: types.Code{
|
||||
Lines: []types.Line{
|
||||
{
|
||||
Number: 1,
|
||||
Content: "token=****************************************",
|
||||
Highlighted: "token=****************************************",
|
||||
IsCause: true,
|
||||
FirstCause: true,
|
||||
LastCause: true,
|
||||
},
|
||||
{
|
||||
Number: 2,
|
||||
Content: "# Comment with invalid UTF-8: <20>",
|
||||
Highlighted: "# Comment with invalid UTF-8: <20>",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
|
||||
3
pkg/fanal/secret/testdata/invalid-utf8.txt
vendored
Normal file
3
pkg/fanal/secret/testdata/invalid-utf8.txt
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
token=ghp_abcdef1234567890ABCDEF1234567890abcd
|
||||
# Comment with invalid UTF-8: <20><><EFBFBD><EFBFBD>
|
||||
token2=ghp_1234567890abcdef1234567890ABCDEF<45><46>abcd
|
||||
Reference in New Issue
Block a user