mirror of
https://github.com/aquasecurity/trivy.git
synced 2025-12-12 15:50:15 -08:00
feat: add JSONC support for comments and trailing commas (#8862)
This commit is contained in:
248
pkg/x/json/jsonc.go
Normal file
248
pkg/x/json/jsonc.go
Normal file
@@ -0,0 +1,248 @@
|
||||
package json
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"io"
|
||||
)
|
||||
|
||||
// TokenType represents the type of token being processed
|
||||
type TokenType int
|
||||
|
||||
const (
|
||||
TokenNormal TokenType = iota
|
||||
TokenString
|
||||
TokenSingleLineComment
|
||||
TokenMultiLineComment
|
||||
)
|
||||
|
||||
// jsoncParser manages the state and processing of JSONC content
|
||||
type jsoncParser struct {
|
||||
reader *bytes.Reader // Source reader
|
||||
dst []byte // Destination buffer
|
||||
pos int // Current position in destination
|
||||
tokenType TokenType // Current token type being processed
|
||||
escaped bool // Whether the previous character was an escape character
|
||||
lastChar byte // Last processed character
|
||||
}
|
||||
|
||||
// ToRFC8259 converts JSONC (JSON with Comments) to valid JSON following RFC8259.
|
||||
// It strips out comments and trailing commas while maintaining the exact character
|
||||
// offsets as the input. This ensures that any JSON parser locations will map
|
||||
// directly back to the original source file positions.
|
||||
//
|
||||
// Both line numbers and character positions are preserved in the output.
|
||||
// Comments and trailing commas are replaced with spaces without changing line counts.
|
||||
//
|
||||
// Comments can be either:
|
||||
// - Single-line: starting with // and continuing to the end of the line
|
||||
// - Multi-line: starting with /* and ending with */
|
||||
//
|
||||
// Trailing commas are allowed in JSONC but not in standard JSON, so they are replaced
|
||||
// with spaces to maintain character offsets.
|
||||
func ToRFC8259(src []byte) []byte {
|
||||
dst := make([]byte, len(src))
|
||||
copy(dst, src) // Copy input to maintain same length and offsets
|
||||
|
||||
parser := newJSONCParser(src, dst)
|
||||
parser.process()
|
||||
|
||||
return dst
|
||||
}
|
||||
|
||||
// UnmarshalJSONC parses JSONC (JSON with Comments) data into the specified value.
|
||||
// It first converts JSONC to standard JSON following RFC8259 and then unmarshals it.
|
||||
// This is a convenience function that combines ToRFC8259 and Unmarshal.
|
||||
//
|
||||
// The parser preserves line number information, which is essential for reporting
|
||||
// errors at their correct locations in the original file.
|
||||
//
|
||||
// Usage example:
|
||||
//
|
||||
// type Config struct {
|
||||
// Name string `json:"name"`
|
||||
// Version string `json:"version"`
|
||||
// xjson.Location // Embed Location to get line number info
|
||||
// }
|
||||
//
|
||||
// var config Config
|
||||
// if err := xjson.UnmarshalJSONC(data, &config); err != nil {
|
||||
// return err
|
||||
// }
|
||||
func UnmarshalJSONC(data []byte, v any) error {
|
||||
jsonData := ToRFC8259(data)
|
||||
return Unmarshal(jsonData, v)
|
||||
}
|
||||
|
||||
// newJSONCParser creates a new JSONC parser
|
||||
func newJSONCParser(src, dst []byte) *jsoncParser {
|
||||
return &jsoncParser{
|
||||
reader: bytes.NewReader(src),
|
||||
dst: dst,
|
||||
pos: 0,
|
||||
tokenType: TokenNormal,
|
||||
}
|
||||
}
|
||||
|
||||
// process processes the input JSONC content
|
||||
func (p *jsoncParser) process() {
|
||||
for {
|
||||
b, err := p.reader.ReadByte()
|
||||
if errors.Is(err, io.EOF) {
|
||||
break
|
||||
} else if err != nil {
|
||||
// Ignore other errors (not expected to occur)
|
||||
break
|
||||
}
|
||||
p.processChar(b)
|
||||
}
|
||||
}
|
||||
|
||||
// processChar processes a single character based on current state
|
||||
func (p *jsoncParser) processChar(b byte) {
|
||||
switch p.tokenType {
|
||||
case TokenString:
|
||||
p.processStringToken(b)
|
||||
case TokenSingleLineComment:
|
||||
p.processSingleLineComment(b)
|
||||
case TokenMultiLineComment:
|
||||
p.processMultiLineComment(b)
|
||||
default:
|
||||
p.processNormalToken(b)
|
||||
}
|
||||
}
|
||||
|
||||
// processStringToken processes a character within a string literal
|
||||
func (p *jsoncParser) processStringToken(b byte) {
|
||||
switch {
|
||||
case p.escaped:
|
||||
p.escaped = false
|
||||
case b == '\\':
|
||||
p.escaped = true
|
||||
case b == '"':
|
||||
p.tokenType = TokenNormal
|
||||
}
|
||||
|
||||
p.lastChar = b
|
||||
p.pos++
|
||||
}
|
||||
|
||||
// processSingleLineComment processes a character within a single-line comment
|
||||
func (p *jsoncParser) processSingleLineComment(b byte) {
|
||||
if b == '\n' {
|
||||
// End of single-line comment at newline
|
||||
p.tokenType = TokenNormal
|
||||
} else if !isPreservedWhitespace(b) {
|
||||
// Replace non-whitespace characters with spaces
|
||||
if p.pos < len(p.dst) {
|
||||
p.dst[p.pos] = ' '
|
||||
}
|
||||
}
|
||||
|
||||
p.lastChar = b
|
||||
p.pos++
|
||||
}
|
||||
|
||||
// processMultiLineComment processes a character within a multi-line comment
|
||||
func (p *jsoncParser) processMultiLineComment(b byte) {
|
||||
if p.lastChar == '*' && b == '/' {
|
||||
// End of multi-line comment
|
||||
p.tokenType = TokenNormal
|
||||
if p.pos < len(p.dst) {
|
||||
p.dst[p.pos] = ' ' // Replace '/' with space
|
||||
}
|
||||
} else if !isPreservedWhitespace(b) {
|
||||
// Replace non-whitespace with space
|
||||
if p.pos < len(p.dst) {
|
||||
p.dst[p.pos] = ' '
|
||||
}
|
||||
}
|
||||
|
||||
p.lastChar = b
|
||||
p.pos++
|
||||
}
|
||||
|
||||
// processNormalToken processes a character outside of string literals and comments
|
||||
func (p *jsoncParser) processNormalToken(b byte) {
|
||||
switch b {
|
||||
case '"':
|
||||
// Start of string literal
|
||||
p.tokenType = TokenString
|
||||
case '/':
|
||||
// Potential start of comment - look ahead
|
||||
nextByte, err := p.reader.ReadByte()
|
||||
if err != nil {
|
||||
// End of file after '/' character
|
||||
return
|
||||
}
|
||||
|
||||
switch nextByte {
|
||||
case '/':
|
||||
// Start of single-line comment
|
||||
p.tokenType = TokenSingleLineComment
|
||||
if p.pos < len(p.dst) {
|
||||
p.dst[p.pos] = ' ' // Replace '/' with space
|
||||
}
|
||||
if p.pos+1 < len(p.dst) {
|
||||
p.dst[p.pos+1] = ' ' // Replace second '/' with space
|
||||
}
|
||||
p.lastChar = nextByte
|
||||
p.pos += 2
|
||||
return
|
||||
case '*':
|
||||
// Start of multi-line comment
|
||||
p.tokenType = TokenMultiLineComment
|
||||
if p.pos < len(p.dst) {
|
||||
p.dst[p.pos] = ' ' // Replace '/' with space
|
||||
}
|
||||
if p.pos+1 < len(p.dst) {
|
||||
p.dst[p.pos+1] = ' ' // Replace '*' with space
|
||||
}
|
||||
p.lastChar = nextByte
|
||||
p.pos += 2
|
||||
return
|
||||
}
|
||||
|
||||
// Not a comment, put the byte back
|
||||
p.reader.UnreadByte()
|
||||
case ']', '}':
|
||||
// Handle trailing comma - look backward
|
||||
p.handleTrailingComma()
|
||||
}
|
||||
p.lastChar = b
|
||||
p.pos++
|
||||
}
|
||||
|
||||
// handleTrailingComma handles the trailing comma by looking backward from the current position
|
||||
func (p *jsoncParser) handleTrailingComma() {
|
||||
// Start from one position before the current bracket
|
||||
startPos := p.pos - 1
|
||||
if startPos < 0 {
|
||||
return
|
||||
}
|
||||
|
||||
// Find the previous significant (non-whitespace) character
|
||||
for i := startPos; i >= 0; i-- {
|
||||
if i >= len(p.dst) {
|
||||
continue
|
||||
}
|
||||
|
||||
c := p.dst[i]
|
||||
switch c {
|
||||
case ' ', '\t', '\n', '\r':
|
||||
// Skip whitespace
|
||||
continue
|
||||
case ',':
|
||||
// If it's a comma, replace it with a space
|
||||
p.dst[i] = ' '
|
||||
default:
|
||||
// Stop after finding the first non-whitespace character
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// isPreservedWhitespace returns true for whitespace that should be preserved
|
||||
func isPreservedWhitespace(c byte) bool {
|
||||
return c == '\n' || c == '\t' || c == '\r'
|
||||
}
|
||||
190
pkg/x/json/jsonc_test.go
Normal file
190
pkg/x/json/jsonc_test.go
Normal file
@@ -0,0 +1,190 @@
|
||||
package json_test
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
xjson "github.com/aquasecurity/trivy/pkg/x/json"
|
||||
)
|
||||
|
||||
func TestToRFC8259(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
want string
|
||||
}{
|
||||
{
|
||||
name: "no comments",
|
||||
input: `{"a": 1, "b": 2}`,
|
||||
want: `{"a": 1, "b": 2}`,
|
||||
},
|
||||
{
|
||||
name: "single-line comment",
|
||||
input: "{\n \"a\": 1, // This is a comment\n \"b\": 2\n}",
|
||||
want: "{\n \"a\": 1, \n \"b\": 2\n}",
|
||||
},
|
||||
{
|
||||
name: "multi-line comment",
|
||||
input: "{\n \"a\": 1, /* This is\n a multi-line\n comment */ \"b\": 2\n}",
|
||||
want: "{\n \"a\": 1, \n \n \"b\": 2\n}",
|
||||
},
|
||||
{
|
||||
name: "comment with forward slash in string",
|
||||
input: "{\n \"url\": \"http://example.com\", // Comment\n \"value\": 123\n}",
|
||||
want: "{\n \"url\": \"http://example.com\", \n \"value\": 123\n}",
|
||||
},
|
||||
{
|
||||
name: "trailing comma in object",
|
||||
input: `{"a": 1, "b": 2,}`,
|
||||
want: `{"a": 1, "b": 2 }`,
|
||||
},
|
||||
{
|
||||
name: "trailing comma in array",
|
||||
input: `[1, 2, 3,]`,
|
||||
want: `[1, 2, 3 ]`,
|
||||
},
|
||||
{
|
||||
name: "nested trailing commas",
|
||||
input: `{"a": [1, 2,], "b": {"x": 1, "y": 2,},}`,
|
||||
want: `{"a": [1, 2 ], "b": {"x": 1, "y": 2 } }`,
|
||||
},
|
||||
{
|
||||
name: "single-line comment at end of file without newline",
|
||||
input: `{"a": 1} // Comment`,
|
||||
want: `{"a": 1} `,
|
||||
},
|
||||
{
|
||||
name: "multi-line comment at end of file",
|
||||
input: `{"a": 1} /* Comment */`,
|
||||
want: `{"a": 1} `,
|
||||
},
|
||||
{
|
||||
name: "comment within string",
|
||||
input: `{"text": "This string has // comment syntax"}`,
|
||||
want: `{"text": "This string has // comment syntax"}`,
|
||||
},
|
||||
{
|
||||
name: "quoted comment markers",
|
||||
input: `{"a": "//", "b": "/*", "c": "*/"}`,
|
||||
want: `{"a": "//", "b": "/*", "c": "*/"}`,
|
||||
},
|
||||
{
|
||||
name: "escaped quotes in string",
|
||||
input: `{"text": "String with \"escaped quotes\" // not a comment"}`,
|
||||
want: `{"text": "String with \"escaped quotes\" // not a comment"}`,
|
||||
},
|
||||
{
|
||||
name: "complex escaped quotes",
|
||||
input: `{"text": "String with \\\"double escaped\\\" quotes"}`,
|
||||
want: `{"text": "String with \\\"double escaped\\\" quotes"}`,
|
||||
},
|
||||
{
|
||||
name: "real world example",
|
||||
input: `{
|
||||
"name": "my-package", // Package name
|
||||
"version": "1.0.0", /* Version number */
|
||||
"dependencies": {
|
||||
"lodash": "^4.17.21",
|
||||
"express": "^4.17.1", // Latest express
|
||||
},
|
||||
"scripts": {
|
||||
"start": "node index.js",
|
||||
"test": "jest",
|
||||
}
|
||||
}`,
|
||||
want: `{
|
||||
"name": "my-package",
|
||||
"version": "1.0.0",
|
||||
"dependencies": {
|
||||
"lodash": "^4.17.21",
|
||||
"express": "^4.17.1"
|
||||
},
|
||||
"scripts": {
|
||||
"start": "node index.js",
|
||||
"test": "jest"
|
||||
}
|
||||
}`,
|
||||
},
|
||||
{
|
||||
name: "preserves newlines in multiline comments",
|
||||
input: `{
|
||||
"name": "test", // Comment
|
||||
/*
|
||||
* Multi-line
|
||||
* comment
|
||||
*/
|
||||
"value": 42
|
||||
}`,
|
||||
want: `{
|
||||
"name": "test",
|
||||
|
||||
|
||||
|
||||
|
||||
"value": 42
|
||||
}`,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
// Test ToRFC8259 (allocates new buffer)
|
||||
got := xjson.ToRFC8259([]byte(tt.input))
|
||||
|
||||
// Check length preservation
|
||||
require.Len(t, got, len(tt.input), "output length should match input length")
|
||||
|
||||
// Check content
|
||||
assert.Equal(t, tt.want, string(got))
|
||||
|
||||
// Verify newline count is preserved
|
||||
inputNewlines := bytes.Count([]byte(tt.input), []byte{'\n'})
|
||||
outputNewlines := bytes.Count(got, []byte{'\n'})
|
||||
assert.Equal(t, inputNewlines, outputNewlines, "number of newlines should be preserved")
|
||||
|
||||
// Make sure the output is valid JSON
|
||||
var jsonMap any
|
||||
err := xjson.Unmarshal(got, &jsonMap)
|
||||
require.NoError(t, err, "result should be valid JSON")
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestUnmarshalJSONC(t *testing.T) {
|
||||
jsonc := `{
|
||||
"name": "test", // This is a comment
|
||||
"dependencies": {
|
||||
"lodash": "^4.17.21", /* Another comment */
|
||||
"express": "^4.17.1", // Comment
|
||||
}, // Trailing comment
|
||||
/* Multi-line
|
||||
comment */
|
||||
"version": "1.0.0"
|
||||
}`
|
||||
|
||||
type Config struct {
|
||||
Name string `json:"name"`
|
||||
Dependencies map[string]string `json:"dependencies"`
|
||||
Version string `json:"version"`
|
||||
xjson.Location
|
||||
}
|
||||
|
||||
var config Config
|
||||
err := xjson.UnmarshalJSONC([]byte(jsonc), &config)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Verify the parsed content
|
||||
assert.Equal(t, "test", config.Name)
|
||||
assert.Equal(t, "1.0.0", config.Version)
|
||||
assert.Equal(t, map[string]string{
|
||||
"lodash": "^4.17.21",
|
||||
"express": "^4.17.1",
|
||||
}, config.Dependencies)
|
||||
|
||||
// Verify location information
|
||||
assert.Equal(t, 1, config.StartLine)
|
||||
assert.Equal(t, 10, config.EndLine)
|
||||
}
|
||||
Reference in New Issue
Block a user