diff --git a/pkg/yqlib/front_matter.go b/pkg/yqlib/front_matter.go index 4c56aabd6f..0f9efb26ee 100644 --- a/pkg/yqlib/front_matter.go +++ b/pkg/yqlib/front_matter.go @@ -2,11 +2,26 @@ package yqlib import ( "bufio" + "bytes" "errors" "io" "os" ) +var utf8BOM = []byte{0xEF, 0xBB, 0xBF} + +// stripUTF8BOM returns a reader that skips a leading UTF-8 BOM, if present. +func stripUTF8BOM(r io.Reader) io.Reader { + br := bufio.NewReader(r) + + peek, err := br.Peek(3) + if err == nil && bytes.Equal(peek, utf8BOM) { + _, _ = br.Discard(3) + } + + return br +} + type frontMatterHandler interface { Split() error GetYamlFrontMatterFilename() string @@ -43,13 +58,13 @@ func (f *frontMatterHandlerImpl) Split() error { var reader *bufio.Reader var err error if f.originalFilename == "-" { - reader = bufio.NewReader(os.Stdin) + reader = bufio.NewReader(stripUTF8BOM(os.Stdin)) } else { file, err := os.Open(f.originalFilename) // #nosec if err != nil { return err } - reader = bufio.NewReader(file) + reader = bufio.NewReader(stripUTF8BOM(file)) } f.contentReader = reader diff --git a/pkg/yqlib/front_matter_test.go b/pkg/yqlib/front_matter_test.go index 65c17c5053..2680aa2863 100644 --- a/pkg/yqlib/front_matter_test.go +++ b/pkg/yqlib/front_matter_test.go @@ -157,6 +157,92 @@ Some content fmHandler.CleanUp() } +func TestFrontMatterSplitWithBOM(t *testing.T) { + // Regression test for https://github.com/mikefarah/yq/issues/2496 + // A UTF-8 BOM before the opening --- must be skipped, otherwise the + // separator isn't recognised and the opening --- is lost. + file := createTestFile("\ufeff---\na: apple\nb: banana\n---\nnot a\nyaml: doc\n") + + expectedYamlFm := `--- +a: apple +b: banana +` + + expectedContent := `--- +not a +yaml: doc +` + + fmHandler := NewFrontMatterHandler(file) + err := fmHandler.Split() + if err != nil { + panic(err) + } + + yamlFm := readFile(fmHandler.GetYamlFrontMatterFilename()) + + test.AssertResult(t, expectedYamlFm, yamlFm) + + contentBytes, err := io.ReadAll(fmHandler.GetContentReader()) + if err != nil { + panic(err) + } + test.AssertResult(t, expectedContent, string(contentBytes)) + + tryRemoveTempFile(file) + fmHandler.CleanUp() +} + +func TestFrontMatterSplitWithBOMFromStdin(t *testing.T) { + // Regression test for https://github.com/mikefarah/yq/issues/2496 + // A UTF-8 BOM must also be skipped when reading front matter from stdin. + originalStdin := os.Stdin + defer func() { os.Stdin = originalStdin }() + + r, w, err := os.Pipe() + if err != nil { + panic(err) + } + os.Stdin = r + defer safelyCloseFile(r) + + go func() { + _, writeErr := w.WriteString("\ufeff---\na: apple\nb: banana\n---\nnot a\nyaml: doc\n") + if writeErr != nil { + t.Errorf("failed to write front matter to the stdin pipe: %v", writeErr) + } + safelyCloseFile(w) + }() + + expectedYamlFm := `--- +a: apple +b: banana +` + + expectedContent := `--- +not a +yaml: doc +` + + fmHandler := NewFrontMatterHandler("-") + err = fmHandler.Split() + if err != nil { + panic(err) + } + + yamlFm := readFile(fmHandler.GetYamlFrontMatterFilename()) + + test.AssertResult(t, expectedYamlFm, yamlFm) + + contentBytes, err := io.ReadAll(fmHandler.GetContentReader()) + if err != nil { + panic(err) + } + test.AssertResult(t, expectedContent, string(contentBytes)) + + fmHandler.CleanUp() +} + func TestFrontMatterSplitWithArray(t *testing.T) { file := createTestFile(`[1,2,3] ---