From 5b3a82d621e75ef9198213d20081da8b28511403 Mon Sep 17 00:00:00 2001 From: Gusted Date: Wed, 21 Feb 2024 22:18:44 +0100 Subject: [PATCH] [FEAT] Enable ambiguous character detection in configured contexts - The ambiguous character detection is an important security feature to combat against sourcebase attacks (https://trojansource.codes/). - However there are a few problems with the feature as it stands today (i) it's apparantly an big performance hitter, it's twice as slow as syntax highlighting (ii) it contains false positives, because it's reporting valid problems but not valid within the context of a programming language (ambiguous charachters in code comments being a prime example) that can lead to security issues (iii) charachters from certain languages always being marked as ambiguous. It's a lot of effort to fix the aforementioned issues. - Therefore, make it configurable in which context the ambiguous character detection should be run, this avoids running detection in all contexts such as file views, but still enable it in commits and pull requests diffs where it matters the most. Ideally this also becomes an per-repository setting, but the code architecture doesn't allow for a clean implementation of that. - Adds unit test. - Adds integration tests to ensure that the contexts and instance-wide is respected (and that ambigious charachter detection actually work in different places). - Ref: https://codeberg.org/forgejo/forgejo/pulls/2395#issuecomment-1575547 - Ref: https://codeberg.org/forgejo/forgejo/issues/564 --- modules/charset/escape.go | 22 +++++-- modules/charset/escape_test.go | 22 ++++++- modules/setting/ui.go | 2 + routers/web/repo/blame.go | 2 +- routers/web/repo/setting/lfs.go | 2 +- routers/web/repo/view.go | 6 +- routers/web/repo/wiki.go | 2 +- services/gitdiff/gitdiff.go | 4 +- tests/integration/view_test.go | 104 ++++++++++++++++++++++++++++++++ 9 files changed, 151 insertions(+), 15 deletions(-) diff --git a/modules/charset/escape.go b/modules/charset/escape.go index 92e417d1f..ba0eb73a3 100644 --- a/modules/charset/escape.go +++ b/modules/charset/escape.go @@ -10,6 +10,7 @@ package charset import ( "html/template" "io" + "slices" "strings" "code.gitea.io/gitea/modules/log" @@ -20,16 +21,29 @@ import ( // RuneNBSP is the codepoint for NBSP const RuneNBSP = 0xa0 +type escapeContext string + +// Keep this consistent with the documentation of [ui].SKIP_ESCAPE_CONTEXTS +// Defines the different contexts that could be used to escape in. +const ( + // Wiki pages. + WikiContext escapeContext = "wiki" + // Rendered content (except markup), source code and blames. + FileviewContext escapeContext = "file-view" + // Commits or pull requet's diff. + DiffContext escapeContext = "diff" +) + // EscapeControlHTML escapes the unicode control sequences in a provided html document -func EscapeControlHTML(html template.HTML, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, output template.HTML) { +func EscapeControlHTML(html template.HTML, locale translation.Locale, context escapeContext, allowed ...rune) (escaped *EscapeStatus, output template.HTML) { sb := &strings.Builder{} - escaped, _ = EscapeControlReader(strings.NewReader(string(html)), sb, locale, allowed...) // err has been handled in EscapeControlReader + escaped, _ = EscapeControlReader(strings.NewReader(string(html)), sb, locale, context, allowed...) // err has been handled in EscapeControlReader return escaped, template.HTML(sb.String()) } // EscapeControlReader escapes the unicode control sequences in a provided reader of HTML content and writer in a locale and returns the findings as an EscapeStatus -func EscapeControlReader(reader io.Reader, writer io.Writer, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, err error) { - if !setting.UI.AmbiguousUnicodeDetection { +func EscapeControlReader(reader io.Reader, writer io.Writer, locale translation.Locale, context escapeContext, allowed ...rune) (escaped *EscapeStatus, err error) { + if !setting.UI.AmbiguousUnicodeDetection || slices.Contains(setting.UI.SkipEscapeContexts, string(context)) { _, err = io.Copy(writer, reader) return &EscapeStatus{}, err } diff --git a/modules/charset/escape_test.go b/modules/charset/escape_test.go index a353ced63..7442a80d7 100644 --- a/modules/charset/escape_test.go +++ b/modules/charset/escape_test.go @@ -4,6 +4,7 @@ package charset import ( + "html/template" "strings" "testing" @@ -14,6 +15,8 @@ import ( "github.com/stretchr/testify/assert" ) +var testContext = escapeContext("test") + type escapeControlTest struct { name string text string @@ -159,7 +162,7 @@ func TestEscapeControlReader(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { output := &strings.Builder{} - status, err := EscapeControlReader(strings.NewReader(tt.text), output, &translation.MockLocale{}) + status, err := EscapeControlReader(strings.NewReader(tt.text), output, &translation.MockLocale{}, testContext) assert.NoError(t, err) assert.Equal(t, tt.status, *status) assert.Equal(t, tt.result, output.String()) @@ -169,9 +172,22 @@ func TestEscapeControlReader(t *testing.T) { func TestSettingAmbiguousUnicodeDetection(t *testing.T) { defer test.MockVariableValue(&setting.UI.AmbiguousUnicodeDetection, true)() - _, out := EscapeControlHTML("a test", &translation.MockLocale{}) + + _, out := EscapeControlHTML("a test", &translation.MockLocale{}, testContext) assert.EqualValues(t, `a test`, out) setting.UI.AmbiguousUnicodeDetection = false - _, out = EscapeControlHTML("a test", &translation.MockLocale{}) + _, out = EscapeControlHTML("a test", &translation.MockLocale{}, testContext) assert.EqualValues(t, `a test`, out) } + +func TestAmbiguousUnicodeDetectionContext(t *testing.T) { + defer test.MockVariableValue(&setting.UI.SkipEscapeContexts, []string{"test"})() + + input := template.HTML("a test") + + _, out := EscapeControlHTML(input, &translation.MockLocale{}, escapeContext("not-test")) + assert.EqualValues(t, `a test`, out) + + _, out = EscapeControlHTML(input, &translation.MockLocale{}, testContext) + assert.EqualValues(t, input, out) +} diff --git a/modules/setting/ui.go b/modules/setting/ui.go index 02a213d47..47e1393ef 100644 --- a/modules/setting/ui.go +++ b/modules/setting/ui.go @@ -38,6 +38,7 @@ var UI = struct { PreferredTimestampTense string AmbiguousUnicodeDetection bool + SkipEscapeContexts []string Notification struct { MinTimeout time.Duration @@ -89,6 +90,7 @@ var UI = struct { PreferredTimestampTense: "mixed", AmbiguousUnicodeDetection: true, + SkipEscapeContexts: []string{}, Notification: struct { MinTimeout time.Duration diff --git a/routers/web/repo/blame.go b/routers/web/repo/blame.go index d7c861c42..dca963c8e 100644 --- a/routers/web/repo/blame.go +++ b/routers/web/repo/blame.go @@ -279,7 +279,7 @@ func renderBlame(ctx *context.Context, blameParts []*git.BlamePart, commitNames lexerName = lexerNameForLine } - br.EscapeStatus, br.Code = charset.EscapeControlHTML(line, ctx.Locale) + br.EscapeStatus, br.Code = charset.EscapeControlHTML(line, ctx.Locale, charset.FileviewContext) rows = append(rows, br) escapeStatus = escapeStatus.Or(br.EscapeStatus) } diff --git a/routers/web/repo/setting/lfs.go b/routers/web/repo/setting/lfs.go index cd0f11d54..e360ae2b8 100644 --- a/routers/web/repo/setting/lfs.go +++ b/routers/web/repo/setting/lfs.go @@ -307,7 +307,7 @@ func LFSFileGet(ctx *context.Context) { // Building code view blocks with line number on server side. escapedContent := &bytes.Buffer{} - ctx.Data["EscapeStatus"], _ = charset.EscapeControlReader(rd, escapedContent, ctx.Locale) + ctx.Data["EscapeStatus"], _ = charset.EscapeControlReader(rd, escapedContent, ctx.Locale, charset.FileviewContext) var output bytes.Buffer lines := strings.Split(escapedContent.String(), "\n") diff --git a/routers/web/repo/view.go b/routers/web/repo/view.go index e48865a2f..86977062c 100644 --- a/routers/web/repo/view.go +++ b/routers/web/repo/view.go @@ -338,7 +338,7 @@ func renderReadmeFile(ctx *context.Context, subfolder string, readmeFile *git.Tr log.Error("Read readme content failed: %v", err) } contentEscaped := template.HTMLEscapeString(util.UnsafeBytesToString(content)) - ctx.Data["EscapeStatus"], ctx.Data["FileContent"] = charset.EscapeControlHTML(template.HTML(contentEscaped), ctx.Locale) + ctx.Data["EscapeStatus"], ctx.Data["FileContent"] = charset.EscapeControlHTML(template.HTML(contentEscaped), ctx.Locale, charset.FileviewContext) } if !fInfo.isLFSFile && ctx.Repo.CanEnableEditor(ctx, ctx.Doer) { @@ -572,7 +572,7 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry) { status := &charset.EscapeStatus{} statuses := make([]*charset.EscapeStatus, len(fileContent)) for i, line := range fileContent { - statuses[i], fileContent[i] = charset.EscapeControlHTML(line, ctx.Locale) + statuses[i], fileContent[i] = charset.EscapeControlHTML(line, ctx.Locale, charset.FileviewContext) status = status.Or(statuses[i]) } ctx.Data["EscapeStatus"] = status @@ -678,7 +678,7 @@ func markupRender(ctx *context.Context, renderCtx *markup.RenderContext, input i go func() { sb := &strings.Builder{} // We allow NBSP here this is rendered - escaped, _ = charset.EscapeControlReader(markupRd, sb, ctx.Locale, charset.RuneNBSP) + escaped, _ = charset.EscapeControlReader(markupRd, sb, ctx.Locale, charset.FileviewContext, charset.RuneNBSP) output = template.HTML(sb.String()) close(done) }() diff --git a/routers/web/repo/wiki.go b/routers/web/repo/wiki.go index 79f446ea8..157ebd4d5 100644 --- a/routers/web/repo/wiki.go +++ b/routers/web/repo/wiki.go @@ -254,7 +254,7 @@ func renderViewPage(ctx *context.Context) (*git.Repository, *git.TreeEntry) { done := make(chan struct{}) go func() { // We allow NBSP here this is rendered - escaped, _ = charset.EscapeControlReader(markupRd, buf, ctx.Locale, charset.RuneNBSP) + escaped, _ = charset.EscapeControlReader(markupRd, buf, ctx.Locale, charset.WikiContext, charset.RuneNBSP) output = buf.String() buf.Reset() close(done) diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go index 241d849c9..715c5bf9b 100644 --- a/services/gitdiff/gitdiff.go +++ b/services/gitdiff/gitdiff.go @@ -284,14 +284,14 @@ type DiffInline struct { // DiffInlineWithUnicodeEscape makes a DiffInline with hidden unicode characters escaped func DiffInlineWithUnicodeEscape(s template.HTML, locale translation.Locale) DiffInline { - status, content := charset.EscapeControlHTML(s, locale) + status, content := charset.EscapeControlHTML(s, locale, charset.DiffContext) return DiffInline{EscapeStatus: status, Content: content} } // DiffInlineWithHighlightCode makes a DiffInline with code highlight and hidden unicode characters escaped func DiffInlineWithHighlightCode(fileName, language, code string, locale translation.Locale) DiffInline { highlighted, _ := highlight.Code(fileName, language, code) - status, content := charset.EscapeControlHTML(highlighted, locale) + status, content := charset.EscapeControlHTML(highlighted, locale, charset.DiffContext) return DiffInline{EscapeStatus: status, Content: content} } diff --git a/tests/integration/view_test.go b/tests/integration/view_test.go index f43444680..cd63304a9 100644 --- a/tests/integration/view_test.go +++ b/tests/integration/view_test.go @@ -5,8 +5,16 @@ package integration import ( "net/http" + "net/url" + "strings" "testing" + unit_model "code.gitea.io/gitea/models/unit" + "code.gitea.io/gitea/models/unittest" + user_model "code.gitea.io/gitea/models/user" + "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/test" + files_service "code.gitea.io/gitea/services/repository/files" "code.gitea.io/gitea/tests" "github.com/stretchr/testify/assert" @@ -25,3 +33,99 @@ func TestRenderFileSVGIsInImgTag(t *testing.T) { assert.True(t, exists, "The SVG image should be in an tag so that scripts in the SVG are not run") assert.Equal(t, "/user2/repo2/raw/branch/master/line.svg", src) } + +func TestAmbiguousCharacterDetection(t *testing.T) { + onGiteaRun(t, func(t *testing.T, u *url.URL) { + user2 := unittest.AssertExistsAndLoadBean(t, &user_model.User{ID: 2}) + session := loginUser(t, user2.Name) + + // Prepare the environments. File view, commit view (diff), wiki page. + repo, commitID, f := CreateDeclarativeRepo(t, user2, "", + []unit_model.Type{unit_model.TypeCode, unit_model.TypeWiki}, nil, + []*files_service.ChangeRepoFile{ + { + Operation: "create", + TreePath: "test.sh", + ContentReader: strings.NewReader("Hello there!\nline western"), + }, + }, + ) + defer f() + + req := NewRequestWithValues(t, "POST", repo.Link()+"/wiki?action=new", map[string]string{ + "_csrf": GetCSRF(t, session, repo.Link()+"/wiki?action=new"), + "title": "Normal", + "content": "Hello – Hello", + }) + session.MakeRequest(t, req, http.StatusSeeOther) + + assertCase := func(t *testing.T, fileContext, commitContext, wikiContext bool) { + t.Helper() + + t.Run("File context", func(t *testing.T) { + defer tests.PrintCurrentTest(t)() + + req := NewRequest(t, "GET", repo.Link()+"/src/branch/main/test.sh") + resp := session.MakeRequest(t, req, http.StatusOK) + + htmlDoc := NewHTMLParser(t, resp.Body) + htmlDoc.AssertElement(t, ".unicode-escape-prompt", fileContext) + }) + t.Run("Commit context", func(t *testing.T) { + defer tests.PrintCurrentTest(t)() + + req := NewRequest(t, "GET", repo.Link()+"/commit/"+commitID) + resp := session.MakeRequest(t, req, http.StatusOK) + + htmlDoc := NewHTMLParser(t, resp.Body) + htmlDoc.AssertElement(t, ".lines-escape .toggle-escape-button", commitContext) + }) + t.Run("Wiki context", func(t *testing.T) { + defer tests.PrintCurrentTest(t)() + + req := NewRequest(t, "GET", repo.Link()+"/wiki/Normal") + resp := session.MakeRequest(t, req, http.StatusOK) + + htmlDoc := NewHTMLParser(t, resp.Body) + htmlDoc.AssertElement(t, ".unicode-escape-prompt", wikiContext) + }) + } + + t.Run("Enabled all context", func(t *testing.T) { + defer test.MockVariableValue(&setting.UI.SkipEscapeContexts, []string{})() + + assertCase(t, true, true, true) + }) + + t.Run("Enabled file context", func(t *testing.T) { + defer test.MockVariableValue(&setting.UI.SkipEscapeContexts, []string{"diff", "wiki"})() + + assertCase(t, true, false, false) + }) + + t.Run("Enabled commit context", func(t *testing.T) { + defer test.MockVariableValue(&setting.UI.SkipEscapeContexts, []string{"file-view", "wiki"})() + + assertCase(t, false, true, false) + }) + + t.Run("Enabled wiki context", func(t *testing.T) { + defer test.MockVariableValue(&setting.UI.SkipEscapeContexts, []string{"file-view", "diff"})() + + assertCase(t, false, false, true) + }) + + t.Run("No context", func(t *testing.T) { + defer test.MockVariableValue(&setting.UI.SkipEscapeContexts, []string{"file-view", "wiki", "diff"})() + + assertCase(t, false, false, false) + }) + + t.Run("Disabled detection", func(t *testing.T) { + defer test.MockVariableValue(&setting.UI.SkipEscapeContexts, []string{})() + defer test.MockVariableValue(&setting.UI.AmbiguousUnicodeDetection, false)() + + assertCase(t, false, false, false) + }) + }) +}