From b01dce2a6e98c25915a8e98afb741a1c34d05aba Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Thu, 16 Jun 2022 11:33:23 +0800 Subject: [PATCH] Allow render HTML with css/js external links (#19017) * Allow render HTML with css/js external links * Fix bug because of filename escape chars * Fix lint * Update docs about new configuration item * Fix bug of render HTML in sub directory * Add CSP head for displaying iframe in rendering file * Fix test * Apply suggestions from code review Co-authored-by: delvh * Some improvements * some improvement * revert change in SanitizerDisabled of external renderer * Add sandbox for iframe and support allow-scripts and allow-same-origin * refactor * fix * fix lint * fine tune * use single option RENDER_CONTENT_MODE, use sandbox=allow-scripts * fine tune CSP * Apply suggestions from code review Co-authored-by: wxiaoguang Co-authored-by: delvh Co-authored-by: wxiaoguang --- custom/conf/app.example.ini | 7 +- .../doc/advanced/config-cheat-sheet.en-us.md | 7 +- .../doc/advanced/config-cheat-sheet.zh-cn.md | 7 +- modules/csv/csv.go | 2 +- modules/csv/csv_test.go | 2 +- modules/markup/console/console.go | 8 -- modules/markup/csv/csv.go | 8 -- modules/markup/external/external.go | 12 ++- modules/markup/html_test.go | 26 +++--- modules/markup/markdown/markdown.go | 9 +-- modules/markup/orgmode/orgmode.go | 9 +-- modules/markup/renderer.go | 81 +++++++++++++++---- modules/setting/markup.go | 37 +++++++-- routers/web/repo/compare.go | 4 +- routers/web/repo/render.go | 79 ++++++++++++++++++ routers/web/repo/view.go | 36 +++++---- routers/web/web.go | 7 ++ 17 files changed, 248 insertions(+), 93 deletions(-) create mode 100644 routers/web/repo/render.go diff --git a/custom/conf/app.example.ini b/custom/conf/app.example.ini index 8e082233c..065c57ef5 100644 --- a/custom/conf/app.example.ini +++ b/custom/conf/app.example.ini @@ -2181,8 +2181,11 @@ PATH = ;RENDER_COMMAND = "asciidoc --out-file=- -" ;; Don't pass the file on STDIN, pass the filename as argument instead. ;IS_INPUT_FILE = false -; Don't filter html tags and attributes if true -;DISABLE_SANITIZER = false +;; How the content will be rendered. +;; * sanitized: Sanitize the content and render it inside current page, default to only allow a few HTML tags and attributes. Customized sanitizer rules can be defined in [markup.sanitizer.*] . +;; * no-sanitizer: Disable the sanitizer and render the content inside current page. It's **insecure** and may lead to XSS attack if the content contains malicious code. +;; * iframe: Render the content in a separate standalone page and embed it into current page by iframe. The iframe is in sandbox mode with same-origin disabled, and the JS code are safely isolated from parent page. +;RENDER_CONTENT_MODE=sanitized ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/docs/content/doc/advanced/config-cheat-sheet.en-us.md b/docs/content/doc/advanced/config-cheat-sheet.en-us.md index 4e32ca00b..4f041d417 100644 --- a/docs/content/doc/advanced/config-cheat-sheet.en-us.md +++ b/docs/content/doc/advanced/config-cheat-sheet.en-us.md @@ -1026,13 +1026,16 @@ IS_INPUT_FILE = false command. Multiple extensions needs a comma as splitter. - RENDER\_COMMAND: External command to render all matching extensions. - IS\_INPUT\_FILE: **false** Input is not a standard input but a file param followed `RENDER_COMMAND`. -- DISABLE_SANITIZER: **false** Don't filter html tags and attributes if true. Don't change this to true except you know what that means. +- RENDER_CONTENT_MODE: **sanitized** How the content will be rendered. + - sanitized: Sanitize the content and render it inside current page, default to only allow a few HTML tags and attributes. Customized sanitizer rules can be defined in `[markup.sanitizer.*]`. + - no-sanitizer: Disable the sanitizer and render the content inside current page. It's **insecure** and may lead to XSS attack if the content contains malicious code. + - iframe: Render the content in a separate standalone page and embed it into current page by iframe. The iframe is in sandbox mode with same-origin disabled, and the JS code are safely isolated from parent page. Two special environment variables are passed to the render command: - `GITEA_PREFIX_SRC`, which contains the current URL prefix in the `src` path tree. To be used as prefix for links. - `GITEA_PREFIX_RAW`, which contains the current URL prefix in the `raw` path tree. To be used as prefix for image paths. -If `DISABLE_SANITIZER` is false, Gitea supports customizing the sanitization policy for rendered HTML. The example below will support KaTeX output from pandoc. +If `RENDER_CONTENT_MODE` is `sanitized`, Gitea supports customizing the sanitization policy for rendered HTML. The example below will support KaTeX output from pandoc. ```ini [markup.sanitizer.TeX] diff --git a/docs/content/doc/advanced/config-cheat-sheet.zh-cn.md b/docs/content/doc/advanced/config-cheat-sheet.zh-cn.md index cc6e950fb..ef1504bc9 100644 --- a/docs/content/doc/advanced/config-cheat-sheet.zh-cn.md +++ b/docs/content/doc/advanced/config-cheat-sheet.zh-cn.md @@ -318,14 +318,17 @@ IS_INPUT_FILE = false - FILE_EXTENSIONS: 关联的文档的扩展名,多个扩展名用都好分隔。 - RENDER_COMMAND: 工具的命令行命令及参数。 - IS_INPUT_FILE: 输入方式是最后一个参数为文件路径还是从标准输入读取。 -- DISABLE_SANITIZER: **false** 如果为 true 则不过滤 HTML 标签和属性。除非你知道这意味着什么,否则不要设置为 true。 +- RENDER_CONTENT_MODE: **sanitized** 内容如何被渲染。 + - sanitized: 对内容进行净化并渲染到当前页面中,仅有一部分 HTML 标签和属性是被允许的。 + - no-sanitizer: 禁用净化器,把内容渲染到当前页面中。此模式是**不安全**的,如果内容中含有恶意代码,可能会导致 XSS 攻击。 + - iframe: 把内容渲染在一个独立的页面中并使用 iframe 嵌入到当前页面中。使用的 iframe 工作在沙箱模式并禁用了同源请求,JS 代码被安全的从父页面中隔离出去。 以下两个环境变量将会被传递给渲染命令: - `GITEA_PREFIX_SRC`:包含当前的`src`路径的URL前缀,可以被用于链接的前缀。 - `GITEA_PREFIX_RAW`:包含当前的`raw`路径的URL前缀,可以被用于图片的前缀。 -如果 `DISABLE_SANITIZER` 为 false,则 Gitea 支持自定义渲染 HTML 的净化策略。以下例子将用 pandoc 支持 KaTeX 输出。 +如果 `RENDER_CONTENT_MODE` 为 `sanitized`,则 Gitea 支持自定义渲染 HTML 的净化策略。以下例子将用 pandoc 支持 KaTeX 输出。 ```ini [markup.sanitizer.TeX] diff --git a/modules/csv/csv.go b/modules/csv/csv.go index 0dd54271f..fe0c35096 100644 --- a/modules/csv/csv.go +++ b/modules/csv/csv.go @@ -54,7 +54,7 @@ func CreateReaderAndDetermineDelimiter(ctx *markup.RenderContext, rd io.Reader) func determineDelimiter(ctx *markup.RenderContext, data []byte) rune { extension := ".csv" if ctx != nil { - extension = strings.ToLower(filepath.Ext(ctx.Filename)) + extension = strings.ToLower(filepath.Ext(ctx.RelativePath)) } var delimiter rune diff --git a/modules/csv/csv_test.go b/modules/csv/csv_test.go index b1e928ae9..9d0848ae5 100644 --- a/modules/csv/csv_test.go +++ b/modules/csv/csv_test.go @@ -230,7 +230,7 @@ John Doe john@doe.com This,note,had,a,lot,of,commas,to,test,delimiters`, } for n, c := range cases { - delimiter := determineDelimiter(&markup.RenderContext{Filename: c.filename}, []byte(decodeSlashes(t, c.csv))) + delimiter := determineDelimiter(&markup.RenderContext{RelativePath: c.filename}, []byte(decodeSlashes(t, c.csv))) assert.EqualValues(t, c.expectedDelimiter, delimiter, "case %d: delimiter should be equal, expected '%c' got '%c'", n, c.expectedDelimiter, delimiter) } } diff --git a/modules/markup/console/console.go b/modules/markup/console/console.go index b59594acb..597593eee 100644 --- a/modules/markup/console/console.go +++ b/modules/markup/console/console.go @@ -33,9 +33,6 @@ func (Renderer) Name() string { return MarkupName } -// NeedPostProcess implements markup.Renderer -func (Renderer) NeedPostProcess() bool { return false } - // Extensions implements markup.Renderer func (Renderer) Extensions() []string { return []string{".sh-session"} @@ -48,11 +45,6 @@ func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule { } } -// SanitizerDisabled disabled sanitize if return true -func (Renderer) SanitizerDisabled() bool { - return false -} - // CanRender implements markup.RendererContentDetector func (Renderer) CanRender(filename string, input io.Reader) bool { buf, err := io.ReadAll(input) diff --git a/modules/markup/csv/csv.go b/modules/markup/csv/csv.go index 17c3fe6f4..5095b8546 100644 --- a/modules/markup/csv/csv.go +++ b/modules/markup/csv/csv.go @@ -29,9 +29,6 @@ func (Renderer) Name() string { return "csv" } -// NeedPostProcess implements markup.Renderer -func (Renderer) NeedPostProcess() bool { return false } - // Extensions implements markup.Renderer func (Renderer) Extensions() []string { return []string{".csv", ".tsv"} @@ -46,11 +43,6 @@ func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule { } } -// SanitizerDisabled disabled sanitize if return true -func (Renderer) SanitizerDisabled() bool { - return false -} - func writeField(w io.Writer, element, class, field string) error { if _, err := io.WriteString(w, "<"); err != nil { return err diff --git a/modules/markup/external/external.go b/modules/markup/external/external.go index a587abcc3..23dd45ba0 100644 --- a/modules/markup/external/external.go +++ b/modules/markup/external/external.go @@ -34,6 +34,11 @@ type Renderer struct { *setting.MarkupRenderer } +var ( + _ markup.PostProcessRenderer = (*Renderer)(nil) + _ markup.ExternalRenderer = (*Renderer)(nil) +) + // Name returns the external tool name func (p *Renderer) Name() string { return p.MarkupName @@ -56,7 +61,12 @@ func (p *Renderer) SanitizerRules() []setting.MarkupSanitizerRule { // SanitizerDisabled disabled sanitize if return true func (p *Renderer) SanitizerDisabled() bool { - return p.DisableSanitizer + return p.RenderContentMode == setting.RenderContentModeNoSanitizer || p.RenderContentMode == setting.RenderContentModeIframe +} + +// DisplayInIFrame represents whether render the content with an iframe +func (p *Renderer) DisplayInIFrame() bool { + return p.RenderContentMode == setting.RenderContentModeIframe } func envMark(envName string) string { diff --git a/modules/markup/html_test.go b/modules/markup/html_test.go index f6aabc627..f494998c5 100644 --- a/modules/markup/html_test.go +++ b/modules/markup/html_test.go @@ -29,10 +29,10 @@ func TestRender_Commits(t *testing.T) { setting.AppURL = TestAppURL test := func(input, expected string) { buffer, err := RenderString(&RenderContext{ - Ctx: git.DefaultContext, - Filename: ".md", - URLPrefix: TestRepoURL, - Metas: localMetas, + Ctx: git.DefaultContext, + RelativePath: ".md", + URLPrefix: TestRepoURL, + Metas: localMetas, }, input) assert.NoError(t, err) assert.Equal(t, strings.TrimSpace(expected), strings.TrimSpace(buffer)) @@ -80,9 +80,9 @@ func TestRender_CrossReferences(t *testing.T) { test := func(input, expected string) { buffer, err := RenderString(&RenderContext{ - Filename: "a.md", - URLPrefix: setting.AppSubURL, - Metas: localMetas, + RelativePath: "a.md", + URLPrefix: setting.AppSubURL, + Metas: localMetas, }, input) assert.NoError(t, err) assert.Equal(t, strings.TrimSpace(expected), strings.TrimSpace(buffer)) @@ -124,8 +124,8 @@ func TestRender_links(t *testing.T) { test := func(input, expected string) { buffer, err := RenderString(&RenderContext{ - Filename: "a.md", - URLPrefix: TestRepoURL, + RelativePath: "a.md", + URLPrefix: TestRepoURL, }, input) assert.NoError(t, err) assert.Equal(t, strings.TrimSpace(expected), strings.TrimSpace(buffer)) @@ -223,8 +223,8 @@ func TestRender_email(t *testing.T) { test := func(input, expected string) { res, err := RenderString(&RenderContext{ - Filename: "a.md", - URLPrefix: TestRepoURL, + RelativePath: "a.md", + URLPrefix: TestRepoURL, }, input) assert.NoError(t, err) assert.Equal(t, strings.TrimSpace(expected), strings.TrimSpace(res)) @@ -281,8 +281,8 @@ func TestRender_emoji(t *testing.T) { test := func(input, expected string) { expected = strings.ReplaceAll(expected, "&", "&") buffer, err := RenderString(&RenderContext{ - Filename: "a.md", - URLPrefix: TestRepoURL, + RelativePath: "a.md", + URLPrefix: TestRepoURL, }, input) assert.NoError(t, err) assert.Equal(t, strings.TrimSpace(expected), strings.TrimSpace(buffer)) diff --git a/modules/markup/markdown/markdown.go b/modules/markup/markdown/markdown.go index 7ebdfea6c..37e11e606 100644 --- a/modules/markup/markdown/markdown.go +++ b/modules/markup/markdown/markdown.go @@ -205,12 +205,14 @@ func init() { // Renderer implements markup.Renderer type Renderer struct{} +var _ markup.PostProcessRenderer = (*Renderer)(nil) + // Name implements markup.Renderer func (Renderer) Name() string { return MarkupName } -// NeedPostProcess implements markup.Renderer +// NeedPostProcess implements markup.PostProcessRenderer func (Renderer) NeedPostProcess() bool { return true } // Extensions implements markup.Renderer @@ -223,11 +225,6 @@ func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule { return []setting.MarkupSanitizerRule{} } -// SanitizerDisabled disabled sanitize if return true -func (Renderer) SanitizerDisabled() bool { - return false -} - // Render implements markup.Renderer func (Renderer) Render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error { return render(ctx, input, output) diff --git a/modules/markup/orgmode/orgmode.go b/modules/markup/orgmode/orgmode.go index 2f394b992..8c9f3b3da 100644 --- a/modules/markup/orgmode/orgmode.go +++ b/modules/markup/orgmode/orgmode.go @@ -29,12 +29,14 @@ func init() { // Renderer implements markup.Renderer for orgmode type Renderer struct{} +var _ markup.PostProcessRenderer = (*Renderer)(nil) + // Name implements markup.Renderer func (Renderer) Name() string { return "orgmode" } -// NeedPostProcess implements markup.Renderer +// NeedPostProcess implements markup.PostProcessRenderer func (Renderer) NeedPostProcess() bool { return true } // Extensions implements markup.Renderer @@ -47,11 +49,6 @@ func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule { return []setting.MarkupSanitizerRule{} } -// SanitizerDisabled disabled sanitize if return true -func (Renderer) SanitizerDisabled() bool { - return false -} - // Render renders orgmode rawbytes to HTML func Render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error { htmlWriter := org.NewHTMLWriter() diff --git a/modules/markup/renderer.go b/modules/markup/renderer.go index 6e4ae4e08..e88fa3118 100644 --- a/modules/markup/renderer.go +++ b/modules/markup/renderer.go @@ -10,6 +10,7 @@ import ( "errors" "fmt" "io" + "net/url" "path/filepath" "strings" "sync" @@ -43,17 +44,18 @@ type Header struct { // RenderContext represents a render context type RenderContext struct { - Ctx context.Context - Filename string - Type string - IsWiki bool - URLPrefix string - Metas map[string]string - DefaultLink string - GitRepo *git.Repository - ShaExistCache map[string]bool - cancelFn func() - TableOfContents []Header + Ctx context.Context + RelativePath string // relative path from tree root of the branch + Type string + IsWiki bool + URLPrefix string + Metas map[string]string + DefaultLink string + GitRepo *git.Repository + ShaExistCache map[string]bool + cancelFn func() + TableOfContents []Header + InStandalonePage bool // used by external render. the router "/org/repo/render/..." will output the rendered content in a standalone page } // Cancel runs any cleanup functions that have been registered for this Ctx @@ -88,12 +90,24 @@ func (ctx *RenderContext) AddCancel(fn func()) { type Renderer interface { Name() string // markup format name Extensions() []string - NeedPostProcess() bool SanitizerRules() []setting.MarkupSanitizerRule - SanitizerDisabled() bool Render(ctx *RenderContext, input io.Reader, output io.Writer) error } +// PostProcessRenderer defines an interface for renderers who need post process +type PostProcessRenderer interface { + NeedPostProcess() bool +} + +// PostProcessRenderer defines an interface for external renderers +type ExternalRenderer interface { + // SanitizerDisabled disabled sanitize if return true + SanitizerDisabled() bool + + // DisplayInIFrame represents whether render the content with an iframe + DisplayInIFrame() bool +} + // RendererContentDetector detects if the content can be rendered // by specified renderer type RendererContentDetector interface { @@ -142,7 +156,7 @@ func DetectRendererType(filename string, input io.Reader) string { func Render(ctx *RenderContext, input io.Reader, output io.Writer) error { if ctx.Type != "" { return renderByType(ctx, input, output) - } else if ctx.Filename != "" { + } else if ctx.RelativePath != "" { return renderFile(ctx, input, output) } return errors.New("Render options both filename and type missing") @@ -163,6 +177,27 @@ type nopCloser struct { func (nopCloser) Close() error { return nil } +func renderIFrame(ctx *RenderContext, output io.Writer) error { + // set height="0" ahead, otherwise the scrollHeight would be max(150, realHeight) + // at the moment, only "allow-scripts" is allowed for sandbox mode. + // "allow-same-origin" should never be used, it leads to XSS attack, and it makes the JS in iframe can access parent window's config and CSRF token + // TODO: when using dark theme, if the rendered content doesn't have proper style, the default text color is black, which is not easy to read + _, err := io.WriteString(output, fmt.Sprintf(` +`, + setting.AppSubURL, + url.PathEscape(ctx.Metas["user"]), + url.PathEscape(ctx.Metas["repo"]), + ctx.Metas["BranchNameSubURL"], + url.PathEscape(ctx.RelativePath), + )) + return err +} + func render(ctx *RenderContext, renderer Renderer, input io.Reader, output io.Writer) error { var wg sync.WaitGroup var err error @@ -175,7 +210,12 @@ func render(ctx *RenderContext, renderer Renderer, input io.Reader, output io.Wr var pr2 io.ReadCloser var pw2 io.WriteCloser - if !renderer.SanitizerDisabled() { + var sanitizerDisabled bool + if r, ok := renderer.(ExternalRenderer); ok { + sanitizerDisabled = r.SanitizerDisabled() + } + + if !sanitizerDisabled { pr2, pw2 = io.Pipe() defer func() { _ = pr2.Close() @@ -194,7 +234,7 @@ func render(ctx *RenderContext, renderer Renderer, input io.Reader, output io.Wr wg.Add(1) go func() { - if renderer.NeedPostProcess() { + if r, ok := renderer.(PostProcessRenderer); ok && r.NeedPostProcess() { err = PostProcess(ctx, pr, pw2) } else { _, err = io.Copy(pw2, pr) @@ -239,8 +279,15 @@ func (err ErrUnsupportedRenderExtension) Error() string { } func renderFile(ctx *RenderContext, input io.Reader, output io.Writer) error { - extension := strings.ToLower(filepath.Ext(ctx.Filename)) + extension := strings.ToLower(filepath.Ext(ctx.RelativePath)) if renderer, ok := extRenderers[extension]; ok { + if r, ok := renderer.(ExternalRenderer); ok && r.DisplayInIFrame() { + if !ctx.InStandalonePage { + // for an external render, it could only output its content in a standalone page + // otherwise, a