From 52e11b24bf5e395d83ea58c1b0fd6922efe16add Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Sun, 17 Sep 2017 01:17:57 +0800 Subject: [PATCH] =?UTF-8?q?Restructure=20markup=20&=20markdown=20to=20prep?= =?UTF-8?q?are=20for=20multiple=20markup=20language=E2=80=A6=20(#2411)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * restructure markup & markdown to prepare for multiple markup languages support * adjust some functions between markdown and markup * fix tests * improve the comments --- models/issue_comment.go | 4 +- models/issue_mail.go | 4 +- models/mail.go | 3 +- models/migrations/v16.go | 4 +- models/repo.go | 10 +- models/repo_test.go | 12 +- modules/markdown/markdown.go | 574 +---------------- modules/markdown/markdown_test.go | 598 +++--------------- modules/markup/html.go | 517 +++++++++++++++ modules/markup/html_test.go | 460 ++++++++++++++ modules/markup/markup.go | 80 ++- modules/markup/markup_test.go | 5 +- modules/{markdown => markup}/sanitizer.go | 2 +- .../{markdown => markup}/sanitizer_test.go | 2 +- modules/templates/helper.go | 6 +- routers/api/v1/misc/markdown.go | 3 +- routers/api/v1/misc/markdown_test.go | 20 +- routers/init.go | 6 +- routers/repo/view.go | 5 +- 19 files changed, 1206 insertions(+), 1109 deletions(-) create mode 100644 modules/markup/html.go create mode 100644 modules/markup/html_test.go rename modules/{markdown => markup}/sanitizer.go (99%) rename modules/{markdown => markup}/sanitizer_test.go (99%) diff --git a/models/issue_comment.go b/models/issue_comment.go index 6c9c75b1e..675143437 100644 --- a/models/issue_comment.go +++ b/models/issue_comment.go @@ -16,7 +16,7 @@ import ( api "code.gitea.io/sdk/gitea" "code.gitea.io/gitea/modules/log" - "code.gitea.io/gitea/modules/markdown" + "code.gitea.io/gitea/modules/markup" ) // CommentType defines whether a comment is just a simple comment, an action (like close) or a reference. @@ -272,7 +272,7 @@ func (c *Comment) LoadAssignees() error { // MailParticipants sends new comment emails to repository watchers // and mentioned people. func (c *Comment) MailParticipants(e Engine, opType ActionType, issue *Issue) (err error) { - mentions := markdown.FindAllMentions(c.Content) + mentions := markup.FindAllMentions(c.Content) if err = UpdateIssueMentions(e, c.IssueID, mentions); err != nil { return fmt.Errorf("UpdateIssueMentions [%d]: %v", c.IssueID, err) } diff --git a/models/issue_mail.go b/models/issue_mail.go index 74ef66055..e4a1a40e6 100644 --- a/models/issue_mail.go +++ b/models/issue_mail.go @@ -10,7 +10,7 @@ import ( "github.com/Unknwon/com" "code.gitea.io/gitea/modules/log" - "code.gitea.io/gitea/modules/markdown" + "code.gitea.io/gitea/modules/markup" "code.gitea.io/gitea/modules/setting" ) @@ -104,7 +104,7 @@ func (issue *Issue) MailParticipants() (err error) { } func (issue *Issue) mailParticipants(e Engine) (err error) { - mentions := markdown.FindAllMentions(issue.Content) + mentions := markup.FindAllMentions(issue.Content) if err = UpdateIssueMentions(e, issue.ID, mentions); err != nil { return fmt.Errorf("UpdateIssueMentions [%d]: %v", issue.ID, err) } diff --git a/models/mail.go b/models/mail.go index 41fddad2b..b7d24a06a 100644 --- a/models/mail.go +++ b/models/mail.go @@ -14,6 +14,7 @@ import ( "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/mailer" "code.gitea.io/gitea/modules/markdown" + "code.gitea.io/gitea/modules/markup" "code.gitea.io/gitea/modules/setting" "gopkg.in/gomail.v2" "gopkg.in/macaron.v1" @@ -150,7 +151,7 @@ func composeTplData(subject, body, link string) map[string]interface{} { func composeIssueCommentMessage(issue *Issue, doer *User, comment *Comment, tplName base.TplName, tos []string, info string) *mailer.Message { subject := issue.mailSubject() - body := string(markdown.RenderString(issue.Content, issue.Repo.HTMLURL(), issue.Repo.ComposeMetas())) + body := string(markup.RenderByType(markdown.MarkupName, []byte(issue.Content), issue.Repo.HTMLURL(), issue.Repo.ComposeMetas())) data := make(map[string]interface{}, 10) if comment != nil { diff --git a/models/migrations/v16.go b/models/migrations/v16.go index 2a6d71de4..ef342a5f8 100644 --- a/models/migrations/v16.go +++ b/models/migrations/v16.go @@ -8,7 +8,7 @@ import ( "fmt" "time" - "code.gitea.io/gitea/modules/markdown" + "code.gitea.io/gitea/modules/markup" "github.com/go-xorm/xorm" ) @@ -101,7 +101,7 @@ func addUnitsToTables(x *xorm.Engine) error { config["ExternalTrackerURL"] = repo.ExternalTrackerURL config["ExternalTrackerFormat"] = repo.ExternalTrackerFormat if len(repo.ExternalTrackerStyle) == 0 { - repo.ExternalTrackerStyle = markdown.IssueNameStyleNumeric + repo.ExternalTrackerStyle = markup.IssueNameStyleNumeric } config["ExternalTrackerStyle"] = repo.ExternalTrackerStyle case V16UnitTypeExternalWiki: diff --git a/models/repo.go b/models/repo.go index 8d5b3b87c..4b3b0322d 100644 --- a/models/repo.go +++ b/models/repo.go @@ -22,7 +22,7 @@ import ( "code.gitea.io/git" "code.gitea.io/gitea/modules/log" - "code.gitea.io/gitea/modules/markdown" + "code.gitea.io/gitea/modules/markup" "code.gitea.io/gitea/modules/options" "code.gitea.io/gitea/modules/process" "code.gitea.io/gitea/modules/setting" @@ -480,10 +480,10 @@ func (repo *Repository) ComposeMetas() map[string]string { "repo": repo.Name, } switch unit.ExternalTrackerConfig().ExternalTrackerStyle { - case markdown.IssueNameStyleAlphanumeric: - repo.ExternalMetas["style"] = markdown.IssueNameStyleAlphanumeric + case markup.IssueNameStyleAlphanumeric: + repo.ExternalMetas["style"] = markup.IssueNameStyleAlphanumeric default: - repo.ExternalMetas["style"] = markdown.IssueNameStyleNumeric + repo.ExternalMetas["style"] = markup.IssueNameStyleNumeric } } @@ -708,7 +708,7 @@ func (repo *Repository) DescriptionHTML() template.HTML { sanitize := func(s string) string { return fmt.Sprintf(`%[1]s`, s) } - return template.HTML(descPattern.ReplaceAllStringFunc(markdown.Sanitize(repo.Description), sanitize)) + return template.HTML(descPattern.ReplaceAllStringFunc(markup.Sanitize(repo.Description), sanitize)) } // LocalCopyPath returns the local repository copy path diff --git a/models/repo_test.go b/models/repo_test.go index c1eb4e2a7..34eaa16c0 100644 --- a/models/repo_test.go +++ b/models/repo_test.go @@ -8,7 +8,7 @@ import ( "path" "testing" - "code.gitea.io/gitea/modules/markdown" + "code.gitea.io/gitea/modules/markup" "code.gitea.io/gitea/modules/setting" "github.com/Unknwon/com" @@ -39,13 +39,13 @@ func TestRepo(t *testing.T) { assert.Equal(t, "https://someurl.com/{user}/{repo}/{issue}", metas["format"]) } - testSuccess(markdown.IssueNameStyleNumeric) + testSuccess(markup.IssueNameStyleNumeric) - externalTracker.ExternalTrackerConfig().ExternalTrackerStyle = markdown.IssueNameStyleAlphanumeric - testSuccess(markdown.IssueNameStyleAlphanumeric) + externalTracker.ExternalTrackerConfig().ExternalTrackerStyle = markup.IssueNameStyleAlphanumeric + testSuccess(markup.IssueNameStyleAlphanumeric) - externalTracker.ExternalTrackerConfig().ExternalTrackerStyle = markdown.IssueNameStyleNumeric - testSuccess(markdown.IssueNameStyleNumeric) + externalTracker.ExternalTrackerConfig().ExternalTrackerStyle = markup.IssueNameStyleNumeric + testSuccess(markup.IssueNameStyleNumeric) } func TestGetRepositoryCount(t *testing.T) { diff --git a/modules/markdown/markdown.go b/modules/markdown/markdown.go index ed673f205..6cf2d9eaa 100644 --- a/modules/markdown/markdown.go +++ b/modules/markdown/markdown.go @@ -6,107 +6,14 @@ package markdown import ( "bytes" - "fmt" - "io" - "net/url" - "path" - "path/filepath" - "regexp" "strings" - "code.gitea.io/gitea/modules/base" - "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/markup" "code.gitea.io/gitea/modules/setting" - "github.com/Unknwon/com" "github.com/russross/blackfriday" - "golang.org/x/net/html" ) -// Issue name styles -const ( - IssueNameStyleNumeric = "numeric" - IssueNameStyleAlphanumeric = "alphanumeric" -) - -// IsMarkdownFile reports whether name looks like a Markdown file -// based on its extension. -func IsMarkdownFile(name string) bool { - extension := strings.ToLower(filepath.Ext(name)) - for _, ext := range setting.Markdown.FileExtensions { - if strings.ToLower(ext) == extension { - return true - } - } - return false -} - -var ( - // NOTE: All below regex matching do not perform any extra validation. - // Thus a link is produced even if the user does not exist, the issue does not exist, the commit does not exist, etc. - // While fast, this is also incorrect and lead to false positives. - - // MentionPattern matches string that mentions someone, e.g. @Unknwon - MentionPattern = regexp.MustCompile(`(\s|^|\W)@[0-9a-zA-Z-_\.]+`) - - // IssueNumericPattern matches string that references to a numeric issue, e.g. #1287 - IssueNumericPattern = regexp.MustCompile(`( |^|\()#[0-9]+\b`) - // IssueAlphanumericPattern matches string that references to an alphanumeric issue, e.g. ABC-1234 - IssueAlphanumericPattern = regexp.MustCompile(`( |^|\()[A-Z]{1,10}-[1-9][0-9]*\b`) - // CrossReferenceIssueNumericPattern matches string that references a numeric issue in a different repository - // e.g. gogits/gogs#12345 - CrossReferenceIssueNumericPattern = regexp.MustCompile(`( |^)[0-9a-zA-Z]+/[0-9a-zA-Z]+#[0-9]+\b`) - - // Sha1CurrentPattern matches string that represents a commit SHA, e.g. d8a994ef243349f321568f9e36d5c3f444b99cae - // Although SHA1 hashes are 40 chars long, the regex matches the hash from 7 to 40 chars in length - // so that abbreviated hash links can be used as well. This matches git and github useability. - Sha1CurrentPattern = regexp.MustCompile(`(?:^|\s|\()([0-9a-f]{7,40})\b`) - - // ShortLinkPattern matches short but difficult to parse [[name|link|arg=test]] syntax - ShortLinkPattern = regexp.MustCompile(`(\[\[.*?\]\]\w*)`) - - // AnySHA1Pattern allows to split url containing SHA into parts - AnySHA1Pattern = regexp.MustCompile(`(http\S*)://(\S+)/(\S+)/(\S+)/(\S+)/([0-9a-f]{40})(?:/?([^#\s]+)?(?:#(\S+))?)?`) - - validLinksPattern = regexp.MustCompile(`^[a-z][\w-]+://`) -) - -// regexp for full links to issues/pulls -var issueFullPattern *regexp.Regexp - -// InitMarkdown initialize regexps for markdown parsing -func InitMarkdown() { - getIssueFullPattern() -} - -func getIssueFullPattern() *regexp.Regexp { - if issueFullPattern == nil { - appURL := setting.AppURL - if len(appURL) > 0 && appURL[len(appURL)-1] != '/' { - appURL += "/" - } - issueFullPattern = regexp.MustCompile(appURL + - `\w+/\w+/(?:issues|pulls)/((?:\w{1,10}-)?[1-9][0-9]*)([\?|#]\S+.(\S+)?)?\b`) - } - return issueFullPattern -} - -// isLink reports whether link fits valid format. -func isLink(link []byte) bool { - return validLinksPattern.Match(link) -} - -// FindAllMentions matches mention patterns in given content -// and returns a list of found user names without @ prefix. -func FindAllMentions(content string) []string { - mentions := MentionPattern.FindAllString(content, -1) - for i := range mentions { - mentions[i] = mentions[i][strings.Index(mentions[i], "@")+1:] // Strip @ character - } - return mentions -} - // Renderer is a extended version of underlying render object. type Renderer struct { blackfriday.Renderer @@ -116,13 +23,13 @@ type Renderer struct { // Link defines how formal links should be processed to produce corresponding HTML elements. func (r *Renderer) Link(out *bytes.Buffer, link []byte, title []byte, content []byte) { - if len(link) > 0 && !isLink(link) { + if len(link) > 0 && !markup.IsLink(link) { if link[0] != '#' { lnk := string(link) if r.isWikiMarkdown { - lnk = URLJoin("wiki", lnk) + lnk = markup.URLJoin("wiki", lnk) } - mLink := URLJoin(r.urlPrefix, lnk) + mLink := markup.URLJoin(r.urlPrefix, lnk) link = []byte(mLink) } } @@ -190,11 +97,11 @@ var ( func (r *Renderer) Image(out *bytes.Buffer, link []byte, title []byte, alt []byte) { prefix := r.urlPrefix if r.isWikiMarkdown { - prefix = URLJoin(prefix, "wiki", "src") + prefix = markup.URLJoin(prefix, "wiki", "src") } prefix = strings.Replace(prefix, "/src/", "/raw/", 1) if len(link) > 0 { - if isLink(link) { + if markup.IsLink(link) { // External link with .svg suffix usually means CI status. // TODO: define a keyword to allow non-svg images render as external link. if bytes.HasSuffix(link, svgSuffix) || bytes.Contains(link, svgSuffixWithMark) { @@ -203,7 +110,7 @@ func (r *Renderer) Image(out *bytes.Buffer, link []byte, title []byte, alt []byt } } else { lnk := string(link) - lnk = URLJoin(prefix, lnk) + lnk = markup.URLJoin(prefix, lnk) lnk = strings.Replace(lnk, " ", "+", -1) link = []byte(lnk) } @@ -216,351 +123,6 @@ func (r *Renderer) Image(out *bytes.Buffer, link []byte, title []byte, alt []byt out.WriteString("") } -// cutoutVerbosePrefix cutouts URL prefix including sub-path to -// return a clean unified string of request URL path. -func cutoutVerbosePrefix(prefix string) string { - if len(prefix) == 0 || prefix[0] != '/' { - return prefix - } - count := 0 - for i := 0; i < len(prefix); i++ { - if prefix[i] == '/' { - count++ - } - if count >= 3+setting.AppSubURLDepth { - return prefix[:i] - } - } - return prefix -} - -// URLJoin joins url components, like path.Join, but preserving contents -func URLJoin(base string, elems ...string) string { - u, err := url.Parse(base) - if err != nil { - log.Error(4, "URLJoin: Invalid base URL %s", base) - return "" - } - joinArgs := make([]string, 0, len(elems)+1) - joinArgs = append(joinArgs, u.Path) - joinArgs = append(joinArgs, elems...) - u.Path = path.Join(joinArgs...) - return u.String() -} - -// RenderIssueIndexPattern renders issue indexes to corresponding links. -func RenderIssueIndexPattern(rawBytes []byte, urlPrefix string, metas map[string]string) []byte { - urlPrefix = cutoutVerbosePrefix(urlPrefix) - - pattern := IssueNumericPattern - if metas["style"] == IssueNameStyleAlphanumeric { - pattern = IssueAlphanumericPattern - } - - ms := pattern.FindAll(rawBytes, -1) - for _, m := range ms { - if m[0] == ' ' || m[0] == '(' { - m = m[1:] // ignore leading space or opening parentheses - } - var link string - if metas == nil { - link = fmt.Sprintf(`%s`, URLJoin(urlPrefix, "issues", string(m[1:])), m) - } else { - // Support for external issue tracker - if metas["style"] == IssueNameStyleAlphanumeric { - metas["index"] = string(m) - } else { - metas["index"] = string(m[1:]) - } - link = fmt.Sprintf(`%s`, com.Expand(metas["format"], metas), m) - } - rawBytes = bytes.Replace(rawBytes, m, []byte(link), 1) - } - return rawBytes -} - -// IsSameDomain checks if given url string has the same hostname as current Gitea instance -func IsSameDomain(s string) bool { - if strings.HasPrefix(s, "/") { - return true - } - if uapp, err := url.Parse(setting.AppURL); err == nil { - if u, err := url.Parse(s); err == nil { - return u.Host == uapp.Host - } - return false - } - return false -} - -// renderFullSha1Pattern renders SHA containing URLs -func renderFullSha1Pattern(rawBytes []byte, urlPrefix string) []byte { - ms := AnySHA1Pattern.FindAllSubmatch(rawBytes, -1) - for _, m := range ms { - all := m[0] - protocol := string(m[1]) - paths := string(m[2]) - path := protocol + "://" + paths - author := string(m[3]) - repoName := string(m[4]) - path = URLJoin(path, author, repoName) - ltype := "src" - itemType := m[5] - if IsSameDomain(paths) { - ltype = string(itemType) - } else if string(itemType) == "commit" { - ltype = "commit" - } - sha := m[6] - var subtree string - if len(m) > 7 && len(m[7]) > 0 { - subtree = string(m[7]) - } - var line []byte - if len(m) > 8 && len(m[8]) > 0 { - line = m[8] - } - urlSuffix := "" - text := base.ShortSha(string(sha)) - if subtree != "" { - urlSuffix = "/" + subtree - text += urlSuffix - } - if line != nil { - value := string(line) - urlSuffix += "#" - urlSuffix += value - text += " (" - text += value - text += ")" - } - rawBytes = bytes.Replace(rawBytes, all, []byte(fmt.Sprintf( - `%s`, URLJoin(path, ltype, string(sha))+urlSuffix, text)), -1) - } - return rawBytes -} - -// RenderFullIssuePattern renders issues-like URLs -func RenderFullIssuePattern(rawBytes []byte) []byte { - ms := getIssueFullPattern().FindAllSubmatch(rawBytes, -1) - for _, m := range ms { - all := m[0] - id := string(m[1]) - text := "#" + id - // TODO if m[2] is not nil, then link is to a comment, - // and we should indicate that in the text somehow - rawBytes = bytes.Replace(rawBytes, all, []byte(fmt.Sprintf( - `%s`, string(all), text)), -1) - } - return rawBytes -} - -func firstIndexOfByte(sl []byte, target byte) int { - for i := 0; i < len(sl); i++ { - if sl[i] == target { - return i - } - } - return -1 -} - -func lastIndexOfByte(sl []byte, target byte) int { - for i := len(sl) - 1; i >= 0; i-- { - if sl[i] == target { - return i - } - } - return -1 -} - -// RenderShortLinks processes [[syntax]] -// -// noLink flag disables making link tags when set to true -// so this function just replaces the whole [[...]] with the content text -// -// isWikiMarkdown is a flag to choose linking url prefix -func RenderShortLinks(rawBytes []byte, urlPrefix string, noLink bool, isWikiMarkdown bool) []byte { - ms := ShortLinkPattern.FindAll(rawBytes, -1) - for _, m := range ms { - orig := bytes.TrimSpace(m) - m = orig[2:] - tailPos := lastIndexOfByte(m, ']') + 1 - tail := []byte{} - if tailPos < len(m) { - tail = m[tailPos:] - m = m[:tailPos-1] - } - m = m[:len(m)-2] - props := map[string]string{} - - // MediaWiki uses [[link|text]], while GitHub uses [[text|link]] - // It makes page handling terrible, but we prefer GitHub syntax - // And fall back to MediaWiki only when it is obvious from the look - // Of text and link contents - sl := bytes.Split(m, []byte("|")) - for _, v := range sl { - switch bytes.Count(v, []byte("=")) { - - // Piped args without = sign, these are mandatory arguments - case 0: - { - sv := string(v) - if props["name"] == "" { - if isLink(v) { - // If we clearly see it is a link, we save it so - - // But first we need to ensure, that if both mandatory args provided - // look like links, we stick to GitHub syntax - if props["link"] != "" { - props["name"] = props["link"] - } - - props["link"] = strings.TrimSpace(sv) - } else { - props["name"] = sv - } - } else { - props["link"] = strings.TrimSpace(sv) - } - } - - // Piped args with = sign, these are optional arguments - case 1: - { - sep := firstIndexOfByte(v, '=') - key, val := string(v[:sep]), html.UnescapeString(string(v[sep+1:])) - lastCharIndex := len(val) - 1 - if (val[0] == '"' || val[0] == '\'') && (val[lastCharIndex] == '"' || val[lastCharIndex] == '\'') { - val = val[1:lastCharIndex] - } - props[key] = val - } - } - } - - var name string - var link string - if props["link"] != "" { - link = props["link"] - } else if props["name"] != "" { - link = props["name"] - } - if props["title"] != "" { - name = props["title"] - } else if props["name"] != "" { - name = props["name"] - } else { - name = link - } - - name += string(tail) - image := false - ext := filepath.Ext(string(link)) - if ext != "" { - switch ext { - case ".jpg", ".jpeg", ".png", ".tif", ".tiff", ".webp", ".gif", ".bmp", ".ico", ".svg": - { - image = true - } - } - } - absoluteLink := isLink([]byte(link)) - if !absoluteLink { - link = strings.Replace(link, " ", "+", -1) - } - if image { - if !absoluteLink { - if IsSameDomain(urlPrefix) { - urlPrefix = strings.Replace(urlPrefix, "/src/", "/raw/", 1) - } - if isWikiMarkdown { - link = URLJoin("wiki", "raw", link) - } - link = URLJoin(urlPrefix, link) - } - title := props["title"] - if title == "" { - title = props["alt"] - } - if title == "" { - title = path.Base(string(name)) - } - alt := props["alt"] - if alt == "" { - alt = name - } - if alt != "" { - alt = `alt="` + alt + `"` - } - name = fmt.Sprintf(``, link, alt, title) - } else if !absoluteLink { - if isWikiMarkdown { - link = URLJoin("wiki", link) - } - link = URLJoin(urlPrefix, link) - } - if noLink { - rawBytes = bytes.Replace(rawBytes, orig, []byte(name), -1) - } else { - rawBytes = bytes.Replace(rawBytes, orig, - []byte(fmt.Sprintf(`%s`, link, name)), -1) - } - } - return rawBytes -} - -// RenderCrossReferenceIssueIndexPattern renders issue indexes from other repositories to corresponding links. -func RenderCrossReferenceIssueIndexPattern(rawBytes []byte, urlPrefix string, metas map[string]string) []byte { - ms := CrossReferenceIssueNumericPattern.FindAll(rawBytes, -1) - for _, m := range ms { - if m[0] == ' ' || m[0] == '(' { - m = m[1:] // ignore leading space or opening parentheses - } - - repo := string(bytes.Split(m, []byte("#"))[0]) - issue := string(bytes.Split(m, []byte("#"))[1]) - - link := fmt.Sprintf(`%s`, URLJoin(setting.AppURL, repo, "issues", issue), m) - rawBytes = bytes.Replace(rawBytes, m, []byte(link), 1) - } - return rawBytes -} - -// renderSha1CurrentPattern renders SHA1 strings to corresponding links that assumes in the same repository. -func renderSha1CurrentPattern(rawBytes []byte, urlPrefix string) []byte { - ms := Sha1CurrentPattern.FindAllSubmatch(rawBytes, -1) - for _, m := range ms { - hash := m[1] - // The regex does not lie, it matches the hash pattern. - // However, a regex cannot know if a hash actually exists or not. - // We could assume that a SHA1 hash should probably contain alphas AND numerics - // but that is not always the case. - // Although unlikely, deadbeef and 1234567 are valid short forms of SHA1 hash - // as used by git and github for linking and thus we have to do similar. - rawBytes = bytes.Replace(rawBytes, hash, []byte(fmt.Sprintf( - `%s`, URLJoin(urlPrefix, "commit", string(hash)), base.ShortSha(string(hash)))), -1) - } - return rawBytes -} - -// RenderSpecialLink renders mentions, indexes and SHA1 strings to corresponding links. -func RenderSpecialLink(rawBytes []byte, urlPrefix string, metas map[string]string, isWikiMarkdown bool) []byte { - ms := MentionPattern.FindAll(rawBytes, -1) - for _, m := range ms { - m = m[bytes.Index(m, []byte("@")):] - rawBytes = bytes.Replace(rawBytes, m, - []byte(fmt.Sprintf(`%s`, URLJoin(setting.AppURL, string(m[1:])), m)), -1) - } - - rawBytes = RenderFullIssuePattern(rawBytes) - rawBytes = RenderShortLinks(rawBytes, urlPrefix, false, isWikiMarkdown) - rawBytes = RenderIssueIndexPattern(rawBytes, urlPrefix, metas) - rawBytes = RenderCrossReferenceIssueIndexPattern(rawBytes, urlPrefix, metas) - rawBytes = renderFullSha1Pattern(rawBytes, urlPrefix) - rawBytes = renderSha1CurrentPattern(rawBytes, urlPrefix) - return rawBytes -} - // RenderRaw renders Markdown to HTML without handling special links. func RenderRaw(body []byte, urlPrefix string, wikiMarkdown bool) []byte { htmlFlags := 0 @@ -588,107 +150,6 @@ func RenderRaw(body []byte, urlPrefix string, wikiMarkdown bool) []byte { return body } -var ( - leftAngleBracket = []byte("") -) - -var noEndTags = []string{"img", "input", "br", "hr"} - -// PostProcess treats different types of HTML differently, -// and only renders special links for plain text blocks. -func PostProcess(rawHTML []byte, urlPrefix string, metas map[string]string, isWikiMarkdown bool) []byte { - startTags := make([]string, 0, 5) - var buf bytes.Buffer - tokenizer := html.NewTokenizer(bytes.NewReader(rawHTML)) - -OUTER_LOOP: - for html.ErrorToken != tokenizer.Next() { - token := tokenizer.Token() - switch token.Type { - case html.TextToken: - buf.Write(RenderSpecialLink([]byte(token.String()), urlPrefix, metas, isWikiMarkdown)) - - case html.StartTagToken: - buf.WriteString(token.String()) - tagName := token.Data - // If this is an excluded tag, we skip processing all output until a close tag is encountered. - if strings.EqualFold("a", tagName) || strings.EqualFold("code", tagName) || strings.EqualFold("pre", tagName) { - stackNum := 1 - for html.ErrorToken != tokenizer.Next() { - token = tokenizer.Token() - - // Copy the token to the output verbatim - buf.Write(RenderShortLinks([]byte(token.String()), urlPrefix, true, isWikiMarkdown)) - - if token.Type == html.StartTagToken && !com.IsSliceContainsStr(noEndTags, token.Data) { - stackNum++ - } - - // If this is the close tag to the outer-most, we are done - if token.Type == html.EndTagToken { - stackNum-- - - if stackNum <= 0 && strings.EqualFold(tagName, token.Data) { - break - } - } - } - continue OUTER_LOOP - } - - if !com.IsSliceContainsStr(noEndTags, tagName) { - startTags = append(startTags, tagName) - } - - case html.EndTagToken: - if len(startTags) == 0 { - buf.WriteString(token.String()) - break - } - - buf.Write(leftAngleBracket) - buf.WriteString(startTags[len(startTags)-1]) - buf.Write(rightAngleBracket) - startTags = startTags[:len(startTags)-1] - default: - buf.WriteString(token.String()) - } - } - - if io.EOF == tokenizer.Err() { - return buf.Bytes() - } - - // If we are not at the end of the input, then some other parsing error has occurred, - // so return the input verbatim. - return rawHTML -} - -// Render renders Markdown to HTML with all specific handling stuff. -func render(rawBytes []byte, urlPrefix string, metas map[string]string, isWikiMarkdown bool) []byte { - urlPrefix = strings.Replace(urlPrefix, " ", "+", -1) - result := RenderRaw(rawBytes, urlPrefix, isWikiMarkdown) - result = PostProcess(result, urlPrefix, metas, isWikiMarkdown) - result = SanitizeBytes(result) - return result -} - -// Render renders Markdown to HTML with all specific handling stuff. -func Render(rawBytes []byte, urlPrefix string, metas map[string]string) []byte { - return render(rawBytes, urlPrefix, metas, false) -} - -// RenderString renders Markdown to HTML with special links and returns string type. -func RenderString(raw, urlPrefix string, metas map[string]string) string { - return string(render([]byte(raw), urlPrefix, metas, false)) -} - -// RenderWiki renders markdown wiki page to HTML and return HTML string -func RenderWiki(rawBytes []byte, urlPrefix string, metas map[string]string) string { - return string(render(rawBytes, urlPrefix, metas, true)) -} - var ( // MarkupName describes markup's name MarkupName = "markdown" @@ -714,5 +175,26 @@ func (Parser) Extensions() []string { // Render implements markup.Parser func (Parser) Render(rawBytes []byte, urlPrefix string, metas map[string]string, isWiki bool) []byte { - return render(rawBytes, urlPrefix, metas, isWiki) + return RenderRaw(rawBytes, urlPrefix, isWiki) +} + +// Render renders Markdown to HTML with all specific handling stuff. +func Render(rawBytes []byte, urlPrefix string, metas map[string]string) []byte { + return markup.Render("a.md", rawBytes, urlPrefix, metas) +} + +// RenderString renders Markdown to HTML with special links and returns string type. +func RenderString(raw, urlPrefix string, metas map[string]string) string { + return markup.RenderString("a.md", raw, urlPrefix, metas) +} + +// RenderWiki renders markdown wiki page to HTML and return HTML string +func RenderWiki(rawBytes []byte, urlPrefix string, metas map[string]string) string { + return markup.RenderWiki("a.md", rawBytes, urlPrefix, metas) +} + +// IsMarkdownFile reports whether name looks like a Markdown file +// based on its extension. +func IsMarkdownFile(name string) bool { + return markup.IsMarkupFile(name, MarkupName) } diff --git a/modules/markdown/markdown_test.go b/modules/markdown/markdown_test.go index 4506a29b1..1b57e4f20 100644 --- a/modules/markdown/markdown_test.go +++ b/modules/markdown/markdown_test.go @@ -7,12 +7,13 @@ package markdown_test import ( "fmt" "strconv" + "strings" "testing" - "strings" - . "code.gitea.io/gitea/modules/markdown" + "code.gitea.io/gitea/modules/markup" "code.gitea.io/gitea/modules/setting" + "github.com/stretchr/testify/assert" ) @@ -24,24 +25,24 @@ var numericMetas = map[string]string{ "format": "https://someurl.com/{user}/{repo}/{index}", "user": "someUser", "repo": "someRepo", - "style": IssueNameStyleNumeric, + "style": markup.IssueNameStyleNumeric, } var alphanumericMetas = map[string]string{ "format": "https://someurl.com/{user}/{repo}/{index}", "user": "someUser", "repo": "someRepo", - "style": IssueNameStyleAlphanumeric, + "style": markup.IssueNameStyleAlphanumeric, } // numericLink an HTML to a numeric-style issue func numericIssueLink(baseURL string, index int) string { - return link(URLJoin(baseURL, strconv.Itoa(index)), fmt.Sprintf("#%d", index)) + return link(markup.URLJoin(baseURL, strconv.Itoa(index)), fmt.Sprintf("#%d", index)) } // alphanumLink an HTML link to an alphanumeric-style issue func alphanumIssueLink(baseURL string, name string) string { - return link(URLJoin(baseURL, name), name) + return link(markup.URLJoin(baseURL, name), name) } // urlContentsLink an HTML link whose contents is the target URL @@ -56,175 +57,7 @@ func link(href, contents string) string { func testRenderIssueIndexPattern(t *testing.T, input, expected string, metas map[string]string) { assert.Equal(t, expected, - string(RenderIssueIndexPattern([]byte(input), AppSubURL, metas))) -} - -func TestURLJoin(t *testing.T) { - type test struct { - Expected string - Base string - Elements []string - } - newTest := func(expected, base string, elements ...string) test { - return test{Expected: expected, Base: base, Elements: elements} - } - for _, test := range []test{ - newTest("https://try.gitea.io/a/b/c", - "https://try.gitea.io", "a/b", "c"), - newTest("https://try.gitea.io/a/b/c", - "https://try.gitea.io/", "/a/b/", "/c/"), - newTest("https://try.gitea.io/a/c", - "https://try.gitea.io/", "/a/./b/", "../c/"), - newTest("a/b/c", - "a", "b/c/"), - newTest("a/b/d", - "a/", "b/c/", "/../d/"), - } { - assert.Equal(t, test.Expected, URLJoin(test.Base, test.Elements...)) - } -} - -func TestRender_IssueIndexPattern(t *testing.T) { - // numeric: render inputs without valid mentions - test := func(s string) { - testRenderIssueIndexPattern(t, s, s, nil) - testRenderIssueIndexPattern(t, s, s, numericMetas) - } - - // should not render anything when there are no mentions - test("") - test("this is a test") - test("test 123 123 1234") - test("#") - test("# # #") - test("# 123") - test("#abcd") - test("##1234") - test("test#1234") - test("#1234test") - test(" test #1234test") - - // should not render issue mention without leading space - test("test#54321 issue") - - // should not render issue mention without trailing space - test("test #54321issue") -} - -func TestRender_IssueIndexPattern2(t *testing.T) { - setting.AppURL = AppURL - setting.AppSubURL = AppSubURL - - // numeric: render inputs with valid mentions - test := func(s, expectedFmt string, indices ...int) { - links := make([]interface{}, len(indices)) - for i, index := range indices { - links[i] = numericIssueLink(URLJoin(setting.AppSubURL, "issues"), index) - } - expectedNil := fmt.Sprintf(expectedFmt, links...) - testRenderIssueIndexPattern(t, s, expectedNil, nil) - - for i, index := range indices { - links[i] = numericIssueLink("https://someurl.com/someUser/someRepo/", index) - } - expectedNum := fmt.Sprintf(expectedFmt, links...) - testRenderIssueIndexPattern(t, s, expectedNum, numericMetas) - } - - // should render freestanding mentions - test("#1234 test", "%s test", 1234) - test("test #8 issue", "test %s issue", 8) - test("test issue #1234", "test issue %s", 1234) - - // should render mentions in parentheses - test("(#54321 issue)", "(%s issue)", 54321) - test("test (#9801 extra) issue", "test (%s extra) issue", 9801) - test("test (#1)", "test (%s)", 1) - - // should render multiple issue mentions in the same line - test("#54321 #1243", "%s %s", 54321, 1243) - test("wow (#54321 #1243)", "wow (%s %s)", 54321, 1243) - test("(#4)(#5)", "(%s)(%s)", 4, 5) - test("#1 (#4321) test", "%s (%s) test", 1, 4321) -} - -func TestRender_IssueIndexPattern3(t *testing.T) { - setting.AppURL = AppURL - setting.AppSubURL = AppSubURL - - // alphanumeric: render inputs without valid mentions - test := func(s string) { - testRenderIssueIndexPattern(t, s, s, alphanumericMetas) - } - test("") - test("this is a test") - test("test 123 123 1234") - test("#") - test("##1234") - test("# 123") - test("#abcd") - test("test #123") - test("abc-1234") // issue prefix must be capital - test("ABc-1234") // issue prefix must be _all_ capital - test("ABCDEFGHIJK-1234") // the limit is 10 characters in the prefix - test("ABC1234") // dash is required - test("test ABC- test") // number is required - test("test -1234 test") // prefix is required - test("testABC-123 test") // leading space is required - test("test ABC-123test") // trailing space is required - test("ABC-0123") // no leading zero -} - -func TestRender_IssueIndexPattern4(t *testing.T) { - setting.AppURL = AppURL - setting.AppSubURL = AppSubURL - - // alphanumeric: render inputs with valid mentions - test := func(s, expectedFmt string, names ...string) { - links := make([]interface{}, len(names)) - for i, name := range names { - links[i] = alphanumIssueLink("https://someurl.com/someUser/someRepo/", name) - } - expected := fmt.Sprintf(expectedFmt, links...) - testRenderIssueIndexPattern(t, s, expected, alphanumericMetas) - } - test("OTT-1234 test", "%s test", "OTT-1234") - test("test T-12 issue", "test %s issue", "T-12") - test("test issue ABCDEFGHIJ-1234567890", "test issue %s", "ABCDEFGHIJ-1234567890") -} - -func TestRender_AutoLink(t *testing.T) { - setting.AppURL = AppURL - setting.AppSubURL = AppSubURL - - test := func(input, expected string) { - buffer := RenderSpecialLink([]byte(input), setting.AppSubURL, nil, false) - assert.Equal(t, strings.TrimSpace(expected), strings.TrimSpace(string(buffer))) - buffer = RenderSpecialLink([]byte(input), setting.AppSubURL, nil, true) - assert.Equal(t, strings.TrimSpace(expected), strings.TrimSpace(string(buffer))) - } - - // render valid issue URLs - test(URLJoin(setting.AppSubURL, "issues", "3333"), - numericIssueLink(URLJoin(setting.AppSubURL, "issues"), 3333)) - - // render external issue URLs - for _, externalURL := range []string{ - "http://1111/2222/ssss-issues/3333?param=blah&blahh=333", - "http://test.com/issues/33333", - "https://issues/333"} { - test(externalURL, externalURL) - } - - // render valid commit URLs - tmp := URLJoin(AppSubURL, "commit", "d8a994ef243349f321568f9e36d5c3f444b99cae") - test(tmp, "d8a994ef24") - tmp += "#diff-2" - test(tmp, "d8a994ef24 (diff-2)") - - // render other commit URLs - tmp = "https://external-link.gogs.io/gogs/gogs/commit/d8a994ef243349f321568f9e36d5c3f444b99cae#diff-2" - test(tmp, "d8a994ef24 (diff-2)") + string(markup.RenderIssueIndexPattern([]byte(input), AppSubURL, metas))) } func TestRender_StandardLinks(t *testing.T) { @@ -241,8 +74,8 @@ func TestRender_StandardLinks(t *testing.T) { googleRendered := `

https://google.com/

` test("", googleRendered, googleRendered) - lnk := URLJoin(AppSubURL, "WikiPage") - lnkWiki := URLJoin(AppSubURL, "wiki", "WikiPage") + lnk := markup.URLJoin(AppSubURL, "WikiPage") + lnkWiki := markup.URLJoin(AppSubURL, "wiki", "WikiPage") test("[WikiPage](WikiPage)", `

WikiPage

`, `

WikiPage

`) @@ -251,7 +84,7 @@ func TestRender_StandardLinks(t *testing.T) { func TestRender_ShortLinks(t *testing.T) { setting.AppURL = AppURL setting.AppSubURL = AppSubURL - tree := URLJoin(AppSubURL, "src", "master") + tree := markup.URLJoin(AppSubURL, "src", "master") test := func(input, expected, expectedWiki string) { buffer := RenderString(input, tree, nil) @@ -260,13 +93,13 @@ func TestRender_ShortLinks(t *testing.T) { assert.Equal(t, strings.TrimSpace(expectedWiki), strings.TrimSpace(string(buffer))) } - rawtree := URLJoin(AppSubURL, "raw", "master") - url := URLJoin(tree, "Link") - otherUrl := URLJoin(tree, "OtherLink") - imgurl := URLJoin(rawtree, "Link.jpg") - urlWiki := URLJoin(AppSubURL, "wiki", "Link") - otherUrlWiki := URLJoin(AppSubURL, "wiki", "OtherLink") - imgurlWiki := URLJoin(AppSubURL, "wiki", "raw", "Link.jpg") + rawtree := markup.URLJoin(AppSubURL, "raw", "master") + url := markup.URLJoin(tree, "Link") + otherUrl := markup.URLJoin(tree, "OtherLink") + imgurl := markup.URLJoin(rawtree, "Link.jpg") + urlWiki := markup.URLJoin(AppSubURL, "wiki", "Link") + otherUrlWiki := markup.URLJoin(AppSubURL, "wiki", "OtherLink") + imgurlWiki := markup.URLJoin(AppSubURL, "wiki", "raw", "Link.jpg") favicon := "http://google.com/favicon.ico" test( @@ -311,271 +144,6 @@ func TestRender_ShortLinks(t *testing.T) { `

Link OtherLink

`) } -func TestRender_Commits(t *testing.T) { - setting.AppURL = AppURL - setting.AppSubURL = AppSubURL - - test := func(input, expected string) { - buffer := RenderString(input, setting.AppSubURL, nil) - assert.Equal(t, strings.TrimSpace(expected), strings.TrimSpace(string(buffer))) - } - - var sha = "b6dd6210eaebc915fd5be5579c58cce4da2e2579" - var commit = URLJoin(AppSubURL, "commit", sha) - var subtree = URLJoin(commit, "src") - var tree = strings.Replace(subtree, "/commit/", "/tree/", -1) - var src = strings.Replace(subtree, "/commit/", "/src/", -1) - - test(sha, `

b6dd6210ea

`) - test(sha[:7], `

b6dd621

`) - test(sha[:39], `

b6dd6210ea

`) - test(commit, `

b6dd6210ea

`) - test(tree, `

b6dd6210ea/src

`) - test("commit "+sha, `

commit b6dd6210ea

`) -} - -func TestRender_Images(t *testing.T) { - setting.AppURL = AppURL - setting.AppSubURL = AppSubURL - - test := func(input, expected string) { - buffer := RenderString(input, setting.AppSubURL, nil) - assert.Equal(t, strings.TrimSpace(expected), strings.TrimSpace(string(buffer))) - } - - url := "../../.images/src/02/train.jpg" - title := "Train" - result := URLJoin(AppSubURL, url) - - test( - "!["+title+"]("+url+")", - `

`+title+`

`) - - test( - "[["+title+"|"+url+"]]", - `

`+title+`

`) -} - -func TestRender_CrossReferences(t *testing.T) { - setting.AppURL = AppURL - setting.AppSubURL = AppSubURL - - test := func(input, expected string) { - buffer := RenderString(input, setting.AppSubURL, nil) - assert.Equal(t, strings.TrimSpace(expected), strings.TrimSpace(string(buffer))) - } - - test( - "gogits/gogs#12345", - `

gogits/gogs#12345

`) -} - -func TestRender_FullIssueURLs(t *testing.T) { - setting.AppURL = AppURL - setting.AppSubURL = AppSubURL - - test := func(input, expected string) { - result := RenderFullIssuePattern([]byte(input)) - assert.Equal(t, expected, string(result)) - } - test("Here is a link https://git.osgeo.org/gogs/postgis/postgis/pulls/6", - "Here is a link https://git.osgeo.org/gogs/postgis/postgis/pulls/6") - test("Look here http://localhost:3000/person/repo/issues/4", - `Look here #4`) - test("http://localhost:3000/person/repo/issues/4#issuecomment-1234", - `#4`) -} - -func TestRegExp_MentionPattern(t *testing.T) { - trueTestCases := []string{ - "@Unknwon", - "@ANT_123", - "@xxx-DiN0-z-A..uru..s-xxx", - " @lol ", - " @Te/st", - } - falseTestCases := []string{ - "@ 0", - "@ ", - "@", - "", - "ABC", - } - - for _, testCase := range trueTestCases { - res := MentionPattern.MatchString(testCase) - if !res { - println() - println(testCase) - } - assert.True(t, res) - } - for _, testCase := range falseTestCases { - res := MentionPattern.MatchString(testCase) - if res { - println() - println(testCase) - } - assert.False(t, res) - } -} - -func TestRegExp_IssueNumericPattern(t *testing.T) { - trueTestCases := []string{ - "#1234", - "#0", - "#1234567890987654321", - } - falseTestCases := []string{ - "# 1234", - "# 0", - "# ", - "#", - "#ABC", - "#1A2B", - "", - "ABC", - } - - for _, testCase := range trueTestCases { - assert.True(t, IssueNumericPattern.MatchString(testCase)) - } - for _, testCase := range falseTestCases { - assert.False(t, IssueNumericPattern.MatchString(testCase)) - } -} - -func TestRegExp_IssueAlphanumericPattern(t *testing.T) { - trueTestCases := []string{ - "ABC-1234", - "A-1", - "RC-80", - "ABCDEFGHIJ-1234567890987654321234567890", - } - falseTestCases := []string{ - "RC-08", - "PR-0", - "ABCDEFGHIJK-1", - "PR_1", - "", - "#ABC", - "", - "ABC", - "GG-", - "rm-1", - } - - for _, testCase := range trueTestCases { - assert.True(t, IssueAlphanumericPattern.MatchString(testCase)) - } - for _, testCase := range falseTestCases { - assert.False(t, IssueAlphanumericPattern.MatchString(testCase)) - } -} - -func TestRegExp_Sha1CurrentPattern(t *testing.T) { - trueTestCases := []string{ - "d8a994ef243349f321568f9e36d5c3f444b99cae", - "abcdefabcdefabcdefabcdefabcdefabcdefabcd", - } - falseTestCases := []string{ - "test", - "abcdefg", - "abcdefghijklmnopqrstuvwxyzabcdefghijklmn", - "abcdefghijklmnopqrstuvwxyzabcdefghijklmO", - } - - for _, testCase := range trueTestCases { - assert.True(t, Sha1CurrentPattern.MatchString(testCase)) - } - for _, testCase := range falseTestCases { - assert.False(t, Sha1CurrentPattern.MatchString(testCase)) - } -} - -func TestRegExp_ShortLinkPattern(t *testing.T) { - trueTestCases := []string{ - "[[stuff]]", - "[[]]", - "[[stuff|title=Difficult name with spaces*!]]", - } - falseTestCases := []string{ - "test", - "abcdefg", - "[[]", - "[[", - "[]", - "]]", - "abcdefghijklmnopqrstuvwxyz", - } - - for _, testCase := range trueTestCases { - assert.True(t, ShortLinkPattern.MatchString(testCase)) - } - for _, testCase := range falseTestCases { - assert.False(t, ShortLinkPattern.MatchString(testCase)) - } -} - -func TestRegExp_AnySHA1Pattern(t *testing.T) { - testCases := map[string][]string{ - "https://github.com/jquery/jquery/blob/a644101ed04d0beacea864ce805e0c4f86ba1cd1/test/unit/event.js#L2703": { - "https", - "github.com", - "jquery", - "jquery", - "blob", - "a644101ed04d0beacea864ce805e0c4f86ba1cd1", - "test/unit/event.js", - "L2703", - }, - "https://github.com/jquery/jquery/blob/a644101ed04d0beacea864ce805e0c4f86ba1cd1/test/unit/event.js": { - "https", - "github.com", - "jquery", - "jquery", - "blob", - "a644101ed04d0beacea864ce805e0c4f86ba1cd1", - "test/unit/event.js", - "", - }, - "https://github.com/jquery/jquery/commit/0705be475092aede1eddae01319ec931fb9c65fc": { - "https", - "github.com", - "jquery", - "jquery", - "commit", - "0705be475092aede1eddae01319ec931fb9c65fc", - "", - "", - }, - "https://github.com/jquery/jquery/tree/0705be475092aede1eddae01319ec931fb9c65fc/src": { - "https", - "github.com", - "jquery", - "jquery", - "tree", - "0705be475092aede1eddae01319ec931fb9c65fc", - "src", - "", - }, - "https://try.gogs.io/gogs/gogs/commit/d8a994ef243349f321568f9e36d5c3f444b99cae#diff-2": { - "https", - "try.gogs.io", - "gogs", - "gogs", - "commit", - "d8a994ef243349f321568f9e36d5c3f444b99cae", - "", - "diff-2", - }, - } - - for k, v := range testCases { - assert.Equal(t, AnySHA1Pattern.FindStringSubmatch(k)[1:], v) - } -} - func TestMisc_IsMarkdownFile(t *testing.T) { setting.Markdown.FileExtensions = []string{".md", ".markdown", ".mdown", ".mkd"} trueTestCases := []string{ @@ -598,49 +166,50 @@ func TestMisc_IsMarkdownFile(t *testing.T) { } } -func TestMisc_IsSameDomain(t *testing.T) { +func TestRender_Images(t *testing.T) { setting.AppURL = AppURL setting.AppSubURL = AppSubURL - var sha = "b6dd6210eaebc915fd5be5579c58cce4da2e2579" - var commit = URLJoin(AppSubURL, "commit", sha) + test := func(input, expected string) { + buffer := RenderString(input, setting.AppSubURL, nil) + assert.Equal(t, strings.TrimSpace(expected), strings.TrimSpace(string(buffer))) + } - assert.True(t, IsSameDomain(commit)) - assert.False(t, IsSameDomain("http://google.com/ncr")) - assert.False(t, IsSameDomain("favicon.ico")) + url := "../../.images/src/02/train.jpg" + title := "Train" + result := markup.URLJoin(AppSubURL, url) + + test( + "!["+title+"]("+url+")", + `

`+title+`

`) + + test( + "[["+title+"|"+url+"]]", + `

`+title+`

`) } -// Test cases without ambiguous links -var sameCases = []string{ - // dear imgui wiki markdown extract: special wiki syntax - `Wiki! Enjoy :) -- [[Links, Language bindings, Engine bindings|Links]] -- [[Tips]] +func TestRegExp_ShortLinkPattern(t *testing.T) { + trueTestCases := []string{ + "[[stuff]]", + "[[]]", + "[[stuff|title=Difficult name with spaces*!]]", + } + falseTestCases := []string{ + "test", + "abcdefg", + "[[]", + "[[", + "[]", + "]]", + "abcdefghijklmnopqrstuvwxyz", + } -Ideas and codes - -- Bezier widget (by @r-lyeh) ` + AppURL + `ocornut/imgui/issues/786 -- Node graph editors https://github.com/ocornut/imgui/issues/306 -- [[Memory Editor|memory_editor_example]] -- [[Plot var helper|plot_var_example]]`, - // wine-staging wiki home extract: tables, special wiki syntax, images - `## What is Wine Staging? -**Wine Staging** on website [wine-staging.com](http://wine-staging.com). - -## Quick Links -Here are some links to the most important topics. You can find the full list of pages at the sidebar. - -| [[images/icon-install.png]] | [[Installation]] | -|--------------------------------|----------------------------------------------------------| -| [[images/icon-usage.png]] | [[Usage]] | -`, - // libgdx wiki page: inline images with special syntax - `[Excelsior JET](http://www.excelsiorjet.com/) allows you to create native executables for Windows, Linux and Mac OS X. - -1. [Package your libGDX application](https://github.com/libgdx/libgdx/wiki/Gradle-on-the-Commandline#packaging-for-the-desktop) -[[images/1.png]] -2. Perform a test run by hitting the Run! button. -[[images/2.png]]`, + for _, testCase := range trueTestCases { + assert.True(t, markup.ShortLinkPattern.MatchString(testCase)) + } + for _, testCase := range falseTestCases { + assert.False(t, markup.ShortLinkPattern.MatchString(testCase)) + } } func testAnswers(baseURLContent, baseURLImages string) []string { @@ -697,24 +266,41 @@ func testAnswers(baseURLContent, baseURLImages string) []string { } } -func TestTotal_RenderString(t *testing.T) { - answers := testAnswers(URLJoin(AppSubURL, "src", "master/"), URLJoin(AppSubURL, "raw", "master/")) +// Test cases without ambiguous links +var sameCases = []string{ + // dear imgui wiki markdown extract: special wiki syntax + `Wiki! Enjoy :) +- [[Links, Language bindings, Engine bindings|Links]] +- [[Tips]] - for i := 0; i < len(sameCases); i++ { - line := RenderString(sameCases[i], URLJoin(AppSubURL, "src", "master/"), nil) - assert.Equal(t, answers[i], line) - } +Ideas and codes - testCases := []string{} +- Bezier widget (by @r-lyeh) ` + AppURL + `ocornut/imgui/issues/786 +- Node graph editors https://github.com/ocornut/imgui/issues/306 +- [[Memory Editor|memory_editor_example]] +- [[Plot var helper|plot_var_example]]`, + // wine-staging wiki home extract: tables, special wiki syntax, images + `## What is Wine Staging? +**Wine Staging** on website [wine-staging.com](http://wine-staging.com). - for i := 0; i < len(testCases); i += 2 { - line := RenderString(testCases[i], AppSubURL, nil) - assert.Equal(t, testCases[i+1], line) - } +## Quick Links +Here are some links to the most important topics. You can find the full list of pages at the sidebar. + +| [[images/icon-install.png]] | [[Installation]] | +|--------------------------------|----------------------------------------------------------| +| [[images/icon-usage.png]] | [[Usage]] | +`, + // libgdx wiki page: inline images with special syntax + `[Excelsior JET](http://www.excelsiorjet.com/) allows you to create native executables for Windows, Linux and Mac OS X. + +1. [Package your libGDX application](https://github.com/libgdx/libgdx/wiki/Gradle-on-the-Commandline#packaging-for-the-desktop) +[[images/1.png]] +2. Perform a test run by hitting the Run! button. +[[images/2.png]]`, } func TestTotal_RenderWiki(t *testing.T) { - answers := testAnswers(URLJoin(AppSubURL, "wiki/"), URLJoin(AppSubURL, "wiki", "raw/")) + answers := testAnswers(markup.URLJoin(AppSubURL, "wiki/"), markup.URLJoin(AppSubURL, "wiki", "raw/")) for i := 0; i < len(sameCases); i++ { line := RenderWiki([]byte(sameCases[i]), AppSubURL, nil) @@ -739,3 +325,19 @@ func TestTotal_RenderWiki(t *testing.T) { assert.Equal(t, testCases[i+1], line) } } + +func TestTotal_RenderString(t *testing.T) { + answers := testAnswers(markup.URLJoin(AppSubURL, "src", "master/"), markup.URLJoin(AppSubURL, "raw", "master/")) + + for i := 0; i < len(sameCases); i++ { + line := RenderString(sameCases[i], markup.URLJoin(AppSubURL, "src", "master/"), nil) + assert.Equal(t, answers[i], line) + } + + testCases := []string{} + + for i := 0; i < len(testCases); i += 2 { + line := RenderString(testCases[i], AppSubURL, nil) + assert.Equal(t, testCases[i+1], line) + } +} diff --git a/modules/markup/html.go b/modules/markup/html.go new file mode 100644 index 000000000..9daf0b0c6 --- /dev/null +++ b/modules/markup/html.go @@ -0,0 +1,517 @@ +// Copyright 2017 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package markup + +import ( + "bytes" + "fmt" + "io" + "net/url" + "path" + "path/filepath" + "regexp" + "strings" + + "code.gitea.io/gitea/modules/base" + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/setting" + + "github.com/Unknwon/com" + "golang.org/x/net/html" +) + +// Issue name styles +const ( + IssueNameStyleNumeric = "numeric" + IssueNameStyleAlphanumeric = "alphanumeric" +) + +var ( + // NOTE: All below regex matching do not perform any extra validation. + // Thus a link is produced even if the linked entity does not exist. + // While fast, this is also incorrect and lead to false positives. + // TODO: fix invalid linking issue + + // MentionPattern matches string that mentions someone, e.g. @Unknwon + MentionPattern = regexp.MustCompile(`(\s|^|\W)@[0-9a-zA-Z-_\.]+`) + + // IssueNumericPattern matches string that references to a numeric issue, e.g. #1287 + IssueNumericPattern = regexp.MustCompile(`( |^|\()#[0-9]+\b`) + // IssueAlphanumericPattern matches string that references to an alphanumeric issue, e.g. ABC-1234 + IssueAlphanumericPattern = regexp.MustCompile(`( |^|\()[A-Z]{1,10}-[1-9][0-9]*\b`) + // CrossReferenceIssueNumericPattern matches string that references a numeric issue in a different repository + // e.g. gogits/gogs#12345 + CrossReferenceIssueNumericPattern = regexp.MustCompile(`( |^)[0-9a-zA-Z]+/[0-9a-zA-Z]+#[0-9]+\b`) + + // Sha1CurrentPattern matches string that represents a commit SHA, e.g. d8a994ef243349f321568f9e36d5c3f444b99cae + // Although SHA1 hashes are 40 chars long, the regex matches the hash from 7 to 40 chars in length + // so that abbreviated hash links can be used as well. This matches git and github useability. + Sha1CurrentPattern = regexp.MustCompile(`(?:^|\s|\()([0-9a-f]{7,40})\b`) + + // ShortLinkPattern matches short but difficult to parse [[name|link|arg=test]] syntax + ShortLinkPattern = regexp.MustCompile(`(\[\[.*?\]\]\w*)`) + + // AnySHA1Pattern allows to split url containing SHA into parts + AnySHA1Pattern = regexp.MustCompile(`(http\S*)://(\S+)/(\S+)/(\S+)/(\S+)/([0-9a-f]{40})(?:/?([^#\s]+)?(?:#(\S+))?)?`) + + validLinksPattern = regexp.MustCompile(`^[a-z][\w-]+://`) +) + +// regexp for full links to issues/pulls +var issueFullPattern *regexp.Regexp + +// IsLink reports whether link fits valid format. +func IsLink(link []byte) bool { + return isLink(link) +} + +// isLink reports whether link fits valid format. +func isLink(link []byte) bool { + return validLinksPattern.Match(link) +} + +func getIssueFullPattern() *regexp.Regexp { + if issueFullPattern == nil { + appURL := setting.AppURL + if len(appURL) > 0 && appURL[len(appURL)-1] != '/' { + appURL += "/" + } + issueFullPattern = regexp.MustCompile(appURL + + `\w+/\w+/(?:issues|pulls)/((?:\w{1,10}-)?[1-9][0-9]*)([\?|#]\S+.(\S+)?)?\b`) + } + return issueFullPattern +} + +// FindAllMentions matches mention patterns in given content +// and returns a list of found user names without @ prefix. +func FindAllMentions(content string) []string { + mentions := MentionPattern.FindAllString(content, -1) + for i := range mentions { + mentions[i] = mentions[i][strings.Index(mentions[i], "@")+1:] // Strip @ character + } + return mentions +} + +// cutoutVerbosePrefix cutouts URL prefix including sub-path to +// return a clean unified string of request URL path. +func cutoutVerbosePrefix(prefix string) string { + if len(prefix) == 0 || prefix[0] != '/' { + return prefix + } + count := 0 + for i := 0; i < len(prefix); i++ { + if prefix[i] == '/' { + count++ + } + if count >= 3+setting.AppSubURLDepth { + return prefix[:i] + } + } + return prefix +} + +// URLJoin joins url components, like path.Join, but preserving contents +func URLJoin(base string, elems ...string) string { + u, err := url.Parse(base) + if err != nil { + log.Error(4, "URLJoin: Invalid base URL %s", base) + return "" + } + joinArgs := make([]string, 0, len(elems)+1) + joinArgs = append(joinArgs, u.Path) + joinArgs = append(joinArgs, elems...) + u.Path = path.Join(joinArgs...) + return u.String() +} + +// RenderIssueIndexPattern renders issue indexes to corresponding links. +func RenderIssueIndexPattern(rawBytes []byte, urlPrefix string, metas map[string]string) []byte { + urlPrefix = cutoutVerbosePrefix(urlPrefix) + + pattern := IssueNumericPattern + if metas["style"] == IssueNameStyleAlphanumeric { + pattern = IssueAlphanumericPattern + } + + ms := pattern.FindAll(rawBytes, -1) + for _, m := range ms { + if m[0] == ' ' || m[0] == '(' { + m = m[1:] // ignore leading space or opening parentheses + } + var link string + if metas == nil { + link = fmt.Sprintf(`%s`, URLJoin(urlPrefix, "issues", string(m[1:])), m) + } else { + // Support for external issue tracker + if metas["style"] == IssueNameStyleAlphanumeric { + metas["index"] = string(m) + } else { + metas["index"] = string(m[1:]) + } + link = fmt.Sprintf(`%s`, com.Expand(metas["format"], metas), m) + } + rawBytes = bytes.Replace(rawBytes, m, []byte(link), 1) + } + return rawBytes +} + +// IsSameDomain checks if given url string has the same hostname as current Gitea instance +func IsSameDomain(s string) bool { + if strings.HasPrefix(s, "/") { + return true + } + if uapp, err := url.Parse(setting.AppURL); err == nil { + if u, err := url.Parse(s); err == nil { + return u.Host == uapp.Host + } + return false + } + return false +} + +// renderFullSha1Pattern renders SHA containing URLs +func renderFullSha1Pattern(rawBytes []byte, urlPrefix string) []byte { + ms := AnySHA1Pattern.FindAllSubmatch(rawBytes, -1) + for _, m := range ms { + all := m[0] + protocol := string(m[1]) + paths := string(m[2]) + path := protocol + "://" + paths + author := string(m[3]) + repoName := string(m[4]) + path = URLJoin(path, author, repoName) + ltype := "src" + itemType := m[5] + if IsSameDomain(paths) { + ltype = string(itemType) + } else if string(itemType) == "commit" { + ltype = "commit" + } + sha := m[6] + var subtree string + if len(m) > 7 && len(m[7]) > 0 { + subtree = string(m[7]) + } + var line []byte + if len(m) > 8 && len(m[8]) > 0 { + line = m[8] + } + urlSuffix := "" + text := base.ShortSha(string(sha)) + if subtree != "" { + urlSuffix = "/" + subtree + text += urlSuffix + } + if line != nil { + value := string(line) + urlSuffix += "#" + urlSuffix += value + text += " (" + text += value + text += ")" + } + rawBytes = bytes.Replace(rawBytes, all, []byte(fmt.Sprintf( + `%s`, URLJoin(path, ltype, string(sha))+urlSuffix, text)), -1) + } + return rawBytes +} + +// RenderFullIssuePattern renders issues-like URLs +func RenderFullIssuePattern(rawBytes []byte) []byte { + ms := getIssueFullPattern().FindAllSubmatch(rawBytes, -1) + for _, m := range ms { + all := m[0] + id := string(m[1]) + text := "#" + id + // TODO if m[2] is not nil, then link is to a comment, + // and we should indicate that in the text somehow + rawBytes = bytes.Replace(rawBytes, all, []byte(fmt.Sprintf( + `%s`, string(all), text)), -1) + } + return rawBytes +} + +func firstIndexOfByte(sl []byte, target byte) int { + for i := 0; i < len(sl); i++ { + if sl[i] == target { + return i + } + } + return -1 +} + +func lastIndexOfByte(sl []byte, target byte) int { + for i := len(sl) - 1; i >= 0; i-- { + if sl[i] == target { + return i + } + } + return -1 +} + +// RenderShortLinks processes [[syntax]] +// +// noLink flag disables making link tags when set to true +// so this function just replaces the whole [[...]] with the content text +// +// isWikiMarkdown is a flag to choose linking url prefix +func RenderShortLinks(rawBytes []byte, urlPrefix string, noLink bool, isWikiMarkdown bool) []byte { + ms := ShortLinkPattern.FindAll(rawBytes, -1) + for _, m := range ms { + orig := bytes.TrimSpace(m) + m = orig[2:] + tailPos := lastIndexOfByte(m, ']') + 1 + tail := []byte{} + if tailPos < len(m) { + tail = m[tailPos:] + m = m[:tailPos-1] + } + m = m[:len(m)-2] + props := map[string]string{} + + // MediaWiki uses [[link|text]], while GitHub uses [[text|link]] + // It makes page handling terrible, but we prefer GitHub syntax + // And fall back to MediaWiki only when it is obvious from the look + // Of text and link contents + sl := bytes.Split(m, []byte("|")) + for _, v := range sl { + switch bytes.Count(v, []byte("=")) { + + // Piped args without = sign, these are mandatory arguments + case 0: + { + sv := string(v) + if props["name"] == "" { + if isLink(v) { + // If we clearly see it is a link, we save it so + + // But first we need to ensure, that if both mandatory args provided + // look like links, we stick to GitHub syntax + if props["link"] != "" { + props["name"] = props["link"] + } + + props["link"] = strings.TrimSpace(sv) + } else { + props["name"] = sv + } + } else { + props["link"] = strings.TrimSpace(sv) + } + } + + // Piped args with = sign, these are optional arguments + case 1: + { + sep := firstIndexOfByte(v, '=') + key, val := string(v[:sep]), html.UnescapeString(string(v[sep+1:])) + lastCharIndex := len(val) - 1 + if (val[0] == '"' || val[0] == '\'') && (val[lastCharIndex] == '"' || val[lastCharIndex] == '\'') { + val = val[1:lastCharIndex] + } + props[key] = val + } + } + } + + var name string + var link string + if props["link"] != "" { + link = props["link"] + } else if props["name"] != "" { + link = props["name"] + } + if props["title"] != "" { + name = props["title"] + } else if props["name"] != "" { + name = props["name"] + } else { + name = link + } + + name += string(tail) + image := false + ext := filepath.Ext(string(link)) + if ext != "" { + switch ext { + case ".jpg", ".jpeg", ".png", ".tif", ".tiff", ".webp", ".gif", ".bmp", ".ico", ".svg": + { + image = true + } + } + } + absoluteLink := isLink([]byte(link)) + if !absoluteLink { + link = strings.Replace(link, " ", "+", -1) + } + if image { + if !absoluteLink { + if IsSameDomain(urlPrefix) { + urlPrefix = strings.Replace(urlPrefix, "/src/", "/raw/", 1) + } + if isWikiMarkdown { + link = URLJoin("wiki", "raw", link) + } + link = URLJoin(urlPrefix, link) + } + title := props["title"] + if title == "" { + title = props["alt"] + } + if title == "" { + title = path.Base(string(name)) + } + alt := props["alt"] + if alt == "" { + alt = name + } + if alt != "" { + alt = `alt="` + alt + `"` + } + name = fmt.Sprintf(``, link, alt, title) + } else if !absoluteLink { + if isWikiMarkdown { + link = URLJoin("wiki", link) + } + link = URLJoin(urlPrefix, link) + } + if noLink { + rawBytes = bytes.Replace(rawBytes, orig, []byte(name), -1) + } else { + rawBytes = bytes.Replace(rawBytes, orig, + []byte(fmt.Sprintf(`%s`, link, name)), -1) + } + } + return rawBytes +} + +// RenderCrossReferenceIssueIndexPattern renders issue indexes from other repositories to corresponding links. +func RenderCrossReferenceIssueIndexPattern(rawBytes []byte, urlPrefix string, metas map[string]string) []byte { + ms := CrossReferenceIssueNumericPattern.FindAll(rawBytes, -1) + for _, m := range ms { + if m[0] == ' ' || m[0] == '(' { + m = m[1:] // ignore leading space or opening parentheses + } + + repo := string(bytes.Split(m, []byte("#"))[0]) + issue := string(bytes.Split(m, []byte("#"))[1]) + + link := fmt.Sprintf(`%s`, URLJoin(setting.AppURL, repo, "issues", issue), m) + rawBytes = bytes.Replace(rawBytes, m, []byte(link), 1) + } + return rawBytes +} + +// renderSha1CurrentPattern renders SHA1 strings to corresponding links that assumes in the same repository. +func renderSha1CurrentPattern(rawBytes []byte, urlPrefix string) []byte { + ms := Sha1CurrentPattern.FindAllSubmatch(rawBytes, -1) + for _, m := range ms { + hash := m[1] + // The regex does not lie, it matches the hash pattern. + // However, a regex cannot know if a hash actually exists or not. + // We could assume that a SHA1 hash should probably contain alphas AND numerics + // but that is not always the case. + // Although unlikely, deadbeef and 1234567 are valid short forms of SHA1 hash + // as used by git and github for linking and thus we have to do similar. + rawBytes = bytes.Replace(rawBytes, hash, []byte(fmt.Sprintf( + `%s`, URLJoin(urlPrefix, "commit", string(hash)), base.ShortSha(string(hash)))), -1) + } + return rawBytes +} + +// RenderSpecialLink renders mentions, indexes and SHA1 strings to corresponding links. +func RenderSpecialLink(rawBytes []byte, urlPrefix string, metas map[string]string, isWikiMarkdown bool) []byte { + ms := MentionPattern.FindAll(rawBytes, -1) + for _, m := range ms { + m = m[bytes.Index(m, []byte("@")):] + rawBytes = bytes.Replace(rawBytes, m, + []byte(fmt.Sprintf(`%s`, URLJoin(setting.AppURL, string(m[1:])), m)), -1) + } + + rawBytes = RenderFullIssuePattern(rawBytes) + rawBytes = RenderShortLinks(rawBytes, urlPrefix, false, isWikiMarkdown) + rawBytes = RenderIssueIndexPattern(rawBytes, urlPrefix, metas) + rawBytes = RenderCrossReferenceIssueIndexPattern(rawBytes, urlPrefix, metas) + rawBytes = renderFullSha1Pattern(rawBytes, urlPrefix) + rawBytes = renderSha1CurrentPattern(rawBytes, urlPrefix) + return rawBytes +} + +var ( + leftAngleBracket = []byte("") +) + +var noEndTags = []string{"img", "input", "br", "hr"} + +// PostProcess treats different types of HTML differently, +// and only renders special links for plain text blocks. +func PostProcess(rawHTML []byte, urlPrefix string, metas map[string]string, isWikiMarkdown bool) []byte { + startTags := make([]string, 0, 5) + var buf bytes.Buffer + tokenizer := html.NewTokenizer(bytes.NewReader(rawHTML)) + +OUTER_LOOP: + for html.ErrorToken != tokenizer.Next() { + token := tokenizer.Token() + switch token.Type { + case html.TextToken: + buf.Write(RenderSpecialLink([]byte(token.String()), urlPrefix, metas, isWikiMarkdown)) + + case html.StartTagToken: + buf.WriteString(token.String()) + tagName := token.Data + // If this is an excluded tag, we skip processing all output until a close tag is encountered. + if strings.EqualFold("a", tagName) || strings.EqualFold("code", tagName) || strings.EqualFold("pre", tagName) { + stackNum := 1 + for html.ErrorToken != tokenizer.Next() { + token = tokenizer.Token() + + // Copy the token to the output verbatim + buf.Write(RenderShortLinks([]byte(token.String()), urlPrefix, true, isWikiMarkdown)) + + if token.Type == html.StartTagToken && !com.IsSliceContainsStr(noEndTags, token.Data) { + stackNum++ + } + + // If this is the close tag to the outer-most, we are done + if token.Type == html.EndTagToken { + stackNum-- + + if stackNum <= 0 && strings.EqualFold(tagName, token.Data) { + break + } + } + } + continue OUTER_LOOP + } + + if !com.IsSliceContainsStr(noEndTags, tagName) { + startTags = append(startTags, tagName) + } + + case html.EndTagToken: + if len(startTags) == 0 { + buf.WriteString(token.String()) + break + } + + buf.Write(leftAngleBracket) + buf.WriteString(startTags[len(startTags)-1]) + buf.Write(rightAngleBracket) + startTags = startTags[:len(startTags)-1] + default: + buf.WriteString(token.String()) + } + } + + if io.EOF == tokenizer.Err() { + return buf.Bytes() + } + + // If we are not at the end of the input, then some other parsing error has occurred, + // so return the input verbatim. + return rawHTML +} diff --git a/modules/markup/html_test.go b/modules/markup/html_test.go new file mode 100644 index 000000000..407115526 --- /dev/null +++ b/modules/markup/html_test.go @@ -0,0 +1,460 @@ +// Copyright 2017 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package markup_test + +import ( + "fmt" + "strconv" + "strings" + "testing" + + _ "code.gitea.io/gitea/modules/markdown" + . "code.gitea.io/gitea/modules/markup" + "code.gitea.io/gitea/modules/setting" + + "github.com/stretchr/testify/assert" +) + +const AppURL = "http://localhost:3000/" +const Repo = "gogits/gogs" +const AppSubURL = AppURL + Repo + "/" + +var numericMetas = map[string]string{ + "format": "https://someurl.com/{user}/{repo}/{index}", + "user": "someUser", + "repo": "someRepo", + "style": IssueNameStyleNumeric, +} + +var alphanumericMetas = map[string]string{ + "format": "https://someurl.com/{user}/{repo}/{index}", + "user": "someUser", + "repo": "someRepo", + "style": IssueNameStyleAlphanumeric, +} + +// numericLink an HTML to a numeric-style issue +func numericIssueLink(baseURL string, index int) string { + return link(URLJoin(baseURL, strconv.Itoa(index)), fmt.Sprintf("#%d", index)) +} + +// alphanumLink an HTML link to an alphanumeric-style issue +func alphanumIssueLink(baseURL string, name string) string { + return link(URLJoin(baseURL, name), name) +} + +// urlContentsLink an HTML link whose contents is the target URL +func urlContentsLink(href string) string { + return link(href, href) +} + +// link an HTML link +func link(href, contents string) string { + return fmt.Sprintf("%s", href, contents) +} + +func testRenderIssueIndexPattern(t *testing.T, input, expected string, metas map[string]string) { + assert.Equal(t, expected, + string(RenderIssueIndexPattern([]byte(input), AppSubURL, metas))) +} + +func TestURLJoin(t *testing.T) { + type test struct { + Expected string + Base string + Elements []string + } + newTest := func(expected, base string, elements ...string) test { + return test{Expected: expected, Base: base, Elements: elements} + } + for _, test := range []test{ + newTest("https://try.gitea.io/a/b/c", + "https://try.gitea.io", "a/b", "c"), + newTest("https://try.gitea.io/a/b/c", + "https://try.gitea.io/", "/a/b/", "/c/"), + newTest("https://try.gitea.io/a/c", + "https://try.gitea.io/", "/a/./b/", "../c/"), + newTest("a/b/c", + "a", "b/c/"), + newTest("a/b/d", + "a/", "b/c/", "/../d/"), + } { + assert.Equal(t, test.Expected, URLJoin(test.Base, test.Elements...)) + } +} + +func TestRender_IssueIndexPattern(t *testing.T) { + // numeric: render inputs without valid mentions + test := func(s string) { + testRenderIssueIndexPattern(t, s, s, nil) + testRenderIssueIndexPattern(t, s, s, numericMetas) + } + + // should not render anything when there are no mentions + test("") + test("this is a test") + test("test 123 123 1234") + test("#") + test("# # #") + test("# 123") + test("#abcd") + test("##1234") + test("test#1234") + test("#1234test") + test(" test #1234test") + + // should not render issue mention without leading space + test("test#54321 issue") + + // should not render issue mention without trailing space + test("test #54321issue") +} + +func TestRender_IssueIndexPattern2(t *testing.T) { + setting.AppURL = AppURL + setting.AppSubURL = AppSubURL + + // numeric: render inputs with valid mentions + test := func(s, expectedFmt string, indices ...int) { + links := make([]interface{}, len(indices)) + for i, index := range indices { + links[i] = numericIssueLink(URLJoin(setting.AppSubURL, "issues"), index) + } + expectedNil := fmt.Sprintf(expectedFmt, links...) + testRenderIssueIndexPattern(t, s, expectedNil, nil) + + for i, index := range indices { + links[i] = numericIssueLink("https://someurl.com/someUser/someRepo/", index) + } + expectedNum := fmt.Sprintf(expectedFmt, links...) + testRenderIssueIndexPattern(t, s, expectedNum, numericMetas) + } + + // should render freestanding mentions + test("#1234 test", "%s test", 1234) + test("test #8 issue", "test %s issue", 8) + test("test issue #1234", "test issue %s", 1234) + + // should render mentions in parentheses + test("(#54321 issue)", "(%s issue)", 54321) + test("test (#9801 extra) issue", "test (%s extra) issue", 9801) + test("test (#1)", "test (%s)", 1) + + // should render multiple issue mentions in the same line + test("#54321 #1243", "%s %s", 54321, 1243) + test("wow (#54321 #1243)", "wow (%s %s)", 54321, 1243) + test("(#4)(#5)", "(%s)(%s)", 4, 5) + test("#1 (#4321) test", "%s (%s) test", 1, 4321) +} + +func TestRender_IssueIndexPattern3(t *testing.T) { + setting.AppURL = AppURL + setting.AppSubURL = AppSubURL + + // alphanumeric: render inputs without valid mentions + test := func(s string) { + testRenderIssueIndexPattern(t, s, s, alphanumericMetas) + } + test("") + test("this is a test") + test("test 123 123 1234") + test("#") + test("##1234") + test("# 123") + test("#abcd") + test("test #123") + test("abc-1234") // issue prefix must be capital + test("ABc-1234") // issue prefix must be _all_ capital + test("ABCDEFGHIJK-1234") // the limit is 10 characters in the prefix + test("ABC1234") // dash is required + test("test ABC- test") // number is required + test("test -1234 test") // prefix is required + test("testABC-123 test") // leading space is required + test("test ABC-123test") // trailing space is required + test("ABC-0123") // no leading zero +} + +func TestRender_IssueIndexPattern4(t *testing.T) { + setting.AppURL = AppURL + setting.AppSubURL = AppSubURL + + // alphanumeric: render inputs with valid mentions + test := func(s, expectedFmt string, names ...string) { + links := make([]interface{}, len(names)) + for i, name := range names { + links[i] = alphanumIssueLink("https://someurl.com/someUser/someRepo/", name) + } + expected := fmt.Sprintf(expectedFmt, links...) + testRenderIssueIndexPattern(t, s, expected, alphanumericMetas) + } + test("OTT-1234 test", "%s test", "OTT-1234") + test("test T-12 issue", "test %s issue", "T-12") + test("test issue ABCDEFGHIJ-1234567890", "test issue %s", "ABCDEFGHIJ-1234567890") +} + +func TestRender_AutoLink(t *testing.T) { + setting.AppURL = AppURL + setting.AppSubURL = AppSubURL + + test := func(input, expected string) { + buffer := RenderSpecialLink([]byte(input), setting.AppSubURL, nil, false) + assert.Equal(t, strings.TrimSpace(expected), strings.TrimSpace(string(buffer))) + buffer = RenderSpecialLink([]byte(input), setting.AppSubURL, nil, true) + assert.Equal(t, strings.TrimSpace(expected), strings.TrimSpace(string(buffer))) + } + + // render valid issue URLs + test(URLJoin(setting.AppSubURL, "issues", "3333"), + numericIssueLink(URLJoin(setting.AppSubURL, "issues"), 3333)) + + // render external issue URLs + for _, externalURL := range []string{ + "http://1111/2222/ssss-issues/3333?param=blah&blahh=333", + "http://test.com/issues/33333", + "https://issues/333"} { + test(externalURL, externalURL) + } + + // render valid commit URLs + tmp := URLJoin(AppSubURL, "commit", "d8a994ef243349f321568f9e36d5c3f444b99cae") + test(tmp, "d8a994ef24") + tmp += "#diff-2" + test(tmp, "d8a994ef24 (diff-2)") + + // render other commit URLs + tmp = "https://external-link.gogs.io/gogs/gogs/commit/d8a994ef243349f321568f9e36d5c3f444b99cae#diff-2" + test(tmp, "d8a994ef24 (diff-2)") +} + +func TestRender_Commits(t *testing.T) { + setting.AppURL = AppURL + setting.AppSubURL = AppSubURL + + test := func(input, expected string) { + buffer := RenderString(".md", input, setting.AppSubURL, nil) + assert.Equal(t, strings.TrimSpace(expected), strings.TrimSpace(string(buffer))) + } + + var sha = "b6dd6210eaebc915fd5be5579c58cce4da2e2579" + var commit = URLJoin(AppSubURL, "commit", sha) + var subtree = URLJoin(commit, "src") + var tree = strings.Replace(subtree, "/commit/", "/tree/", -1) + var src = strings.Replace(subtree, "/commit/", "/src/", -1) + + test(sha, `

b6dd6210ea

`) + test(sha[:7], `

b6dd621

`) + test(sha[:39], `

b6dd6210ea

`) + test(commit, `

b6dd6210ea

`) + test(tree, `

b6dd6210ea/src

`) + test("commit "+sha, `

commit b6dd6210ea

`) +} + +func TestRender_CrossReferences(t *testing.T) { + setting.AppURL = AppURL + setting.AppSubURL = AppSubURL + + test := func(input, expected string) { + buffer := RenderString("a.md", input, setting.AppSubURL, nil) + assert.Equal(t, strings.TrimSpace(expected), strings.TrimSpace(string(buffer))) + } + + test( + "gogits/gogs#12345", + `

gogits/gogs#12345

`) +} + +func TestRender_FullIssueURLs(t *testing.T) { + setting.AppURL = AppURL + setting.AppSubURL = AppSubURL + + test := func(input, expected string) { + result := RenderFullIssuePattern([]byte(input)) + assert.Equal(t, expected, string(result)) + } + test("Here is a link https://git.osgeo.org/gogs/postgis/postgis/pulls/6", + "Here is a link https://git.osgeo.org/gogs/postgis/postgis/pulls/6") + test("Look here http://localhost:3000/person/repo/issues/4", + `Look here #4`) + test("http://localhost:3000/person/repo/issues/4#issuecomment-1234", + `#4`) +} + +func TestRegExp_MentionPattern(t *testing.T) { + trueTestCases := []string{ + "@Unknwon", + "@ANT_123", + "@xxx-DiN0-z-A..uru..s-xxx", + " @lol ", + " @Te/st", + } + falseTestCases := []string{ + "@ 0", + "@ ", + "@", + "", + "ABC", + } + + for _, testCase := range trueTestCases { + res := MentionPattern.MatchString(testCase) + if !res { + println() + println(testCase) + } + assert.True(t, res) + } + for _, testCase := range falseTestCases { + res := MentionPattern.MatchString(testCase) + if res { + println() + println(testCase) + } + assert.False(t, res) + } +} + +func TestRegExp_IssueNumericPattern(t *testing.T) { + trueTestCases := []string{ + "#1234", + "#0", + "#1234567890987654321", + } + falseTestCases := []string{ + "# 1234", + "# 0", + "# ", + "#", + "#ABC", + "#1A2B", + "", + "ABC", + } + + for _, testCase := range trueTestCases { + assert.True(t, IssueNumericPattern.MatchString(testCase)) + } + for _, testCase := range falseTestCases { + assert.False(t, IssueNumericPattern.MatchString(testCase)) + } +} + +func TestRegExp_IssueAlphanumericPattern(t *testing.T) { + trueTestCases := []string{ + "ABC-1234", + "A-1", + "RC-80", + "ABCDEFGHIJ-1234567890987654321234567890", + } + falseTestCases := []string{ + "RC-08", + "PR-0", + "ABCDEFGHIJK-1", + "PR_1", + "", + "#ABC", + "", + "ABC", + "GG-", + "rm-1", + } + + for _, testCase := range trueTestCases { + assert.True(t, IssueAlphanumericPattern.MatchString(testCase)) + } + for _, testCase := range falseTestCases { + assert.False(t, IssueAlphanumericPattern.MatchString(testCase)) + } +} + +func TestRegExp_Sha1CurrentPattern(t *testing.T) { + trueTestCases := []string{ + "d8a994ef243349f321568f9e36d5c3f444b99cae", + "abcdefabcdefabcdefabcdefabcdefabcdefabcd", + } + falseTestCases := []string{ + "test", + "abcdefg", + "abcdefghijklmnopqrstuvwxyzabcdefghijklmn", + "abcdefghijklmnopqrstuvwxyzabcdefghijklmO", + } + + for _, testCase := range trueTestCases { + assert.True(t, Sha1CurrentPattern.MatchString(testCase)) + } + for _, testCase := range falseTestCases { + assert.False(t, Sha1CurrentPattern.MatchString(testCase)) + } +} + +func TestRegExp_AnySHA1Pattern(t *testing.T) { + testCases := map[string][]string{ + "https://github.com/jquery/jquery/blob/a644101ed04d0beacea864ce805e0c4f86ba1cd1/test/unit/event.js#L2703": { + "https", + "github.com", + "jquery", + "jquery", + "blob", + "a644101ed04d0beacea864ce805e0c4f86ba1cd1", + "test/unit/event.js", + "L2703", + }, + "https://github.com/jquery/jquery/blob/a644101ed04d0beacea864ce805e0c4f86ba1cd1/test/unit/event.js": { + "https", + "github.com", + "jquery", + "jquery", + "blob", + "a644101ed04d0beacea864ce805e0c4f86ba1cd1", + "test/unit/event.js", + "", + }, + "https://github.com/jquery/jquery/commit/0705be475092aede1eddae01319ec931fb9c65fc": { + "https", + "github.com", + "jquery", + "jquery", + "commit", + "0705be475092aede1eddae01319ec931fb9c65fc", + "", + "", + }, + "https://github.com/jquery/jquery/tree/0705be475092aede1eddae01319ec931fb9c65fc/src": { + "https", + "github.com", + "jquery", + "jquery", + "tree", + "0705be475092aede1eddae01319ec931fb9c65fc", + "src", + "", + }, + "https://try.gogs.io/gogs/gogs/commit/d8a994ef243349f321568f9e36d5c3f444b99cae#diff-2": { + "https", + "try.gogs.io", + "gogs", + "gogs", + "commit", + "d8a994ef243349f321568f9e36d5c3f444b99cae", + "", + "diff-2", + }, + } + + for k, v := range testCases { + assert.Equal(t, AnySHA1Pattern.FindStringSubmatch(k)[1:], v) + } +} + +func TestMisc_IsSameDomain(t *testing.T) { + setting.AppURL = AppURL + setting.AppSubURL = AppSubURL + + var sha = "b6dd6210eaebc915fd5be5579c58cce4da2e2579" + var commit = URLJoin(AppSubURL, "commit", sha) + + assert.True(t, IsSameDomain(commit)) + assert.False(t, IsSameDomain("http://google.com/ncr")) + assert.False(t, IsSameDomain("favicon.ico")) +} diff --git a/modules/markup/markup.go b/modules/markup/markup.go index 185cdc727..ba28ec53c 100644 --- a/modules/markup/markup.go +++ b/modules/markup/markup.go @@ -9,6 +9,12 @@ import ( "strings" ) +// Init initialize regexps for markdown parsing +func Init() { + getIssueFullPattern() + NewSanitizer() +} + // Parser defines an interface for parsering markup file to HTML type Parser interface { Name() string // markup format name @@ -17,66 +23,94 @@ type Parser interface { } var ( - parsers = make(map[string]Parser) + extParsers = make(map[string]Parser) + parsers = make(map[string]Parser) ) // RegisterParser registers a new markup file parser func RegisterParser(parser Parser) { + parsers[parser.Name()] = parser for _, ext := range parser.Extensions() { - parsers[strings.ToLower(ext)] = parser + extParsers[strings.ToLower(ext)] = parser } } +// GetParserByFileName get parser by filename +func GetParserByFileName(filename string) Parser { + extension := strings.ToLower(filepath.Ext(filename)) + return extParsers[extension] +} + +// GetParserByType returns a parser according type +func GetParserByType(tp string) Parser { + return parsers[tp] +} + // Render renders markup file to HTML with all specific handling stuff. func Render(filename string, rawBytes []byte, urlPrefix string, metas map[string]string) []byte { - return render(filename, rawBytes, urlPrefix, metas, false) + return renderFile(filename, rawBytes, urlPrefix, metas, false) } -func render(filename string, rawBytes []byte, urlPrefix string, metas map[string]string, isWiki bool) []byte { - extension := strings.ToLower(filepath.Ext(filename)) - if parser, ok := parsers[extension]; ok { - return parser.Render(rawBytes, urlPrefix, metas, isWiki) - } - return nil +// RenderByType renders markup to HTML with special links and returns string type. +func RenderByType(tp string, rawBytes []byte, urlPrefix string, metas map[string]string) []byte { + return renderByType(tp, rawBytes, urlPrefix, metas, false) } // RenderString renders Markdown to HTML with special links and returns string type. func RenderString(filename string, raw, urlPrefix string, metas map[string]string) string { - return string(render(filename, []byte(raw), urlPrefix, metas, false)) + return string(renderFile(filename, []byte(raw), urlPrefix, metas, false)) } // RenderWiki renders markdown wiki page to HTML and return HTML string func RenderWiki(filename string, rawBytes []byte, urlPrefix string, metas map[string]string) string { - return string(render(filename, rawBytes, urlPrefix, metas, true)) + return string(renderFile(filename, rawBytes, urlPrefix, metas, true)) +} + +func render(parser Parser, rawBytes []byte, urlPrefix string, metas map[string]string, isWiki bool) []byte { + urlPrefix = strings.Replace(urlPrefix, " ", "+", -1) + result := parser.Render(rawBytes, urlPrefix, metas, isWiki) + result = PostProcess(result, urlPrefix, metas, isWiki) + return SanitizeBytes(result) +} + +func renderByType(tp string, rawBytes []byte, urlPrefix string, metas map[string]string, isWiki bool) []byte { + if parser, ok := parsers[tp]; ok { + return render(parser, rawBytes, urlPrefix, metas, isWiki) + } + return nil +} + +func renderFile(filename string, rawBytes []byte, urlPrefix string, metas map[string]string, isWiki bool) []byte { + extension := strings.ToLower(filepath.Ext(filename)) + if parser, ok := extParsers[extension]; ok { + return render(parser, rawBytes, urlPrefix, metas, isWiki) + } + return nil } // Type returns if markup format via the filename func Type(filename string) string { - extension := strings.ToLower(filepath.Ext(filename)) - if parser, ok := parsers[extension]; ok { + if parser := GetParserByFileName(filename); parser != nil { return parser.Name() } return "" } -// ReadmeFileType reports whether name looks like a README file -// based on its name and find the parser via its ext name -func ReadmeFileType(name string) (string, bool) { - if IsReadmeFile(name) { - return Type(name), true +// IsMarkupFile reports whether file is a markup type file +func IsMarkupFile(name, markup string) bool { + if parser := GetParserByFileName(name); parser != nil { + return parser.Name() == markup } - return "", false + return false } // IsReadmeFile reports whether name looks like a README file // based on its name. func IsReadmeFile(name string) bool { + name = strings.ToLower(name) if len(name) < 6 { return false - } - - name = strings.ToLower(name) - if len(name) == 6 { + } else if len(name) == 6 { return name == "readme" } return name[:7] == "readme." diff --git a/modules/markup/markup_test.go b/modules/markup/markup_test.go index 0dd2be356..8d061ae39 100644 --- a/modules/markup/markup_test.go +++ b/modules/markup/markup_test.go @@ -2,11 +2,14 @@ // Use of this source code is governed by a MIT-style // license that can be found in the LICENSE file. -package markup +package markup_test import ( "testing" + _ "code.gitea.io/gitea/modules/markdown" + . "code.gitea.io/gitea/modules/markup" + "github.com/stretchr/testify/assert" ) diff --git a/modules/markdown/sanitizer.go b/modules/markup/sanitizer.go similarity index 99% rename from modules/markdown/sanitizer.go rename to modules/markup/sanitizer.go index cc00c9a1a..2ec43cf4f 100644 --- a/modules/markdown/sanitizer.go +++ b/modules/markup/sanitizer.go @@ -3,7 +3,7 @@ // Use of this source code is governed by a MIT-style // license that can be found in the LICENSE file. -package markdown +package markup import ( "regexp" diff --git a/modules/markdown/sanitizer_test.go b/modules/markup/sanitizer_test.go similarity index 99% rename from modules/markdown/sanitizer_test.go rename to modules/markup/sanitizer_test.go index 77a4b33c8..211201d20 100644 --- a/modules/markdown/sanitizer_test.go +++ b/modules/markup/sanitizer_test.go @@ -3,7 +3,7 @@ // Use of this source code is governed by a MIT-style // license that can be found in the LICENSE file. -package markdown +package markup import ( "testing" diff --git a/modules/templates/helper.go b/modules/templates/helper.go index 5ac0f6ee5..181d3ff15 100644 --- a/modules/templates/helper.go +++ b/modules/templates/helper.go @@ -24,7 +24,7 @@ import ( "code.gitea.io/gitea/models" "code.gitea.io/gitea/modules/base" "code.gitea.io/gitea/modules/log" - "code.gitea.io/gitea/modules/markdown" + "code.gitea.io/gitea/modules/markup" "code.gitea.io/gitea/modules/setting" ) @@ -173,7 +173,7 @@ func SafeJS(raw string) template.JS { // Str2html render Markdown text to HTML func Str2html(raw string) template.HTML { - return template.HTML(markdown.Sanitize(raw)) + return template.HTML(markup.Sanitize(raw)) } // List traversings the list @@ -253,7 +253,7 @@ func ReplaceLeft(s, old, new string) string { // RenderCommitMessage renders commit message with XSS-safe and special links. func RenderCommitMessage(full bool, msg, urlPrefix string, metas map[string]string) template.HTML { cleanMsg := template.HTMLEscapeString(msg) - fullMessage := string(markdown.RenderIssueIndexPattern([]byte(cleanMsg), urlPrefix, metas)) + fullMessage := string(markup.RenderIssueIndexPattern([]byte(cleanMsg), urlPrefix, metas)) msgLines := strings.Split(strings.TrimSpace(fullMessage), "\n") numLines := len(msgLines) if numLines == 0 { diff --git a/routers/api/v1/misc/markdown.go b/routers/api/v1/misc/markdown.go index 933488301..a2e65ecb0 100644 --- a/routers/api/v1/misc/markdown.go +++ b/routers/api/v1/misc/markdown.go @@ -9,6 +9,7 @@ import ( "code.gitea.io/gitea/modules/context" "code.gitea.io/gitea/modules/markdown" + "code.gitea.io/gitea/modules/markup" "code.gitea.io/gitea/modules/setting" ) @@ -39,7 +40,7 @@ func Markdown(ctx *context.APIContext, form api.MarkdownOption) { switch form.Mode { case "gfm": md := []byte(form.Text) - context := markdown.URLJoin(setting.AppURL, form.Context) + context := markup.URLJoin(setting.AppURL, form.Context) if form.Wiki { ctx.Write([]byte(markdown.RenderWiki(md, context, nil))) } else { diff --git a/routers/api/v1/misc/markdown_test.go b/routers/api/v1/misc/markdown_test.go index d6e619347..8c3051ea8 100644 --- a/routers/api/v1/misc/markdown_test.go +++ b/routers/api/v1/misc/markdown_test.go @@ -1,23 +1,21 @@ package misc import ( + "io/ioutil" "net/http" "net/http/httptest" + "net/url" + "strings" "testing" - macaron "gopkg.in/macaron.v1" - - "net/url" - - "io/ioutil" - "strings" - "code.gitea.io/gitea/modules/context" - "code.gitea.io/gitea/modules/markdown" + "code.gitea.io/gitea/modules/markup" "code.gitea.io/gitea/modules/setting" api "code.gitea.io/sdk/gitea" + "github.com/go-macaron/inject" "github.com/stretchr/testify/assert" + macaron "gopkg.in/macaron.v1" ) const AppURL = "http://localhost:3000/" @@ -55,7 +53,7 @@ func TestAPI_RenderGFM(t *testing.T) { Context: Repo, Wiki: true, } - requrl, _ := url.Parse(markdown.URLJoin(AppURL, "api", "v1", "markdown")) + requrl, _ := url.Parse(markup.URLJoin(AppURL, "api", "v1", "markdown")) req := &http.Request{ Method: "POST", URL: requrl, @@ -149,7 +147,7 @@ func TestAPI_RenderSimple(t *testing.T) { Text: "", Context: Repo, } - requrl, _ := url.Parse(markdown.URLJoin(AppURL, "api", "v1", "markdown")) + requrl, _ := url.Parse(markup.URLJoin(AppURL, "api", "v1", "markdown")) req := &http.Request{ Method: "POST", URL: requrl, @@ -168,7 +166,7 @@ func TestAPI_RenderSimple(t *testing.T) { func TestAPI_RenderRaw(t *testing.T) { setting.AppURL = AppURL - requrl, _ := url.Parse(markdown.URLJoin(AppURL, "api", "v1", "markdown")) + requrl, _ := url.Parse(markup.URLJoin(AppURL, "api", "v1", "markdown")) req := &http.Request{ Method: "POST", URL: requrl, diff --git a/routers/init.go b/routers/init.go index bd46abdf9..5113ec4ce 100644 --- a/routers/init.go +++ b/routers/init.go @@ -16,7 +16,7 @@ import ( "code.gitea.io/gitea/modules/indexer" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/mailer" - "code.gitea.io/gitea/modules/markdown" + "code.gitea.io/gitea/modules/markup" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/ssh" macaron "gopkg.in/macaron.v1" @@ -50,8 +50,8 @@ func GlobalInit() { if setting.InstallLock { highlight.NewContext() - markdown.InitMarkdown() - markdown.NewSanitizer() + markup.Init() + if err := models.NewEngine(migrations.Migrate); err != nil { log.Fatal(4, "Failed to initialize ORM engine: %v", err) } diff --git a/routers/repo/view.go b/routers/repo/view.go index 3a9e0e1d3..991d2c040 100644 --- a/routers/repo/view.go +++ b/routers/repo/view.go @@ -61,13 +61,12 @@ func renderDirectory(ctx *context.Context, treeLink string) { continue } - tp, ok := markup.ReadmeFileType(entry.Name()) - if !ok { + if !markup.IsReadmeFile(entry.Name()) { continue } readmeFile = entry.Blob() - if tp != "" { + if markup.Type(entry.Name()) != "" { break } }