forgejo-tickets/internal/markdown/markdown.go

142 lines
4.1 KiB
Go

package markdown
import (
"bytes"
"fmt"
"html/template"
"regexp"
"strings"
"github.com/microcosm-cc/bluemonday"
"github.com/yuin/goldmark"
highlighting "github.com/yuin/goldmark-highlighting/v2"
"github.com/yuin/goldmark/extension"
"github.com/yuin/goldmark/renderer/html"
)
var (
md goldmark.Markdown
policy *bluemonday.Policy
// Matches @username in rendered HTML text (not inside tags)
mentionRegex = regexp.MustCompile(`(?:^|[\s(>])(@(\w+))`)
// Matches @username in raw markdown for extraction
RawMentionRegex = regexp.MustCompile(`(?:^|[\s(])@(\w+)`)
)
func init() {
md = goldmark.New(
goldmark.WithExtensions(
extension.GFM,
highlighting.NewHighlighting(
highlighting.WithStyle("github"),
),
),
goldmark.WithRendererOptions(
html.WithHardWraps(),
),
)
policy = bluemonday.UGCPolicy()
policy.AllowAttrs("class").OnElements("code", "pre", "span", "div", "ul", "li")
policy.AllowAttrs("style").OnElements("span", "pre", "code")
// Allow task list checkboxes generated by goldmark GFM
policy.AllowAttrs("type").Matching(regexp.MustCompile(`^checkbox$`)).OnElements("input")
policy.AllowAttrs("checked", "disabled").OnElements("input")
}
// ExtractMentions returns unique @usernames found in the raw markdown text.
func ExtractMentions(texts ...string) []string {
seen := map[string]bool{}
var result []string
for _, text := range texts {
for _, m := range RawMentionRegex.FindAllStringSubmatch(text, -1) {
username := m[1]
if !seen[username] {
seen[username] = true
result = append(result, username)
}
}
}
return result
}
// RenderMarkdown converts markdown text to sanitized HTML.
// An optional mentions map (username -> display name) can be passed to style @mentions.
func RenderMarkdown(input string, mentions map[string]string) template.HTML {
var buf bytes.Buffer
if err := md.Convert([]byte(input), &buf); err != nil {
return template.HTML(template.HTMLEscapeString(input))
}
sanitized := string(policy.SanitizeBytes(buf.Bytes()))
if len(mentions) > 0 {
sanitized = processMentions(sanitized, mentions)
}
return template.HTML(sanitized)
}
// processMentions replaces @username in HTML text with styled spans.
// It avoids replacing inside <code>, <pre>, and <a> tags.
func processMentions(html string, mentions map[string]string) string {
// Simple approach: split on code/pre blocks, only process outside them
// For robustness, just do a string replacement for known usernames
for username, displayName := range mentions {
old := "@" + username
title := template.HTMLEscapeString(displayName)
replacement := fmt.Sprintf(`<span class="mention" title="%s">@%s</span>`, title, template.HTMLEscapeString(username))
html = replaceOutsideCode(html, old, replacement)
}
return html
}
// replaceOutsideCode replaces old with new in html, but skips content inside <code> and <pre> tags.
func replaceOutsideCode(html, old, replacement string) string {
var result strings.Builder
i := 0
for i < len(html) {
// Check if we're entering a code or pre block
if i < len(html)-1 && html[i] == '<' {
lower := strings.ToLower(html[i:])
if strings.HasPrefix(lower, "<code") || strings.HasPrefix(lower, "<pre") {
// Find the matching close tag
var closeTag string
if strings.HasPrefix(lower, "<code") {
closeTag = "</code>"
} else {
closeTag = "</pre>"
}
endIdx := strings.Index(strings.ToLower(html[i:]), closeTag)
if endIdx != -1 {
endIdx += i + len(closeTag)
result.WriteString(html[i:endIdx])
i = endIdx
continue
}
}
}
// Try to match old at current position
if i+len(old) <= len(html) && html[i:i+len(old)] == old {
// Make sure it's a word boundary (not part of a longer word)
before := i > 0 && isWordChar(html[i-1])
after := i+len(old) < len(html) && isWordChar(html[i+len(old)])
if !before && !after {
result.WriteString(replacement)
i += len(old)
continue
}
}
result.WriteByte(html[i])
i++
}
return result.String()
}
func isWordChar(b byte) bool {
return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || (b >= '0' && b <= '9') || b == '_'
}