forgejo-tickets/internal/markdown/markdown.go

package markdown

import (
	"bytes"
	"fmt"
	"html/template"
	"regexp"
	"strings"

	"github.com/microcosm-cc/bluemonday"
	"github.com/yuin/goldmark"
	highlighting "github.com/yuin/goldmark-highlighting/v2"
	"github.com/yuin/goldmark/extension"
	"github.com/yuin/goldmark/renderer/html"
)

var (
	md     goldmark.Markdown
	policy *bluemonday.Policy

	// Matches @username in rendered HTML text (not inside tags)
	mentionRegex = regexp.MustCompile(`(?:^|[\s(>])(@(\w+))`)

	// Matches @username in raw markdown for extraction
	RawMentionRegex = regexp.MustCompile(`(?:^|[\s(])@(\w+)`)
)

func init() {
	md = goldmark.New(
		goldmark.WithExtensions(
			extension.GFM,
			highlighting.NewHighlighting(
				highlighting.WithStyle("github"),
			),
		),
		goldmark.WithRendererOptions(
			html.WithHardWraps(),
		),
	)

	policy = bluemonday.UGCPolicy()
	policy.AllowAttrs("class").OnElements("code", "pre", "span", "div", "ul", "li")
	policy.AllowAttrs("style").OnElements("span", "pre", "code")
	// Allow task list checkboxes generated by goldmark GFM
	policy.AllowAttrs("type").Matching(regexp.MustCompile(`^checkbox$`)).OnElements("input")
	policy.AllowAttrs("checked", "disabled").OnElements("input")
}

// ExtractMentions returns unique @usernames found in the raw markdown text.
func ExtractMentions(texts ...string) []string {
	seen := map[string]bool{}
	var result []string
	for _, text := range texts {
		for _, m := range RawMentionRegex.FindAllStringSubmatch(text, -1) {
			username := m[1]
			if !seen[username] {
				seen[username] = true
				result = append(result, username)
			}
		}
	}
	return result
}

// RenderMarkdown converts markdown text to sanitized HTML.
// An optional mentions map (username -> display name) can be passed to style @mentions.
func RenderMarkdown(input string, mentions map[string]string) template.HTML {
	var buf bytes.Buffer
	if err := md.Convert([]byte(input), &buf); err != nil {
		return template.HTML(template.HTMLEscapeString(input))
	}
	sanitized := string(policy.SanitizeBytes(buf.Bytes()))

	if len(mentions) > 0 {
		sanitized = processMentions(sanitized, mentions)
	}

	return template.HTML(sanitized)
}

// processMentions replaces @username in HTML text with styled spans.
// It avoids replacing inside <code>, <pre>, and <a> tags.
func processMentions(html string, mentions map[string]string) string {
	// Simple approach: split on code/pre blocks, only process outside them
	// For robustness, just do a string replacement for known usernames
	for username, displayName := range mentions {
		old := "@" + username
		title := template.HTMLEscapeString(displayName)
		replacement := fmt.Sprintf(`<span class="mention" title="%s">@%s</span>`, title, template.HTMLEscapeString(username))
		html = replaceOutsideCode(html, old, replacement)
	}
	return html
}

// replaceOutsideCode replaces old with new in html, but skips content inside <code> and <pre> tags.
func replaceOutsideCode(html, old, replacement string) string {
	var result strings.Builder
	i := 0
	for i < len(html) {
		// Check if we're entering a code or pre block
		if i < len(html)-1 && html[i] == '<' {
			lower := strings.ToLower(html[i:])
			if strings.HasPrefix(lower, "<code") || strings.HasPrefix(lower, "<pre") {
				// Find the matching close tag
				var closeTag string
				if strings.HasPrefix(lower, "<code") {
					closeTag = "</code>"
				} else {
					closeTag = "</pre>"
				}
				endIdx := strings.Index(strings.ToLower(html[i:]), closeTag)
				if endIdx != -1 {
					endIdx += i + len(closeTag)
					result.WriteString(html[i:endIdx])
					i = endIdx
					continue
				}
			}
		}

		// Try to match old at current position
		if i+len(old) <= len(html) && html[i:i+len(old)] == old {
			// Make sure it's a word boundary (not part of a longer word)
			before := i > 0 && isWordChar(html[i-1])
			after := i+len(old) < len(html) && isWordChar(html[i+len(old)])
			if !before && !after {
				result.WriteString(replacement)
				i += len(old)
				continue
			}
		}

		result.WriteByte(html[i])
		i++
	}
	return result.String()
}

func isWordChar(b byte) bool {
	return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || (b >= '0' && b <= '9') || b == '_'
}