forgejo-tickets/internal/markdown/markdown.go

510 lines
18 KiB
Go
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package markdown
import (
"bytes"
"fmt"
"html/template"
"regexp"
"strings"
"github.com/microcosm-cc/bluemonday"
"github.com/yuin/goldmark"
highlighting "github.com/yuin/goldmark-highlighting/v2"
"github.com/yuin/goldmark/extension"
"github.com/yuin/goldmark/renderer/html"
)
var (
md goldmark.Markdown
policy *bluemonday.Policy
// Matches @username in raw markdown for extraction
rawMentionRegex = regexp.MustCompile(`(?:^|[\s(])@(\w+)`)
// Matches :shortcode: patterns for emoji replacement
emojiRegex = regexp.MustCompile(`:(\w+):`)
// Matches mermaid code blocks in rendered HTML
mermaidBlockRegex = regexp.MustCompile(`(?s)<pre[^>]*><code[^>]*class="[^"]*language-mermaid[^"]*"[^>]*>(.*?)</code>\s*</pre>`)
// emojiMap maps shortcode names to Unicode emoji characters.
emojiMap = map[string]string{
// Smileys & Emotion
"smile": "😄",
"laughing": "😆",
"blush": "😊",
"smiley": "😃",
"relaxed": "☺️",
"smirk": "😏",
"heart_eyes": "😍",
"kissing_heart": "😘",
"kissing_closed_eyes": "😚",
"flushed": "😳",
"relieved": "😌",
"satisfied": "😆",
"grin": "😁",
"wink": "😉",
"stuck_out_tongue_winking_eye": "😜",
"stuck_out_tongue": "😛",
"sleeping": "😴",
"worried": "😟",
"frowning": "😦",
"anguished": "😧",
"open_mouth": "😮",
"grimacing": "😬",
"confused": "😕",
"hushed": "😯",
"expressionless": "😑",
"unamused": "😒",
"sweat_smile": "😅",
"sweat": "😓",
"disappointed_relieved": "😥",
"weary": "😩",
"pensive": "😔",
"disappointed": "😞",
"confounded": "😖",
"fearful": "😨",
"cold_sweat": "😰",
"persevere": "😣",
"cry": "😢",
"sob": "😭",
"joy": "😂",
"astonished": "😲",
"scream": "😱",
"tired_face": "😫",
"angry": "😠",
"rage": "😡",
"triumph": "😤",
"sleepy": "😪",
"yum": "😋",
"mask": "😷",
"sunglasses": "😎",
"dizzy_face": "😵",
"imp": "👿",
"smiling_imp": "😈",
"neutral_face": "😐",
"no_mouth": "😶",
"innocent": "😇",
"alien": "👽",
"yellow_heart": "💛",
"blue_heart": "💙",
"purple_heart": "💜",
"heart": "❤️",
"green_heart": "💚",
"broken_heart": "💔",
"heartbeat": "💓",
"heartpulse": "💗",
"two_hearts": "💕",
"sparkling_heart": "💖",
"star": "⭐",
"star2": "🌟",
"dizzy": "💫",
"boom": "💥",
"anger": "💢",
"exclamation": "❗",
"question": "❓",
"grey_exclamation": "❕",
"grey_question": "❔",
"zzz": "💤",
"dash": "💨",
"sweat_drops": "💦",
"notes": "🎶",
"musical_note": "🎵",
"fire": "🔥",
"poop": "💩",
"thumbsup": "👍",
"+1": "👍",
"thumbsdown": "👎",
"-1": "👎",
"ok_hand": "👌",
"punch": "👊",
"fist": "✊",
"v": "✌️",
"wave": "👋",
"hand": "✋",
"open_hands": "👐",
"point_up": "☝️",
"point_down": "👇",
"point_left": "👈",
"point_right": "👉",
"raised_hands": "🙌",
"pray": "🙏",
"clap": "👏",
"muscle": "💪",
"eyes": "👀",
"tongue": "👅",
"lips": "👄",
// People
"boy": "👦",
"girl": "👧",
"woman": "👩",
"man": "👨",
"baby": "👶",
"older_man": "👴",
"older_woman": "👵",
"skull": "💀",
"ghost": "👻",
"robot": "🤖",
// Nature
"sunny": "☀️",
"umbrella": "☂️",
"cloud": "☁️",
"snowflake": "❄️",
"snowman": "⛄",
"zap": "⚡",
"cyclone": "🌀",
"foggy": "🌁",
"rainbow": "🌈",
"ocean": "🌊",
"dog": "🐶",
"cat": "🐱",
"mouse": "🐭",
"hamster": "🐹",
"rabbit": "🐰",
"bear": "🐻",
"panda_face": "🐼",
"pig": "🐷",
"frog": "🐸",
"monkey_face": "🐵",
"see_no_evil": "🙈",
"hear_no_evil": "🙉",
"speak_no_evil": "🙊",
"chicken": "🐔",
"penguin": "🐧",
"bird": "🐦",
"fish": "🐟",
"whale": "🐳",
"bug": "🐛",
"honeybee": "🐝",
"beetle": "🐞",
"snail": "🐌",
"octopus": "🐙",
"turtle": "🐢",
"snake": "🐍",
"crab": "🦀",
"unicorn": "🦄",
// Food & Drink
"apple": "🍎",
"green_apple": "🍏",
"pear": "🍐",
"tangerine": "🍊",
"lemon": "🍋",
"banana": "🍌",
"watermelon": "🍉",
"grapes": "🍇",
"strawberry": "🍓",
"peach": "🍑",
"cherries": "🍒",
"pizza": "🍕",
"hamburger": "🍔",
"fries": "🍟",
"hotdog": "🌭",
"taco": "🌮",
"burrito": "🌯",
"egg": "🥚",
"coffee": "☕",
"tea": "🍵",
"beer": "🍺",
"beers": "🍻",
"wine_glass": "🍷",
"cocktail": "🍸",
"cake": "🍰",
"cookie": "🍪",
"chocolate_bar": "🍫",
"candy": "🍬",
"icecream": "🍦",
"doughnut": "🍩",
// Objects
"rocket": "🚀",
"airplane": "✈️",
"car": "🚗",
"taxi": "🚕",
"bus": "🚌",
"ambulance": "🚑",
"fire_engine": "🚒",
"bike": "🚲",
"ship": "🚢",
"phone": "📱",
"computer": "💻",
"keyboard": "⌨️",
"desktop_computer": "🖥️",
"tv": "📺",
"camera": "📷",
"mag": "🔍",
"bulb": "💡",
"flashlight": "🔦",
"wrench": "🔧",
"hammer": "🔨",
"nut_and_bolt": "🔩",
"gear": "⚙️",
"lock": "🔒",
"unlock": "🔓",
"key": "🔑",
"bell": "🔔",
"bookmark": "🔖",
"link": "🔗",
"bomb": "💣",
"gem": "💎",
"knife": "🔪",
"shield": "🛡️",
"trophy": "🏆",
"medal": "🏅",
"crown": "👑",
"moneybag": "💰",
"dollar": "💵",
"credit_card": "💳",
"envelope": "✉️",
"email": "📧",
"inbox_tray": "📥",
"outbox_tray": "📤",
"package": "📦",
"memo": "📝",
"pencil": "✏️",
"pencil2": "✏️",
"book": "📖",
"books": "📚",
"clipboard": "📋",
"calendar": "📅",
"chart_with_upwards_trend": "📈",
"chart_with_downwards_trend": "📉",
"bar_chart": "📊",
"pushpin": "📌",
"paperclip": "📎",
"scissors": "✂️",
"file_folder": "📁",
"open_file_folder": "📂",
"wastebasket": "🗑️",
// Symbols
"white_check_mark": "✅",
"ballot_box_with_check": "☑️",
"heavy_check_mark": "✔️",
"x": "❌",
"negative_squared_cross_mark": "❎",
"bangbang": "‼️",
"interrobang": "⁉️",
"warning": "⚠️",
"no_entry": "⛔",
"recycle": "♻️",
"100": "💯",
"arrow_up": "⬆️",
"arrow_down": "⬇️",
"arrow_left": "⬅️",
"arrow_right": "➡️",
"arrow_upper_right": "↗️",
"arrow_lower_right": "↘️",
"arrow_upper_left": "↖️",
"arrow_lower_left": "↙️",
"arrows_counterclockwise": "🔄",
"hash": "#️⃣",
"information_source": "",
"abc": "🔤",
"red_circle": "🔴",
"blue_circle": "🔵",
"large_orange_diamond": "🔶",
"large_blue_diamond": "🔷",
"white_circle": "⚪",
"black_circle": "⚫",
// Flags
"checkered_flag": "🏁",
"triangular_flag_on_post": "🚩",
"crossed_flags": "🎌",
"flag_white": "🏳️",
"flag_black": "🏴",
// Celebration
"tada": "🎉",
"confetti_ball": "🎊",
"balloon": "🎈",
"birthday": "🎂",
"gift": "🎁",
"sparkles": "✨",
"sparkler": "🎇",
"fireworks": "🎆",
"ribbon": "🎀",
"art": "🎨",
"performing_arts": "🎭",
"microphone": "🎤",
"headphones": "🎧",
"musical_keyboard": "🎹",
"guitar": "🎸",
"soccer": "⚽",
"basketball": "🏀",
"football": "🏈",
"baseball": "⚾",
"tennis": "🎾",
"golf": "⛳",
// Places
"house": "🏠",
"office": "🏢",
"hospital": "🏥",
"school": "🏫",
"earth_americas": "🌎",
"earth_africa": "🌍",
"earth_asia": "🌏",
"globe_with_meridians": "🌐",
"camping": "🏕️",
"mount_fuji": "🗻",
"sunrise": "🌅",
"sunset": "🌇",
// Clock
"hourglass": "⌛",
"watch": "⌚",
"alarm_clock": "⏰",
"stopwatch": "⏱️",
"timer_clock": "⏲️",
"clock": "🕐",
}
)
func init() {
md = goldmark.New(
goldmark.WithExtensions(
extension.GFM,
extension.Footnote,
highlighting.NewHighlighting(
highlighting.WithStyle("github"),
),
),
goldmark.WithRendererOptions(
html.WithHardWraps(),
),
)
policy = bluemonday.UGCPolicy()
policy.AllowAttrs("class").OnElements("code", "pre", "span", "div", "ul", "li")
policy.AllowAttrs("style").OnElements("span", "pre", "code")
// Allow task list checkboxes generated by goldmark GFM
policy.AllowAttrs("type").Matching(regexp.MustCompile(`^checkbox$`)).OnElements("input")
policy.AllowAttrs("checked", "disabled").OnElements("input")
// Footnote support
policy.AllowElements("sup", "hr")
policy.AllowAttrs("id").Matching(regexp.MustCompile(`^fnref\d*:\d+$`)).OnElements("sup")
policy.AllowAttrs("id").Matching(regexp.MustCompile(`^fn:\d+$`)).OnElements("li")
policy.AllowAttrs("class").Matching(regexp.MustCompile(`^footnote-ref$`)).OnElements("a")
policy.AllowAttrs("class").Matching(regexp.MustCompile(`^footnote-backref$`)).OnElements("a")
policy.AllowAttrs("class").Matching(regexp.MustCompile(`^footnotes$`)).OnElements("div")
policy.AllowAttrs("role").Matching(regexp.MustCompile(`^doc-(noteref|backlink|endnotes)$`)).OnElements("a", "div")
policy.AllowAttrs("href").Matching(regexp.MustCompile(`^#fn(ref\d*)?:\d+$`)).OnElements("a")
}
// ExtractMentions returns unique @usernames found in the raw markdown text.
func ExtractMentions(texts ...string) []string {
seen := map[string]bool{}
var result []string
for _, text := range texts {
for _, m := range rawMentionRegex.FindAllStringSubmatch(text, -1) {
username := m[1]
if !seen[username] {
seen[username] = true
result = append(result, username)
}
}
}
return result
}
// RenderMarkdown converts markdown text to sanitized HTML.
// An optional mentions map (username -> display name) can be passed to style @mentions.
func RenderMarkdown(input string, mentions map[string]string) template.HTML {
var buf bytes.Buffer
if err := md.Convert([]byte(input), &buf); err != nil {
return template.HTML(template.HTMLEscapeString(input))
}
sanitized := string(policy.SanitizeBytes(buf.Bytes()))
sanitized = processMermaid(sanitized)
sanitized = processEmojis(sanitized)
if len(mentions) > 0 {
sanitized = processMentions(sanitized, mentions)
}
return template.HTML(sanitized)
}
// processMentions replaces @username in HTML text with styled spans.
// It avoids replacing inside <code>, <pre>, and <a> tags.
func processMentions(html string, mentions map[string]string) string {
// Simple approach: split on code/pre blocks, only process outside them
// For robustness, just do a string replacement for known usernames
for username, displayName := range mentions {
old := "@" + username
title := template.HTMLEscapeString(displayName)
replacement := fmt.Sprintf(`<span class="mention" title="%s">@%s</span>`, title, template.HTMLEscapeString(username))
html = replaceOutsideCode(html, old, replacement)
}
return html
}
// replaceOutsideCode replaces old with new in html, but skips content inside <code> and <pre> tags.
func replaceOutsideCode(html, old, replacement string) string {
var result strings.Builder
i := 0
for i < len(html) {
// Check if we're entering a code or pre block
if i < len(html)-1 && html[i] == '<' {
lower := strings.ToLower(html[i:])
if strings.HasPrefix(lower, "<code") || strings.HasPrefix(lower, "<pre") {
// Find the matching close tag
var closeTag string
if strings.HasPrefix(lower, "<code") {
closeTag = "</code>"
} else {
closeTag = "</pre>"
}
endIdx := strings.Index(strings.ToLower(html[i:]), closeTag)
if endIdx != -1 {
endIdx += i + len(closeTag)
result.WriteString(html[i:endIdx])
i = endIdx
continue
}
}
}
// Try to match old at current position
if i+len(old) <= len(html) && html[i:i+len(old)] == old {
// Make sure it's a word boundary (not part of a longer word)
before := i > 0 && isWordChar(html[i-1])
after := i+len(old) < len(html) && isWordChar(html[i+len(old)])
if !before && !after {
result.WriteString(replacement)
i += len(old)
continue
}
}
result.WriteByte(html[i])
i++
}
return result.String()
}
// processEmojis replaces :shortcode: patterns with Unicode emoji characters.
// It skips content inside <code> and <pre> tags using replaceOutsideCode.
func processEmojis(html string) string {
// Find all shortcode matches and collect unique ones that have emoji mappings
matches := emojiRegex.FindAllString(html, -1)
seen := map[string]bool{}
for _, match := range matches {
if seen[match] {
continue
}
seen[match] = true
name := match[1 : len(match)-1]
if emoji, ok := emojiMap[name]; ok {
html = replaceOutsideCode(html, match, emoji)
}
}
return html
}
// processMermaid transforms mermaid code blocks from goldmark's rendered format
// into the format mermaid.js expects: <pre class="mermaid">...content...</pre>
func processMermaid(html string) string {
return mermaidBlockRegex.ReplaceAllString(html, `<pre class="mermaid">$1</pre>`)
}
func isWordChar(b byte) bool {
return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || (b >= '0' && b <= '9') || b == '_'
}