package markdown import ( "bytes" "fmt" "html/template" "regexp" "strings" "github.com/microcosm-cc/bluemonday" "github.com/yuin/goldmark" highlighting "github.com/yuin/goldmark-highlighting/v2" "github.com/yuin/goldmark/extension" "github.com/yuin/goldmark/renderer/html" ) var ( md goldmark.Markdown policy *bluemonday.Policy // Matches @username in raw markdown for extraction rawMentionRegex = regexp.MustCompile(`(?:^|[\s(])@(\w+)`) // Matches :shortcode: patterns for emoji replacement emojiRegex = regexp.MustCompile(`:(\w+):`) // Matches mermaid code blocks in rendered HTML mermaidBlockRegex = regexp.MustCompile(`(?s)
]*>]*class="[^"]*language-mermaid[^"]*"[^>]*>(.*?)\s*`)
// emojiMap maps shortcode names to Unicode emoji characters.
emojiMap = map[string]string{
// Smileys & Emotion
"smile": "đ",
"laughing": "đ",
"blush": "đ",
"smiley": "đ",
"relaxed": "âēī¸",
"smirk": "đ",
"heart_eyes": "đ",
"kissing_heart": "đ",
"kissing_closed_eyes": "đ",
"flushed": "đŗ",
"relieved": "đ",
"satisfied": "đ",
"grin": "đ",
"wink": "đ",
"stuck_out_tongue_winking_eye": "đ",
"stuck_out_tongue": "đ",
"sleeping": "đ´",
"worried": "đ",
"frowning": "đĻ",
"anguished": "đ§",
"open_mouth": "đŽ",
"grimacing": "đŦ",
"confused": "đ",
"hushed": "đ¯",
"expressionless": "đ",
"unamused": "đ",
"sweat_smile": "đ
",
"sweat": "đ",
"disappointed_relieved": "đĨ",
"weary": "đŠ",
"pensive": "đ",
"disappointed": "đ",
"confounded": "đ",
"fearful": "đ¨",
"cold_sweat": "đ°",
"persevere": "đŖ",
"cry": "đĸ",
"sob": "đ",
"joy": "đ",
"astonished": "đ˛",
"scream": "đą",
"tired_face": "đĢ",
"angry": "đ ",
"rage": "đĄ",
"triumph": "đ¤",
"sleepy": "đĒ",
"yum": "đ",
"mask": "đˇ",
"sunglasses": "đ",
"dizzy_face": "đĩ",
"imp": "đŋ",
"smiling_imp": "đ",
"neutral_face": "đ",
"no_mouth": "đļ",
"innocent": "đ",
"alien": "đŊ",
"yellow_heart": "đ",
"blue_heart": "đ",
"purple_heart": "đ",
"heart": "â¤ī¸",
"green_heart": "đ",
"broken_heart": "đ",
"heartbeat": "đ",
"heartpulse": "đ",
"two_hearts": "đ",
"sparkling_heart": "đ",
"star": "â",
"star2": "đ",
"dizzy": "đĢ",
"boom": "đĨ",
"anger": "đĸ",
"exclamation": "â",
"question": "â",
"grey_exclamation": "â",
"grey_question": "â",
"zzz": "đ¤",
"dash": "đ¨",
"sweat_drops": "đĻ",
"notes": "đļ",
"musical_note": "đĩ",
"fire": "đĨ",
"poop": "đŠ",
"thumbsup": "đ",
"+1": "đ",
"thumbsdown": "đ",
"-1": "đ",
"ok_hand": "đ",
"punch": "đ",
"fist": "â",
"v": "âī¸",
"wave": "đ",
"hand": "â",
"open_hands": "đ",
"point_up": "âī¸",
"point_down": "đ",
"point_left": "đ",
"point_right": "đ",
"raised_hands": "đ",
"pray": "đ",
"clap": "đ",
"muscle": "đĒ",
"eyes": "đ",
"tongue": "đ
",
"lips": "đ",
// People
"boy": "đĻ",
"girl": "đ§",
"woman": "đŠ",
"man": "đ¨",
"baby": "đļ",
"older_man": "đ´",
"older_woman": "đĩ",
"skull": "đ",
"ghost": "đģ",
"robot": "đ¤",
// Nature
"sunny": "âī¸",
"umbrella": "âī¸",
"cloud": "âī¸",
"snowflake": "âī¸",
"snowman": "â",
"zap": "âĄ",
"cyclone": "đ",
"foggy": "đ",
"rainbow": "đ",
"ocean": "đ",
"dog": "đļ",
"cat": "đą",
"mouse": "đ",
"hamster": "đš",
"rabbit": "đ°",
"bear": "đģ",
"panda_face": "đŧ",
"pig": "đˇ",
"frog": "đ¸",
"monkey_face": "đĩ",
"see_no_evil": "đ",
"hear_no_evil": "đ",
"speak_no_evil": "đ",
"chicken": "đ",
"penguin": "đ§",
"bird": "đĻ",
"fish": "đ",
"whale": "đŗ",
"bug": "đ",
"honeybee": "đ",
"beetle": "đ",
"snail": "đ",
"octopus": "đ",
"turtle": "đĸ",
"snake": "đ",
"crab": "đĻ",
"unicorn": "đĻ",
// Food & Drink
"apple": "đ",
"green_apple": "đ",
"pear": "đ",
"tangerine": "đ",
"lemon": "đ",
"banana": "đ",
"watermelon": "đ",
"grapes": "đ",
"strawberry": "đ",
"peach": "đ",
"cherries": "đ",
"pizza": "đ",
"hamburger": "đ",
"fries": "đ",
"hotdog": "đ",
"taco": "đŽ",
"burrito": "đ¯",
"egg": "đĨ",
"coffee": "â",
"tea": "đĩ",
"beer": "đē",
"beers": "đģ",
"wine_glass": "đˇ",
"cocktail": "đ¸",
"cake": "đ°",
"cookie": "đĒ",
"chocolate_bar": "đĢ",
"candy": "đŦ",
"icecream": "đĻ",
"doughnut": "đŠ",
// Objects
"rocket": "đ",
"airplane": "âī¸",
"car": "đ",
"taxi": "đ",
"bus": "đ",
"ambulance": "đ",
"fire_engine": "đ",
"bike": "đ˛",
"ship": "đĸ",
"phone": "đą",
"computer": "đģ",
"keyboard": "â¨ī¸",
"desktop_computer": "đĨī¸",
"tv": "đē",
"camera": "đˇ",
"mag": "đ",
"bulb": "đĄ",
"flashlight": "đĻ",
"wrench": "đ§",
"hammer": "đ¨",
"nut_and_bolt": "đŠ",
"gear": "âī¸",
"lock": "đ",
"unlock": "đ",
"key": "đ",
"bell": "đ",
"bookmark": "đ",
"link": "đ",
"bomb": "đŖ",
"gem": "đ",
"knife": "đĒ",
"shield": "đĄī¸",
"trophy": "đ",
"medal": "đ
",
"crown": "đ",
"moneybag": "đ°",
"dollar": "đĩ",
"credit_card": "đŗ",
"envelope": "âī¸",
"email": "đ§",
"inbox_tray": "đĨ",
"outbox_tray": "đ¤",
"package": "đĻ",
"memo": "đ",
"pencil": "âī¸",
"pencil2": "âī¸",
"book": "đ",
"books": "đ",
"clipboard": "đ",
"calendar": "đ
",
"chart_with_upwards_trend": "đ",
"chart_with_downwards_trend": "đ",
"bar_chart": "đ",
"pushpin": "đ",
"paperclip": "đ",
"scissors": "âī¸",
"file_folder": "đ",
"open_file_folder": "đ",
"wastebasket": "đī¸",
// Symbols
"white_check_mark": "â
",
"ballot_box_with_check": "âī¸",
"heavy_check_mark": "âī¸",
"x": "â",
"negative_squared_cross_mark": "â",
"bangbang": "âŧī¸",
"interrobang": "âī¸",
"warning": "â ī¸",
"no_entry": "â",
"recycle": "âģī¸",
"100": "đ¯",
"arrow_up": "âŦī¸",
"arrow_down": "âŦī¸",
"arrow_left": "âŦ
ī¸",
"arrow_right": "âĄī¸",
"arrow_upper_right": "âī¸",
"arrow_lower_right": "âī¸",
"arrow_upper_left": "âī¸",
"arrow_lower_left": "âī¸",
"arrows_counterclockwise": "đ",
"hash": "#ī¸âŖ",
"information_source": "âšī¸",
"abc": "đ¤",
"red_circle": "đ´",
"blue_circle": "đĩ",
"large_orange_diamond": "đļ",
"large_blue_diamond": "đˇ",
"white_circle": "âĒ",
"black_circle": "âĢ",
// Flags
"checkered_flag": "đ",
"triangular_flag_on_post": "đŠ",
"crossed_flags": "đ",
"flag_white": "đŗī¸",
"flag_black": "đ´",
// Celebration
"tada": "đ",
"confetti_ball": "đ",
"balloon": "đ",
"birthday": "đ",
"gift": "đ",
"sparkles": "â¨",
"sparkler": "đ",
"fireworks": "đ",
"ribbon": "đ",
"art": "đ¨",
"performing_arts": "đ",
"microphone": "đ¤",
"headphones": "đ§",
"musical_keyboard": "đš",
"guitar": "đ¸",
"soccer": "âŊ",
"basketball": "đ",
"football": "đ",
"baseball": "âž",
"tennis": "đž",
"golf": "âŗ",
// Places
"house": "đ ",
"office": "đĸ",
"hospital": "đĨ",
"school": "đĢ",
"earth_americas": "đ",
"earth_africa": "đ",
"earth_asia": "đ",
"globe_with_meridians": "đ",
"camping": "đī¸",
"mount_fuji": "đģ",
"sunrise": "đ
",
"sunset": "đ",
// Clock
"hourglass": "â",
"watch": "â",
"alarm_clock": "â°",
"stopwatch": "âąī¸",
"timer_clock": "â˛ī¸",
"clock": "đ",
}
)
func init() {
md = goldmark.New(
goldmark.WithExtensions(
extension.GFM,
extension.Footnote,
highlighting.NewHighlighting(
highlighting.WithStyle("github"),
),
),
goldmark.WithRendererOptions(
html.WithHardWraps(),
),
)
policy = bluemonday.UGCPolicy()
policy.AllowAttrs("class").OnElements("code", "pre", "span", "div", "ul", "li")
policy.AllowAttrs("style").OnElements("span", "pre", "code")
// Allow task list checkboxes generated by goldmark GFM
policy.AllowAttrs("type").Matching(regexp.MustCompile(`^checkbox$`)).OnElements("input")
policy.AllowAttrs("checked", "disabled").OnElements("input")
// Footnote support
policy.AllowElements("sup", "hr")
policy.AllowAttrs("id").Matching(regexp.MustCompile(`^fnref\d*:\d+$`)).OnElements("sup")
policy.AllowAttrs("id").Matching(regexp.MustCompile(`^fn:\d+$`)).OnElements("li")
policy.AllowAttrs("class").Matching(regexp.MustCompile(`^footnote-ref$`)).OnElements("a")
policy.AllowAttrs("class").Matching(regexp.MustCompile(`^footnote-backref$`)).OnElements("a")
policy.AllowAttrs("class").Matching(regexp.MustCompile(`^footnotes$`)).OnElements("div")
policy.AllowAttrs("role").Matching(regexp.MustCompile(`^doc-(noteref|backlink|endnotes)$`)).OnElements("a", "div")
policy.AllowAttrs("href").Matching(regexp.MustCompile(`^#fn(ref\d*)?:\d+$`)).OnElements("a")
}
// ExtractMentions returns unique @usernames found in the raw markdown text.
func ExtractMentions(texts ...string) []string {
seen := map[string]bool{}
var result []string
for _, text := range texts {
for _, m := range rawMentionRegex.FindAllStringSubmatch(text, -1) {
username := m[1]
if !seen[username] {
seen[username] = true
result = append(result, username)
}
}
}
return result
}
// RenderMarkdown converts markdown text to sanitized HTML.
// An optional mentions map (username -> display name) can be passed to style @mentions.
func RenderMarkdown(input string, mentions map[string]string) template.HTML {
var buf bytes.Buffer
if err := md.Convert([]byte(input), &buf); err != nil {
return template.HTML(template.HTMLEscapeString(input))
}
sanitized := string(policy.SanitizeBytes(buf.Bytes()))
sanitized = processMermaid(sanitized)
sanitized = processEmojis(sanitized)
if len(mentions) > 0 {
sanitized = processMentions(sanitized, mentions)
}
return template.HTML(sanitized)
}
// processMentions replaces @username in HTML text with styled spans.
// It avoids replacing inside , , and tags.
func processMentions(html string, mentions map[string]string) string {
// Simple approach: split on code/pre blocks, only process outside them
// For robustness, just do a string replacement for known usernames
for username, displayName := range mentions {
old := "@" + username
title := template.HTMLEscapeString(displayName)
replacement := fmt.Sprintf(`@%s`, title, template.HTMLEscapeString(username))
html = replaceOutsideCode(html, old, replacement)
}
return html
}
// replaceOutsideCode replaces old with new in html, but skips content inside and tags.
func replaceOutsideCode(html, old, replacement string) string {
var result strings.Builder
i := 0
for i < len(html) {
// Check if we're entering a code or pre block
if i < len(html)-1 && html[i] == '<' {
lower := strings.ToLower(html[i:])
if strings.HasPrefix(lower, " 0 && isWordChar(html[i-1])
after := i+len(old) < len(html) && isWordChar(html[i+len(old)])
if !before && !after {
result.WriteString(replacement)
i += len(old)
continue
}
}
result.WriteByte(html[i])
i++
}
return result.String()
}
// processEmojis replaces :shortcode: patterns with Unicode emoji characters.
// It skips content inside and tags using replaceOutsideCode.
func processEmojis(html string) string {
// Find all shortcode matches and collect unique ones that have emoji mappings
matches := emojiRegex.FindAllString(html, -1)
seen := map[string]bool{}
for _, match := range matches {
if seen[match] {
continue
}
seen[match] = true
name := match[1 : len(match)-1]
if emoji, ok := emojiMap[name]; ok {
html = replaceOutsideCode(html, match, emoji)
}
}
return html
}
// processMermaid transforms mermaid code blocks from goldmark's rendered format
// into the format mermaid.js expects: ...content...
func processMermaid(html string) string {
return mermaidBlockRegex.ReplaceAllString(html, `$1
`)
}
func isWordChar(b byte) bool {
return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || (b >= '0' && b <= '9') || b == '_'
}