package converter
import (
"bytes"
"fmt"
htmlpkg "html"
"strings"
"golang.org/x/net/html"
)
// ConfluenceToMarkdown converts Confluence storage format XML to Markdown.
func ConfluenceToMarkdown(source string) (string, error) {
// Preprocess: extract CDATA content and replace with escaped text,
// because x/net/html doesn't handle CDATA sections.
preprocessed := preprocessCDATA(source)
// Wrap in a root element so the HTML parser handles it correctly.
wrapped := "
" + preprocessed + "
"
doc, err := html.Parse(strings.NewReader(wrapped))
if err != nil {
return "", fmt.Errorf("parsing confluence xml: %w", err)
}
var buf bytes.Buffer
c := &xmlConverter{buf: &buf}
// Navigate to the wrapper div: html > head > body > div
body := findNode(doc, "body")
if body == nil {
return "", fmt.Errorf("unexpected parse structure")
}
wrapper := body.FirstChild
if wrapper != nil {
c.walkChildren(wrapper, 0)
}
result := buf.String()
// Clean up excessive blank lines
for strings.Contains(result, "\n\n\n") {
result = strings.ReplaceAll(result, "\n\n\n", "\n\n")
}
return strings.TrimSpace(result) + "\n", nil
}
// preprocessCDATA replaces with the content as a data attribute
// on the parent element, since x/net/html doesn't parse CDATA.
func preprocessCDATA(s string) string {
var result strings.Builder
for {
idx := strings.Index(s, "")
if endIdx == -1 {
result.WriteString(s)
break
}
// Write CDATA content as a special element that we can detect
content := s[:endIdx]
result.WriteString("")
result.WriteString(htmlpkg.EscapeString(content))
result.WriteString("")
s = s[endIdx+len("]]>"):]
}
return result.String()
}
type xmlConverter struct {
buf *bytes.Buffer
listDepth int
inListItem bool
}
func (c *xmlConverter) walkChildren(n *html.Node, depth int) {
for child := n.FirstChild; child != nil; child = child.NextSibling {
c.walk(child, depth)
}
}
func (c *xmlConverter) walk(n *html.Node, depth int) {
if n.Type == html.TextNode {
text := n.Data
// Skip whitespace-only text nodes inside lists
if c.listDepth > 0 && strings.TrimSpace(text) == "" {
return
}
// Collapse whitespace in text nodes (XML indentation artifacts)
if strings.TrimSpace(text) != "" {
// Replace sequences of whitespace (including newlines) with single space,
// but preserve the trimmed content
text = collapseWhitespace(text)
}
c.buf.WriteString(text)
return
}
if n.Type != html.ElementNode {
c.walkChildren(n, depth)
return
}
tag := strings.ToLower(n.Data)
switch {
// Headings
case tag == "h1":
c.buf.WriteString("\n# ")
c.walkChildren(n, depth)
c.buf.WriteString("\n\n")
case tag == "h2":
c.buf.WriteString("\n## ")
c.walkChildren(n, depth)
c.buf.WriteString("\n\n")
case tag == "h3":
c.buf.WriteString("\n### ")
c.walkChildren(n, depth)
c.buf.WriteString("\n\n")
case tag == "h4":
c.buf.WriteString("\n#### ")
c.walkChildren(n, depth)
c.buf.WriteString("\n\n")
case tag == "h5":
c.buf.WriteString("\n##### ")
c.walkChildren(n, depth)
c.buf.WriteString("\n\n")
case tag == "h6":
c.buf.WriteString("\n###### ")
c.walkChildren(n, depth)
c.buf.WriteString("\n\n")
// Paragraphs
case tag == "p":
c.walkChildren(n, depth)
if !c.inListItem {
c.buf.WriteString("\n\n")
}
// Inline formatting
case tag == "strong", tag == "b":
c.buf.WriteString("**")
c.walkChildren(n, depth)
c.buf.WriteString("**")
case tag == "em", tag == "i":
c.buf.WriteString("*")
c.walkChildren(n, depth)
c.buf.WriteString("*")
case tag == "del", tag == "s":
c.buf.WriteString("~~")
c.walkChildren(n, depth)
c.buf.WriteString("~~")
case tag == "code":
c.buf.WriteString("`")
c.walkChildren(n, depth)
c.buf.WriteString("`")
// Links
case tag == "a":
href := getAttr(n, "href")
c.buf.WriteString("[")
c.walkChildren(n, depth)
c.buf.WriteString("](")
c.buf.WriteString(href)
c.buf.WriteString(")")
// Line break
case tag == "br":
c.buf.WriteString(" \n")
// Horizontal rule
case tag == "hr":
c.buf.WriteString("\n---\n\n")
// Lists
case tag == "ul":
c.listDepth++
if c.listDepth == 1 {
c.buf.WriteString("\n")
}
c.walkChildren(n, depth)
c.listDepth--
if c.listDepth == 0 {
c.buf.WriteString("\n")
}
case tag == "ol":
c.listDepth++
if c.listDepth == 1 {
c.buf.WriteString("\n")
}
c.walkOL(n, depth)
c.listDepth--
if c.listDepth == 0 {
c.buf.WriteString("\n")
}
case tag == "li":
prev := c.inListItem
c.inListItem = true
// Check if this list item contains a task checkbox
if hasTaskStatus(n) {
// Task status handler will write the prefix, walkChildrenInline for text
c.walkChildrenInline(n, depth)
c.buf.WriteString("\n")
} else {
indent := strings.Repeat(" ", max(0, c.listDepth-1))
c.buf.WriteString(indent)
c.buf.WriteString("- ")
c.walkChildrenInline(n, depth)
c.buf.WriteString("\n")
}
c.inListItem = prev
// Tables - convert to GFM table
case tag == "table":
c.renderTable(n, depth)
// Confluence macros - handled via ac:* namespace (parsed as ac-*)
// The HTML parser lowercases and handles colons differently.
// We need to handle both ac:structured-macro and the parsed form.
// Skip layout/structural elements, pass through children
case tag == "div", tag == "span", tag == "tbody", tag == "thead",
tag == "colgroup", tag == "col", tag == "content-wrapper":
c.walkChildren(n, depth)
// Handle Confluence-specific elements
default:
c.handleConfluenceElement(n, tag, depth)
}
}
func (c *xmlConverter) handleConfluenceElement(n *html.Node, tag string, depth int) {
switch {
// Layout elements — preserve as HTML comments for round-trip
case strings.Contains(tag, "ac:layout-section") || strings.Contains(tag, "layout-section"):
sectionType := getAttr(n, "ac:type")
if sectionType == "" {
sectionType = getAttr(n, "type")
}
fmt.Fprintf(c.buf, "\n", sectionType)
c.walkChildren(n, depth)
c.buf.WriteString("\n")
case strings.Contains(tag, "ac:layout-cell") || strings.Contains(tag, "layout-cell"):
c.buf.WriteString("\n")
c.walkChildren(n, depth)
c.buf.WriteString("\n")
case tag == "ac:layout" || strings.Contains(tag, "layout") && !strings.Contains(tag, "layout-"):
c.buf.WriteString("\n")
c.walkChildren(n, depth)
c.buf.WriteString("\n")
// Confluence structured macros (code blocks, panels, etc.)
case strings.Contains(tag, "structured-macro") || strings.Contains(tag, "ac:structured-macro"):
macroName := getAttr(n, "ac:name")
if macroName == "" {
macroName = getAttr(n, "name")
}
macroID := getAttr(n, "ac:macro-id")
if macroID == "" {
macroID = getAttr(n, "macro-id")
}
switch macroName {
case "code":
c.renderCodeMacro(n, macroID)
case "info":
c.renderPanelAsBlockquote(n, depth)
case "note":
c.renderPanelAsBlockquote(n, depth)
case "warning":
c.renderPanelAsBlockquote(n, depth)
case "toc":
// Preserve TOC macro as HTML comment
if macroID != "" {
fmt.Fprintf(c.buf, "\n", macroID)
} else {
c.buf.WriteString("\n")
}
default:
c.walkChildren(n, depth)
}
// Confluence images
case strings.Contains(tag, "image") || strings.Contains(tag, "ac:image"):
alt := getAttr(n, "ac:alt")
if alt == "" {
alt = getAttr(n, "alt")
}
imgRef := c.findImageRef(n)
if imgRef.isAttachment {
// Preserve attachment reference as round-trippable HTML
fmt.Fprintf(c.buf, `")
} else {
c.buf.WriteString("
c.buf.WriteString(imgRef.url)
c.buf.WriteString(")")
}
// Confluence links (user mentions, page links)
case strings.Contains(tag, "ac:link"):
if c.hasUserChild(n) {
c.walkChildren(n, depth)
} else {
c.walkChildren(n, depth)
}
// Confluence emoticons
case strings.Contains(tag, "emoticon") || strings.Contains(tag, "ac:emoticon"):
name := getAttr(n, "ac:name")
if name == "" {
name = getAttr(n, "name")
}
switch name {
case "plus":
c.buf.WriteString("(+)")
case "minus":
c.buf.WriteString("(-)")
case "question":
c.buf.WriteString("(?)")
case "tick":
c.buf.WriteString("(v)")
case "cross":
c.buf.WriteString("(x)")
}
// Confluence task lists
case strings.Contains(tag, "task-list"):
c.listDepth++
c.walkChildren(n, depth)
c.listDepth--
case strings.Contains(tag, "task-body"):
c.walkChildren(n, depth)
c.buf.WriteString("\n")
case strings.Contains(tag, "task-status"):
status := strings.TrimSpace(getTextContent(n))
indent := strings.Repeat(" ", max(0, c.listDepth-1))
if status == "complete" {
c.buf.WriteString(indent + "- [x] ")
} else {
c.buf.WriteString(indent + "- [ ] ")
}
case strings.Contains(tag, "task-id"):
// Skip task IDs
case strings.Contains(tag, "task") && !strings.Contains(tag, "task-"):
c.walkChildren(n, depth)
// Confluence inline comment markers — preserve as span with data attribute
case strings.Contains(tag, "inline-comment-marker"):
ref := getAttr(n, "ac:ref")
if ref == "" {
ref = getAttr(n, "ref")
}
if ref != "" {
fmt.Fprintf(c.buf, ``, ref)
c.walkChildren(n, depth)
c.buf.WriteString("")
} else {
c.walkChildren(n, depth)
}
// User references — preserve as round-trippable HTML span
case strings.Contains(tag, "ri:user"):
userKey := getAttr(n, "ri:userkey")
if userKey == "" {
userKey = getAttr(n, "userkey")
}
if userKey != "" {
fmt.Fprintf(c.buf, ``, userKey)
}
// Time elements
case tag == "time":
datetime := getAttr(n, "datetime")
if datetime != "" {
c.buf.WriteString(datetime)
}
// Fallback: just walk children
default:
c.walkChildren(n, depth)
}
}
func (c *xmlConverter) renderCodeMacro(n *html.Node, macroID string) {
language := ""
code := ""
// Walk children to find parameters and body
var walkMacro func(*html.Node)
walkMacro = func(node *html.Node) {
if node.Type == html.ElementNode {
tag := strings.ToLower(node.Data)
if strings.Contains(tag, "parameter") || strings.Contains(tag, "ac:parameter") {
name := getAttr(node, "ac:name")
if name == "" {
name = getAttr(node, "name")
}
if name == "language" {
language = getTextContent(node)
}
}
if strings.Contains(tag, "plain-text-body") || strings.Contains(tag, "ac:plain-text-body") {
code = getCDATAContent(node)
}
}
for child := node.FirstChild; child != nil; child = child.NextSibling {
walkMacro(child)
}
}
walkMacro(n)
if macroID != "" {
fmt.Fprintf(c.buf, "\n\n", macroID)
} else {
c.buf.WriteString("\n")
}
c.buf.WriteString("```")
c.buf.WriteString(language)
c.buf.WriteString("\n")
c.buf.WriteString(code)
if !strings.HasSuffix(code, "\n") {
c.buf.WriteString("\n")
}
c.buf.WriteString("```\n\n")
}
func (c *xmlConverter) renderPanelAsBlockquote(n *html.Node, depth int) {
// Collect panel body content
var bodyBuf bytes.Buffer
origBuf := c.buf
c.buf = &bodyBuf
// Find rich-text-body and walk it
var findBody func(*html.Node)
findBody = func(node *html.Node) {
if node.Type == html.ElementNode {
tag := strings.ToLower(node.Data)
if strings.Contains(tag, "rich-text-body") {
c.walkChildren(node, depth)
return
}
}
for child := node.FirstChild; child != nil; child = child.NextSibling {
findBody(child)
}
}
findBody(n)
c.buf = origBuf
text := strings.TrimSpace(bodyBuf.String())
lines := strings.Split(text, "\n")
for _, line := range lines {
c.buf.WriteString("> ")
c.buf.WriteString(line)
c.buf.WriteString("\n")
}
c.buf.WriteString("\n")
}
func (c *xmlConverter) renderTable(n *html.Node, depth int) {
rows := collectTableRows(n)
if len(rows) == 0 {
return
}
// Determine column count
cols := 0
for _, row := range rows {
if len(row.cells) > cols {
cols = len(row.cells)
}
}
if cols == 0 {
return
}
// Preserve table attributes and colgroup as HTML comment
tableAttrs := extractTableAttrs(n)
if tableAttrs != "" {
fmt.Fprintf(c.buf, "\n\n", tableAttrs)
} else {
c.buf.WriteString("\n")
}
// If first row is a header
isFirstRowHeader := len(rows) > 0 && rows[0].isHeader
startIdx := 0
if isFirstRowHeader {
c.writeTableRow(rows[0].cells, cols)
c.writeTableSep(cols)
startIdx = 1
} else {
// Write empty header and separator
empty := make([]string, cols)
c.writeTableRow(empty, cols)
c.writeTableSep(cols)
}
for i := startIdx; i < len(rows); i++ {
c.writeTableRow(rows[i].cells, cols)
}
c.buf.WriteString("\n")
}
func (c *xmlConverter) writeTableRow(cells []string, cols int) {
c.buf.WriteString("|")
for i := range cols {
cell := ""
if i < len(cells) {
cell = cells[i]
}
c.buf.WriteString(" ")
c.buf.WriteString(cell)
c.buf.WriteString(" |")
}
c.buf.WriteString("\n")
}
func (c *xmlConverter) writeTableSep(cols int) {
c.buf.WriteString("|")
for range cols {
c.buf.WriteString("---|")
}
c.buf.WriteString("\n")
}
func (c *xmlConverter) walkOL(n *html.Node, depth int) {
idx := 1
for child := n.FirstChild; child != nil; child = child.NextSibling {
if child.Type != html.ElementNode {
continue
}
tag := strings.ToLower(child.Data)
if tag == "li" {
indent := strings.Repeat(" ", max(0, c.listDepth-1))
c.buf.WriteString(indent)
fmt.Fprintf(c.buf, "%d. ", idx)
c.walkChildrenInline(child, depth)
c.buf.WriteString("\n")
idx++
}
}
}
func (c *xmlConverter) walkChildrenInline(n *html.Node, depth int) {
for child := n.FirstChild; child != nil; child = child.NextSibling {
if child.Type == html.TextNode {
// Collapse whitespace but preserve a single space between inline elements
text := collapseWhitespace(child.Data)
// Only trim leading space if this is the very first child
if child == n.FirstChild {
text = strings.TrimLeft(text, " ")
}
// Only trim trailing space if this is the very last child
if child.NextSibling == nil {
text = strings.TrimRight(text, " ")
}
if text != "" {
c.buf.WriteString(text)
}
continue
}
if child.Type == html.ElementNode {
tag := strings.ToLower(child.Data)
switch {
case tag == "p":
c.walkChildrenInline(child, depth)
case tag == "ul", tag == "ol":
c.buf.WriteString("\n")
c.walk(child, depth)
default:
c.walk(child, depth)
}
}
}
}
// extractTableAttrs extracts class, style, and colgroup info as a JSON-like string for preservation.
func extractTableAttrs(table *html.Node) string {
var parts []string
// Table class and style
cls := getAttr(table, "class")
style := getAttr(table, "style")
if cls != "" {
parts = append(parts, fmt.Sprintf("class=%q", cls))
}
if style != "" {
parts = append(parts, fmt.Sprintf("style=%q", style))
}
// Colgroup
var colWidths []string
for child := table.FirstChild; child != nil; child = child.NextSibling {
if child.Type == html.ElementNode && strings.ToLower(child.Data) == "colgroup" {
for col := child.FirstChild; col != nil; col = col.NextSibling {
if col.Type == html.ElementNode && strings.ToLower(col.Data) == "col" {
colStyle := getAttr(col, "style")
if colStyle != "" {
colWidths = append(colWidths, colStyle)
}
}
}
}
}
if len(colWidths) > 0 {
parts = append(parts, fmt.Sprintf("cols=[%s]", strings.Join(colWidths, "|")))
}
return strings.Join(parts, " ")
}
type tableRow struct {
isHeader bool
cells []string
}
func collectTableRows(table *html.Node) []tableRow {
var rows []tableRow
var walk func(*html.Node, bool)
walk = func(n *html.Node, inHeader bool) {
if n.Type == html.ElementNode {
tag := strings.ToLower(n.Data)
switch tag {
case "thead":
for child := n.FirstChild; child != nil; child = child.NextSibling {
walk(child, true)
}
return
case "tbody":
for child := n.FirstChild; child != nil; child = child.NextSibling {
walk(child, false)
}
return
case "tr":
row := tableRow{isHeader: inHeader}
for child := n.FirstChild; child != nil; child = child.NextSibling {
if child.Type == html.ElementNode {
cellTag := strings.ToLower(child.Data)
if cellTag == "th" {
row.isHeader = true
row.cells = append(row.cells, strings.TrimSpace(renderCellMarkdown(child)))
} else if cellTag == "td" {
row.cells = append(row.cells, strings.TrimSpace(renderCellMarkdown(child)))
}
}
}
rows = append(rows, row)
return
}
}
for child := n.FirstChild; child != nil; child = child.NextSibling {
walk(child, inHeader)
}
}
walk(table, false)
return rows
}
// renderCellMarkdown renders cell content to inline markdown, preserving
// formatting like bold, italic, code, links, br, and user references.
func renderCellMarkdown(cell *html.Node) string {
var buf bytes.Buffer
renderCellNode(&buf, cell)
return buf.String()
}
func renderCellNode(buf *bytes.Buffer, n *html.Node) {
for child := n.FirstChild; child != nil; child = child.NextSibling {
switch child.Type {
case html.TextNode:
text := collapseWhitespace(child.Data)
buf.WriteString(text)
case html.ElementNode:
tag := strings.ToLower(child.Data)
switch {
case tag == "strong" || tag == "b":
buf.WriteString("**")
renderCellNode(buf, child)
buf.WriteString("**")
case tag == "em" || tag == "i":
buf.WriteString("*")
renderCellNode(buf, child)
buf.WriteString("*")
case tag == "del" || tag == "s":
buf.WriteString("~~")
renderCellNode(buf, child)
buf.WriteString("~~")
case tag == "code":
buf.WriteString("`")
buf.WriteString(getTextContent(child))
buf.WriteString("`")
case tag == "a":
href := getAttr(child, "href")
buf.WriteString("[")
renderCellNode(buf, child)
buf.WriteString("](")
buf.WriteString(href)
buf.WriteString(")")
case tag == "br":
buf.WriteString("
")
case tag == "p":
// Unwrap inside cells
renderCellNode(buf, child)
case tag == "div":
renderCellNode(buf, child)
case strings.Contains(tag, "user"):
userKey := getAttr(child, "ri:userkey")
if userKey == "" {
userKey = getAttr(child, "userkey")
}
if userKey != "" {
fmt.Fprintf(buf, ``, userKey)
}
case strings.Contains(tag, "ac:link"):
renderCellNode(buf, child)
case strings.Contains(tag, "image"):
// Handle images in cells
alt := getAttr(child, "ac:alt")
if alt == "" {
alt = getAttr(child, "alt")
}
var imgBuf bytes.Buffer
c := &xmlConverter{buf: &imgBuf}
ref := c.findImageRef(child)
if ref.isAttachment {
fmt.Fprintf(buf, `")
} else if ref.url != "" {
buf.WriteString("
buf.WriteString(ref.url)
buf.WriteString(")")
}
case strings.Contains(tag, "task-list"):
renderCellTaskList(buf, child)
case strings.Contains(tag, "emoticon"):
name := getAttr(child, "ac:name")
if name == "" {
name = getAttr(child, "name")
}
switch name {
case "plus":
buf.WriteString("(+)")
case "minus":
buf.WriteString("(-)")
case "question":
buf.WriteString("(?)")
case "tick":
buf.WriteString("(v)")
case "cross":
buf.WriteString("(x)")
}
case strings.Contains(tag, "inline-comment-marker"):
ref := getAttr(child, "ac:ref")
if ref == "" {
ref = getAttr(child, "ref")
}
if ref != "" {
fmt.Fprintf(buf, ``, ref)
renderCellNode(buf, child)
buf.WriteString("")
} else {
renderCellNode(buf, child)
}
default:
renderCellNode(buf, child)
}
}
}
}
// renderCellTaskList renders a task list inside a table cell as inline markdown.
func renderCellTaskList(buf *bytes.Buffer, n *html.Node) {
for child := n.FirstChild; child != nil; child = child.NextSibling {
if child.Type != html.ElementNode {
continue
}
tag := strings.ToLower(child.Data)
if !strings.Contains(tag, "task") || strings.Contains(tag, "task-list") {
continue
}
// This is an ac:task element
status := ""
var bodyContent string
for tc := child.FirstChild; tc != nil; tc = tc.NextSibling {
if tc.Type != html.ElementNode {
continue
}
tcTag := strings.ToLower(tc.Data)
if strings.Contains(tcTag, "task-status") {
status = strings.TrimSpace(getTextContent(tc))
} else if strings.Contains(tcTag, "task-body") {
bodyContent = strings.TrimSpace(renderCellMarkdown(tc))
}
}
check := "[ ]"
if status == "complete" {
check = "[x]"
}
fmt.Fprintf(buf, "- %s %s
", check, bodyContent)
}
}
type imageRef struct {
url string
filename string
isAttachment bool
}
func (c *xmlConverter) findImageRef(n *html.Node) imageRef {
var ref imageRef
var walk func(*html.Node)
walk = func(node *html.Node) {
if node.Type == html.ElementNode {
tag := strings.ToLower(node.Data)
//
if strings.Contains(tag, "url") {
v := getAttr(node, "ri:value")
if v == "" {
v = getAttr(node, "value")
}
if v != "" {
ref.url = v
return
}
}
//
if strings.Contains(tag, "attachment") {
f := getAttr(node, "ri:filename")
if f == "" {
f = getAttr(node, "filename")
}
if f != "" {
ref.filename = f
ref.isAttachment = true
return
}
}
}
for child := node.FirstChild; child != nil; child = child.NextSibling {
walk(child)
}
}
walk(n)
return ref
}
func (c *xmlConverter) hasUserChild(n *html.Node) bool {
for child := n.FirstChild; child != nil; child = child.NextSibling {
if child.Type == html.ElementNode {
tag := strings.ToLower(child.Data)
if strings.Contains(tag, "user") {
return true
}
}
}
return false
}
// Helper functions
func findNode(n *html.Node, tag string) *html.Node {
if n.Type == html.ElementNode && n.Data == tag {
return n
}
for child := n.FirstChild; child != nil; child = child.NextSibling {
if found := findNode(child, tag); found != nil {
return found
}
}
return nil
}
func getAttr(n *html.Node, key string) string {
for _, attr := range n.Attr {
attrKey := attr.Key
if attr.Namespace != "" {
attrKey = attr.Namespace + ":" + attr.Key
}
if attrKey == key {
return attr.Val
}
}
return ""
}
// collapseWhitespace replaces runs of whitespace with a single space,
// preserving leading/trailing single space if original had whitespace there.
func collapseWhitespace(s string) string {
var buf strings.Builder
inWS := false
for _, r := range s {
if r == ' ' || r == '\t' || r == '\n' || r == '\r' {
if !inWS {
buf.WriteByte(' ')
inWS = true
}
} else {
buf.WriteRune(r)
inWS = false
}
}
return buf.String()
}
// hasTaskStatus checks if a node contains a task-status element.
func hasTaskStatus(n *html.Node) bool {
for child := n.FirstChild; child != nil; child = child.NextSibling {
if child.Type == html.ElementNode {
tag := strings.ToLower(child.Data)
if strings.Contains(tag, "task-status") {
return true
}
}
}
return false
}
// getCDATAContent retrieves content from preprocessed CDATA sections.
// It looks for elements and unescapes their text.
func getCDATAContent(n *html.Node) string {
var buf bytes.Buffer
var walk func(*html.Node)
walk = func(node *html.Node) {
if node.Type == html.ElementNode && node.Data == "cdatacontent" {
text := getTextContent(node)
buf.WriteString(htmlpkg.UnescapeString(text))
return
}
if node.Type == html.TextNode {
buf.WriteString(node.Data)
}
for child := node.FirstChild; child != nil; child = child.NextSibling {
walk(child)
}
}
walk(n)
return buf.String()
}
func getTextContent(n *html.Node) string {
var buf bytes.Buffer
var walk func(*html.Node)
walk = func(node *html.Node) {
if node.Type == html.TextNode {
buf.WriteString(node.Data)
}
for child := node.FirstChild; child != nil; child = child.NextSibling {
walk(child)
}
}
walk(n)
return buf.String()
}