package main import ( "fmt" "io" "os" "regexp" "strings" "github.com/spf13/cobra" "go.bigb.es/confluence-md-utilities/converter" "go.bigb.es/confluence-md-utilities/format" ) var ( verifyIndent string ) var verifyCmd = &cobra.Command{ Use: "verify [input.xml]", Short: "Verify round-trip fidelity of XML → Markdown → XML conversion", Long: `Check that Confluence XML survives a round-trip through Markdown and back. Compares: A = fmt(input XML) B = fmt(xml2md(input XML) → md2xml → XML) If A and B match, the round-trip is lossless. Otherwise, prints a diff. Reads from stdin if no file is specified.`, Args: cobra.MaximumNArgs(1), RunE: func(cmd *cobra.Command, args []string) error { var input []byte var err error if len(args) > 0 { input, err = os.ReadFile(args[0]) } else { input, err = io.ReadAll(os.Stdin) } if err != nil { return fmt.Errorf("reading input: %w", err) } xmlInput := string(input) // Normalize input: remove elements that cannot survive round-trip xmlInput = normalizeForVerify(xmlInput) // A: format the original XML formatted := format.PrettyXML(xmlInput, verifyIndent) // B: XML → Markdown → XML → format md, err := converter.ConfluenceToMarkdown(xmlInput) if err != nil { return fmt.Errorf("xml→markdown: %w", err) } xmlRoundTrip, err := converter.MarkdownToConfluence([]byte(md)) if err != nil { return fmt.Errorf("markdown→xml: %w", err) } formattedRoundTrip := format.PrettyXML(xmlRoundTrip, verifyIndent) if formatted == formattedRoundTrip { fmt.Fprintln(os.Stderr, "OK: round-trip is lossless") return nil } // Print unified diff with colored inline highlights linesA := strings.Split(formatted, "\n") linesB := strings.Split(formattedRoundTrip, "\n") fmt.Fprintln(os.Stderr, "MISMATCH: round-trip produced different output") fmt.Fprintln(os.Stderr, "") fmt.Fprintf(os.Stderr, "%s--- original (formatted)%s\n", ansiRed, ansiReset) fmt.Fprintf(os.Stderr, "%s+++ round-trip (formatted)%s\n", ansiGreen, ansiReset) ops := computeDiffOps(linesA, linesB) hunks := buildHunks(ops, 3) for _, h := range hunks { printHunk(h) } os.Exit(1) return nil }, } func init() { verifyCmd.Flags().StringVar(&verifyIndent, "indent", " ", "Indentation string (default: 2 spaces)") rootCmd.AddCommand(verifyCmd) } var ( // reEmptyParagraph matches empty paragraphs like
\s*
\s*
, unwrapping to just the text.
reSpanInCode = regexp.MustCompile(`([^<]*)]*>([^<]*)`)
// reAdjacentCode matches (directly adjacent), merging into one span.
reAdjacentCode = regexp.MustCompile(``)
)
// normalizeForVerify strips XML patterns that cannot survive a round-trip
// through Markdown, so verify compares only what the converter can preserve.
func normalizeForVerify(xml string) string {
xml = reEmptyParagraph.ReplaceAllString(xml, "")
// Unwrap inside (apply repeatedly for nested cases)
for reSpanInCode.MatchString(xml) {
xml = reSpanInCode.ReplaceAllString(xml, "${1}${2}")
}
// Merge adjacent elements
xml = reAdjacentCode.ReplaceAllString(xml, "")
return xml
}
// ANSI escape codes for diff output.
const (
ansiReset = "\033[0m"
ansiRed = "\033[31m"
ansiGreen = "\033[32m"
ansiCyan = "\033[36m"
ansiBold = "\033[1m"
ansiRedBg = "\033[41;37m" // red background, white text
ansiGrnBg = "\033[42;30m" // green background, black text
)
// diffOp represents a line-level diff operation.
type diffOp int
const (
opEqual diffOp = iota
opRemove // line only in A
opAdd // line only in B
)
// diffLine is a single line in the diff with its operation and source positions.
type diffLine struct {
op diffOp
text string
lineA int // 1-based line number in A (-1 if not applicable)
lineB int // 1-based line number in B (-1 if not applicable)
}
// hunk is a group of diff lines with surrounding context.
type hunk struct {
startA, countA int // 1-based start and count for A
startB, countB int // 1-based start and count for B
lines []diffLine
}
// computeDiffOps produces a sequence of diff operations from two line slices
// using LCS-based algorithm.
func computeDiffOps(a, b []string) []diffLine {
m, n := len(a), len(b)
dp := make([][]int, m+1)
for i := range dp {
dp[i] = make([]int, n+1)
}
for i := 1; i <= m; i++ {
for j := 1; j <= n; j++ {
if a[i-1] == b[j-1] {
dp[i][j] = dp[i-1][j-1] + 1
} else if dp[i-1][j] >= dp[i][j-1] {
dp[i][j] = dp[i-1][j]
} else {
dp[i][j] = dp[i][j-1]
}
}
}
// Backtrack to produce operations
var ops []diffLine
i, j := m, n
for i > 0 || j > 0 {
if i > 0 && j > 0 && a[i-1] == b[j-1] {
ops = append(ops, diffLine{op: opEqual, text: a[i-1], lineA: i, lineB: j})
i--
j--
} else if j > 0 && (i == 0 || dp[i][j-1] >= dp[i-1][j]) {
ops = append(ops, diffLine{op: opAdd, text: b[j-1], lineA: -1, lineB: j})
j--
} else {
ops = append(ops, diffLine{op: opRemove, text: a[i-1], lineA: i, lineB: -1})
i--
}
}
// Reverse — we built it backwards
for l, r := 0, len(ops)-1; l < r; l, r = l+1, r-1 {
ops[l], ops[r] = ops[r], ops[l]
}
return ops
}
// buildHunks groups diff operations into unified-diff hunks with `ctx` context lines.
func buildHunks(ops []diffLine, ctx int) []hunk {
// Find ranges of changed lines, expanded by context
type span struct{ start, end int } // indices into ops
var changed []span
for i, op := range ops {
if op.op != opEqual {
if len(changed) > 0 && i-changed[len(changed)-1].end <= 2*ctx {
// Merge with previous span
changed[len(changed)-1].end = i + 1
} else {
changed = append(changed, span{i, i + 1})
}
}
}
var hunks []hunk
for _, ch := range changed {
lo := ch.start - ctx
if lo < 0 {
lo = 0
}
hi := ch.end + ctx
if hi > len(ops) {
hi = len(ops)
}
h := hunk{lines: ops[lo:hi]}
// Compute start lines and counts
h.startA, h.startB = 1, 1
if len(h.lines) > 0 {
// Find first valid line numbers
for _, dl := range h.lines {
if dl.lineA > 0 {
h.startA = dl.lineA
break
}
if dl.lineB > 0 {
h.startB = dl.lineB
break
}
}
if h.lines[0].lineA > 0 {
h.startA = h.lines[0].lineA
}
if h.lines[0].lineB > 0 {
h.startB = h.lines[0].lineB
}
}
for _, dl := range h.lines {
if dl.op == opEqual || dl.op == opRemove {
h.countA++
}
if dl.op == opEqual || dl.op == opAdd {
h.countB++
}
}
hunks = append(hunks, h)
}
return hunks
}
// printHunk outputs a single unified diff hunk with ANSI colors and inline highlights.
func printHunk(h hunk) {
// @@ header
fmt.Fprintf(os.Stdout, "%s@@ -%d,%d +%d,%d @@%s\n",
ansiCyan, h.startA, h.countA, h.startB, h.countB, ansiReset)
lines := h.lines
for i := 0; i < len(lines); i++ {
dl := lines[i]
switch dl.op {
case opEqual:
fmt.Printf(" %s\n", dl.text)
case opRemove:
// Try to pair with subsequent add(s) for inline highlighting
remStart := i
for i+1 < len(lines) && lines[i+1].op == opRemove {
i++
}
remEnd := i + 1
addStart := remEnd
j := addStart
for j < len(lines) && lines[j].op == opAdd {
j++
}
addEnd := j
removed := lines[remStart:remEnd]
added := lines[addStart:addEnd]
// Pair up removed/added lines for inline diff
pairs := min(len(removed), len(added))
for p := range pairs {
hl, hr := inlineHighlight(removed[p].text, added[p].text)
fmt.Printf("%s-%s%s\n", ansiRed, hl, ansiReset)
fmt.Printf("%s+%s%s\n", ansiGreen, hr, ansiReset)
}
// Remaining unpaired lines
for p := pairs; p < len(removed); p++ {
fmt.Printf("%s-%s%s\n", ansiRed, removed[p].text, ansiReset)
}
for p := pairs; p < len(added); p++ {
fmt.Printf("%s+%s%s\n", ansiGreen, added[p].text, ansiReset)
}
i = addEnd - 1 // -1 because loop increments
case opAdd:
// Unpaired add (not preceded by remove)
fmt.Printf("%s+%s%s\n", ansiGreen, dl.text, ansiReset)
}
}
}
// inlineHighlight returns two strings (for removed and added lines) with ANSI
// bold marking on the parts that actually differ.
func inlineHighlight(a, b string) (string, string) {
ra := []rune(a)
rb := []rune(b)
// Common prefix
pfx := 0
for pfx < len(ra) && pfx < len(rb) && ra[pfx] == rb[pfx] {
pfx++
}
// Common suffix (from the end, but don't overlap with prefix)
sfx := 0
for sfx < len(ra)-pfx && sfx < len(rb)-pfx && ra[len(ra)-1-sfx] == rb[len(rb)-1-sfx] {
sfx++
}
midA := ra[pfx : len(ra)-sfx]
midB := rb[pfx : len(rb)-sfx]
if len(midA) == 0 && len(midB) == 0 {
// Lines are identical — no highlighting needed
return a, b
}
prefix := string(ra[:pfx])
suffix := string(ra[len(ra)-sfx:])
hlA := prefix + ansiBold + ansiRedBg + string(midA) + ansiReset + ansiRed + suffix
hlB := prefix + ansiBold + ansiGrnBg + string(midB) + ansiReset + ansiGreen + suffix
return hlA, hlB
}