package main import ( "fmt" "io" "os" "regexp" "strings" "github.com/spf13/cobra" "sourcecraft.dev/bigbes/confluence-md-utilities/converter" "sourcecraft.dev/bigbes/confluence-md-utilities/format" ) var ( verifyIndent string ) var verifyCmd = &cobra.Command{ Use: "verify [input.xml]", Short: "Verify round-trip fidelity of XML → Markdown → XML conversion", Long: `Check that Confluence XML survives a round-trip through Markdown and back. Compares: A = fmt(input XML) B = fmt(xml2md(input XML) → md2xml → XML) If A and B match, the round-trip is lossless. Otherwise, prints a diff. Reads from stdin if no file is specified.`, Args: cobra.MaximumNArgs(1), RunE: func(cmd *cobra.Command, args []string) error { var input []byte var err error if len(args) > 0 { input, err = os.ReadFile(args[0]) } else { input, err = io.ReadAll(os.Stdin) } if err != nil { return fmt.Errorf("reading input: %w", err) } xmlInput := string(input) // Normalize input: remove elements that cannot survive round-trip xmlInput = normalizeForVerify(xmlInput) // A: format the original XML formatted := format.PrettyXML(xmlInput, verifyIndent) // B: XML → Markdown → XML → format md, err := converter.ConfluenceToMarkdown(xmlInput) if err != nil { return fmt.Errorf("xml→markdown: %w", err) } xmlRoundTrip, err := converter.MarkdownToConfluence([]byte(md)) if err != nil { return fmt.Errorf("markdown→xml: %w", err) } formattedRoundTrip := format.PrettyXML(xmlRoundTrip, verifyIndent) if formatted == formattedRoundTrip { fmt.Fprintln(os.Stderr, "OK: round-trip is lossless") return nil } // Print unified diff with colored inline highlights linesA := strings.Split(formatted, "\n") linesB := strings.Split(formattedRoundTrip, "\n") fmt.Fprintln(os.Stderr, "MISMATCH: round-trip produced different output") fmt.Fprintln(os.Stderr, "") fmt.Fprintf(os.Stderr, "%s--- original (formatted)%s\n", ansiRed, ansiReset) fmt.Fprintf(os.Stderr, "%s+++ round-trip (formatted)%s\n", ansiGreen, ansiReset) ops := computeDiffOps(linesA, linesB) hunks := buildHunks(ops, 3) for _, h := range hunks { printHunk(h) } os.Exit(1) return nil }, } func init() { verifyCmd.Flags().StringVar(&verifyIndent, "indent", " ", "Indentation string (default: 2 spaces)") rootCmd.AddCommand(verifyCmd) } var ( // reEmptyParagraph matches empty paragraphs like


,


, etc. reEmptyParagraph = regexp.MustCompile(`

\s*\s*

`) // reSpanInCode matches ... inside , unwrapping to just the text. reSpanInCode = regexp.MustCompile(`([^<]*)]*>([^<]*)`) // reAdjacentCode matches (directly adjacent), merging into one span. reAdjacentCode = regexp.MustCompile(``) ) // normalizeForVerify strips XML patterns that cannot survive a round-trip // through Markdown, so verify compares only what the converter can preserve. func normalizeForVerify(xml string) string { xml = reEmptyParagraph.ReplaceAllString(xml, "") // Unwrap inside (apply repeatedly for nested cases) for reSpanInCode.MatchString(xml) { xml = reSpanInCode.ReplaceAllString(xml, "${1}${2}") } // Merge adjacent elements xml = reAdjacentCode.ReplaceAllString(xml, "") return xml } // ANSI escape codes for diff output. const ( ansiReset = "\033[0m" ansiRed = "\033[31m" ansiGreen = "\033[32m" ansiCyan = "\033[36m" ansiBold = "\033[1m" ansiRedBg = "\033[41;37m" // red background, white text ansiGrnBg = "\033[42;30m" // green background, black text ) // diffOp represents a line-level diff operation. type diffOp int const ( opEqual diffOp = iota opRemove // line only in A opAdd // line only in B ) // diffLine is a single line in the diff with its operation and source positions. type diffLine struct { op diffOp text string lineA int // 1-based line number in A (-1 if not applicable) lineB int // 1-based line number in B (-1 if not applicable) } // hunk is a group of diff lines with surrounding context. type hunk struct { startA, countA int // 1-based start and count for A startB, countB int // 1-based start and count for B lines []diffLine } // computeDiffOps produces a sequence of diff operations from two line slices // using LCS-based algorithm. func computeDiffOps(a, b []string) []diffLine { m, n := len(a), len(b) dp := make([][]int, m+1) for i := range dp { dp[i] = make([]int, n+1) } for i := 1; i <= m; i++ { for j := 1; j <= n; j++ { if a[i-1] == b[j-1] { dp[i][j] = dp[i-1][j-1] + 1 } else if dp[i-1][j] >= dp[i][j-1] { dp[i][j] = dp[i-1][j] } else { dp[i][j] = dp[i][j-1] } } } // Backtrack to produce operations var ops []diffLine i, j := m, n for i > 0 || j > 0 { if i > 0 && j > 0 && a[i-1] == b[j-1] { ops = append(ops, diffLine{op: opEqual, text: a[i-1], lineA: i, lineB: j}) i-- j-- } else if j > 0 && (i == 0 || dp[i][j-1] >= dp[i-1][j]) { ops = append(ops, diffLine{op: opAdd, text: b[j-1], lineA: -1, lineB: j}) j-- } else { ops = append(ops, diffLine{op: opRemove, text: a[i-1], lineA: i, lineB: -1}) i-- } } // Reverse — we built it backwards for l, r := 0, len(ops)-1; l < r; l, r = l+1, r-1 { ops[l], ops[r] = ops[r], ops[l] } return ops } // buildHunks groups diff operations into unified-diff hunks with `ctx` context lines. func buildHunks(ops []diffLine, ctx int) []hunk { // Find ranges of changed lines, expanded by context type span struct{ start, end int } // indices into ops var changed []span for i, op := range ops { if op.op != opEqual { if len(changed) > 0 && i-changed[len(changed)-1].end <= 2*ctx { // Merge with previous span changed[len(changed)-1].end = i + 1 } else { changed = append(changed, span{i, i + 1}) } } } var hunks []hunk for _, ch := range changed { lo := ch.start - ctx if lo < 0 { lo = 0 } hi := ch.end + ctx if hi > len(ops) { hi = len(ops) } h := hunk{lines: ops[lo:hi]} // Compute start lines and counts h.startA, h.startB = 1, 1 if len(h.lines) > 0 { // Find first valid line numbers for _, dl := range h.lines { if dl.lineA > 0 { h.startA = dl.lineA break } if dl.lineB > 0 { h.startB = dl.lineB break } } if h.lines[0].lineA > 0 { h.startA = h.lines[0].lineA } if h.lines[0].lineB > 0 { h.startB = h.lines[0].lineB } } for _, dl := range h.lines { if dl.op == opEqual || dl.op == opRemove { h.countA++ } if dl.op == opEqual || dl.op == opAdd { h.countB++ } } hunks = append(hunks, h) } return hunks } // printHunk outputs a single unified diff hunk with ANSI colors and inline highlights. func printHunk(h hunk) { // @@ header fmt.Fprintf(os.Stdout, "%s@@ -%d,%d +%d,%d @@%s\n", ansiCyan, h.startA, h.countA, h.startB, h.countB, ansiReset) lines := h.lines for i := 0; i < len(lines); i++ { dl := lines[i] switch dl.op { case opEqual: fmt.Printf(" %s\n", dl.text) case opRemove: // Try to pair with subsequent add(s) for inline highlighting remStart := i for i+1 < len(lines) && lines[i+1].op == opRemove { i++ } remEnd := i + 1 addStart := remEnd j := addStart for j < len(lines) && lines[j].op == opAdd { j++ } addEnd := j removed := lines[remStart:remEnd] added := lines[addStart:addEnd] // Pair up removed/added lines for inline diff pairs := min(len(removed), len(added)) for p := range pairs { hl, hr := inlineHighlight(removed[p].text, added[p].text) fmt.Printf("%s-%s%s\n", ansiRed, hl, ansiReset) fmt.Printf("%s+%s%s\n", ansiGreen, hr, ansiReset) } // Remaining unpaired lines for p := pairs; p < len(removed); p++ { fmt.Printf("%s-%s%s\n", ansiRed, removed[p].text, ansiReset) } for p := pairs; p < len(added); p++ { fmt.Printf("%s+%s%s\n", ansiGreen, added[p].text, ansiReset) } i = addEnd - 1 // -1 because loop increments case opAdd: // Unpaired add (not preceded by remove) fmt.Printf("%s+%s%s\n", ansiGreen, dl.text, ansiReset) } } } // inlineHighlight returns two strings (for removed and added lines) with ANSI // bold marking on the parts that actually differ. func inlineHighlight(a, b string) (string, string) { ra := []rune(a) rb := []rune(b) // Common prefix pfx := 0 for pfx < len(ra) && pfx < len(rb) && ra[pfx] == rb[pfx] { pfx++ } // Common suffix (from the end, but don't overlap with prefix) sfx := 0 for sfx < len(ra)-pfx && sfx < len(rb)-pfx && ra[len(ra)-1-sfx] == rb[len(rb)-1-sfx] { sfx++ } midA := ra[pfx : len(ra)-sfx] midB := rb[pfx : len(rb)-sfx] if len(midA) == 0 && len(midB) == 0 { // Lines are identical — no highlighting needed return a, b } prefix := string(ra[:pfx]) suffix := string(ra[len(ra)-sfx:]) hlA := prefix + ansiBold + ansiRedBg + string(midA) + ansiReset + ansiRed + suffix hlB := prefix + ansiBold + ansiGrnBg + string(midB) + ansiReset + ansiGreen + suffix return hlA, hlB }