From 5fabfe018459f627441085dd7f87f1f9e4e97af8 Mon Sep 17 00:00:00 2001 From: Eugene Blikh Date: Fri, 27 Mar 2026 13:11:40 +0300 Subject: [PATCH] feat: add verify command, improve round-trip fidelity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add verify command for XML validation. Merge adjacent elements during XML→MD conversion. Preserve macro attribute order for round-trip fidelity. Fix list item trailing whitespace and normalize
format. false --- cmd/mdcx/completions.go | 1 + cmd/mdcx/verify.go | 347 ++++++++++++++++++++++++++++++++ cmd/mdcx/verify_test.go | 421 +++++++++++++++++++++++++++++++++++++++ confluence/elements.go | 49 ++++- confluence/renderer.go | 12 +- converter/md2xml_test.go | 54 ++++- converter/xml2md.go | 55 ++++- 7 files changed, 925 insertions(+), 14 deletions(-) create mode 100644 cmd/mdcx/verify.go create mode 100644 cmd/mdcx/verify_test.go diff --git a/cmd/mdcx/completions.go b/cmd/mdcx/completions.go index f8022dc5bfbe0835a1067f765b444a1d2f7313d6..e4542a627bb1bb9a83485e9507d786dc4ba1bbc5 100644 --- a/cmd/mdcx/completions.go +++ b/cmd/mdcx/completions.go @@ -11,6 +11,7 @@ func init() { embedCmd.ValidArgsFunction = completeMarkdownFiles fmtCmd.ValidArgsFunction = completeXMLFiles extractCmd.ValidArgsFunction = completeXMLFiles + verifyCmd.ValidArgsFunction = completeXMLFiles pullCmd.ValidArgsFunction = cobra.NoFileCompletions pushCmd.ValidArgsFunction = completePushArgs diff --git a/cmd/mdcx/verify.go b/cmd/mdcx/verify.go new file mode 100644 index 0000000000000000000000000000000000000000..d9e0866417eb04fb2a7d7b69441530dafd2ecd99 --- /dev/null +++ b/cmd/mdcx/verify.go @@ -0,0 +1,347 @@ +package main + +import ( + "fmt" + "io" + "os" + "regexp" + "strings" + + "github.com/spf13/cobra" + + "sourcecraft.dev/bigbes/confluence-md-utilities/converter" + "sourcecraft.dev/bigbes/confluence-md-utilities/format" +) + +var ( + verifyIndent string +) + +var verifyCmd = &cobra.Command{ + Use: "verify [input.xml]", + Short: "Verify round-trip fidelity of XML → Markdown → XML conversion", + Long: `Check that Confluence XML survives a round-trip through Markdown and back. + +Compares: + A = fmt(input XML) + B = fmt(xml2md(input XML) → md2xml → XML) + +If A and B match, the round-trip is lossless. Otherwise, prints a diff. + +Reads from stdin if no file is specified.`, + Args: cobra.MaximumNArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + var input []byte + var err error + + if len(args) > 0 { + input, err = os.ReadFile(args[0]) + } else { + input, err = io.ReadAll(os.Stdin) + } + if err != nil { + return fmt.Errorf("reading input: %w", err) + } + + xmlInput := string(input) + + // Normalize input: remove elements that cannot survive round-trip + xmlInput = normalizeForVerify(xmlInput) + + // A: format the original XML + formatted := format.PrettyXML(xmlInput, verifyIndent) + + // B: XML → Markdown → XML → format + md, err := converter.ConfluenceToMarkdown(xmlInput) + if err != nil { + return fmt.Errorf("xml→markdown: %w", err) + } + + xmlRoundTrip, err := converter.MarkdownToConfluence([]byte(md)) + if err != nil { + return fmt.Errorf("markdown→xml: %w", err) + } + + formattedRoundTrip := format.PrettyXML(xmlRoundTrip, verifyIndent) + + if formatted == formattedRoundTrip { + fmt.Fprintln(os.Stderr, "OK: round-trip is lossless") + return nil + } + + // Print unified diff with colored inline highlights + linesA := strings.Split(formatted, "\n") + linesB := strings.Split(formattedRoundTrip, "\n") + + fmt.Fprintln(os.Stderr, "MISMATCH: round-trip produced different output") + fmt.Fprintln(os.Stderr, "") + fmt.Fprintf(os.Stderr, "%s--- original (formatted)%s\n", ansiRed, ansiReset) + fmt.Fprintf(os.Stderr, "%s+++ round-trip (formatted)%s\n", ansiGreen, ansiReset) + + ops := computeDiffOps(linesA, linesB) + hunks := buildHunks(ops, 3) + for _, h := range hunks { + printHunk(h) + } + + os.Exit(1) + return nil + }, +} + +func init() { + verifyCmd.Flags().StringVar(&verifyIndent, "indent", " ", "Indentation string (default: 2 spaces)") + rootCmd.AddCommand(verifyCmd) +} + +var ( + // reEmptyParagraph matches empty paragraphs like


,


, etc. + reEmptyParagraph = regexp.MustCompile(`

\s*\s*

`) + // reSpanInCode matches ... inside , unwrapping to just the text. + reSpanInCode = regexp.MustCompile(`([^<]*)]*>([^<]*)`) + // reAdjacentCode matches (directly adjacent), merging into one span. + reAdjacentCode = regexp.MustCompile(``) +) + +// normalizeForVerify strips XML patterns that cannot survive a round-trip +// through Markdown, so verify compares only what the converter can preserve. +func normalizeForVerify(xml string) string { + xml = reEmptyParagraph.ReplaceAllString(xml, "") + // Unwrap inside (apply repeatedly for nested cases) + for reSpanInCode.MatchString(xml) { + xml = reSpanInCode.ReplaceAllString(xml, "${1}${2}") + } + // Merge adjacent elements + xml = reAdjacentCode.ReplaceAllString(xml, "") + return xml +} + +// ANSI escape codes for diff output. +const ( + ansiReset = "\033[0m" + ansiRed = "\033[31m" + ansiGreen = "\033[32m" + ansiCyan = "\033[36m" + ansiBold = "\033[1m" + ansiRedBg = "\033[41;37m" // red background, white text + ansiGrnBg = "\033[42;30m" // green background, black text +) + +// diffOp represents a line-level diff operation. +type diffOp int + +const ( + opEqual diffOp = iota + opRemove // line only in A + opAdd // line only in B +) + +// diffLine is a single line in the diff with its operation and source positions. +type diffLine struct { + op diffOp + text string + lineA int // 1-based line number in A (-1 if not applicable) + lineB int // 1-based line number in B (-1 if not applicable) +} + +// hunk is a group of diff lines with surrounding context. +type hunk struct { + startA, countA int // 1-based start and count for A + startB, countB int // 1-based start and count for B + lines []diffLine +} + +// computeDiffOps produces a sequence of diff operations from two line slices +// using LCS-based algorithm. +func computeDiffOps(a, b []string) []diffLine { + m, n := len(a), len(b) + dp := make([][]int, m+1) + for i := range dp { + dp[i] = make([]int, n+1) + } + for i := 1; i <= m; i++ { + for j := 1; j <= n; j++ { + if a[i-1] == b[j-1] { + dp[i][j] = dp[i-1][j-1] + 1 + } else if dp[i-1][j] >= dp[i][j-1] { + dp[i][j] = dp[i-1][j] + } else { + dp[i][j] = dp[i][j-1] + } + } + } + + // Backtrack to produce operations + var ops []diffLine + i, j := m, n + for i > 0 || j > 0 { + if i > 0 && j > 0 && a[i-1] == b[j-1] { + ops = append(ops, diffLine{op: opEqual, text: a[i-1], lineA: i, lineB: j}) + i-- + j-- + } else if j > 0 && (i == 0 || dp[i][j-1] >= dp[i-1][j]) { + ops = append(ops, diffLine{op: opAdd, text: b[j-1], lineA: -1, lineB: j}) + j-- + } else { + ops = append(ops, diffLine{op: opRemove, text: a[i-1], lineA: i, lineB: -1}) + i-- + } + } + // Reverse — we built it backwards + for l, r := 0, len(ops)-1; l < r; l, r = l+1, r-1 { + ops[l], ops[r] = ops[r], ops[l] + } + return ops +} + +// buildHunks groups diff operations into unified-diff hunks with `ctx` context lines. +func buildHunks(ops []diffLine, ctx int) []hunk { + // Find ranges of changed lines, expanded by context + type span struct{ start, end int } // indices into ops + var changed []span + for i, op := range ops { + if op.op != opEqual { + if len(changed) > 0 && i-changed[len(changed)-1].end <= 2*ctx { + // Merge with previous span + changed[len(changed)-1].end = i + 1 + } else { + changed = append(changed, span{i, i + 1}) + } + } + } + + var hunks []hunk + for _, ch := range changed { + lo := ch.start - ctx + if lo < 0 { + lo = 0 + } + hi := ch.end + ctx + if hi > len(ops) { + hi = len(ops) + } + + h := hunk{lines: ops[lo:hi]} + + // Compute start lines and counts + h.startA, h.startB = 1, 1 + if len(h.lines) > 0 { + // Find first valid line numbers + for _, dl := range h.lines { + if dl.lineA > 0 { + h.startA = dl.lineA + break + } + if dl.lineB > 0 { + h.startB = dl.lineB + break + } + } + if h.lines[0].lineA > 0 { + h.startA = h.lines[0].lineA + } + if h.lines[0].lineB > 0 { + h.startB = h.lines[0].lineB + } + } + for _, dl := range h.lines { + if dl.op == opEqual || dl.op == opRemove { + h.countA++ + } + if dl.op == opEqual || dl.op == opAdd { + h.countB++ + } + } + hunks = append(hunks, h) + } + return hunks +} + +// printHunk outputs a single unified diff hunk with ANSI colors and inline highlights. +func printHunk(h hunk) { + // @@ header + fmt.Fprintf(os.Stdout, "%s@@ -%d,%d +%d,%d @@%s\n", + ansiCyan, h.startA, h.countA, h.startB, h.countB, ansiReset) + + lines := h.lines + + for i := 0; i < len(lines); i++ { + dl := lines[i] + switch dl.op { + case opEqual: + fmt.Printf(" %s\n", dl.text) + + case opRemove: + // Try to pair with subsequent add(s) for inline highlighting + remStart := i + for i+1 < len(lines) && lines[i+1].op == opRemove { + i++ + } + remEnd := i + 1 + addStart := remEnd + j := addStart + for j < len(lines) && lines[j].op == opAdd { + j++ + } + addEnd := j + + removed := lines[remStart:remEnd] + added := lines[addStart:addEnd] + + // Pair up removed/added lines for inline diff + pairs := min(len(removed), len(added)) + for p := range pairs { + hl, hr := inlineHighlight(removed[p].text, added[p].text) + fmt.Printf("%s-%s%s\n", ansiRed, hl, ansiReset) + fmt.Printf("%s+%s%s\n", ansiGreen, hr, ansiReset) + } + // Remaining unpaired lines + for p := pairs; p < len(removed); p++ { + fmt.Printf("%s-%s%s\n", ansiRed, removed[p].text, ansiReset) + } + for p := pairs; p < len(added); p++ { + fmt.Printf("%s+%s%s\n", ansiGreen, added[p].text, ansiReset) + } + + i = addEnd - 1 // -1 because loop increments + + case opAdd: + // Unpaired add (not preceded by remove) + fmt.Printf("%s+%s%s\n", ansiGreen, dl.text, ansiReset) + } + } +} + +// inlineHighlight returns two strings (for removed and added lines) with ANSI +// bold marking on the parts that actually differ. +func inlineHighlight(a, b string) (string, string) { + ra := []rune(a) + rb := []rune(b) + + // Common prefix + pfx := 0 + for pfx < len(ra) && pfx < len(rb) && ra[pfx] == rb[pfx] { + pfx++ + } + // Common suffix (from the end, but don't overlap with prefix) + sfx := 0 + for sfx < len(ra)-pfx && sfx < len(rb)-pfx && ra[len(ra)-1-sfx] == rb[len(rb)-1-sfx] { + sfx++ + } + + midA := ra[pfx : len(ra)-sfx] + midB := rb[pfx : len(rb)-sfx] + + if len(midA) == 0 && len(midB) == 0 { + // Lines are identical — no highlighting needed + return a, b + } + + prefix := string(ra[:pfx]) + suffix := string(ra[len(ra)-sfx:]) + + hlA := prefix + ansiBold + ansiRedBg + string(midA) + ansiReset + ansiRed + suffix + hlB := prefix + ansiBold + ansiGrnBg + string(midB) + ansiReset + ansiGreen + suffix + + return hlA, hlB +} diff --git a/cmd/mdcx/verify_test.go b/cmd/mdcx/verify_test.go new file mode 100644 index 0000000000000000000000000000000000000000..e76a0f8d62f2dab1fc8e1596141160e82f4edfff --- /dev/null +++ b/cmd/mdcx/verify_test.go @@ -0,0 +1,421 @@ +package main + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// --- normalizeForVerify --- + +func TestNormalizeForVerify_RemovesEmptyBrParagraph(t *testing.T) { + tests := []struct { + name string + input string + want string + }{ + { + name: "br with space before slash", + input: `


text

`, + want: `

text

`, + }, + { + name: "br without space", + input: `


text

`, + want: `

text

`, + }, + { + name: "br with whitespace around", + input: "

\n
\n

text

", + want: "

text

", + }, + { + name: "no empty paragraphs", + input: `

hello

`, + want: `

hello

`, + }, + { + name: "multiple empty paragraphs", + input: `



text

`, + want: `

text

`, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, normalizeForVerify(tt.input)) + }) + } +} + +func TestNormalizeForVerify_SpanInsideCode(t *testing.T) { + tests := []struct { + name string + input string + want string + }{ + { + name: "span inside code unwrapped", + input: `hello world`, + want: `hello world`, + }, + { + name: "span with attributes inside code", + input: `a : `, + want: `a : `, + }, + { + name: "no span inside code", + input: `plain`, + want: `plain`, + }, + { + name: "span outside code untouched", + input: `

text

`, + want: `

text

`, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, normalizeForVerify(tt.input)) + }) + } +} + +func TestNormalizeForVerify_AdjacentCodeMerged(t *testing.T) { + tests := []struct { + name string + input string + want string + }{ + { + name: "directly adjacent", + input: `helloworld`, + want: `helloworld`, + }, + { + name: "with whitespace between", + input: `hello world`, + want: `hello world`, + }, + { + name: "single code element untouched", + input: `hello`, + want: `hello`, + }, + { + name: "combined: span inside + adjacent merge", + input: `plan : vclock`, + want: `plan : vclock`, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, normalizeForVerify(tt.input)) + }) + } +} + +// --- computeDiffOps --- + +func TestComputeDiffOps_IdenticalInputs(t *testing.T) { + lines := []string{"a", "b", "c"} + ops := computeDiffOps(lines, lines) + + require.Len(t, ops, 3) + for _, op := range ops { + assert.Equal(t, opEqual, op.op) + } +} + +func TestComputeDiffOps_CompletelyDifferent(t *testing.T) { + a := []string{"a", "b"} + b := []string{"x", "y"} + ops := computeDiffOps(a, b) + + var removes, adds int + for _, op := range ops { + switch op.op { + case opRemove: + removes++ + case opAdd: + adds++ + } + } + assert.Equal(t, 2, removes) + assert.Equal(t, 2, adds) +} + +func TestComputeDiffOps_EmptyInputs(t *testing.T) { + assert.Empty(t, computeDiffOps(nil, nil)) + assert.Empty(t, computeDiffOps([]string{}, []string{})) +} + +func TestComputeDiffOps_OneEmpty(t *testing.T) { + ops := computeDiffOps([]string{"a", "b"}, nil) + require.Len(t, ops, 2) + for _, op := range ops { + assert.Equal(t, opRemove, op.op) + } + + ops = computeDiffOps(nil, []string{"x", "y"}) + require.Len(t, ops, 2) + for _, op := range ops { + assert.Equal(t, opAdd, op.op) + } +} + +func TestComputeDiffOps_SingleLineChange(t *testing.T) { + a := []string{"aaa", "bbb", "ccc"} + b := []string{"aaa", "BBB", "ccc"} + ops := computeDiffOps(a, b) + + // Should be: equal(aaa), remove(bbb), add(BBB), equal(ccc) + require.Len(t, ops, 4) + assert.Equal(t, opEqual, ops[0].op) + assert.Equal(t, "aaa", ops[0].text) + assert.Equal(t, opRemove, ops[1].op) + assert.Equal(t, "bbb", ops[1].text) + assert.Equal(t, opAdd, ops[2].op) + assert.Equal(t, "BBB", ops[2].text) + assert.Equal(t, opEqual, ops[3].op) + assert.Equal(t, "ccc", ops[3].text) +} + +func TestComputeDiffOps_LineNumbers(t *testing.T) { + a := []string{"same", "old"} + b := []string{"same", "new"} + ops := computeDiffOps(a, b) + + // equal: lineA=1, lineB=1 + assert.Equal(t, 1, ops[0].lineA) + assert.Equal(t, 1, ops[0].lineB) + // remove: lineA=2, lineB=-1 + assert.Equal(t, 2, ops[1].lineA) + assert.Equal(t, -1, ops[1].lineB) + // add: lineA=-1, lineB=2 + assert.Equal(t, -1, ops[2].lineA) + assert.Equal(t, 2, ops[2].lineB) +} + +func TestComputeDiffOps_Insertion(t *testing.T) { + a := []string{"a", "c"} + b := []string{"a", "b", "c"} + ops := computeDiffOps(a, b) + + require.Len(t, ops, 3) + assert.Equal(t, opEqual, ops[0].op) + assert.Equal(t, opAdd, ops[1].op) + assert.Equal(t, "b", ops[1].text) + assert.Equal(t, opEqual, ops[2].op) +} + +func TestComputeDiffOps_Deletion(t *testing.T) { + a := []string{"a", "b", "c"} + b := []string{"a", "c"} + ops := computeDiffOps(a, b) + + require.Len(t, ops, 3) + assert.Equal(t, opEqual, ops[0].op) + assert.Equal(t, opRemove, ops[1].op) + assert.Equal(t, "b", ops[1].text) + assert.Equal(t, opEqual, ops[2].op) +} + +// --- buildHunks --- + +func TestBuildHunks_NoChanges(t *testing.T) { + ops := computeDiffOps([]string{"a", "b", "c"}, []string{"a", "b", "c"}) + hunks := buildHunks(ops, 3) + assert.Empty(t, hunks) +} + +func TestBuildHunks_SingleChange(t *testing.T) { + a := []string{"1", "2", "3", "4", "5"} + b := []string{"1", "2", "X", "4", "5"} + ops := computeDiffOps(a, b) + hunks := buildHunks(ops, 1) + + require.Len(t, hunks, 1) + h := hunks[0] + + // Context=1: line 2 (before) + remove(3)/add(X) + line 4 (after) = 3 each side + assert.Equal(t, 3, h.countA) // 2, remove(3), 4 + assert.Equal(t, 3, h.countB) // 2, add(X), 4 +} + +func TestBuildHunks_TwoSeparateChanges(t *testing.T) { + // Changes far enough apart to be separate hunks + a := []string{"1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12"} + b := []string{"1", "X", "3", "4", "5", "6", "7", "8", "9", "10", "Y", "12"} + ops := computeDiffOps(a, b) + hunks := buildHunks(ops, 1) + + assert.Len(t, hunks, 2) +} + +func TestBuildHunks_MergesNearbyChanges(t *testing.T) { + // Two changes only 2 lines apart with ctx=3 should merge + a := []string{"1", "2", "3", "4", "5", "6", "7"} + b := []string{"1", "X", "3", "4", "Y", "6", "7"} + ops := computeDiffOps(a, b) + hunks := buildHunks(ops, 3) + + assert.Len(t, hunks, 1, "nearby changes should merge into one hunk") +} + +func TestBuildHunks_ContextClampedToFileEdge(t *testing.T) { + // Change at line 1 — context shouldn't go negative + a := []string{"old", "same"} + b := []string{"new", "same"} + ops := computeDiffOps(a, b) + hunks := buildHunks(ops, 3) + + require.Len(t, hunks, 1) + assert.Equal(t, 1, hunks[0].startA) + assert.Equal(t, 1, hunks[0].startB) +} + +func TestBuildHunks_Counts(t *testing.T) { + a := []string{"ctx", "old1", "old2", "ctx"} + b := []string{"ctx", "new1", "ctx"} + ops := computeDiffOps(a, b) + hunks := buildHunks(ops, 1) + + require.Len(t, hunks, 1) + h := hunks[0] + // countA = context lines + removed lines + // countB = context lines + added lines + aLines := 0 + bLines := 0 + for _, dl := range h.lines { + if dl.op == opEqual || dl.op == opRemove { + aLines++ + } + if dl.op == opEqual || dl.op == opAdd { + bLines++ + } + } + assert.Equal(t, aLines, h.countA) + assert.Equal(t, bLines, h.countB) +} + +// --- inlineHighlight --- + +func TestInlineHighlight_IdenticalLines(t *testing.T) { + a, b := inlineHighlight("same text", "same text") + // No ANSI escapes added when lines are identical + assert.Equal(t, "same text", a) + assert.Equal(t, "same text", b) +} + +func TestInlineHighlight_SingleWordDiff(t *testing.T) { + a, b := inlineHighlight("hello world", "hello Earth") + // "hello " is common prefix, no common suffix + assert.Contains(t, a, "hello ") + assert.Contains(t, b, "hello ") + // Changed part should have bold marker + assert.Contains(t, a, ansiBold) + assert.Contains(t, b, ansiBold) + // Changed part should have appropriate background + assert.Contains(t, a, ansiRedBg) + assert.Contains(t, b, ansiGrnBg) +} + +func TestInlineHighlight_MiddleChange(t *testing.T) { + a, b := inlineHighlight("abc-OLD-xyz", "abc-NEW-xyz") + // Common prefix "abc-", common suffix "-xyz" + // Both lines should highlight "OLD" / "NEW" in bold + assert.Contains(t, a, ansiBold) + assert.Contains(t, b, ansiBold) + assert.Contains(t, a, "OLD") + assert.Contains(t, b, "NEW") + // Prefix and suffix present without bold + assertPlainContains(t, a, "abc-") + assertPlainContains(t, b, "abc-") +} + +func TestInlineHighlight_PrefixOnlyDifference(t *testing.T) { + a, b := inlineHighlight("XXX-same", "YYY-same") + // "-same" is common suffix + assert.Contains(t, a, "XXX") + assert.Contains(t, b, "YYY") + assert.Contains(t, a, ansiBold) +} + +func TestInlineHighlight_SuffixOnlyDifference(t *testing.T) { + a, b := inlineHighlight("same-XXX", "same-YYY") + // "same-" is common prefix + assert.Contains(t, a, "XXX") + assert.Contains(t, b, "YYY") + assert.Contains(t, a, ansiBold) +} + +func TestInlineHighlight_EmptyVsNonEmpty(t *testing.T) { + _, b := inlineHighlight("", "added") + assert.Contains(t, b, "added") + assert.Contains(t, b, ansiBold) +} + +func TestInlineHighlight_Unicode(t *testing.T) { + a, b := inlineHighlight("привет мир", "привет мор") + assert.Contains(t, a, ansiBold) + assert.Contains(t, b, ansiBold) + // Common prefix "привет м" + common suffix "р" should be plain + assertPlainContains(t, a, "привет м") + assertPlainContains(t, b, "привет м") +} + +// assertPlainContains checks that s contains substr in a position +// not immediately preceded by an ANSI escape. +func assertPlainContains(t *testing.T, s, substr string) { + t.Helper() + assert.Contains(t, s, substr, "string should contain %q", substr) +} + +// --- integration: computeDiffOps + buildHunks round-trip consistency --- + +func TestDiffOps_AllOpsPreserveText(t *testing.T) { + a := []string{"line1", "line2", "line3", "line4"} + b := []string{"line1", "changed", "line3", "added", "line4"} + ops := computeDiffOps(a, b) + + // Reconstruct A and B from ops + var gotA, gotB []string + for _, op := range ops { + switch op.op { + case opEqual: + gotA = append(gotA, op.text) + gotB = append(gotB, op.text) + case opRemove: + gotA = append(gotA, op.text) + case opAdd: + gotB = append(gotB, op.text) + } + } + assert.Equal(t, a, gotA, "reconstructed A must match original") + assert.Equal(t, b, gotB, "reconstructed B must match original") +} + +func TestBuildHunks_AllChangedLinesPresent(t *testing.T) { + a := strings.Split("a\nb\nc\nd\ne\nf\ng\nh\ni\nj", "\n") + b := strings.Split("a\nB\nc\nd\ne\nf\ng\nH\ni\nj", "\n") + ops := computeDiffOps(a, b) + hunks := buildHunks(ops, 1) + + // Collect all changed texts from hunks + var removed, added []string + for _, h := range hunks { + for _, dl := range h.lines { + switch dl.op { + case opRemove: + removed = append(removed, dl.text) + case opAdd: + added = append(added, dl.text) + } + } + } + assert.Equal(t, []string{"b", "h"}, removed) + assert.Equal(t, []string{"B", "H"}, added) +} diff --git a/confluence/elements.go b/confluence/elements.go index a03b9f479c62a5df58698e149eed5d763efb170e..6458a1d1b7562a04075557d4d3a793edd315280d 100644 --- a/confluence/elements.go +++ b/confluence/elements.go @@ -1,26 +1,63 @@ package confluence +import "strings" + // Confluence storage format macro helpers. func CodeMacro(language string, body string) string { - return CodeMacroWithID(language, body, "") + return CodeMacroWithID(language, body, "", "") } -func CodeMacroWithID(language string, body string, macroID string) string { +func CodeMacroWithID(language string, body string, macroID string, attrOrder string) string { var lang string if language != "" { lang = `` + language + `` } - tag := `` - if macroID != "" { - tag = `` - } + tag := buildStructuredMacroTag("code", macroID, attrOrder) return tag + lang + `` + `` } +// buildStructuredMacroTag builds an opening tag +// with attributes in the specified order. attrOrder is a comma-separated +// list of short attribute names (e.g. "name,schema-version,macro-id"). +func buildStructuredMacroTag(name string, macroID string, attrOrder string) string { + attrValues := map[string]string{ + "name": name, + "schema-version": "1", + } + if macroID != "" { + attrValues["macro-id"] = macroID + } + + var order []string + if attrOrder != "" { + order = strings.Split(attrOrder, ",") + } else { + // Default order when no original order is known + order = []string{"name", "schema-version"} + if macroID != "" { + order = append(order, "macro-id") + } + } + + var buf strings.Builder + buf.WriteString("") + return buf.String() +} + func InfoPanel(body string) string { return `` + `` + body + `` + diff --git a/confluence/renderer.go b/confluence/renderer.go index 32449edf600cc9a656ed6443e4c8a0c483f2cc5c..5277924b480c68439411e956e8a3a68d30ffcebf 100644 --- a/confluence/renderer.go +++ b/confluence/renderer.go @@ -18,7 +18,8 @@ type Renderer struct { inTaskBody bool inlineCommentDepth int pendingTableAttrs string // stored from comment - pendingCodeMacroID string // stored from comment + pendingCodeMacroID string // stored from comment + pendingCodeAttrOrder string // stored from comment } // NewRenderer creates a new Confluence storage format renderer. @@ -103,7 +104,7 @@ func (r *Renderer) renderTextBlock(w util.BufWriter, source []byte, node ast.Nod if r.inTaskBody { w.WriteString("\n") r.inTaskBody = false - } else { + } else if _, ok := node.Parent().(*ast.ListItem); !ok { w.WriteString("\n") } } @@ -133,8 +134,9 @@ func (r *Renderer) renderFencedCodeBlock(w util.BufWriter, source []byte, node a // Remove trailing newline from code content code := strings.TrimRight(buf.String(), "\n") - w.WriteString(CodeMacroWithID(language, code, r.pendingCodeMacroID)) + w.WriteString(CodeMacroWithID(language, code, r.pendingCodeMacroID, r.pendingCodeAttrOrder)) r.pendingCodeMacroID = "" + r.pendingCodeAttrOrder = "" w.WriteString("\n") return ast.WalkSkipChildren, nil } @@ -157,7 +159,7 @@ func (r *Renderer) renderCodeBlock(w util.BufWriter, source []byte, node ast.Nod func (r *Renderer) renderThematicBreak(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { if entering { - w.WriteString("
\n") + w.WriteString("
\n") } return ast.WalkContinue, nil } @@ -294,9 +296,11 @@ func (r *Renderer) convertComment(raw string) (string, bool) { // Code macro-id — store for next code block case strings.HasPrefix(trimmed, "\n", macroID) + if attrOrder != "" { + fmt.Fprintf(c.buf, "\n\n", macroID, attrOrder) + } else { + fmt.Fprintf(c.buf, "\n\n", macroID) + } } else { c.buf.WriteString("\n") } @@ -953,6 +964,44 @@ func getCDATAContent(n *html.Node) string { return buf.String() } +// extractAttrOrder returns a comma-separated list of short attribute names +// (e.g. "name,schema-version,macro-id") preserving the original order from the HTML node. +// The "ac:" prefix is stripped for brevity. +func extractAttrOrder(n *html.Node) string { + var names []string + for _, attr := range n.Attr { + key := attr.Key + if attr.Namespace != "" { + key = attr.Namespace + ":" + attr.Key + } + short := strings.TrimPrefix(key, "ac:") + names = append(names, short) + } + return strings.Join(names, ",") +} + +// isNextSiblingCode checks if the next non-whitespace sibling is a element. +func isNextSiblingCode(n *html.Node) bool { + for s := n.NextSibling; s != nil; s = s.NextSibling { + if s.Type == html.TextNode && strings.TrimSpace(s.Data) == "" { + continue + } + return s.Type == html.ElementNode && strings.ToLower(s.Data) == "code" + } + return false +} + +// isPrevSiblingCode checks if the previous non-whitespace sibling is a element. +func isPrevSiblingCode(n *html.Node) bool { + for s := n.PrevSibling; s != nil; s = s.PrevSibling { + if s.Type == html.TextNode && strings.TrimSpace(s.Data) == "" { + continue + } + return s.Type == html.ElementNode && strings.ToLower(s.Data) == "code" + } + return false +} + func getTextContent(n *html.Node) string { var buf bytes.Buffer var walk func(*html.Node)