pretty.go

v0.1.0

package format

import (
	"strings"
	"unicode/utf8"
)

const defaultMaxLineWidth = 120

// Block elements get their own line and increase indentation for children.
var blockTags = map[string]bool{
	// Layout
	"ac:layout":         true,
	"ac:layout-section": true,
	"ac:layout-cell":    true,
	// Block content
	"p":  true,
	"h1": true, "h2": true, "h3": true, "h4": true, "h5": true, "h6": true,
	"div": true,
	// Lists
	"ul": true, "ol": true, "li": true,
	// Tables
	"table": true, "thead": true, "tbody": true, "colgroup": true,
	"tr": true, "th": true, "td": true,
	// Macros
	"ac:structured-macro": true,
	"ac:rich-text-body":   true,
	"ac:plain-text-body":  true,
	// Task lists
	"ac:task-list": true,
	"ac:task":      true,
	"ac:task-body": true,
}

// inlineableBlocks: block tags that prefer to stay on one line if short enough.
var inlineableBlocks = map[string]bool{
	"li": true, "th": true, "td": true,
	"h1": true, "h2": true, "h3": true, "h4": true, "h5": true, "h6": true,
	"ac:task-id": true, "ac:task-status": true,
}

// Pre elements: content inside is not reformatted.
var preTags = map[string]bool{
	"ac:plain-text-body": true,
}

// PrettyXML formats Confluence storage XML with sensible indentation.
func PrettyXML(input string, indent string) string {
	tokens := tokenize(input)
	var buf strings.Builder
	level := 0
	inPre := 0
	atLineStart := true

	i := 0
	for i < len(tokens) {
		tok := tokens[i]

		switch tok.kind {
		case tokenOpen:
			tagName := tok.tagName()
			if inPre > 0 {
				buf.WriteString(tok.raw)
				if preTags[tagName] {
					inPre++
				}
				i++
				continue
			}
			if preTags[tagName] {
				inPre++
				ensureIndentedLine(&buf, level, indent, &atLineStart)
				buf.WriteString(tok.raw)
				i++
				continue
			}
			if blockTags[tagName] {
				// Try to inline short blocks like <li>text</li>, <h1>Title</h1>
				if inlineableBlocks[tagName] {
					if inlined, skip := tryInlineBlock(tokens[i:], tagName); skip > 0 {
						ensureIndentedLine(&buf, level, indent, &atLineStart)
						buf.WriteString(inlined)
						buf.WriteString("\n")
						atLineStart = true
						i += skip
						continue
					}
				}
				ensureIndentedLine(&buf, level, indent, &atLineStart)
				buf.WriteString(tok.raw)
				buf.WriteString("\n")
				level++
				atLineStart = true
			} else {
				if atLineStart {
					writeIndentPrefix(&buf, level, indent)
					atLineStart = false
				}
				buf.WriteString(tok.raw)
			}

		case tokenClose:
			tagName := tok.tagName()
			if inPre > 0 {
				buf.WriteString(tok.raw)
				if preTags[tagName] {
					inPre--
				}
				i++
				continue
			}
			if blockTags[tagName] {
				level--
				if level < 0 {
					level = 0
				}
				if !atLineStart {
					buf.WriteString("\n")
				}
				writeIndentPrefix(&buf, level, indent)
				buf.WriteString(tok.raw)
				buf.WriteString("\n")
				atLineStart = true
			} else {
				buf.WriteString(tok.raw)
			}

		case tokenSelfClose:
			tagName := tok.tagName()
			if inPre > 0 {
				buf.WriteString(tok.raw)
				i++
				continue
			}
			if blockTags[tagName] || tagName == "hr" || tagName == "col" {
				ensureIndentedLine(&buf, level, indent, &atLineStart)
				buf.WriteString(tok.raw)
				buf.WriteString("\n")
				atLineStart = true
			} else {
				if atLineStart {
					writeIndentPrefix(&buf, level, indent)
					atLineStart = false
				}
				buf.WriteString(tok.raw)
			}

		case tokenText:
			if inPre > 0 {
				buf.WriteString(tok.raw)
				i++
				continue
			}
			text := collapseWS(tok.raw)
			if text == "" || text == " " {
				i++
				continue
			}
			if atLineStart {
				text = strings.TrimLeft(text, " ")
				if text == "" {
					i++
					continue
				}
				writeIndentPrefix(&buf, level, indent)
				atLineStart = false
			}
			buf.WriteString(text)

		case tokenCDATA, tokenComment:
			if inPre > 0 {
				buf.WriteString(tok.raw)
				i++
				continue
			}
			if atLineStart {
				writeIndentPrefix(&buf, level, indent)
				atLineStart = false
			}
			buf.WriteString(tok.raw)
		}

		i++
	}

	result := buf.String()
	// Post-process: clean up lines and wrap long ones
	lines := strings.Split(result, "\n")
	var final []string
	for _, line := range lines {
		line = strings.TrimRight(line, " \t")
		if runeWidth(line) > defaultMaxLineWidth {
			final = append(final, wrapLine(line, defaultMaxLineWidth)...)
		} else {
			final = append(final, line)
		}
	}
	return strings.TrimSpace(strings.Join(final, "\n")) + "\n"
}

// tryInlineBlock checks if the block starting at tokens[0] (an open tag) has
// only inline/text children and a matching close tag, and the total is short
// enough to fit on one line. Returns the inlined string and number of tokens consumed.
func tryInlineBlock(tokens []token, tagName string) (string, int) {
	if len(tokens) < 2 {
		return "", 0
	}
	// Scan forward to find matching close tag
	depth := 0
	var inner strings.Builder
	for j, tok := range tokens {
		if j == 0 {
			inner.WriteString(tok.raw)
			depth = 1
			continue
		}
		switch tok.kind {
		case tokenOpen:
			tn := tok.tagName()
			if blockTags[tn] && !inlineableBlocks[tn] {
				// Contains a non-inlineable block child — can't inline
				return "", 0
			}
			if tn == tagName {
				depth++
			}
			inner.WriteString(tok.raw)
		case tokenClose:
			tn := tok.tagName()
			if tn == tagName {
				depth--
				if depth == 0 {
					inner.WriteString(tok.raw)
					result := inner.String()
					if runeWidth(result) <= defaultMaxLineWidth {
						return result, j + 1
					}
					return "", 0
				}
			}
			inner.WriteString(tok.raw)
		case tokenText:
			text := collapseWS(tok.raw)
			if text == "" {
				continue
			}
			// Trim leading space only for the first text token after open tag
			if j == 1 {
				text = strings.TrimLeft(text, " ")
			}
			inner.WriteString(text)
		case tokenCDATA:
			// CDATA in an inlineable block — don't inline if multiline
			if strings.Contains(tok.raw, "\n") {
				return "", 0
			}
			inner.WriteString(tok.raw)
		default:
			inner.WriteString(tok.raw)
		}
	}
	return "", 0
}

// wrapLine splits a long line at word boundaries, preserving leading indentation.
// It is XML-aware: it won't break inside tags (< ... >).
func wrapLine(line string, maxWidth int) []string {
	// Extract leading indentation
	trimmed := strings.TrimLeft(line, " \t")
	indentStr := line[:len(line)-len(trimmed)]
	contIndent := indentStr + "  " // continuation lines get extra indent

	// Split into segments: tags (unsplittable) and text (splittable at spaces)
	segments := splitSegments(trimmed)

	var lines []string
	var cur strings.Builder
	cur.WriteString(indentStr)
	curWidth := runeWidth(indentStr)

	for _, seg := range segments {
		segW := runeWidth(seg)

		if seg == "" {
			continue
		}

		// Tags and non-space text: never break inside
		if strings.HasPrefix(seg, "<") {
			// If adding this tag exceeds limit and we have content, wrap
			if curWidth+segW > maxWidth && curWidth > runeWidth(indentStr) {
				lines = append(lines, strings.TrimRight(cur.String(), " "))
				cur.Reset()
				cur.WriteString(contIndent)
				curWidth = runeWidth(contIndent)
			}
			cur.WriteString(seg)
			curWidth += segW
			continue
		}

		// Text segment: split at word boundaries
		words := strings.Fields(seg)
		// Preserve leading space if original had one
		needSpace := len(seg) > 0 && seg[0] == ' '

		for _, word := range words {
			wordW := runeWidth(word)
			spaceW := 0
			if needSpace {
				spaceW = 1
			}

			if curWidth+spaceW+wordW > maxWidth && curWidth > runeWidth(contIndent) {
				lines = append(lines, strings.TrimRight(cur.String(), " "))
				cur.Reset()
				cur.WriteString(contIndent)
				curWidth = runeWidth(contIndent)
				needSpace = false
			}

			if needSpace {
				cur.WriteByte(' ')
				curWidth++
			}
			cur.WriteString(word)
			curWidth += wordW
			needSpace = true
		}
	}

	if cur.Len() > 0 {
		final := strings.TrimRight(cur.String(), " ")
		if final != "" {
			lines = append(lines, final)
		}
	}

	if len(lines) == 0 {
		return []string{line}
	}
	return lines
}

// splitSegments breaks text into alternating tag and text segments.
// E.g. "Hello <strong>world</strong> end" -> ["Hello ", "<strong>", "world", "</strong>", " end"]
func splitSegments(s string) []string {
	var segs []string
	for len(s) > 0 {
		lt := strings.Index(s, "<")
		if lt == -1 {
			segs = append(segs, s)
			break
		}
		if lt > 0 {
			segs = append(segs, s[:lt])
		}
		gt := strings.Index(s[lt:], ">")
		if gt == -1 {
			segs = append(segs, s[lt:])
			break
		}
		segs = append(segs, s[lt:lt+gt+1])
		s = s[lt+gt+1:]
	}
	return segs
}

func runeWidth(s string) int {
	return utf8.RuneCountInString(s)
}

func ensureIndentedLine(buf *strings.Builder, level int, indent string, atLineStart *bool) {
	if !*atLineStart {
		buf.WriteString("\n")
	}
	writeIndentPrefix(buf, level, indent)
	*atLineStart = false
}

func writeIndentPrefix(buf *strings.Builder, level int, indent string) {
	for range level {
		buf.WriteString(indent)
	}
}

func collapseWS(s string) string {
	var buf strings.Builder
	inWS := false
	for _, r := range s {
		if r == ' ' || r == '\t' || r == '\n' || r == '\r' {
			if !inWS {
				buf.WriteByte(' ')
				inWS = true
			}
		} else {
			buf.WriteRune(r)
			inWS = false
		}
	}
	return buf.String()
}

// Token types for the XML tokenizer.
type tokenKind int

const (
	tokenOpen      tokenKind = iota // <tag ...>
	tokenClose                      // </tag>
	tokenSelfClose                  // <tag .../>
	tokenText                       // plain text
	tokenCDATA                      // <![CDATA[...]]>
	tokenComment                    // <!-- ... -->
)

type token struct {
	kind tokenKind
	raw  string
}

func (t token) tagName() string {
	s := t.raw
	switch t.kind {
	case tokenOpen, tokenSelfClose:
		s = s[1:]
		if strings.HasSuffix(s, "/>") {
			s = s[:len(s)-2]
		} else {
			s = strings.TrimSuffix(s, ">")
		}
		if idx := strings.IndexAny(s, " \t\n"); idx > 0 {
			s = s[:idx]
		}
		return strings.ToLower(s)
	case tokenClose:
		s = s[2:]
		s = strings.TrimSuffix(s, ">")
		return strings.ToLower(strings.TrimSpace(s))
	}
	return ""
}

func tokenize(input string) []token {
	var tokens []token
	i := 0
	for i < len(input) {
		if input[i] == '<' {
			if strings.HasPrefix(input[i:], "<![CDATA[") {
				end := strings.Index(input[i:], "]]>")
				if end == -1 {
					tokens = append(tokens, token{tokenCDATA, input[i:]})
					break
				}
				tokens = append(tokens, token{tokenCDATA, input[i : i+end+3]})
				i += end + 3
				continue
			}
			if strings.HasPrefix(input[i:], "<!--") {
				end := strings.Index(input[i:], "-->")
				if end == -1 {
					tokens = append(tokens, token{tokenComment, input[i:]})
					break
				}
				tokens = append(tokens, token{tokenComment, input[i : i+end+3]})
				i += end + 3
				continue
			}
			end := strings.Index(input[i:], ">")
			if end == -1 {
				tokens = append(tokens, token{tokenText, input[i:]})
				break
			}
			tagStr := input[i : i+end+1]
			if strings.HasPrefix(tagStr, "</") {
				tokens = append(tokens, token{tokenClose, tagStr})
			} else if strings.HasSuffix(tagStr, "/>") {
				tokens = append(tokens, token{tokenSelfClose, tagStr})
			} else {
				tokens = append(tokens, token{tokenOpen, tagStr})
			}
			i += end + 1
		} else {
			end := strings.Index(input[i:], "<")
			if end == -1 {
				tokens = append(tokens, token{tokenText, input[i:]})
				break
			}
			tokens = append(tokens, token{tokenText, input[i : i+end]})
			i += end
		}
	}
	return tokens
}


1	package format
2
3	import (
4	"strings"
5	"unicode/utf8"
6	)
7
8	const defaultMaxLineWidth = 120
9
10	// Block elements get their own line and increase indentation for children.
11	var blockTags = map[string]bool{
12	// Layout
13	"ac:layout": true,
14	"ac:layout-section": true,
15	"ac:layout-cell": true,
16	// Block content
17	"p": true,
18	"h1": true, "h2": true, "h3": true, "h4": true, "h5": true, "h6": true,
19	"div": true,
20	// Lists
21	"ul": true, "ol": true, "li": true,
22	// Tables
23	"table": true, "thead": true, "tbody": true, "colgroup": true,
24	"tr": true, "th": true, "td": true,
25	// Macros
26	"ac:structured-macro": true,
27	"ac:rich-text-body": true,
28	"ac:plain-text-body": true,
29	// Task lists
30	"ac:task-list": true,
31	"ac:task": true,
32	"ac:task-body": true,
33	}
34
35	// inlineableBlocks: block tags that prefer to stay on one line if short enough.
36	var inlineableBlocks = map[string]bool{
37	"li": true, "th": true, "td": true,
38	"h1": true, "h2": true, "h3": true, "h4": true, "h5": true, "h6": true,
39	"ac:task-id": true, "ac:task-status": true,
40	}
41
42	// Pre elements: content inside is not reformatted.
43	var preTags = map[string]bool{
44	"ac:plain-text-body": true,
45	}
46
47	// PrettyXML formats Confluence storage XML with sensible indentation.
48	func PrettyXML(input string, indent string) string {
49	tokens := tokenize(input)
50	var buf strings.Builder
51	level := 0
52	inPre := 0
53	atLineStart := true
54
55	i := 0
56	for i < len(tokens) {
57	tok := tokens[i]
58
59	switch tok.kind {
60	case tokenOpen:
61	tagName := tok.tagName()
62	if inPre > 0 {
63	buf.WriteString(tok.raw)
64	if preTags[tagName] {
65	inPre++
66	}
67	i++
68	continue
69	}
70	if preTags[tagName] {
71	inPre++
72	ensureIndentedLine(&buf, level, indent, &atLineStart)
73	buf.WriteString(tok.raw)
74	i++
75	continue
76	}
77	if blockTags[tagName] {
78	// Try to inline short blocks like <li>text</li>, <h1>Title</h1>
79	if inlineableBlocks[tagName] {
80	if inlined, skip := tryInlineBlock(tokens[i:], tagName); skip > 0 {
81	ensureIndentedLine(&buf, level, indent, &atLineStart)
82	buf.WriteString(inlined)
83	buf.WriteString("\n")
84	atLineStart = true
85	i += skip
86	continue
87	}
88	}
89	ensureIndentedLine(&buf, level, indent, &atLineStart)
90	buf.WriteString(tok.raw)
91	buf.WriteString("\n")
92	level++
93	atLineStart = true
94	} else {
95	if atLineStart {
96	writeIndentPrefix(&buf, level, indent)
97	atLineStart = false
98	}
99	buf.WriteString(tok.raw)
100	}
101
102	case tokenClose:
103	tagName := tok.tagName()
104	if inPre > 0 {
105	buf.WriteString(tok.raw)
106	if preTags[tagName] {
107	inPre--
108	}
109	i++
110	continue
111	}
112	if blockTags[tagName] {
113	level--
114	if level < 0 {
115	level = 0
116	}
117	if !atLineStart {
118	buf.WriteString("\n")
119	}
120	writeIndentPrefix(&buf, level, indent)
121	buf.WriteString(tok.raw)
122	buf.WriteString("\n")
123	atLineStart = true
124	} else {
125	buf.WriteString(tok.raw)
126	}
127
128	case tokenSelfClose:
129	tagName := tok.tagName()
130	if inPre > 0 {
131	buf.WriteString(tok.raw)
132	i++
133	continue
134	}
135	if blockTags[tagName] \|\| tagName == "hr" \|\| tagName == "col" {
136	ensureIndentedLine(&buf, level, indent, &atLineStart)
137	buf.WriteString(tok.raw)
138	buf.WriteString("\n")
139	atLineStart = true
140	} else {
141	if atLineStart {
142	writeIndentPrefix(&buf, level, indent)
143	atLineStart = false
144	}
145	buf.WriteString(tok.raw)
146	}
147
148	case tokenText:
149	if inPre > 0 {
150	buf.WriteString(tok.raw)
151	i++
152	continue
153	}
154	text := collapseWS(tok.raw)
155	if text == "" \|\| text == " " {
156	i++
157	continue
158	}
159	if atLineStart {
160	text = strings.TrimLeft(text, " ")
161	if text == "" {
162	i++
163	continue
164	}
165	writeIndentPrefix(&buf, level, indent)
166	atLineStart = false
167	}
168	buf.WriteString(text)
169
170	case tokenCDATA, tokenComment:
171	if inPre > 0 {
172	buf.WriteString(tok.raw)
173	i++
174	continue
175	}
176	if atLineStart {
177	writeIndentPrefix(&buf, level, indent)
178	atLineStart = false
179	}
180	buf.WriteString(tok.raw)
181	}
182
183	i++
184	}
185
186	result := buf.String()
187	// Post-process: clean up lines and wrap long ones
188	lines := strings.Split(result, "\n")
189	var final []string
190	for _, line := range lines {
191	line = strings.TrimRight(line, " \t")
192	if runeWidth(line) > defaultMaxLineWidth {
193	final = append(final, wrapLine(line, defaultMaxLineWidth)...)
194	} else {
195	final = append(final, line)
196	}
197	}
198	return strings.TrimSpace(strings.Join(final, "\n")) + "\n"
199	}
200
201	// tryInlineBlock checks if the block starting at tokens[0] (an open tag) has
202	// only inline/text children and a matching close tag, and the total is short
203	// enough to fit on one line. Returns the inlined string and number of tokens consumed.
204	func tryInlineBlock(tokens []token, tagName string) (string, int) {
205	if len(tokens) < 2 {
206	return "", 0
207	}
208	// Scan forward to find matching close tag
209	depth := 0
210	var inner strings.Builder
211	for j, tok := range tokens {
212	if j == 0 {
213	inner.WriteString(tok.raw)
214	depth = 1
215	continue
216	}
217	switch tok.kind {
218	case tokenOpen:
219	tn := tok.tagName()
220	if blockTags[tn] && !inlineableBlocks[tn] {
221	// Contains a non-inlineable block child — can't inline
222	return "", 0
223	}
224	if tn == tagName {
225	depth++
226	}
227	inner.WriteString(tok.raw)
228	case tokenClose:
229	tn := tok.tagName()
230	if tn == tagName {
231	depth--
232	if depth == 0 {
233	inner.WriteString(tok.raw)
234	result := inner.String()
235	if runeWidth(result) <= defaultMaxLineWidth {
236	return result, j + 1
237	}
238	return "", 0
239	}
240	}
241	inner.WriteString(tok.raw)
242	case tokenText:
243	text := collapseWS(tok.raw)
244	if text == "" {
245	continue
246	}
247	// Trim leading space only for the first text token after open tag
248	if j == 1 {
249	text = strings.TrimLeft(text, " ")
250	}
251	inner.WriteString(text)
252	case tokenCDATA:
253	// CDATA in an inlineable block — don't inline if multiline
254	if strings.Contains(tok.raw, "\n") {
255	return "", 0
256	}
257	inner.WriteString(tok.raw)
258	default:
259	inner.WriteString(tok.raw)
260	}
261	}
262	return "", 0
263	}
264
265	// wrapLine splits a long line at word boundaries, preserving leading indentation.
266	// It is XML-aware: it won't break inside tags (< ... >).
267	func wrapLine(line string, maxWidth int) []string {
268	// Extract leading indentation
269	trimmed := strings.TrimLeft(line, " \t")
270	indentStr := line[:len(line)-len(trimmed)]
271	contIndent := indentStr + " " // continuation lines get extra indent
272
273	// Split into segments: tags (unsplittable) and text (splittable at spaces)
274	segments := splitSegments(trimmed)
275
276	var lines []string
277	var cur strings.Builder
278	cur.WriteString(indentStr)
279	curWidth := runeWidth(indentStr)
280
281	for _, seg := range segments {
282	segW := runeWidth(seg)
283
284	if seg == "" {
285	continue
286	}
287
288	// Tags and non-space text: never break inside
289	if strings.HasPrefix(seg, "<") {
290	// If adding this tag exceeds limit and we have content, wrap
291	if curWidth+segW > maxWidth && curWidth > runeWidth(indentStr) {
292	lines = append(lines, strings.TrimRight(cur.String(), " "))
293	cur.Reset()
294	cur.WriteString(contIndent)
295	curWidth = runeWidth(contIndent)
296	}
297	cur.WriteString(seg)
298	curWidth += segW
299	continue
300	}
301
302	// Text segment: split at word boundaries
303	words := strings.Fields(seg)
304	// Preserve leading space if original had one
305	needSpace := len(seg) > 0 && seg[0] == ' '
306
307	for _, word := range words {
308	wordW := runeWidth(word)
309	spaceW := 0
310	if needSpace {
311	spaceW = 1
312	}
313
314	if curWidth+spaceW+wordW > maxWidth && curWidth > runeWidth(contIndent) {
315	lines = append(lines, strings.TrimRight(cur.String(), " "))
316	cur.Reset()
317	cur.WriteString(contIndent)
318	curWidth = runeWidth(contIndent)
319	needSpace = false
320	}
321
322	if needSpace {
323	cur.WriteByte(' ')
324	curWidth++
325	}
326	cur.WriteString(word)
327	curWidth += wordW
328	needSpace = true
329	}
330	}
331
332	if cur.Len() > 0 {
333	final := strings.TrimRight(cur.String(), " ")
334	if final != "" {
335	lines = append(lines, final)
336	}
337	}
338
339	if len(lines) == 0 {
340	return []string{line}
341	}
342	return lines
343	}
344
345	// splitSegments breaks text into alternating tag and text segments.
346	// E.g. "Hello <strong>world</strong> end" -> ["Hello ", "<strong>", "world", "</strong>", " end"]
347	func splitSegments(s string) []string {
348	var segs []string
349	for len(s) > 0 {
350	lt := strings.Index(s, "<")
351	if lt == -1 {
352	segs = append(segs, s)
353	break
354	}
355	if lt > 0 {
356	segs = append(segs, s[:lt])
357	}
358	gt := strings.Index(s[lt:], ">")
359	if gt == -1 {
360	segs = append(segs, s[lt:])
361	break
362	}
363	segs = append(segs, s[lt:lt+gt+1])
364	s = s[lt+gt+1:]
365	}
366	return segs
367	}
368
369	func runeWidth(s string) int {
370	return utf8.RuneCountInString(s)
371	}
372
373	func ensureIndentedLine(buf strings.Builder, level int, indent string, atLineStart bool) {
374	if !*atLineStart {
375	buf.WriteString("\n")
376	}
377	writeIndentPrefix(buf, level, indent)
378	*atLineStart = false
379	}
380
381	func writeIndentPrefix(buf *strings.Builder, level int, indent string) {
382	for range level {
383	buf.WriteString(indent)
384	}
385	}
386
387	func collapseWS(s string) string {
388	var buf strings.Builder
389	inWS := false
390	for _, r := range s {
391	if r == ' ' \|\| r == '\t' \|\| r == '\n' \|\| r == '\r' {
392	if !inWS {
393	buf.WriteByte(' ')
394	inWS = true
395	}
396	} else {
397	buf.WriteRune(r)
398	inWS = false
399	}
400	}
401	return buf.String()
402	}
403
404	// Token types for the XML tokenizer.
405	type tokenKind int
406
407	const (
408	tokenOpen tokenKind = iota // <tag ...>
409	tokenClose // </tag>
410	tokenSelfClose // <tag .../>
411	tokenText // plain text
412	tokenCDATA // <![CDATA[...]]>
413	tokenComment // <!-- ... -->
414	)
415
416	type token struct {
417	kind tokenKind
418	raw string
419	}
420
421	func (t token) tagName() string {
422	s := t.raw
423	switch t.kind {
424	case tokenOpen, tokenSelfClose:
425	s = s[1:]
426	if strings.HasSuffix(s, "/>") {
427	s = s[:len(s)-2]
428	} else {
429	s = strings.TrimSuffix(s, ">")
430	}
431	if idx := strings.IndexAny(s, " \t\n"); idx > 0 {
432	s = s[:idx]
433	}
434	return strings.ToLower(s)
435	case tokenClose:
436	s = s[2:]
437	s = strings.TrimSuffix(s, ">")
438	return strings.ToLower(strings.TrimSpace(s))
439	}
440	return ""
441	}
442
443	func tokenize(input string) []token {
444	var tokens []token
445	i := 0
446	for i < len(input) {
447	if input[i] == '<' {
448	if strings.HasPrefix(input[i:], "<![CDATA[") {
449	end := strings.Index(input[i:], "]]>")
450	if end == -1 {
451	tokens = append(tokens, token{tokenCDATA, input[i:]})
452	break
453	}
454	tokens = append(tokens, token{tokenCDATA, input[i : i+end+3]})
455	i += end + 3
456	continue
457	}
458	if strings.HasPrefix(input[i:], "<!--") {
459	end := strings.Index(input[i:], "-->")
460	if end == -1 {
461	tokens = append(tokens, token{tokenComment, input[i:]})
462	break
463	}
464	tokens = append(tokens, token{tokenComment, input[i : i+end+3]})
465	i += end + 3
466	continue
467	}
468	end := strings.Index(input[i:], ">")
469	if end == -1 {
470	tokens = append(tokens, token{tokenText, input[i:]})
471	break
472	}
473	tagStr := input[i : i+end+1]
474	if strings.HasPrefix(tagStr, "</") {
475	tokens = append(tokens, token{tokenClose, tagStr})
476	} else if strings.HasSuffix(tagStr, "/>") {
477	tokens = append(tokens, token{tokenSelfClose, tagStr})
478	} else {
479	tokens = append(tokens, token{tokenOpen, tagStr})
480	}
481	i += end + 1
482	} else {
483	end := strings.Index(input[i:], "<")
484	if end == -1 {
485	tokens = append(tokens, token{tokenText, input[i:]})
486	break
487	}
488	tokens = append(tokens, token{tokenText, input[i : i+end]})
489	i += end
490	}
491	}
492	return tokens
493	}
494

pretty.go

Source Files