internal/span/utf16.go


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95

// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package span

import (
	"fmt"
	"unicode/utf8"
)

// ToUTF16Column calculates the utf16 column expressed by the point given the
// supplied file contents.
// This is used to convert from the native (always in bytes) column
// representation and the utf16 counts used by some editors.
func ToUTF16Column(p Point, content []byte) (int, error) {
	if !p.HasPosition() {
		return -1, fmt.Errorf("ToUTF16Column: point is missing position")
	}
	if !p.HasOffset() {
		return -1, fmt.Errorf("ToUTF16Column: point is missing offset")
	}
	offset := p.Offset()      // 0-based
	colZero := p.Column() - 1 // 0-based
	if colZero == 0 {
		// 0-based column 0, so it must be chr 1
		return 1, nil
	} else if colZero < 0 {
		return -1, fmt.Errorf("ToUTF16Column: column is invalid (%v)", colZero)
	}
	// work out the offset at the start of the line using the column
	lineOffset := offset - colZero
	if lineOffset < 0 || offset > len(content) {
		return -1, fmt.Errorf("ToUTF16Column: offsets %v-%v outside file contents (%v)", lineOffset, offset, len(content))
	}
	// Use the offset to pick out the line start.
	// This cannot panic: offset > len(content) and lineOffset < offset.
	start := content[lineOffset:]

	// Now, truncate down to the supplied column.
	start = start[:colZero]

	cnt := 0
	for _, r := range string(start) {
		cnt++
		if r > 0xffff {
			cnt++
		}
	}
	return cnt + 1, nil // the +1 is for 1-based columns
}

// FromUTF16Column advances the point by the utf16 character offset given the
// supplied line contents.
// This is used to convert from the utf16 counts used by some editors to the
// native (always in bytes) column representation.
func FromUTF16Column(p Point, chr int, content []byte) (Point, error) {
	if !p.HasOffset() {
		return Point{}, fmt.Errorf("FromUTF16Column: point is missing offset")
	}
	// if chr is 1 then no adjustment needed
	if chr <= 1 {
		return p, nil
	}
	if p.Offset() >= len(content) {
		return p, fmt.Errorf("FromUTF16Column: offset (%v) greater than length of content (%v)", p.Offset(), len(content))
	}
	remains := content[p.Offset():]
	// scan forward the specified number of characters
	for count := 1; count < chr; count++ {
		if len(remains) <= 0 {
			return Point{}, fmt.Errorf("FromUTF16Column: chr goes beyond the content")
		}
		r, w := utf8.DecodeRune(remains)
		if r == '\n' {
			// Per the LSP spec:
			//
			// > If the character value is greater than the line length it
			// > defaults back to the line length.
			break
		}
		remains = remains[w:]
		if r >= 0x10000 {
			// a two point rune
			count++
			// if we finished in a two point rune, do not advance past the first
			if count >= chr {
				break
			}
		}
		p.v.Column += w
		p.v.Offset += w
	}
	return p, nil
}