aboutsummaryrefslogtreecommitdiff
path: root/commentparser/language/language.go
blob: 457e9363038fa1f5c30569b0f82afbbb3c46f85b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Package language contains methods and information about the different
// programming languages the comment parser supports.
package language

import (
	"path/filepath"
	"strings"
)

// Language is the progamming language we're grabbing the comments from.
type Language int

// Languages we can retrieve comments from.
const (
	Unknown Language = iota
	AppleScript
	Assembly
	BLIF // Berkley Logic Interface Format
	Batch
	C
	Clif
	Clojure
	CMake
	CSharp
	Dart
	EDIF // Electronic Design Interchange Format
	Elixir
	Flex
	Fortran
	GLSLF // OpenGL Shading Language
	Go
	HTML
	Haskell
	Java
	JavaScript
	Kotlin
	LEF // Library Exchange Format
	Lisp
	Markdown
	Matlab
	MySQL
	NinjaBuild
	ObjectiveC
	Perl
	Python
	R
	Ruby
	Rust
	SDC  // Synopsis Design Constraint
	SDF  // Standard Delay Format
	SPEF // Standard Parasitics Exchange Format
	SQL
	SWIG
	Shader
	Shell
	Swift
	SystemVerilog
	TCL
	TypeScript
	Verilog
	XDC // Xilinx Design Constraint files
	Yacc
	Yaml
)

// style is the comment styles that a language uses.
type style int

// Comment styles.
const (
	unknown     style = iota
	applescript       // -- ... and (* ... *)
	batch             // @REM
	bcpl              // // ... and /* ... */
	cmake             // # ... and #[[ ... ]]
	fortran           // ! ...
	hash              // # ...
	haskell           // -- ... and {- ... -}
	html              // <!-- ... -->
	lisp              // ;; ...
	matlab            // % ...
	mysql             // # ... and /* ... */
	ruby              // # ... and =begin ... =end
	shell             // # ... and %{ ... %}
	sql               // -- ... and /* ... */
)

// ClassifyLanguage determines what language the source code was written in. It
// does this by looking at the file's extension.
func ClassifyLanguage(filename string) Language {
	ext := strings.ToLower(filepath.Ext(filename))
	if len(ext) == 0 || ext[0] != '.' {
		return Unknown
	}

	switch ext[1:] { // Skip the '.'.
	case "applescript":
		return AppleScript
	case "bat":
		return Batch
	case "blif", "eblif":
		return BLIF
	case "c", "cc", "cpp", "c++", "h", "hh", "hpp":
		return C
	case "clif":
		return Clif
	case "cmake":
		return CMake
	case "cs":
		return CSharp
	case "dart":
		return Dart
	case "ex", "exs":
		return Elixir
	case "f", "f90", "f95":
		return Fortran
	case "glslf":
		return GLSLF
	case "go":
		return Go
	case "hs":
		return Haskell
	case "html", "htm", "ng", "sgml":
		return HTML
	case "java":
		return Java
	case "js":
		return JavaScript
	case "kt":
		return Kotlin
	case "l":
		return Flex
	case "lef":
		return LEF
	case "lisp", "el", "clj":
		return Lisp
	case "m", "mm":
		return ObjectiveC
	case "md":
		return Markdown
	case "gn":
		return NinjaBuild
	case "pl", "pm":
		return Perl
	case "py", "pi":
		return Python
	case "r":
		return R
	case "rb":
		return Ruby
	case "rs":
		return Rust
	case "s":
		return Assembly
	case "sdf":
		return SDF
	case "sh":
		return Shell
	case "shader":
		return Shader
	case "sql":
		return SQL
	case "swift":
		return Swift
	case "swig":
		return SWIG
	case "sv", "svh":
		return SystemVerilog
	case "tcl", "sdc", "xdc":
		return TCL
	case "ts", "tsx":
		return TypeScript
	case "v", "vh":
		return Verilog
	case "y":
		return Yacc
	case "yaml":
		return Yaml
	}
	return Unknown
}

// commentStyle returns the language's comment style.
func (lang Language) commentStyle() style {
	switch lang {
	case Assembly, C, CSharp, Dart, Flex, GLSLF, Go, Java, JavaScript, Kotlin, ObjectiveC, Rust, Shader, Swift, SWIG, TypeScript, Yacc, Verilog, SystemVerilog, SDF, SPEF:
		return bcpl
	case Batch:
		return batch
	case BLIF, TCL:
		return hash
	case CMake:
		return cmake
	case Fortran:
		return fortran
	case Haskell:
		return haskell
	case HTML, Markdown:
		return html
	case Clojure, Lisp:
		return lisp
	case Ruby:
		return ruby
	case Clif, Elixir, NinjaBuild, Perl, Python, R, Shell, Yaml:
		return shell
	case Matlab:
		return matlab
	case MySQL:
		return mysql
	case SQL:
		return sql
	}
	return unknown
}

// SingleLineCommentStart returns the starting string of a single line comment
// for the given language. There is no equivalent "End" method, because it's
// the end of line.
func (lang Language) SingleLineCommentStart() string {
	switch lang.commentStyle() {
	case applescript, haskell, sql:
		return "--"
	case batch:
		return "@REM"
	case bcpl:
		return "//"
	case fortran:
		return "!"
	case lisp:
		return ";"
	case matlab:
		return "%"
	case shell, ruby, cmake, mysql, hash:
		return "#"
	}
	return ""
}

// MultilineCommentStart returns the starting string of a multiline comment for
// the given language.
func (lang Language) MultilineCommentStart() string {
	switch lang.commentStyle() {
	case applescript:
		return "(*"
	case bcpl, mysql:
		if lang != Rust {
			return "/*"
		}
	case cmake:
		return "#[["
	case haskell:
		return "{-"
	case html:
		return "<!--"
	case matlab:
		return "%{"
	case ruby:
		return "=begin"
	}
	return ""
}

// MultilineCommentEnd returns the ending string of a multiline comment for the
// given language.
func (lang Language) MultilineCommentEnd() string {
	switch lang.commentStyle() {
	case applescript:
		return "*)"
	case bcpl, mysql:
		if lang != Rust {
			return "*/"
		}
	case cmake:
		return "]]"
	case haskell:
		return "-}"
	case html:
		return "-->"
	case matlab:
		return "%}"
	case ruby:
		return "=end"
	}
	return ""
}

// QuoteCharacter returns 'true' if the character is considered the beginning
// of a string in the given language. The second return value is true if the
// string allows for escaping.
func (lang Language) QuoteCharacter(quote rune) (ok bool, escape bool) {
	switch quote {
	case '"', '\'':
		return true, true
	case '`':
		if lang == Go {
			return true, false
		}
	}
	return false, false
}

// NestedComments returns true if the language allows for nested multiline comments.
func (lang Language) NestedComments() bool {
	return lang == Swift
}