1;; Copyright (c) 2007,2008 Paulo Cesar Pereira de Andrade
2;;
3;; Permission is hereby granted, free of charge, to any person obtaining a
4;; copy of this software and associated documentation files (the "Software"),
5;; to deal in the Software without restriction, including without limitation
6;; the rights to use, copy, modify, merge, publish, distribute, sublicense,
7;; and/or sell copies of the Software, and to permit persons to whom the
8;; Software is furnished to do so, subject to the following conditions:
9;;
10;; The above copyright notice and this permission notice (including the next
11;; paragraph) shall be included in all copies or substantial portions of the
12;; Software.
13;;
14;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15;; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16;; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17;; THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18;; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19;; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20;; DEALINGS IN THE SOFTWARE.
21;;
22;; Author: Paulo Cesar Pereira de Andrade
23;;
24
25;;   Perl syntax and indentation mode
26;;   Based on the C/C++ and Lisp modes. Attempting to make simple
27;; syntax/indentation rules, that should work correctly with most
28;; perl code.
29
30;;  *cont-indent* is somewhat buggy, that if pressing C-A,Tab, will
31;; not generate the same output as when normally typing the expression.
32;;  This is because the parser doesn't search for a matching ';', '{',
33;; '[' or '(' to know where the expression starts. The C mode has the
34;; same problem. Example:
35;;	a +
36;;	    b;	<-- if pressing C-A,Tab will align "b;" with "a +"
37
38;;  Maybe most of the code here, and some code in the C mode could be
39;; merged to have a single "default mode" parser for languages that
40;; basically only depend on { and } for indentation.
41
42(require "syntax")
43(require "indent")
44(in-package "XEDIT")
45
46;=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
47(defsynprop *prop-string-escape*
48  "string-escape"
49  :font		"*lucidatypewriter-bold-r*-12-*"
50  :foreground	"RoyalBlue2"
51  :underline	t)
52
53(defsynprop *prop-string-keyword-bold*
54  "string-variable-bold"
55  :font		"*lucidatypewriter-bold-r*-12-*"
56  :foreground	"RoyalBlue4")
57
58(defsynprop *prop-string-keyword*
59  "string-variable"
60  :font		"*lucidatypewriter-medium-r*-12-*"
61  :foreground	"RoyalBlue4")
62
63(defsynprop *prop-constant-escape*
64  "constant-escape"
65  :font		"*lucidatypewriter-medium-r*-12-*"
66  :foreground	"VioletRed3"
67  :underline	t)
68
69(defsynprop *prop-regex*
70  "regex"
71  :font		"*courier-medium-o*-12-*"
72  :foreground	"black")
73
74(defsynprop *prop-shell*
75  "shell"
76  :font		"*lucidatypewriter-medium-r*-12-*"
77  :foreground	"red3")
78
79(defsynprop *prop-shell-escape*
80  "shell-escape"
81  :font		"*lucidatypewriter-bold-r*-12-*"
82  :foreground	"red3"
83  :underline	t)
84
85(defsynprop *prop-documentation*
86  "documentation"
87  :font		"fixed"
88  :foreground	"black"
89  :background	"rgb:e/e/e"
90)
91
92
93;=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
94(defsynoptions *perl-DEFAULT-style*
95  ;; Positive number. Basic indentation
96  (:indentation		.	4)
97
98  ;; Boolean. Add one indentation level to continuations?
99  (:cont-indent		.	t)
100
101  ;; Boolean. Move cursor to the indent column after pressing <Enter>?
102  (:newline-indent	.	t)
103
104  ;; Boolean. Set to T if tabs shouldn't be used to fill indentation.
105  (:emulate-tabs	.	nil)
106
107  ;; Boolean. Only calculate indentation after pressing <Enter>?
108  ;;		This may be useful if the parser does not always
109  ;;		do what the user expects...
110  (:only-newline-indent	.	nil)
111
112  ;; Boolean. Remove extra spaces from previous line.
113  ;;		This should default to T when newline-indent is not NIL.
114  (:trim-blank-lines	.	t)
115
116  ;; Boolean. If this hash-table entry is set, no indentation is done.
117  ;;		Useful to temporarily disable indentation.
118  (:disable-indent	.	nil))
119
120
121
122;=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
123(defvar *perl-mode-options* *perl-DEFAULT-style*)
124
125;=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
126;; Parenthesis are usually not required, just distinguish as:
127;; expression:	code without an ending ';'
128;; statement:	code ending in a ';'
129;; block:	code enclosed in '{' and '}'
130;; In Perl a simpler logic can be used, unlikely the C mode, as in
131;; perl braces are mandatory
132(defindent *perl-mode-indent* :main
133  ;; this must be the first token
134  (indtoken "^\\s*" :indent
135    :code (or *offset* (setq *offset* (+ *ind-offset* *ind-length*))))
136  ;; this may cause some other patterns to fail, due to matching single \'
137  (indtoken "(&?(\\w+)|&(\\w+)?)'\\w+" :expression)
138  ;; special variables
139  (indtoken "\\$(\\d|^\\u|[][0-9!#$*()_@<>?/|,\"'])" :expression)
140  ;; ignore comments
141  (indtoken "#.*$" nil)
142  ;; treat regex as expressions to avoid confusing parser
143  (indtoken "m?/([^/]|\\\\/)+/\\w*" :expression)
144  (indtoken "m\\{[^}]+\\}\\w*" :expression)
145  (indtoken "m<[^>]+>\\w*" :expression)
146  (indtoken "(s|tr)/[^/]+/([^/]|\\\\/)*/\\w*" :expression)
147  (indtoken "//" :expression :nospec t)
148  ;; fast resolve deferences to expressions
149  (indtoken "[$@%&*]?\\{\\$?\\S+\\}" :expression)
150
151  (indtoken "($%@*)?\\w+" :expression)
152  (indtoken ";" :semi :nospec t)
153  (indinit (braces 0))
154  (indtoken "{" :obrace :nospec t
155    :code (decf braces))
156  (indtoken "}" :cbrace :nospec t
157    :code (incf braces))
158  (indinit (parens&bracks 0))
159  (indtoken ")" :cparen :nospec t :code (incf parens&bracks))
160  (indtoken "(" :oparen	:nospec t :code (decf parens&bracks))
161  (indtoken "]" :cbrack	:nospec t :code (incf parens&bracks))
162  (indtoken "[" :obrack	:nospec t :code (decf parens&bracks))
163  ;; if in the same line, reduce now, this must be done because the
164  ;; delimiters are identical
165  (indtoken "'([^\\']|\\\\.)*'" :expression)
166  (indtoken "\"([^\\\"]|\\\\.)*\"" :expression)
167  (indtoken "\"" :cstring1 :nospec t :begin :string1)
168  (indtoken "'" :cstring2 :nospec t :begin :string2)
169  ;; This must be the last rule
170  (indtoken "\\s*$"		:eol)
171
172  (indtable :string1
173    ;; Ignore escaped characters
174    (indtoken "\\." nil)
175    ;; Return to the toplevel when the start of the string is found
176    (indtoken "\"" :ostring1 :nospec t :switch -1))
177  (indtable :string2
178    (indtoken "\\." nil)
179    (indtoken "'" :ostring2 :nospec t :switch -1))
180
181  ;; This avoids some problems with *cont-indent* adding an indentation
182  ;; level to an expression after an empty line
183  (indreduce nil
184    t
185    ((:indent :eol)))
186
187  ;; Reduce to a single expression token
188  (indreduce :expression
189    t
190    ((:indent :expression)
191      (:expression :eol)
192      (:expression :parens)
193      (:expression :bracks)
194      (:expression :expression)
195      ;; multiline strings
196      (:ostring1 (not :ostring1) :cstring1)
197      (:ostring2 (not :ostring2) :cstring2)
198      ;; parenthesis and brackets
199      (:oparen (not :oparen) :cparen)
200      (:obrack (not :obrack) :cbrack)))
201
202  ;; Statements end in a semicollon
203  (indreduce :statement
204    t
205    ((:semi)
206      (:indent :semi)
207      (:expression :statement)
208      (:statement :eol)
209      ;; Doesn't necessarily end in a semicollon
210      (:expression :block)))
211
212  (indreduce :block
213    t
214    ((:obrace (not :obrace) :cbrace)
215      (:block :eol)))
216  (indreduce :obrace
217    (< *ind-offset* *ind-start*)
218    ((:indent :obrace))
219    (setq *indent* (offset-indentation (+ *ind-offset* *ind-length*) :resolve t))
220    (indent-macro-reject-left))
221
222  ;; Try to do an smart indentation on open parenthesis and brackets
223  (indreduce :parens
224    t
225    ((:oparen (not :oparen) :cparen))
226    (when (and
227	    (< *ind-offset* *ind-start*)
228	    (> (+ *ind-offset* *ind-length*) *ind-start*))
229      (setq *indent* (1+ (offset-indentation *ind-offset* :align t)))
230      (indent-macro-reject-left)))
231  (indreduce :bracks
232    t
233    ((:obrack (not :obrack) :cbrack))
234    (when (and
235	    (< *ind-offset* *ind-start*)
236	    (> (+ *ind-offset* *ind-length*) *ind-start*))
237      (setq *indent* (1+ (offset-indentation *ind-offset* :align t)))
238      (indent-macro-reject-left)))
239
240  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
241  ;; Assuming previous lines have correct indentation, try to
242  ;; fast resolve brace indentation
243  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
244  ;; Line ended with an open brace
245  (indreduce :obrace
246    (< *ind-offset* *ind-start*)
247    ((:expression :obrace))
248    (setq *indent* (offset-indentation *ind-offset* :resolve t))
249    (indent-macro-reject-left))
250  ;; Line starts with an open brace
251  (indreduce nil
252    (< *ind-offset* *ind-start* (+ *ind-offset* *ind-length*))
253    ;; Just set initial indentation
254    ((:indent :obrace))
255    (setq
256      *indent* (- (offset-indentation *ind-offset* :resolve t) *base-indent*))
257    (indent-macro-reject-left))
258
259  (indresolve :statement
260    (when (< *ind-offset* *ind-start*)
261      (while (> braces 0)
262	(setq
263	  *indent*	(- *indent* *base-indent*)
264	  braces	(1- braces)))))
265
266  (indresolve :obrace
267    (and (< *ind-offset* *ind-start*)
268      (incf *indent* *base-indent*)))
269  (indresolve :cbrace
270    (decf *indent* *base-indent*))
271  (indresolve :expression
272    (and
273      *cont-indent*
274      (> *indent* 0)
275      (zerop parens&bracks)
276      (< *ind-offset* *ind-start*)
277      (> (+ *ind-offset* *ind-length*) *ind-start*)
278      (incf *indent* *base-indent*)))
279
280  (indresolve (:oparen :obrack)
281    (and (< *ind-offset* *ind-start*)
282      (setq *indent* (1+ (offset-indentation *ind-offset* :align t)))))
283)
284
285;=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
286(defun perl-offset-indent (&aux char (point (point)))
287  ;; Skip spaces forward
288  (while (member (setq char (char-after point)) indent-spaces)
289    (incf point))
290  (if (member char '(#\})) (1+ point) point))
291
292(compile 'perl-offset-indent)
293
294;=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
295(defun perl-should-indent (options &aux char point start offset)
296  (when (hash-table-p options)
297    ;; check if previous line has extra spaces
298    (and (gethash :trim-blank-lines options)
299      (indent-clear-empty-line))
300
301    ;; indentation disabled?
302    (and (gethash :disable-indent options)
303      (return-from perl-should-indent))
304
305    (setq
306      point	(point)
307      char	(char-before point)
308      start	(scan point :eol :left))
309
310    ;; if at bol and should indent only when starting a line
311    (and (gethash :only-newline-indent options)
312      (return-from perl-should-indent (= point start)))
313
314    ;; at the start of a line
315    (and (= point start)
316      (return-from perl-should-indent (gethash :newline-indent options)))
317
318    ;; if first character
319    (and (= point (1+ start))
320      (return-from perl-should-indent t))
321
322    ;; check if is the first non-blank character in a new line
323    (when (and
324	    (gethash :cont-indent options)
325	    (= point (scan point :eol :right))
326	    (alphanumericp char))
327      (setq offset (1- point))
328      (while (and
329	       (> offset start)
330	       (member (char-before offset) indent-spaces))
331	(decf offset))
332      ;; line has only one character with possible spaces before it
333      (and (<= offset start)
334	(return-from perl-should-indent t)))
335
336    ;; if one of these was typed, should check indentation
337    (if (member char '(#\})) (return-from perl-should-indent t))
338  )
339  ;; Should not indent
340  nil)
341
342(compile 'perl-should-indent)
343
344;=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
345(defun perl-indent (syntax syntable)
346  (let*
347    ((options (syntax-options syntax))
348      *base-indent*
349      *cont-indent*)
350
351    (or (perl-should-indent options) (return-from perl-indent))
352    (setq
353      *base-indent* (gethash :indentation options 4)
354      *cont-indent* (gethash :cont-indent options t))
355
356    (indent-macro
357      *perl-mode-indent*
358      (perl-offset-indent)
359      (gethash :emulate-tabs options))))
360
361(compile 'perl-indent)
362
363;=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
364;; some example macros to easily add new patterns for strings and possibly
365;; regex or other patterns
366(defmacro perl-q-string-token (token)
367  `(syntoken (string-concat "\\<q(q|w)?\\s*\\" ,token)
368     :icase t :contained t :begin
369     (intern (string-concat "string" ,token) 'keyword)))
370(defmacro perl-q-string-table (start end)
371  `(syntable (intern (string-concat "string" ,start) 'keyword)
372     *prop-string* #'default-indent
373     (syntoken ,end :nospec t :switch -1)
374     (synaugment :inside-string)))
375
376;=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
377(defsyntax *perl-mode* :main nil #'perl-indent *perl-mode-options*
378  ;; keywords
379  (syntoken
380    (string-concat
381      "\\<("
382      "and|for|foreach|gt|if|else|elsif|eq|goto|le|lt|last|ne|"
383      "neg|next|not|or|return|shift|sub|unless|unshift|until|while"
384      ")\\>")
385    :property *prop-keyword*)
386
387  ;; pseudo keywords
388  (syntoken
389    (string-concat
390      "\\<("
391      "BEGIN|END|bless|blessed|defined|delete|eval|local|my|our|"
392      "package|require|undef|use"
393      ")\\>")
394    :property *prop-preprocessor*)
395  ;; this may cause some other patterns to fail, due to matching single \'
396  (syntoken "(&?(\\w+)|&(\\w+)?)'\\w+" :property *prop-preprocessor*)
397
398  ;; numbers
399  (syntoken
400    (string-concat
401      "\\<("
402      ;; Integers
403      "(\\d+|0x\\x+)|"
404      ;; Floats
405      "\\d+\\.?\\d*(e[+-]?\\d+)?"
406      ")\\>")
407    :icase t
408    :property *prop-number*)
409
410  ;; special variables
411  (syntoken "\\$(\\d|^\\u|[][0-9!#$*()_@<>?/|,\"'])" :property *prop-keyword*)
412
413  ;; also match variables
414  (syntable :inside-string nil nil
415    ;; escaped characters
416
417    ;; XXX This pattern was matching the empty string and entering an
418    ;; infinite loop in code like:
419#|
420---%<---
421"		<-- *** if an backslash is added it fails. Inverting
422a";                 *** the pattern fixed the problem, but was the wrong
423---%<---	    *** solution. Note that C-G stops the interpreter, and
424		    *** special care must be taken with patterns matching
425		    *** empty strings.
426|#
427
428    (syntoken "\\\\\\d{3}|\\\\." :property *prop-string-escape*)
429    (syntoken "(\\{\\$|\\$\\{)" :property *prop-string-keyword-bold* :begin :string-varbrace)
430    (syntoken "[$@]" :property *prop-string-keyword-bold* :begin :string-variable)
431    (syntoken "\\$(\\d|^\\u|[][0-9!#$*()_@<>?/|,\"'])" :property *prop-string-keyword-bold*))
432
433  ;; variables insided strings
434  (syntable :string-variable *prop-string-keyword* nil
435    (syntoken "\\w+" :switch -1))
436  (syntable :string-varbrace *prop-string-keyword* nil
437    (syntoken		"}"
438      :nospec		t
439      :property	*prop-string-keyword-bold*
440      :switch		-1)
441    (synaugment :inside-string))
442
443  ;; comments
444  (syntoken "#.*$" :property *prop-comment*)
445
446  ;; regex
447  (syntoken "(\\<m)?/([^/]|\\\\/)+/\\w*"	:property *prop-regex*)
448  (syntoken "\\<m\\{[^}]+\\}\\w*"		:property *prop-regex*)
449  (syntoken "\\<m<[^>]+>\\w*"			:property *prop-regex*)
450  (syntoken "\\<(s|tr)/[^/]+/([^/]|\\\\/)*/\\w*":property *prop-regex*)
451  ;; just to avoid confusing the parser on something like split //, ...
452  (syntoken "//" :nospec t			:property *prop-regex*)
453
454  ;; strings
455  (syntoken "\"" :nospec t :contained t :begin :string)
456  (syntable :string *prop-string* #'default-indent
457    (syntoken "\"" :nospec t :switch -1)
458    (synaugment :inside-string))
459
460  ;; more strings
461  (perl-q-string-token "{")
462  (perl-q-string-table "{" "}")
463  (perl-q-string-token "[")
464  (perl-q-string-table "[" "]")
465  (perl-q-string-token "(")
466  (perl-q-string-table "(" ")")
467  (perl-q-string-token "/")
468  (perl-q-string-table "/" "/")
469
470  ;; yet more strings
471  (syntoken "'" :nospec t :contained t :begin :constant)
472  (syntable :constant *prop-constant* #'default-indent
473    (syntoken "'"	:nospec t :switch -1)
474    (syntoken "\\\\." :property *prop-string-escape*))
475
476  ;; shell commands
477  (syntoken "`" :nospec t :contained t :begin :shell)
478  (syntable :shell *prop-shell* #'default-indent
479    (syntoken "`"	:nospec t :switch -1)
480    (synaugment :inside-string))
481
482  ;; punctuation
483  (syntoken "[][$@%(){}/*+:;=<>,&!|^~\\.?-]" :property *prop-punctuation*)
484  (syntoken "\\<x\\>" :property *prop-punctuation*)
485
486  ;; primitive faked heredoc support, doesn't match the proper string, just
487  ;; expects an uppercase identifier in a single line
488  (syntoken "<<\"[A-Z][A-Z0-9_]+\"" :property *prop-string* :begin :heredoc)
489  (syntoken "<<'[A-Z][A-Z0-9_]+'" :property *prop-constant* :begin :heredoc)
490  (syntoken "<<[A-Z][A-Z0-9_]+" :property *prop-preprocessor* :begin :heredoc)
491  (syntable :heredoc *prop-documentation* #'default-indent
492    (syntoken "^[A-Z][A-Z0-9_]+$" :switch -1))
493
494  (syntoken "^=(pod|item|over|head\\d)\\>.*$" :property *prop-documentation* :begin :info)
495  (syntable :info *prop-documentation* nil
496    (syntoken "^=cut\\>.*$"	:switch -1)
497    (syntoken "^.*$"))
498
499  (syntoken "^(__END__|__DATA__)$" :property *prop-documentation*
500    :begin :documentation)
501
502  (syntoken "__\\u+__" :property *prop-preprocessor*)
503
504  (syntable :documentation *prop-documentation* nil
505    (syntoken "^.*$"))
506
507)
508