syntax.lsp revision 5dfecf96
1;; 2;; Copyright (c) 2002 by The XFree86 Project, Inc. 3;; 4;; Permission is hereby granted, free of charge, to any person obtaining a 5;; copy of this software and associated documentation files (the "Software"), 6;; to deal in the Software without restriction, including without limitation 7;; the rights to use, copy, modify, merge, publish, distribute, sublicense, 8;; and/or sell copies of the Software, and to permit persons to whom the 9;; Software is furnished to do so, subject to the following conditions: 10;; 11;; The above copyright notice and this permission notice shall be included in 12;; all copies or substantial portions of the Software. 13;; 14;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15;; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16;; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17;; THE XFREE86 PROJECT BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 18;; WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF 19;; OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20;; SOFTWARE. 21;; 22;; Except as contained in this notice, the name of the XFree86 Project shall 23;; not be used in advertising or otherwise to promote the sale, use or other 24;; dealings in this Software without prior written authorization from the 25;; XFree86 Project. 26;; 27;; Author: Paulo César Pereira de Andrade 28;; 29;; 30;; $XFree86: xc/programs/xedit/lisp/modules/syntax.lsp,v 1.11 2003/01/16 03:50:46 paulo Exp $ 31;; 32 33(provide "syntax") 34(require "xedit") 35(in-package "XEDIT") 36 37(defvar *syntax-symbols* '( 38 syntax-highlight defsyntax defsynprop synprop-p syntax-p 39 syntable syntoken synaugment 40 *prop-default* *prop-keyword* *prop-number* *prop-string* 41 *prop-constant* *prop-comment* *prop-preprocessor* 42 *prop-punctuation* *prop-error* *prop-annotation* 43)) 44(export *syntax-symbols*) 45(in-package "USER") 46(dolist (symbol xedit::*syntax-symbols*) 47 (import symbol) 48) 49(in-package "XEDIT") 50(makunbound '*syntax-symbols*) 51 52#| 53TODO: 54o Add a command to match without increment the offset in the input, this 55 may be useful for example in a case like: 56 some-table 57 match "<" 58 switch -1 59 match "<" <- the table already eated this, so it won't be matched. 60 This must be carefully checked at compile time, such instruction should 61 be in a token that returns or starts a new one, and even then, may need 62 runtime check to make sure it won't enter an infinite loop. 63o Allow combining properties, this is supported in Xaw, and could allow some 64 very interesting effects for complex documents. 65o Maybe have an separated function/loop for tables that don't have tokens 66 that start/switch to another table, and/or have the contained attribute set. 67 This could allow running considerably faster. 68o Do a better handling of interactive edition for tokens that start and end 69 with the same pattern, as an example strings, if the user types '"', it 70 will parse up to the end of the file, "inverting" all strings. 71o Allow generic code to be run once a match is found, such code could handle 72 some defined variables and take decisions based on the parser state. This 73 should be detected at compile time, to maybe run a different parser for 74 such syntax tables, due to the extra time building the environment to 75 call the code. This would be useful to "really" parse documents with 76 complex syntax, for example, a man page source file. 77o Add command to change current default property without initializing a new 78 state. 79o Fix problems matching EOL. Since EOL is an empty string match, if there 80 is a rule to match only EOL, but some other rule matches up to the end 81 of the input, the match to EOL will not be recognized. Currently the only 82 way to handle this is to have a nested table that always returns once a 83 match is found, so that it will restart the match loop code even if the 84 input is at EOL. 85 One possible solution would be to add the ending newline to the input, 86 and then instead of matching "$", should match "\\n". 87o XXX Usage of the variable newline-property must be reviewed in function 88 syntax-highlight, if the text property has a background attribute, 89 visual effect will look "strange", will paint a square with the 90 background attribute at the end of every line in the matched text. 91|# 92 93;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 94;; Some annotations to later write documentation for the module... 95;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 96#| 97 The current interface logic should be easy to understand for people 98that have written lex scanners before. It has some extended semantics, 99that could be translated to stacked BEGIN() statements in lex, but 100currently does not have rules for matches in the format RE/TRAILING, as 101well as code attached to rules (the biggest difference) and/or things 102like REJECT and unput(). Also, at least currently, it is *really* quite 103slower than lex. 104 105 MATCHING RULES 106 -------------- 107 When two tokens are matched at the same input offset, the longest 108token is used, if the length is the same, the first definition is 109used. For example: 110 token1 => int 111 token2 => [A-Za-z]+ 112 input => integer 113 Token1 matches "int" and token2 matches "integer", but since token2 is 114longer, it is used. But in the case: 115 token1 => int 116 token2 => [A-Za-z]+ 117 input => int 118 Both, token1 and token2 match "int", since token1 is defined first, it 119is used. 120|# 121 122 123;; Initialize some default properties that may be shared in syntax 124;; highlight definitions. Use of these default properties is encouraged, 125;; so that "tokens" will be shown identically when editing program 126;; sources in different programming languages. 127(defsynprop *prop-default* 128 "default" 129 :font "*courier-medium-r*-12-*" 130 :foreground "black") 131 132(defsynprop *prop-keyword* 133 "keyword" 134 :font "*courier-bold-r*-12-*" 135 :foreground "gray12") 136 137(defsynprop *prop-number* 138 "number" 139 :font "*courier-bold-r*-12-*" 140 :foreground "OrangeRed3") 141 142(defsynprop *prop-string* 143 "string" 144 :font "*lucidatypewriter-medium-r*-12-*" 145 :foreground "RoyalBlue2") 146 147(defsynprop *prop-constant* 148 "constant" 149 :font "*lucidatypewriter-medium-r*-12-*" 150 :foreground "VioletRed3") 151 152(defsynprop *prop-comment* 153 "comment" 154 :font "*courier-medium-o*-12-*" 155 :foreground "SlateBlue3") 156 157(defsynprop *prop-preprocessor* 158 "preprocessor" 159 :font "*courier-medium-r*-12-*" 160 :foreground "green4") 161 162(defsynprop *prop-punctuation* 163 "punctuation" 164 :font "*courier-bold-r*-12-*" 165 :foreground "gray12") 166 167;; Control characters, not always errors... 168(defsynprop *prop-control* 169 "control" 170 :font "*courier-bold-r*-12-*" 171 :foreground "yellow2" 172 :background "red3") 173 174(defsynprop *prop-error* 175 "error" 176 :font "*new century schoolbook-bold*-24-*" 177 :foreground "yellow" 178 :background "red") 179 180(defsynprop *prop-annotation* 181 "annotation" 182 :font "*courier-medium-r*-12-*" 183 :foreground "black" 184 :background "PaleGreen") 185 186 187;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 188;; The "main" definition of the syntax highlight coding interface. 189;; Creates a "special" variable with the given name, associating to 190;; it an already compiled syntax table. 191;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 192(defmacro defsyntax (variable label property indent options &rest lists) 193 `(if (boundp ',variable) 194 ,variable 195 (progn 196 (proclaim '(special ,variable)) 197 (setq ,variable 198 (compile-syntax-table 199 (string ',variable) ,options 200 (syntable ,label ,property ,indent ,@lists) 201 ) 202 ) 203 ) 204 ) 205) 206 207;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 208;; Just a wrapper to create a hash-table and bound it to a symbol. 209;; Example of call: 210;; (defsynoptions *my-syntax-options* 211;; (:indent . 8) 212;; (:indent-option-1 . 1) 213;; (:indent-option-2 . 2) 214;; ) 215;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 216(defmacro defsynoptions (variable &rest options) 217 `(if (boundp ',variable) 218 ,variable 219 (progn 220 (proclaim '(special ,variable)) 221 (setq ,variable (make-hash-table :initial-contents ',options)) 222 ) 223 ) 224) 225 226;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 227;; These definitions should be "private". 228;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 229(defstruct syntoken 230 regex ;; A compiled regexp. 231 property ;; NIL for default, or a synprop structure. 232 contained ;; Only used when switch/begin is not NIL. Values: 233 ;; NIL -> just switch to or begin new 234 ;; syntax table. 235 ;; (not NIL) -> apply syntoken property 236 ;; (or default one) to matched 237 ;; text *after* switching to or 238 ;; beginning a new syntax table. 239 switch ;; Values for switch are: 240 ;; NIL -> do nothing 241 ;; A keyword -> switch to the syntax table 242 ;; identified by the keyword. 243 ;; A negative integer -> Pop the stack 244 ;; -<swich-value> times. 245 ;; A common value is -1, 246 ;; to switch to the previous 247 ;; state, but some times 248 ;; it is desired to return 249 ;; two or more times in 250 ;; in the stack. 251 ;; NOTE: This is actually a jump, the stack is 252 ;; popped until the named syntax table is found, 253 ;; if the stack becomes empty, a new state is 254 ;; implicitly created. 255 begin ;; NIL or a keyword (like switch), but instead of 256 ;; popping the stack, it pushes the current syntax 257 ;; table to the stack and sets a new current one. 258) 259 260 261;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 262;; Just a wrapper to make-syntoken. 263;; TODO: Add support for structure constructors. 264;; XXX: Note that the NOSUB only works with the xedit regex, it 265;; will still return the match offsets, but will ignore subexpressions, 266;; that is, parenthesis are used only for grouping. 267;; TODO: Create a new version of the re-exec call that returns 268;; offsets in the format (<from> . <to>) and not 269;; ((<from0> . <to0>) ... (<fromN> . <toN>)). Only the global result 270;; is expected/used, so there is no reason to allocate more than one 271;; cons cell per call. 272;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 273(defun syntoken (pattern 274 &key icase nospec property contained switch begin (nosub t) 275 &aux 276 (regex 277 (re-comp pattern :icase icase :nospec nospec :nosub nosub) 278 ) 279 check) 280 281 ;; Don't allow a regex that matches the null string enter the 282 ;; syntax table list. 283 (if (consp (setq check (re-exec regex "" :noteol t :notbol t))) 284#+xedit (error "SYNTOKEN: regex matches empty string ~S" regex) 285#-xedit () 286 ) 287 288 (make-syntoken 289 :regex regex 290 :property property 291 :contained contained 292 :switch switch 293 :begin begin 294 ) 295) 296 297 298;; This structure is defined only to do some type checking, it just 299;; holds a list of keywords. 300(defstruct synaugment 301 labels ;; List of keywords labeling syntax tables. 302) 303 304(defstruct syntable 305 label ;; A keyword naming this syntax table. 306 property ;; NIL or a default synprop structure. 307 indent ;; Indentation function for the syntax table. 308 tokens ;; A list of syntoken structures. 309 tables ;; A list of syntable structures. 310 augments ;; A list of synaugment structures, used only 311 ;; at "compile time", so that a table can be 312 ;; used before it's definition. 313 bol ;; One of the tokens match the empty string at 314 ;; the start of a line (loop optimization hint). 315 ;; Field filled at "link" time. 316 eol ;; Same comments as bol, but in this case, for 317 ;; the empty string at the end of a line. 318) 319 320 321;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 322;; Just call make-syntable, but sorts the elements by type, allowing 323;; a cleaner code when defining the syntax highlight rules. 324;; XXX Same comments as for syntoken about the use of a constructor for 325;; structures. TODO: when/if clos is implemented in the interpreter. 326;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 327(defun syntable (label default-property indent &rest definitions) 328 329 ;; Check for possible errors in the arguments. 330 (unless (keywordp label) 331 (error "SYNTABLE: ~A is not a keyword" label) 332 ) 333 (unless 334 (or 335 (null default-property) 336 (synprop-p default-property) 337 ) 338 (error "SYNTABLE: ~A is an invalid text property" 339 default-property 340 ) 341 ) 342 343 ;; Don't allow unknown data in the definition list. 344 ;; XXX typecase should be added to the interpreter, and since 345 ;; the code is traversing the entire list, it could build 346 ;; now the arguments to make-syntable. 347 (dolist (item definitions) 348 (unless 349 (or 350 351 ;; Allow NIL in the definition list, so that one 352 ;; can put conditionals in the syntax definition, 353 ;; and if the conditional is false, fill the slot 354 ;; with a NIL value. 355 (atom item) 356 (syntoken-p item) 357 (syntable-p item) 358 (synaugment-p item) 359 ) 360 (error "SYNTABLE: invalid syntax table argument ~A" item) 361 ) 362 ) 363 364 ;; Build the syntax table. 365 (make-syntable 366 :label label 367 :property default-property 368 :indent indent 369 :tokens (remove-if-not #'syntoken-p definitions) 370 :tables (remove-if-not #'syntable-p definitions) 371 :augments (remove-if-not #'synaugment-p definitions) 372 ) 373) 374 375 376;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 377;; Just to do a "preliminary" error checking, every element must be a 378;; a keyword, and also check for reserved names. 379;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 380(defun synaugment (&rest keywords) 381 (dolist (keyword keywords) 382 (unless (keywordp keyword) 383 (error "SYNAUGMENT: bad syntax table label ~A" keyword) 384 ) 385 ) 386 (make-synaugment :labels keywords) 387) 388 389 390;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 391;; Recursive compile utility function. 392;; Returns a cons in the format: 393;; car => List of all syntoken structures 394;; (including child tables). 395;; cdr => List of all child syntable structures. 396;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 397(defun list-syntable-elements (table &aux result sub-result) 398 (setq 399 result 400 (cons 401 (syntable-tokens table) 402 (syntable-tables table)) 403 ) 404 405 ;; For every child syntax table. 406 (dolist (child (syntable-tables table)) 407 408 ;; Recursively call list-syntable-elements. 409 (setq sub-result (list-syntable-elements child)) 410 411 (rplaca result (append (car result) (car sub-result))) 412 (rplacd result (append (cdr result) (cdr sub-result))) 413 ) 414 415 ;; Return the pair of nested tokens and tables. 416 result 417) 418 419 420;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 421;; Append tokens of the augment list to the tokens of the specified 422;; syntax table. 423;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 424(defun compile-syntax-augment-list (table table-list 425 &aux labels augment tokens) 426 427 ;; Create a list of all augment tables. 428 (dolist (augment (syntable-augments table)) 429 (setq labels (append labels (synaugment-labels augment))) 430 ) 431 432 ;; Remove duplicates and references to "itself", 433 ;; without warnings? 434 (setq 435 labels 436 (remove 437 (syntable-label table) 438 (remove-duplicates labels :from-end t) 439 ) 440 ) 441 442 ;; Check if the specified syntax tables exists! 443 (dolist (label labels) 444 (unless 445 (setq 446 augment 447 (car (member label table-list :key #'syntable-label)) 448 ) 449 (error "COMPILE-SYNTAX-AUGMENT-LIST: Cannot augment ~A in ~A" 450 label 451 (syntable-label table) 452 ) 453 ) 454 455 ;; Increase list of tokens. 456 (setq tokens (append tokens (syntable-tokens augment))) 457 ) 458 459 ;; Store the tokens in the augment list. They will be added 460 ;; to the syntax table in the second pass. 461 (setf (syntable-augments table) tokens) 462 463 ;; Recurse on every child table. 464 (dolist (child (syntable-tables table)) 465 (compile-syntax-augment-list child table-list) 466 ) 467) 468 469 470;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 471;; Just add the augmented tokens to the token list, recursing on 472;; every child syntax table. 473;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 474(defun link-syntax-augment-table (table) 475 (setf 476 (syntable-tokens table) 477 ;; When augmenting a table, duplicated tokens or different tokens 478 ;; that use the same regex pattern should be common. 479 (remove-duplicates 480 (nconc (syntable-tokens table) (syntable-augments table)) 481 :key #'syntoken-regex 482 :test #'equal 483 :from-end t 484 ) 485 486 ;; Don't need to keep this list anymore. 487 (syntable-augments table) 488 () 489 ) 490 491 ;; Check if one of the tokens match the empty string at the 492 ;; start or end of a text line. XXX The fields bol and eol 493 ;; are expected to be initialized to NIL. 494 (dolist (token (syntable-tokens table)) 495 (when (consp (re-exec (syntoken-regex token) "" :noteol t)) 496 (setf (syntable-bol table) t) 497 (return) 498 ) 499 ) 500 (dolist (token (syntable-tokens table)) 501 (when (consp (re-exec (syntoken-regex token) "" :notbol t)) 502 (setf (syntable-eol table) t) 503 (return) 504 ) 505 ) 506 507 (dolist (child (syntable-tables table)) 508 (link-syntax-augment-table child) 509 ) 510) 511 512 513;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 514;; "Compile" the main structure of the syntax highlight code. 515;; Variables "switches" and "begins" are used only for error checking. 516;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 517(defun compile-syntax-table (name options main-table &aux syntax elements 518 switches begins tables properties) 519 (unless (stringp name) 520 (error "COMPILE-SYNTAX-TABLE: ~A is not a string" name) 521 ) 522 523 (setq 524 elements 525 (list-syntable-elements main-table) 526 527 switches 528 (remove-if 529 #'null 530 (car elements) 531 :key #'syntoken-switch 532 ) 533 534 begins 535 (remove-if-not 536 #'keywordp 537 (car elements) 538 :key #'syntoken-begin 539 ) 540 541 ;; The "main-table" isn't in the list, because 542 ;; list-syntable-elements includes only the child tables; 543 ;; this is done to avoid the need of removing duplicates here. 544 tables 545 (cons main-table (cdr elements)) 546 ) 547 548 ;; Check for typos in the keywords, or for not defined syntax tables. 549 (dolist (item (mapcar #'syntoken-switch switches)) 550 (unless 551 (or 552 (and 553 (integerp item) 554 (minusp item) 555 ) 556 (member item tables :key #'syntable-label) 557 ) 558 (error "COMPILE-SYNTAX-TABLE: SWITCH ~A cannot be matched" 559 item 560 ) 561 ) 562 ) 563 (dolist (item (mapcar #'syntoken-begin begins)) 564 (unless (member item tables :key #'syntable-label) 565 (error "COMPILE-SYNTAX-TABLE: BEGIN ~A cannot be matched" 566 item 567 ) 568 ) 569 ) 570 571 ;; Create a list of all properties used by the syntax. 572 (setq 573 properties 574 (delete-duplicates 575 576 ;; Remove explicitly set to "default" properties. 577 (remove nil 578 579 (append 580 581 ;; List all properties in the syntoken list. 582 (mapcar 583 #'syntoken-property 584 (car elements) 585 ) 586 587 ;; List all properties in the syntable list. 588 (mapcar 589 #'syntable-property 590 tables 591 ) 592 ) 593 ) 594 :test #'string= 595 :key #'synprop-name 596 ) 597 ) 598 599 ;; Provide a default property if none specified. 600 (unless 601 (member 602 "default" 603 properties 604 :test #'string= 605 :key #'synprop-name 606 ) 607 (setq properties (append (list *prop-default*) properties)) 608 ) 609 610 611 ;; Now that a list of all nested syntax tables is known, compile the 612 ;; augment list. Note that even the main-table can be augmented to 613 ;; include tokens of one of it's children. 614 615 ;; Adding the tokens of the augment tables must be done in 616 ;; two passes, or it may cause surprises due to "inherited" 617 ;; tokens, as the augment table was processed first, and 618 ;; increased it's token list. 619 (compile-syntax-augment-list main-table tables) 620 621 ;; Now just append the augmented tokens to the table's token list. 622 (link-syntax-augment-table main-table) 623 624 ;; Change all syntoken switch and begin fields to point to the 625 ;; syntable. 626 (dolist (item switches) 627 (if (keywordp (syntoken-switch item)) 628 ;; A switch may be relative, check if a keyword 629 ;; was specified. 630 (setf 631 (syntoken-switch item) 632 (car 633 (member 634 (syntoken-switch item) 635 tables 636 :key #'syntable-label 637 ) 638 ) 639 ) 640 ) 641 ) 642 (dolist (item begins) 643 (setf 644 (syntoken-begin item) 645 (car 646 (member 647 (syntoken-begin item) 648 tables 649 :key #'syntable-label 650 ) 651 ) 652 ) 653 ) 654 655 ;; Don't need to add a entity for default properties 656 (dolist (item (car elements)) 657 (and 658 (syntoken-property item) 659 (string= (synprop-name (syntoken-property item)) "default") 660 (setf (syntoken-property item) ()) 661 ) 662 ) 663 (dolist (item tables) 664 (and 665 (syntable-property item) 666 (string= (synprop-name (syntable-property item)) "default") 667 (setf (syntable-property item) ()) 668 ) 669 ) 670 671 (setq syntax 672 (make-syntax 673 :name name 674 :options options 675 :labels tables 676 :quark 677 (compile-syntax-property-list 678 name 679 properties 680 ) 681 :token-count 682 (length (car elements)) 683 ) 684 ) 685 686 ;; Ready to run! 687) 688 689 690;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 691;; Loop applying the specifed syntax table to the text. 692;; XXX This function needs a review. Should compile the regex patterns 693;; with newline sensitive match (and scan the entire file), and keep a 694;; cache of matched tokens (that may be at a very longer offset), and, 695;; when the match is removed from the cache, readd the token to the 696;; token-list; if the token does not match, it will not be in the cache, 697;; but should be removed from the token-list. If properly implemented, it 698;; should be somewhat like 4 times faster, but I would not be surprised 699;; if it becames even faster. 700;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 701(defun syntax-highlight (*syntax* 702 &optional 703 (*from* (point-min)) 704 (*to* (point-max)) 705 interactive 706 &aux 707#+debug (*line-number* 0) 708 stream 709 indent-table 710 ) 711 712 ;; Make sure the property list is in use. 713 ;; The interactive flag is only set after loading the file. 714 (or interactive 715 (property-list (syntax-quark *syntax*)) 716 ) 717 718#+debug 719 (setq *from* 0 *to* 0) 720 721#-debug 722 (and (>= *from* *to*) (return-from syntax-highlight (values *from* nil))) 723 724 ;; Remove any existing properties from the text. 725 (clear-entities *from* (1+ *to*)) 726 727 (setq stream 728#-debug (make-string-input-stream (read-text *from* (- *to* *from*))) 729#+debug *standard-input* 730 ) 731 732 (prog* 733 ( 734 ;; Used to check if end of file found but syntax stack did 735 ;; not finish. 736 (point-max (point-max)) 737 738 ;; Used in interactive mode, to return the syntax table 739 ;; where the cursor is located. 740 (point (point)) 741 742 ;; The current stack of states. 743 stack 744 745 ;; The current syntable. 746 (syntax-table (car (syntax-labels *syntax*))) 747 748 ;; The current syntable's default property. 749 (default-property (syntable-property syntax-table)) 750 751 ;; Add this property to newlines as a hint to the interactive 752 ;; callback, so that it knows from where to restart parsing. 753 newline-property 754 755 ;; The tokens in the current syntax table that may match, 756 ;; i.e. the items in this list are not in nomatch. 757 token-list 758 759 ;; A pointer to the syntable token list, if token-list is 760 ;; eq to this value, cannot change it inplace. 761 current-token-list 762 763 ;; Help to avoid allocating too many new object cells, and 764 ;; optmizes a bit time in [n]?set-difference. 765 ;; This optimizes only the processing of one line of text 766 ;; as nomatch must be rebuilt when reading a new line of text. 767 token-list-stack 768 769 ;; Matches for the current list of tokens. 770 matches 771 772 ;; Line of text. 773 line 774 775 ;; Length of the text line. 776 length 777 778 ;; A inverse cache, don't call re-exec when the regex is 779 ;; already known to not match. 780 nomatch 781 782 ;; Use cache as a list of matches to avoid repetitive 783 ;; unnecessary calls to re-exec. 784 ;; cache is a list in which every element has the format: 785 ;; (token . (start . end)) 786 ;; Line of text. 787 cache 788 789 ;; Used just to avoid a function call at every re-exec call. 790 notbol 791 792 match 793 794 start 795 left 796 right 797 result 798 property 799 800 ;; Beginig a new syntax table? 801 begin 802 803 ;; Switching to another syntax table? 804 switch 805 806 ;; Property flag when changing the current syntax table. 807 contained 808 809 ;; Flag to know if syntax table has changed. 810 change 811 812 ;; Variables used when removing invalid elements from the 813 ;; the cache. 814 item 815 from 816 to 817 ) 818 819;----------------------------------------------------------------------- 820:read 821#+debug-verbose 822 (format t "** Entering :READ stack length is ~D~%" (length stack)) 823#+debug (format t "~%[~D]> " (incf *line-number*)) 824 825 ;; If input has finished, return. 826 (unless (setq line (read-line stream nil nil)) 827 (when 828 (and 829 ;; If a nested syntax table wasn't finished 830 (consp stack) 831 (< 832 (setq *to* (scan *from* :eol :right)) 833 point-max 834 ) 835 ) 836 (setq line (read-text *from* (- *to* *from*))) 837 (clear-entities *from* (1+ *to*)) 838 (go :again) 839 ) 840#-debug (close stream) 841 (return) 842 ) 843 844;------------------------------------------------------------------------ 845:again 846 (setq 847 start 0 848 length (length line) 849 token-list (syntable-tokens syntax-table) 850 current-token-list token-list 851 token-list-stack () 852 nomatch () 853 cache () 854 ) 855 856 857 ;; If empty line, and current table does not have matches for 858 ;; the empty string at start or end of a text line. 859 (when 860 (and 861 (= length 0) 862 (not (syntable-eol syntax-table)) 863 (not (syntable-bol syntax-table))) 864#+debug-verbose 865 (format t "Empty line and table has no match to bol or eol~%") 866 867 (and newline-property 868 (add-entity *from* 1 (synprop-quark newline-property))) 869 (go :update) 870 ) 871 872;------------------------------------------------------------------------ 873:loop 874#+debug-verbose 875 (format t "** Entering :LOOP at offset ~D in table ~A, cache has ~D items~%" 876 start 877 (syntable-label syntax-table) 878 (length cache)) 879 880 (setq notbol (> start 0)) 881 882 ;; For every token that may match. 883 (dolist 884 (token 885 (setq 886 token-list 887 (if (eq token-list current-token-list) 888 (set-difference token-list nomatch :test #'eq) 889 (nset-difference token-list nomatch :test #'eq) 890 ) 891 ) 892 ) 893 894 ;; Try to fetch match from cache. 895 (if (setq match (member token cache :test #'eq :key #'car)) 896 ;; Match is in the cache. 897 898 (progn 899 ;; Match must be moved to the beginning of the 900 ;; matches list, as a match from another syntax 901 ;; table may be also in the cache, but before 902 ;; the match for the current token. 903#+debug-verbose (format t "cached: {~A:~S} ~A~%" 904 (cdar match) 905 (subseq line (cadar match) (cddar match)) 906 (syntoken-regex token)) 907 908 ;; Remove the match from the cache. 909 (if (eq match cache) 910 911 ;; This could be changed to only set "matches" 912 ;; if it is not the first element of cache, 913 ;; but is unsafe, because other tokens may 914 ;; be added to "matches", and will end up 915 ;; before when joining "matches" and "cache". 916 (progn 917 (setq cache (cdr cache)) 918 (rplacd match matches) 919 (setq matches match)) 920 921 (progn 922 (if (= (length match) 1) 923 (progn 924 (rplacd (last cache 2) nil) 925 (rplacd match matches) 926 (setq matches match)) 927 (progn 928 (setq matches (cons (car match) matches)) 929 (rplaca match (cadr match)) 930 (rplacd match (cddr match))) 931 ) 932 ) 933 ) 934 935 ;; Exit loop if the all the remaining 936 ;; input was matched. 937 (when 938 (and 939 (= start (cadar match)) 940 (= length (cddar match)) 941 ) 942#+debug-verbose (format t "Rest of line match~%") 943 (return) 944 ) 945 ) 946 947 ;; Not in the cache, call re-exec. 948 (if 949 (consp 950 (setq 951 match 952 (re-exec 953 (syntoken-regex token) 954 line 955 :start start 956 :notbol notbol))) 957 958 ;; Match found. 959 (progn 960#+debug-verbose (format t "Adding to cache: {~A:~S} ~A~%" 961 (car match) 962 (subseq line (caar match) (cdar match)) 963 (syntoken-regex token)) 964 965 ;; Only the first pair is used. 966 (setq match (car match)) 967 968 (cond 969 ( 970 (or 971 (null matches) 972 ;; No overlap and after most 973 ;; recent match. 974 (>= (car match) (cddar matches)) 975 ;; No overlap and before most 976 ;; recent match. 977 (<= (cdr match) (cadar matches)) 978 ) 979 (setq 980 matches 981 (cons (cons token match) matches) 982 ) 983 ) 984 ( 985 (or 986 ;; Overlap, but start before most 987 ;; recent match. 988 (< (car match) (cadar matches)) 989 (and 990 ;; Same offset as most recent 991 ;; match, but is longer. 992 (= (car match) (cadar matches)) 993 (> (cdr match) (cddar matches)) 994 ) 995 ) 996 (rplaca (car matches) token) 997 (rplacd (car matches) match) 998#+debug-verbose (format t "Replaced most recent match~%") 999 ) 1000 (t 1001#+debug-verbose (format t "Ignored~%") 1002 ;; XXX The interpreter does not yet implement 1003 ;; implicit tagbody in dolist, just comment 1004 ;; the go call in that case. (Will just do 1005 ;; an unecessary test...) 1006 (go :ignored) 1007 ) 1008 ) 1009 1010 ;; Exit loop if the all the remaining 1011 ;; input was matched. 1012 (when 1013 (and 1014 (= start (car match)) 1015 (= length (cdr match))) 1016#+debug-verbose (format t "Rest of line match~%") 1017 (return)) 1018 ) 1019 1020 ;; Match not found. 1021 (progn 1022#+debug-verbose (format t "Adding to nomatch: ~A~%" 1023 (syntoken-regex token)) 1024 (setq nomatch (cons token nomatch))) 1025 ) 1026 ) 1027:ignored 1028 ) 1029 1030 ;; Add matches to the beginning of the cache list. 1031 (setq 1032 ;; Put matches with smaller offset first. 1033 cache 1034 (stable-sort (nconc (nreverse matches) cache) #'< :key #'cadr) 1035 1036 ;; Make sure that when the match loop is reentered, this 1037 ;; variable is NIL. 1038 matches 1039 () 1040 ) 1041 1042 ;; While the first entry in the cache is not from the current table. 1043 (until (or (null cache) (member (caar cache) token-list :test #'eq)) 1044 1045#+debug-verbose 1046 (format t "Not in the current table, removing {~A:~S} ~A~%" 1047 (cdar cache) 1048 (subseq line (cadar cache) (cddar cache)) 1049 (syntoken-regex (caar cache))) 1050 1051 (setq cache (cdr cache)) 1052 ) 1053 1054 1055 ;; If nothing was matched in the entire/remaining line. 1056 (unless cache 1057 (when default-property 1058 (if 1059 (or 1060 (null result) 1061 (> start (cadar result)) 1062 (not (eq (cddar result) default-property))) 1063 (setq 1064 result 1065 (cons 1066 (cons start (cons length default-property)) 1067 result 1068 ) 1069 ) 1070 (rplaca (cdar result) length) 1071 ) 1072 ) 1073 1074#+debug-verbose 1075 (format t "No match until end of line~%") 1076 1077 ;; Result already known, and there is no syntax table 1078 ;; change, bypass :PARSE. 1079 (and interactive 1080 (null indent-table) 1081 (<= 0 (- point *from*) length) 1082 (setq indent-table syntax-table)) 1083 (go :process) 1084 ) 1085 1086#+debug-verbose 1087 (format t "Removing first candidate from cache {~A:~S} ~A~%" 1088 (cdar cache) 1089 (subseq line (cadar cache) (cddar cache)) 1090 (syntoken-regex (caar cache)) 1091 ) 1092 1093 ;; Prepare to choose best match. 1094 (setq 1095 match (car cache) 1096 left (cadr match) 1097 right (cddr match) 1098 cache (cdr cache) 1099 ) 1100 1101 ;; First element can be safely removed now. 1102 ;; If there is only one, skip loop below. 1103 (or cache (go :parse)) 1104 1105 ;; Remove elements of cache that must be discarded. 1106 (setq 1107 item (car cache) 1108 from (cadr item) 1109 to (cddr item) 1110 ) 1111 1112 (loop 1113 (if 1114 (or 1115 1116 ;; If everything removed from the cache. 1117 (null item) 1118 1119 ;; Or next item is at a longer offset than the 1120 ;; end of current match. 1121 (>= from right) 1122 ) 1123 (return) 1124 ) 1125 1126 (and 1127 ;; If another match at the same offset. 1128 (= left from) 1129 1130 ;; And if this match is longer than the current one. 1131 (> to right) 1132 1133 (member (car item) token-list :test #'eq) 1134 1135 (setq 1136 match item 1137 right to 1138 ) 1139 ) 1140 1141#+debug-verbose 1142 (format t "Removing from cache {~A:~S} ~A~%" 1143 (cdar cache) 1144 (subseq line from to) 1145 (syntoken-regex (caar cache))) 1146 1147 (setq 1148 cache (cdr cache) 1149 item (car cache) 1150 from (cadr item) 1151 to (cddr item) 1152 ) 1153 ) 1154 1155 1156;----------------------------------------------------------------------- 1157:parse 1158#+debug-verbose 1159 (format t "** Entering :PARSE~%") 1160 1161 (setq 1162 1163 ;; Change match value to the syntoken. 1164 match (car match) 1165 1166 begin (syntoken-begin match) 1167 switch (syntoken-switch match) 1168 contained (syntoken-contained match) 1169 change (or begin switch) 1170 ) 1171 1172 ;; Check for unmatched leading text. 1173 (when (and default-property (> left start)) 1174#+debug-verbose (format t "No match in {(~D . ~D):~S}~%" 1175 start 1176 left 1177 (subseq line start left) 1178 ) 1179 (if 1180 (or 1181 (null result) 1182 (> start (cadar result)) 1183 (not (eq (cddar result) default-property))) 1184 (setq 1185 result 1186 (cons 1187 (cons start (cons left default-property)) 1188 result 1189 ) 1190 ) 1191 (rplaca (cdar result) left) 1192 ) 1193 ) 1194 1195 ;; If the syntax table is not changed, 1196 ;; or if the new table requires that the 1197 ;; current default property be used. 1198 (unless (and change contained) 1199 1200 (and 1201 (> right left) 1202 (setq 1203 property 1204 (or 1205 ;; If token specifies the property. 1206 (syntoken-property match) 1207 default-property 1208 ) 1209 ) 1210 1211 ;; Add matched text. 1212 (if 1213 (or 1214 (null result) 1215 (> left (cadar result)) 1216 (not (eq (cddar result) property)) 1217 ) 1218 (setq 1219 result 1220 (cons 1221 (cons left (cons right property)) 1222 result 1223 ) 1224 ) 1225 (rplaca (cdar result) right) 1226 ) 1227 ) 1228 1229#+debug-verbose 1230 (format t "(0)Match found for {(~D . ~D):~S}~%" 1231 left 1232 right 1233 (subseq line left right) 1234 ) 1235 ) 1236 1237 1238 ;; Update start offset in the input now! 1239 (and interactive 1240 (null indent-table) 1241 (<= start (- point *from*) right) 1242 (setq indent-table syntax-table)) 1243 (setq start right) 1244 1245 1246 ;; When changing the current syntax table. 1247 (when change 1248 (when switch 1249 (if (numberp switch) 1250 1251 ;; If returning to a previous state. 1252 ;; Don't generate an error if the stack 1253 ;; becomes empty? 1254 (while 1255 (< switch 0) 1256 1257 (setq 1258 syntax-table (pop stack) 1259 token-list (pop token-list-stack) 1260 switch (1+ switch) 1261 ) 1262 ) 1263 1264 ;; Else, not to a previous state, but 1265 ;; returning to a named syntax table, 1266 ;; search for it in the stack. 1267 (while 1268 (and 1269 1270 (setq 1271 token-list (pop token-list-stack) 1272 syntax-table (pop stack) 1273 ) 1274 1275 (not (eq switch syntax-table)) 1276 ) 1277 ;; Empty loop. 1278 ) 1279 ) 1280 1281 ;; If no match found while popping 1282 ;; the stack. 1283 (unless syntax-table 1284 1285 ;; Return to the topmost syntax table. 1286 (setq 1287 syntax-table 1288 (car (syntax-labels *syntax*)) 1289 ) 1290 ) 1291 1292#+debug-verbose (format t "switching to ~A offset: ~D~%" 1293 (syntable-label syntax-table) 1294 start 1295 ) 1296 1297 (if (null token-list) 1298 (setq token-list (syntable-tokens syntax-table)) 1299 ) 1300 ) 1301 1302 (when begin 1303 ;; Save state for a possible 1304 ;; :SWITCH later. 1305 (setq 1306 stack (cons syntax-table stack) 1307 token-list-stack (cons token-list token-list-stack) 1308 token-list (syntable-tokens begin) 1309 syntax-table begin 1310 ) 1311#+debug-verbose (format t "begining ~A offset: ~D~%" 1312 (syntable-label syntax-table) 1313 start 1314 ) 1315 ) 1316 1317 ;; Change current syntax table. 1318 (setq 1319 default-property (syntable-property syntax-table) 1320 current-token-list (syntable-tokens syntax-table) 1321 ) 1322 1323 ;; Set newline property, to help interactive callback 1324 ;; Only need to have a defined value, for now don't care 1325 ;; about wich value is being used, neither if there is 1326 ;; a value to be set. 1327 (if (null stack) 1328 (setq newline-property nil) 1329 (or newline-property 1330 (setq newline-property default-property) 1331 (setq newline-property (syntoken-property match)) 1332 ) 1333 ) 1334 1335 ;; If processing of text was deferred. 1336 (when contained 1337 1338 (and 1339 (> right left) 1340 (setq 1341 property 1342 (or 1343 (syntoken-property match) 1344 default-property 1345 ) 1346 ) 1347 ;; Add matched text with the updated property. 1348 (if 1349 (or 1350 (null result) 1351 (> left (cadar result)) 1352 (not (eq (cddar result) property)) 1353 ) 1354 (setq 1355 result 1356 (cons 1357 (cons left (cons right property)) 1358 result 1359 ) 1360 ) 1361 (rplaca (cdar result) right) 1362 ) 1363 ) 1364 1365#+debug-verbose (format t "(1)Match found for {(~D . ~D):~S}~%" 1366 left 1367 right 1368 (subseq line left right) 1369 ) 1370 ) 1371 1372 (go :loop) 1373 ) 1374 1375 1376;----------------------------------------------------------------------- 1377 ;; Wait for the end of the line to process, so that 1378 ;; it is possible to join sequential matches with the 1379 ;; same text property. 1380 (and (or cache (< start length)) (go :loop)) 1381:process 1382 1383#+debug-verbose 1384 (format t "** Entering :PROCESS~%") 1385 1386 (if result 1387 (progn 1388 ;; If the last property was at the end of the line, 1389 ;; there are nested syntax tables, and there is a 1390 ;; default property, include the newline in the property, 1391 ;; as a hint to the interactive callback. 1392 (and 1393 newline-property 1394 (if 1395 (and 1396 (eq (cddar result) newline-property) 1397 (= length (cadar result)) 1398 ) 1399 (rplaca (cdar result) (1+ length)) 1400 (setq 1401 result 1402 (cons 1403 (cons length (cons (1+ length) newline-property)) 1404 result 1405 ) 1406 ) 1407 ) 1408 ) 1409 1410 ;; Result was created in reversed order. 1411 (nreverse result) 1412 (dolist (item result) 1413 (setq 1414 left (car item) 1415 right (cadr item) 1416 property (cddr item)) 1417 1418 ;; Use the information. 1419 (add-entity 1420 (+ *from* left) 1421 (- right left) 1422 (synprop-quark property)) 1423 ) 1424 ) 1425 1426 (and newline-property 1427 (add-entity 1428 (+ *from* length) 1429 1 1430 (synprop-quark newline-property)) 1431 ) 1432 ) 1433 1434;------------------------------------------------------------------------ 1435:update 1436 ;; Prepare for new matches. 1437 (setq 1438 result nil 1439 1440 ;; Update offset to read text. 1441 ;; Add 1 for the skipped newline. 1442 *from* (+ *from* length 1) 1443 ) 1444 1445 (go :read) 1446 ) 1447 1448#+debug (terpri) 1449 (values *to* indent-table) 1450) 1451 1452(compile 'syntax-highlight) 1453