glob: Support square brackets in patterns.
* guix/glob.scm (wildcard-indices): Remove. (parse-bracket): New procedure. (compile-glob-pattern): Rewrite. Support square brackets for sets and ranges. (glob-match?): Support sets and ranges. * tests/glob.scm (test-compile-glob-pattern) (test-glob-match): New macros. Use them to rewrite the existing tests, and add new tests.
This commit is contained in:
		
							parent
							
								
									675e81a082
								
							
						
					
					
						commit
						e914b398af
					
				
					 2 changed files with 99 additions and 63 deletions
				
			
		|  | @ -25,20 +25,17 @@ | |||
| ;;; | ||||
| ;;; This is a minimal implementation of "glob patterns" (info "(libc) | ||||
| ;;; Globbbing").  It is currently limited to simple patterns and does not | ||||
| ;;; support braces and square brackets, for instance. | ||||
| ;;; support braces, for instance. | ||||
| ;;; | ||||
| ;;; Code: | ||||
| 
 | ||||
| (define (wildcard-indices str) | ||||
|   "Return the list of indices in STR where wildcards can be found." | ||||
|   (let loop ((index 0) | ||||
|              (result '())) | ||||
|     (if (= index (string-length str)) | ||||
|         (reverse result) | ||||
|         (loop (+ 1 index) | ||||
|               (case (string-ref str index) | ||||
|                 ((#\? #\*) (cons index result)) | ||||
|                 (else      result)))))) | ||||
| (define (parse-bracket chars) | ||||
|   "Parse CHARS, a list of characters that extracted from a '[...]' sequence." | ||||
|   (match chars | ||||
|     ((start #\- end) | ||||
|      `(range ,start ,end)) | ||||
|     (lst | ||||
|      `(set ,@lst)))) | ||||
| 
 | ||||
| (define (compile-glob-pattern str) | ||||
|   "Return an sexp that represents the compiled form of STR, a glob pattern | ||||
|  | @ -48,29 +45,43 @@ such as \"foo*\" or \"foo??bar\"." | |||
|       (((? string? str)) str) | ||||
|       (x x))) | ||||
| 
 | ||||
|   (let loop ((index   0) | ||||
|              (indices (wildcard-indices str)) | ||||
|   (define (cons-string chars lst) | ||||
|     (match chars | ||||
|       (() lst) | ||||
|       (_ (cons (list->string (reverse chars)) lst)))) | ||||
| 
 | ||||
|   (let loop ((chars   (string->list str)) | ||||
|              (pending '()) | ||||
|              (brackets 0) | ||||
|              (result '())) | ||||
|     (match indices | ||||
|     (match chars | ||||
|       (() | ||||
|        (flatten (cond ((zero? index) | ||||
|                        (list str)) | ||||
|                       ((= index (string-length str)) | ||||
|                        (reverse result)) | ||||
|                       (else | ||||
|                        (reverse (cons (string-drop str index) | ||||
|                                       result)))))) | ||||
|       ((wildcard-index . rest) | ||||
|        (let ((wildcard (match (string-ref str wildcard-index) | ||||
|        (flatten (reverse (if (null? pending) | ||||
|                              result | ||||
|                              (cons-string pending result))))) | ||||
|       (((and chr (or #\? #\*)) . rest) | ||||
|        (let ((wildcard (match chr | ||||
|                          (#\? '?) | ||||
|                          (#\* '*)))) | ||||
|          (match (substring str index wildcard-index) | ||||
|            (""  (loop (+ 1 wildcard-index) | ||||
|                       rest | ||||
|                       (cons wildcard result))) | ||||
|            (str (loop (+ 1 wildcard-index) | ||||
|                       rest | ||||
|                       (cons* wildcard str result))))))))) | ||||
|          (if (zero? brackets) | ||||
|              (loop rest '() 0 | ||||
|                    (cons* wildcard (cons-string pending result))) | ||||
|              (loop rest (cons chr pending) brackets result)))) | ||||
|       ((#\[ . rest) | ||||
|        (if (zero? brackets) | ||||
|            (loop rest '() (+ 1 brackets) | ||||
|                  (cons-string pending result)) | ||||
|            (loop rest (cons #\[ pending) (+ 1 brackets) result))) | ||||
|       ((#\] . rest) | ||||
|        (cond ((zero? brackets) | ||||
|               (error "unexpected closing bracket" str)) | ||||
|              ((= 1 brackets) | ||||
|               (loop rest '() 0 | ||||
|                     (cons (parse-bracket (reverse pending)) result))) | ||||
|              (else | ||||
|               (loop rest (cons #\] pending) (- brackets 1) result)))) | ||||
|       ((chr . rest) | ||||
|        (loop rest (cons chr pending) brackets result))))) | ||||
| 
 | ||||
| (define (glob-match? pattern str) | ||||
|   "Return true if STR matches PATTERN, a compiled glob pattern as returned by | ||||
|  | @ -78,11 +89,12 @@ such as \"foo*\" or \"foo??bar\"." | |||
|   (let loop ((pattern pattern) | ||||
|              (str str)) | ||||
|    (match pattern | ||||
|      ((? string? literal) (string=? literal str)) | ||||
|      (((? string? one))   (string=? one str)) | ||||
|      (('*)  #t) | ||||
|      (('?) (= 1 (string-length str))) | ||||
|      (()    #t) | ||||
|      ((? string? literal) | ||||
|       (string=? literal str)) | ||||
|      (() | ||||
|       (string-null? str)) | ||||
|      (('*) | ||||
|       #t) | ||||
|      (('* suffix . rest) | ||||
|       (match (string-contains str suffix) | ||||
|         (#f    #f) | ||||
|  | @ -92,6 +104,19 @@ such as \"foo*\" or \"foo??bar\"." | |||
|      (('? . rest) | ||||
|       (and (>= (string-length str) 1) | ||||
|            (loop rest (string-drop str 1)))) | ||||
|      ((('range start end) . rest) | ||||
|       (and (>= (string-length str) 1) | ||||
|            (let ((chr (string-ref str 0))) | ||||
|              (and (char-set-contains? (ucs-range->char-set | ||||
|                                        (char->integer start) | ||||
|                                        (+ 1 (char->integer end))) | ||||
|                                       chr) | ||||
|                   (loop rest (string-drop str 1)))))) | ||||
|      ((('set . chars) . rest) | ||||
|       (and (>= (string-length str) 1) | ||||
|            (let ((chr (string-ref str 0))) | ||||
|              (and (char-set-contains? (list->char-set chars) chr) | ||||
|                   (loop rest (string-drop str 1)))))) | ||||
|      ((prefix . rest) | ||||
|       (and (string-prefix? prefix str) | ||||
|            (loop rest (string-drop str (string-length prefix)))))))) | ||||
|  |  | |||
|  | @ -23,36 +23,47 @@ | |||
|  | ||||
| (test-begin "glob") | ||||
| 
 | ||||
| (test-equal "compile-glob-pattern, no wildcards" | ||||
|   "foo" | ||||
|   (compile-glob-pattern "foo")) | ||||
| (define-syntax test-compile-glob-pattern | ||||
|   (syntax-rules (=>) | ||||
|     ((_ pattern => result rest ...) | ||||
|      (begin | ||||
|        (test-equal (format #f "compile-glob-pattern, ~s" pattern) | ||||
|          result | ||||
|          (compile-glob-pattern pattern)) | ||||
|        (test-compile-glob-pattern rest ...))) | ||||
|     ((_) | ||||
|      #t))) | ||||
| 
 | ||||
| (test-equal "compile-glob-pattern, Kleene star" | ||||
|   '("foo" * "bar") | ||||
|   (compile-glob-pattern "foo*bar")) | ||||
| (define-syntax test-glob-match | ||||
|   (syntax-rules (matches and not) | ||||
|     ((_ (pattern-string matches strings ... (and not others ...)) rest ...) | ||||
|      (begin | ||||
|        (test-assert (format #f "glob-match? ~s" pattern-string) | ||||
|          (let ((pattern (compile-glob-pattern pattern-string))) | ||||
|            (and (glob-match? pattern strings) ... | ||||
|                 (not (glob-match? pattern others)) ...))) | ||||
|        (test-glob-match rest ...))) | ||||
|     ((_) | ||||
|      #t))) | ||||
| 
 | ||||
| (test-equal "compile-glob-pattern, question mark" | ||||
|   '(? "foo" *) | ||||
|   (compile-glob-pattern "?foo*")) | ||||
| (test-compile-glob-pattern | ||||
|  "foo" => "foo" | ||||
|  "?foo*" => '(? "foo" *) | ||||
|  "foo[1-5]" => '("foo" (range #\1 #\5)) | ||||
|  "foo[abc]bar" => '("foo" (set #\a #\b #\c) "bar") | ||||
|  "foo[a[b]c]bar" => '("foo" (set #\a #\[ #\b #\] #\c) "bar") | ||||
|  "[123]x" => '((set #\1 #\2 #\3) "x") | ||||
|  "[a-z]" => '((range #\a #\z))) | ||||
| 
 | ||||
| (test-assert "literal match" | ||||
|   (let ((pattern (compile-glob-pattern "foo"))) | ||||
|     (and (glob-match? pattern "foo") | ||||
|          (not (glob-match? pattern "foobar")) | ||||
|          (not (glob-match? pattern "barfoo"))))) | ||||
| 
 | ||||
| (test-assert "trailing star" | ||||
|   (let ((pattern (compile-glob-pattern "foo*"))) | ||||
|     (and (glob-match? pattern "foo") | ||||
|          (glob-match? pattern "foobar") | ||||
|          (not (glob-match? pattern "xfoo"))))) | ||||
| 
 | ||||
| (test-assert "question marks" | ||||
|   (let ((pattern (compile-glob-pattern "foo??bar"))) | ||||
|     (and (glob-match? pattern "fooxxbar") | ||||
|          (glob-match? pattern "fooZZbar") | ||||
|          (not (glob-match? pattern "foobar")) | ||||
|          (not (glob-match? pattern "fooxxxbar")) | ||||
|          (not (glob-match? pattern "fooxxbarzz"))))) | ||||
| (test-glob-match | ||||
|  ("foo" matches "foo" (and not "foobar" "barfoo")) | ||||
|  ("foo*" matches "foo" "foobar" (and not "xfoo")) | ||||
|  ("foo??bar" matches "fooxxbar" "fooZZbar" | ||||
|   (and not "foobar" "fooxxxbar" "fooxxbarzz")) | ||||
|  ("foo?" matches "foox" (and not "fooxx")) | ||||
|  ("ab[0-9]c" matches "ab0c" "ab7c" "ab9c" | ||||
|   (and not "ab-c" "ab00c" "ab3")) | ||||
|  ("ab[cdefg]" matches "abc" "abd" "abg" | ||||
|   (and not "abh" "abcd" "ab["))) | ||||
| 
 | ||||
| (test-end "glob") | ||||
|  |  | |||
		Reference in a new issue