[racket-dev] strange performance on regexp-match: proposed patch
I've isolated a performance issue on form-urlencoded->alist. On
strings with very long key/values, the code appears to consume an
unusual amount of memory. Does the following look ok?
diff --git a/collects/net/uri-codec-unit.rkt b/collects/net/uri-codec-unit.rkt
index c992d62..01d4c05 100644
--- a/collects/net/uri-codec-unit.rkt
+++ b/collects/net/uri-codec-unit.rkt
@@ -269,7 +269,17 @@ See more in PR8831.
;; string -> listof (cons string string)
;; http://www.w3.org/TR/html401/appendix/notes.html#ampersands-in-uris
(define (form-urlencoded->alist str)
- (define keyval-regexp #rx"^([^=]*)(?:=(.*))?$")
+ (define (split-key-value s)
+ (let ([n (string-length s)])
+ (let loop ([i 0])
+ (cond [(= i n)
+ (values s #f)]
+ [(char=? (string-ref s i) #\=)
+ (values (substring s 0 i)
+ (substring s (add1 i)))]
+ [else
+ (loop (add1 i))]))))
+
(define value-regexp
(case (current-alist-separator-mode)
[(semi) #rx"[;]"]
@@ -278,10 +288,10 @@ See more in PR8831.
(if (equal? "" str)
'()
(map (lambda (keyval)
- (let ([m (regexp-match keyval-regexp keyval)]) ; cannot fail
- (cons (string->symbol (form-urlencoded-decode (cadr m)))
+ (let-values ([(key val) (split-key-value keyval)])
+ (cons (string->symbol (form-urlencoded-decode key))
;; can be #f for no "=..." part
- (and (caddr m) (form-urlencoded-decode (caddr m))))))
+ (and val (form-urlencoded-decode val)))))
(regexp-split value-regexp str))))
(define current-alist-separator-mode