[racket] string-trim : an implementation & a question
On Sat, Apr 2, 2011 at 8:38 PM, Robby Findler
<robby at eecs.northwestern.edu> wrote:
>
> Well, you're probably well past caring at this point, esp. since your
> interest was in the speed, but I wrote the below for fun and maybe
> you'll enjoy it too. Random testing is great stuff. :)
I do enjoy it -- thanks!
Here's what I have now, incorporating your random testing:
#lang racket
(require rackunit)
(require racket/unsafe/ops)
(provide string-trim
string-trim-left
string-trim-right)
(define-syntax (first-non-whitespace-index stx)
(syntax-case stx ()
((_ s start end direction)
(let*-values (((cmp next) (if (eq? (string->keyword "forward")
(syntax->datum #'direction))
(values #'unsafe-fx< #'unsafe-fx+)
(values #'unsafe-fx>= #'unsafe-fx-))))
#`(let loop ((i start))
(if (#,cmp i end)
(if (not (char-whitespace? (unsafe-string-ref s i)))
i
(loop (#,next i 1)))
#f))))))
(define (string-trim s)
(let* ((len (string-length s))
(last-index (unsafe-fx- len 1))
(start (first-non-whitespace-index s 0 len #:forward))
(end (if start
(unsafe-fx+ (first-non-whitespace-index s
last-index 0 #:backward) 1)
#f)))
(cond ((not start) "")
(else (substring s start end)))))
(define (string-trim-left s)
(let* ((len (string-length s))
(start (first-non-whitespace-index s 0 len #:forward)))
(cond ((not start) "")
(else (substring s start)))))
(define (string-trim-right s)
(let* ((len (string-length s))
(last-index (unsafe-fx- len 1))
(end (first-non-whitespace-index s last-index 0 #:backward)))
(cond ((not end) "")
(else (substring s 0 (unsafe-fx+ end 1))))))
;; Test cases
;; ----------
;; left-trimming
(check-equal? (string-trim-left "") "")
(check-equal? (string-trim-left " ") "")
(check-equal? (string-trim-left "a") "a")
(check-equal? (string-trim-left "ab") "ab")
(check-equal? (string-trim-left " a") "a")
(check-equal? (string-trim-left " \na") "a")
(check-equal? (string-trim-left " \nab") "ab")
(check-equal? (string-trim-left " \na ") "a ")
(check-equal? (string-trim-left " \nab \n") "ab \n")
(check-equal? (string-trim-left " \na foo b \n") "a foo b \n")
;; right trimming
(check-equal? (string-trim-right "") "")
(check-equal? (string-trim-right " ") "")
(check-equal? (string-trim-right "a") "a")
(check-equal? (string-trim-right "ab") "ab")
(check-equal? (string-trim-right "a ") "a")
(check-equal? (string-trim-right "a\n ") "a")
(check-equal? (string-trim-right "ab\n ") "ab")
(check-equal? (string-trim-right " a\n ") " a")
(check-equal? (string-trim-right " \nab \n") " \nab")
(check-equal? (string-trim-right " \na foo b \n") " \na foo b")
;; left & right trimming
(check-equal? (string-trim "") "")
(check-equal? (string-trim " ") "")
(check-equal? (string-trim "a") "a")
(check-equal? (string-trim "ab") "ab")
(check-equal? (string-trim "a ") "a")
(check-equal? (string-trim " a") "a")
(check-equal? (string-trim " a ") "a")
(check-equal? (string-trim " ab ") "ab")
(check-equal? (string-trim "\t ab\n ") "ab")
(check-equal? (string-trim " a foo b ") "a foo b")
;; Time tests
(define (random-str)
(define whitespace "\r\n\t ")
(list->string
(for/list ([x (in-range 0 (random 100))])
(case (random 3)
[(0) (string-ref whitespace (random (string-length whitespace)))]
[else (integer->char (+ (char->integer #\a) (random 26)))]))))
(define (string-trim/alt s)
(regexp-replace #px"^\\s*(.*?)\\s*$" s "\\1"))
(define (time-test)
(define N 100000)
(define test-cases
(for/list ((i (in-range N)))
(random-str)))
(collect-garbage)
(collect-garbage)
(time (for ((s (in-list test-cases)))
(string-trim s)))
(collect-garbage)
(collect-garbage)
(time (for ((s (in-list test-cases)))
(string-trim/alt s)))
;; compare
(for ((str (in-list test-cases)))
(let ((s1 (string-trim str))
(s2 (string-trim/alt str)))
(unless (equal? s1 s2)
(error 'test "found a disagreement for ~s 1: ~s, 2: ~s"
str s1 s2)))))