[racket] string-trim : an implementation & a question
Very nice.
One other thought I had: testing this one actually seems pretty
amenable to enumeration, since there really are only a few characters
that are treated differently; one could set up a function that mapped
a natural number to a string by treating the different characters as
digits and then translating the number into a base
however-many-characters-you-have number and reading off the string
from there.
How do the timing turn out now? Still the same, roughly?
Robby
On Sat, Apr 2, 2011 at 8:43 PM, Jon Zeppieri <zeppieri at gmail.com> wrote:
> On Sat, Apr 2, 2011 at 8:38 PM, Robby Findler
> <robby at eecs.northwestern.edu> wrote:
>>
>> Well, you're probably well past caring at this point, esp. since your
>> interest was in the speed, but I wrote the below for fun and maybe
>> you'll enjoy it too. Random testing is great stuff. :)
>
>
> I do enjoy it -- thanks!
> Here's what I have now, incorporating your random testing:
>
>
> #lang racket
>
> (require rackunit)
> (require racket/unsafe/ops)
> (provide string-trim
> string-trim-left
> string-trim-right)
>
> (define-syntax (first-non-whitespace-index stx)
> (syntax-case stx ()
> ((_ s start end direction)
> (let*-values (((cmp next) (if (eq? (string->keyword "forward")
> (syntax->datum #'direction))
> (values #'unsafe-fx< #'unsafe-fx+)
> (values #'unsafe-fx>= #'unsafe-fx-))))
> #`(let loop ((i start))
> (if (#,cmp i end)
> (if (not (char-whitespace? (unsafe-string-ref s i)))
> i
> (loop (#,next i 1)))
> #f))))))
>
> (define (string-trim s)
> (let* ((len (string-length s))
> (last-index (unsafe-fx- len 1))
> (start (first-non-whitespace-index s 0 len #:forward))
> (end (if start
> (unsafe-fx+ (first-non-whitespace-index s
> last-index 0 #:backward) 1)
> #f)))
> (cond ((not start) "")
> (else (substring s start end)))))
>
> (define (string-trim-left s)
> (let* ((len (string-length s))
> (start (first-non-whitespace-index s 0 len #:forward)))
> (cond ((not start) "")
> (else (substring s start)))))
>
> (define (string-trim-right s)
> (let* ((len (string-length s))
> (last-index (unsafe-fx- len 1))
> (end (first-non-whitespace-index s last-index 0 #:backward)))
> (cond ((not end) "")
> (else (substring s 0 (unsafe-fx+ end 1))))))
>
>
>
>
>
> ;; Test cases
> ;; ----------
> ;; left-trimming
> (check-equal? (string-trim-left "") "")
> (check-equal? (string-trim-left " ") "")
> (check-equal? (string-trim-left "a") "a")
> (check-equal? (string-trim-left "ab") "ab")
> (check-equal? (string-trim-left " a") "a")
> (check-equal? (string-trim-left " \na") "a")
> (check-equal? (string-trim-left " \nab") "ab")
> (check-equal? (string-trim-left " \na ") "a ")
> (check-equal? (string-trim-left " \nab \n") "ab \n")
> (check-equal? (string-trim-left " \na foo b \n") "a foo b \n")
>
> ;; right trimming
> (check-equal? (string-trim-right "") "")
> (check-equal? (string-trim-right " ") "")
> (check-equal? (string-trim-right "a") "a")
> (check-equal? (string-trim-right "ab") "ab")
> (check-equal? (string-trim-right "a ") "a")
> (check-equal? (string-trim-right "a\n ") "a")
> (check-equal? (string-trim-right "ab\n ") "ab")
> (check-equal? (string-trim-right " a\n ") " a")
> (check-equal? (string-trim-right " \nab \n") " \nab")
> (check-equal? (string-trim-right " \na foo b \n") " \na foo b")
>
> ;; left & right trimming
> (check-equal? (string-trim "") "")
> (check-equal? (string-trim " ") "")
> (check-equal? (string-trim "a") "a")
> (check-equal? (string-trim "ab") "ab")
> (check-equal? (string-trim "a ") "a")
> (check-equal? (string-trim " a") "a")
> (check-equal? (string-trim " a ") "a")
> (check-equal? (string-trim " ab ") "ab")
> (check-equal? (string-trim "\t ab\n ") "ab")
> (check-equal? (string-trim " a foo b ") "a foo b")
>
>
> ;; Time tests
> (define (random-str)
> (define whitespace "\r\n\t ")
> (list->string
> (for/list ([x (in-range 0 (random 100))])
> (case (random 3)
> [(0) (string-ref whitespace (random (string-length whitespace)))]
> [else (integer->char (+ (char->integer #\a) (random 26)))]))))
>
> (define (string-trim/alt s)
> (regexp-replace #px"^\\s*(.*?)\\s*$" s "\\1"))
>
> (define (time-test)
> (define N 100000)
>
> (define test-cases
> (for/list ((i (in-range N)))
> (random-str)))
>
> (collect-garbage)
> (collect-garbage)
>
> (time (for ((s (in-list test-cases)))
> (string-trim s)))
>
> (collect-garbage)
> (collect-garbage)
>
> (time (for ((s (in-list test-cases)))
> (string-trim/alt s)))
>
> ;; compare
> (for ((str (in-list test-cases)))
> (let ((s1 (string-trim str))
> (s2 (string-trim/alt str)))
> (unless (equal? s1 s2)
> (error 'test "found a disagreement for ~s 1: ~s, 2: ~s"
> str s1 s2)))))
>