[racket] string-trim : an implementation & a question

From: Robby Findler (robby at eecs.northwestern.edu)
Date: Sun Apr 3 08:53:09 EDT 2011

Very nice.

One other thought I had: testing this one actually seems pretty
amenable to enumeration, since there really are only a few characters
that are treated differently; one could set up a function that mapped
a natural number to a string by treating the different characters as
digits and then translating the number into a base
however-many-characters-you-have number and reading off the string
from there.

How do the timing turn out now? Still the same, roughly?

Robby

On Sat, Apr 2, 2011 at 8:43 PM, Jon Zeppieri <zeppieri at gmail.com> wrote:
> On Sat, Apr 2, 2011 at 8:38 PM, Robby Findler
> <robby at eecs.northwestern.edu> wrote:
>>
>> Well, you're probably well past caring at this point, esp. since your
>> interest was in the speed, but I wrote the below for fun and maybe
>> you'll enjoy it too. Random testing is great stuff. :)
>
>
> I do enjoy it -- thanks!
> Here's what I have now, incorporating your random testing:
>
>
> #lang racket
>
> (require rackunit)
> (require racket/unsafe/ops)
> (provide string-trim
>         string-trim-left
>         string-trim-right)
>
> (define-syntax (first-non-whitespace-index stx)
>  (syntax-case stx ()
>    ((_ s start end direction)
>     (let*-values (((cmp next) (if (eq? (string->keyword "forward")
> (syntax->datum #'direction))
>                                   (values #'unsafe-fx< #'unsafe-fx+)
>                                   (values #'unsafe-fx>= #'unsafe-fx-))))
>       #`(let loop ((i start))
>           (if (#,cmp i end)
>               (if (not (char-whitespace? (unsafe-string-ref s i)))
>                   i
>                   (loop (#,next i 1)))
>               #f))))))
>
> (define (string-trim s)
>  (let* ((len        (string-length s))
>         (last-index (unsafe-fx- len 1))
>         (start      (first-non-whitespace-index s 0 len #:forward))
>         (end        (if start
>                         (unsafe-fx+ (first-non-whitespace-index s
> last-index 0 #:backward) 1)
>                         #f)))
>    (cond ((not start) "")
>          (else        (substring s start end)))))
>
> (define (string-trim-left s)
>  (let* ((len   (string-length s))
>         (start (first-non-whitespace-index s 0 len #:forward)))
>    (cond ((not start) "")
>          (else        (substring s start)))))
>
> (define (string-trim-right s)
>  (let* ((len        (string-length s))
>         (last-index (unsafe-fx- len 1))
>         (end        (first-non-whitespace-index s last-index 0 #:backward)))
>    (cond ((not end) "")
>          (else      (substring s 0 (unsafe-fx+ end 1))))))
>
>
>
>
>
> ;; Test cases
> ;; ----------
> ;; left-trimming
> (check-equal? (string-trim-left "") "")
> (check-equal? (string-trim-left " ") "")
> (check-equal? (string-trim-left "a") "a")
> (check-equal? (string-trim-left "ab") "ab")
> (check-equal? (string-trim-left " a") "a")
> (check-equal? (string-trim-left " \na") "a")
> (check-equal? (string-trim-left " \nab") "ab")
> (check-equal? (string-trim-left " \na ") "a ")
> (check-equal? (string-trim-left " \nab \n") "ab \n")
> (check-equal? (string-trim-left " \na foo b \n") "a foo b \n")
>
> ;; right trimming
> (check-equal? (string-trim-right "") "")
> (check-equal? (string-trim-right " ") "")
> (check-equal? (string-trim-right "a") "a")
> (check-equal? (string-trim-right "ab") "ab")
> (check-equal? (string-trim-right "a ") "a")
> (check-equal? (string-trim-right "a\n ") "a")
> (check-equal? (string-trim-right "ab\n ") "ab")
> (check-equal? (string-trim-right " a\n ") " a")
> (check-equal? (string-trim-right " \nab \n") " \nab")
> (check-equal? (string-trim-right " \na foo b \n") " \na foo b")
>
> ;; left & right trimming
> (check-equal? (string-trim "") "")
> (check-equal? (string-trim " ") "")
> (check-equal? (string-trim "a") "a")
> (check-equal? (string-trim "ab") "ab")
> (check-equal? (string-trim "a ") "a")
> (check-equal? (string-trim " a") "a")
> (check-equal? (string-trim " a ") "a")
> (check-equal? (string-trim " ab ") "ab")
> (check-equal? (string-trim "\t ab\n ") "ab")
> (check-equal? (string-trim " a foo b ") "a foo b")
>
>
> ;; Time tests
> (define (random-str)
>  (define whitespace "\r\n\t ")
>  (list->string
>  (for/list ([x (in-range 0 (random 100))])
>    (case (random 3)
>      [(0) (string-ref whitespace (random (string-length whitespace)))]
>      [else (integer->char (+ (char->integer #\a) (random 26)))]))))
>
> (define (string-trim/alt s)
>  (regexp-replace #px"^\\s*(.*?)\\s*$" s "\\1"))
>
> (define (time-test)
>  (define N 100000)
>
>  (define test-cases
>    (for/list ((i (in-range N)))
>      (random-str)))
>
>  (collect-garbage)
>  (collect-garbage)
>
>  (time (for ((s (in-list test-cases)))
>          (string-trim s)))
>
>  (collect-garbage)
>  (collect-garbage)
>
>  (time (for ((s (in-list test-cases)))
>          (string-trim/alt s)))
>
>  ;; compare
>  (for ((str (in-list test-cases)))
>    (let ((s1 (string-trim str))
>          (s2 (string-trim/alt str)))
>      (unless (equal? s1 s2)
>        (error 'test "found a disagreement for ~s 1: ~s, 2: ~s"
>               str s1 s2)))))
>



Posted on the users mailing list.