[racket] string-trim : an implementation & a question

From: Robby Findler (robby at eecs.northwestern.edu)
Date: Sat Apr 2 20:38:39 EDT 2011

Well, you're probably well past caring at this point, esp. since your
interest was in the speed, but I wrote the below for fun and maybe
you'll enjoy it too. Random testing is great stuff. :)

Robby

#lang racket

(define (string-trim.1 s)
  (define-syntax scan
    (syntax-rules ()
      ((_ s start end step)
       (for/first ((i (in-range start end step))
                   #:when (not (char-whitespace? (string-ref s i))))
         i))))

  (let* ((len (string-length s))
         (last-index (sub1 len))
         (start (or (scan s 0 len 1) 0))
         (end (or (scan s last-index (sub1 start) -1) last-index)))
    (substring s start (add1 end))))

(define (string-trim.2 s)
  (regexp-replace #px"^\\s*(.*?)\\s*$" s "\\1"))


(define (test in expected)
  (define (check string-trim)
    (define got (string-trim in))
    (unless (equal? got expected)
      (error 'test "expected ~s applied to ~s to produce ~s, but got ~s"
             string-trim in expected got)))
  (check string-trim.1)
  (check string-trim.2))

(test "" "")
;(test " " "") ;; broken test case
(test "a" "a")
(test " a" "a")
(test "a " "a")
(test "ab" "ab")
(test " ab" "ab")
(test "ab " "ab")
(test "   ab" "ab")
(test "   ab   " "ab")
(test "ab   " "ab")
(test " s sdf d  " "s sdf d")

(define (try str)
  (define s1 (string-trim.1 str))
  (define s2 (string-trim.2 str))
  (unless (equal? s1 s2)
    (error 'try "found a disagreement for ~s 1: ~s, 2: ~s"
           str s1 s2)))

(define (random-str)
  (define whitespace "\r\n\t ")
  (list->string
   (for/list ([x (in-range 0 (random 100))])
     (case (random 3)
       [(0) (string-ref whitespace (random (string-length whitespace)))]
       [else (integer->char (+ (char->integer #\a) (random 26)))]))))


(define (time-one string-trim)
  (define s  "                                                      \n
 \t foo bar baz\n                                    \r   ")
  (time
   (for ([x (in-range 0 100000)])
     (string-trim s))))

(time-one string-trim.1)
(time-one string-trim.2)

(for ([x (in-range 0 1000)])
  (try (random-str)))


Posted on the users mailing list.