[racket] string-trim : an implementation & a question

From: Jon Zeppieri (zeppieri at gmail.com)
Date: Sat Apr 2 21:43:28 EDT 2011

On Sat, Apr 2, 2011 at 8:38 PM, Robby Findler
<robby at eecs.northwestern.edu> wrote:
>
> Well, you're probably well past caring at this point, esp. since your
> interest was in the speed, but I wrote the below for fun and maybe
> you'll enjoy it too. Random testing is great stuff. :)


I do enjoy it -- thanks!
Here's what I have now, incorporating your random testing:


#lang racket

(require rackunit)
(require racket/unsafe/ops)
(provide string-trim
         string-trim-left
         string-trim-right)

(define-syntax (first-non-whitespace-index stx)
  (syntax-case stx ()
    ((_ s start end direction)
     (let*-values (((cmp next) (if (eq? (string->keyword "forward")
(syntax->datum #'direction))
                                   (values #'unsafe-fx< #'unsafe-fx+)
                                   (values #'unsafe-fx>= #'unsafe-fx-))))
       #`(let loop ((i start))
           (if (#,cmp i end)
               (if (not (char-whitespace? (unsafe-string-ref s i)))
                   i
                   (loop (#,next i 1)))
               #f))))))

(define (string-trim s)
  (let* ((len        (string-length s))
         (last-index (unsafe-fx- len 1))
         (start      (first-non-whitespace-index s 0 len #:forward))
         (end        (if start
                         (unsafe-fx+ (first-non-whitespace-index s
last-index 0 #:backward) 1)
                         #f)))
    (cond ((not start) "")
          (else        (substring s start end)))))

(define (string-trim-left s)
  (let* ((len   (string-length s))
         (start (first-non-whitespace-index s 0 len #:forward)))
    (cond ((not start) "")
          (else        (substring s start)))))

(define (string-trim-right s)
  (let* ((len        (string-length s))
         (last-index (unsafe-fx- len 1))
         (end        (first-non-whitespace-index s last-index 0 #:backward)))
    (cond ((not end) "")
          (else      (substring s 0 (unsafe-fx+ end 1))))))





;; Test cases
;; ----------
;; left-trimming
(check-equal? (string-trim-left "") "")
(check-equal? (string-trim-left " ") "")
(check-equal? (string-trim-left "a") "a")
(check-equal? (string-trim-left "ab") "ab")
(check-equal? (string-trim-left " a") "a")
(check-equal? (string-trim-left " \na") "a")
(check-equal? (string-trim-left " \nab") "ab")
(check-equal? (string-trim-left " \na ") "a ")
(check-equal? (string-trim-left " \nab \n") "ab \n")
(check-equal? (string-trim-left " \na foo b \n") "a foo b \n")

;; right trimming
(check-equal? (string-trim-right "") "")
(check-equal? (string-trim-right " ") "")
(check-equal? (string-trim-right "a") "a")
(check-equal? (string-trim-right "ab") "ab")
(check-equal? (string-trim-right "a ") "a")
(check-equal? (string-trim-right "a\n ") "a")
(check-equal? (string-trim-right "ab\n ") "ab")
(check-equal? (string-trim-right " a\n ") " a")
(check-equal? (string-trim-right " \nab \n") " \nab")
(check-equal? (string-trim-right " \na foo b \n") " \na foo b")

;; left & right trimming
(check-equal? (string-trim "") "")
(check-equal? (string-trim " ") "")
(check-equal? (string-trim "a") "a")
(check-equal? (string-trim "ab") "ab")
(check-equal? (string-trim "a ") "a")
(check-equal? (string-trim " a") "a")
(check-equal? (string-trim " a ") "a")
(check-equal? (string-trim " ab ") "ab")
(check-equal? (string-trim "\t ab\n ") "ab")
(check-equal? (string-trim " a foo b ") "a foo b")


;; Time tests
(define (random-str)
 (define whitespace "\r\n\t ")
 (list->string
  (for/list ([x (in-range 0 (random 100))])
    (case (random 3)
      [(0) (string-ref whitespace (random (string-length whitespace)))]
      [else (integer->char (+ (char->integer #\a) (random 26)))]))))

(define (string-trim/alt s)
  (regexp-replace #px"^\\s*(.*?)\\s*$" s "\\1"))

(define (time-test)
  (define N 100000)

  (define test-cases
    (for/list ((i (in-range N)))
      (random-str)))

  (collect-garbage)
  (collect-garbage)

  (time (for ((s (in-list test-cases)))
          (string-trim s)))

  (collect-garbage)
  (collect-garbage)

  (time (for ((s (in-list test-cases)))
          (string-trim/alt s)))

  ;; compare
  (for ((str (in-list test-cases)))
    (let ((s1 (string-trim str))
          (s2 (string-trim/alt str)))
      (unless (equal? s1 s2)
        (error 'test "found a disagreement for ~s 1: ~s, 2: ~s"
               str s1 s2)))))


Posted on the users mailing list.