[racket] Performance help

From: Bradley Lucier (lucier at math.purdue.edu)
Date: Sat Jan 17 16:47:05 EST 2015

I learned some things by studying the Racket version of this program, 
thanks.

I tried to rewrite it without using the Racket built-ins.  In the end I 
wanted a version where edits1 and edits2 wouldn't allocate the lists of 
intermediate results, but rather process the items as they were 
generated.  This is what I came up with (it uses irregex rather than 
pregexp).  I ran it on Gambit, of course, and it turns out to be quite a 
bit slower than the Racket version.

Brad

#!/usr/bin/env gsi
(declare (standard-bindings)
      (extended-bindings)
      (block)
      (fixnum)
     (not safe)
     )

;;; Norvig's spelling corrector
;; -----------------------------------

;; Authors:
;; Jyotirmoy Bhattacharya (jyotirmoy at jyotirmoy.net)
;; Matthias Felleisen

(include "irregex.scm")

(define (fold-left operator initial-value list)
   (if (null? list)
       initial-value
       (fold-left operator
                  (operator initial-value (car list))
          (cdr list))))

(define (string-downcase s)
   (let* ((n (string-length s))
      (result (make-string n)))
     (do ((i (- n 1) (- i 1)))
     ((< i 0) result)
       (string-set! result i (char-downcase (string-ref s i))))))

(define ALPHABET (map string (string->list "abcdefghijklmnopqrstuvwxyz")))

(define (train fname)
   (freqs (words (read-line (open-input-file fname) #f))))

;; String -> [Listof String]
;; Extracts words from a string and convert them to lowercase

(define (words buf)
   (map irregex-match-substring
        (irregex-fold (irregex "[a-z]+")
              (lambda (i m s) (cons m s))
              '()
              (string-downcase buf))))


;; [Listof String] -> HFrequency
;; Take a list of words, return a hash table with words as keys and 
frequencies as values

(define (freqs xs)
   (let ((m (make-table test: string=?)))
     (for-each (lambda (x)
         (table-set! m x (+ 1 (table-ref m x 0))))
           xs)
     m))

;; HFrequency String -> String
;; Returns the correction for a word.
;; Returns the word itself if no correction is found.

(define (correct m s)
   (let ((better-known
      (lambda (init s)
        (cond ((table-ref m s #f)
           => (lambda (v)
                (if (and init
                 (>= (cdr init) v))
                init
                (cons s v))))
          (else
           init)))))
     (cond ((table-ref m s #f)
        s)
       ((process-edits s better-known #f)
        => car)
       ((process-edits s (lambda (init s) (process-edits s better-known 
init)) #f)
        => car)
       (else
        s))))

(define (process-edits s combine init)

   (define (splits s)
     (let ((n (string-length s)))
       (do ((i n (- i 1))
        (result '() (cons (cons (substring s 0 i) (substring s i n))
                  result)))
       ((< i 0) result))))

   (define (deletes ss init)
     (fold-left (lambda (result s)
          (let ((right-part (cdr s)))
            (if (zero? (string-length right-part))
                result
                (combine result (string-append (car s) (substring 
right-part 1 (string-length right-part)))))))
            init
            ss))

   (define (inserts ss init)
     (fold-left (lambda (result c)
          (fold-left (lambda (result s)
                   (combine result (string-append (car s) c (cdr s))))
                 result
                 ss))
            init
            ALPHABET))

   (define (replaces ss init)
     (fold-left (lambda (result s)
          (let ((right-part (cdr s)))
            (if (zero? (string-length right-part))
                result
                (fold-left (lambda (result c)
                     (combine result (string-append (car s) c (substring 
right-part 1 (string-length right-part)))))
                   result
                   ALPHABET))))
            init
            ss))

   (define (transposes ss init)
     (fold-left (lambda (result s)
          (let ((r (cdr s)))
            (if (>= (string-length r) 2)
                (combine result (string-append (car s) (string 
(string-ref r 1)) (string (string-ref r 0)) (substring r 2 
(string-length r)))
                 )
                result)))
            init
            ss))

   (let ((ss (splits s)))
     (deletes ss (inserts ss (replaces ss (transposes ss init))))))

(define (main training-file file)
   (let ((m (time (train training-file))))
     (let ((tests (read-all (open-input-file file) read-line)))
       (for-each (lambda (l)
           (if (not (string=? l ""))
               (for-each display (list l ", " (correct m l) #\newline))))
         tests))))


Posted on the users mailing list.