[racket] Performance help
I learned some things by studying the Racket version of this program,
thanks.
I tried to rewrite it without using the Racket built-ins. In the end I
wanted a version where edits1 and edits2 wouldn't allocate the lists of
intermediate results, but rather process the items as they were
generated. This is what I came up with (it uses irregex rather than
pregexp). I ran it on Gambit, of course, and it turns out to be quite a
bit slower than the Racket version.
Brad
#!/usr/bin/env gsi
(declare (standard-bindings)
(extended-bindings)
(block)
(fixnum)
(not safe)
)
;;; Norvig's spelling corrector
;; -----------------------------------
;; Authors:
;; Jyotirmoy Bhattacharya (jyotirmoy at jyotirmoy.net)
;; Matthias Felleisen
(include "irregex.scm")
(define (fold-left operator initial-value list)
(if (null? list)
initial-value
(fold-left operator
(operator initial-value (car list))
(cdr list))))
(define (string-downcase s)
(let* ((n (string-length s))
(result (make-string n)))
(do ((i (- n 1) (- i 1)))
((< i 0) result)
(string-set! result i (char-downcase (string-ref s i))))))
(define ALPHABET (map string (string->list "abcdefghijklmnopqrstuvwxyz")))
(define (train fname)
(freqs (words (read-line (open-input-file fname) #f))))
;; String -> [Listof String]
;; Extracts words from a string and convert them to lowercase
(define (words buf)
(map irregex-match-substring
(irregex-fold (irregex "[a-z]+")
(lambda (i m s) (cons m s))
'()
(string-downcase buf))))
;; [Listof String] -> HFrequency
;; Take a list of words, return a hash table with words as keys and
frequencies as values
(define (freqs xs)
(let ((m (make-table test: string=?)))
(for-each (lambda (x)
(table-set! m x (+ 1 (table-ref m x 0))))
xs)
m))
;; HFrequency String -> String
;; Returns the correction for a word.
;; Returns the word itself if no correction is found.
(define (correct m s)
(let ((better-known
(lambda (init s)
(cond ((table-ref m s #f)
=> (lambda (v)
(if (and init
(>= (cdr init) v))
init
(cons s v))))
(else
init)))))
(cond ((table-ref m s #f)
s)
((process-edits s better-known #f)
=> car)
((process-edits s (lambda (init s) (process-edits s better-known
init)) #f)
=> car)
(else
s))))
(define (process-edits s combine init)
(define (splits s)
(let ((n (string-length s)))
(do ((i n (- i 1))
(result '() (cons (cons (substring s 0 i) (substring s i n))
result)))
((< i 0) result))))
(define (deletes ss init)
(fold-left (lambda (result s)
(let ((right-part (cdr s)))
(if (zero? (string-length right-part))
result
(combine result (string-append (car s) (substring
right-part 1 (string-length right-part)))))))
init
ss))
(define (inserts ss init)
(fold-left (lambda (result c)
(fold-left (lambda (result s)
(combine result (string-append (car s) c (cdr s))))
result
ss))
init
ALPHABET))
(define (replaces ss init)
(fold-left (lambda (result s)
(let ((right-part (cdr s)))
(if (zero? (string-length right-part))
result
(fold-left (lambda (result c)
(combine result (string-append (car s) c (substring
right-part 1 (string-length right-part)))))
result
ALPHABET))))
init
ss))
(define (transposes ss init)
(fold-left (lambda (result s)
(let ((r (cdr s)))
(if (>= (string-length r) 2)
(combine result (string-append (car s) (string
(string-ref r 1)) (string (string-ref r 0)) (substring r 2
(string-length r)))
)
result)))
init
ss))
(let ((ss (splits s)))
(deletes ss (inserts ss (replaces ss (transposes ss init))))))
(define (main training-file file)
(let ((m (time (train training-file))))
(let ((tests (read-all (open-input-file file) read-line)))
(for-each (lambda (l)
(if (not (string=? l ""))
(for-each display (list l ", " (correct m l) #\newline))))
tests))))