#!/bin/sh #| if [ -x "$PLTHOME/bin/mzscheme" ]; then exec "$PLTHOME/bin/mzscheme" -qgr "$0" "$@" else exec "mzscheme" -qgr "$0" "$@" fi |# ;; See also gettitle.ss, which contains regexps that are sensitive to ;; the precise output of tex2page. (require (lib "cmdline.ss") (lib "list.ss") (lib "string.ss") (lib "xml.ss" "xml") (lib "html.ss" "html") (lib "pretty.ss")) (define dir-to-read #f) (define doc-to-read #f) (define split-out-keywords? #f) (command-line "makekeywords.ss" argv (once-each [("--keywords") "Extract R5RS keywords" (set! split-out-keywords? #t)] [("--docname") name "Document name (when not the same as the directory)" (set! doc-to-read name)]) (args (dir) (set! dir-to-read dir))) (unless doc-to-read (set! doc-to-read dir-to-read)) (define re:iname (regexp "index")) (define re:ientry (regexp "^(( )*)()
$")) (define re:ilink (regexp "(.*)
(.*)")) (define re:isubentry-start (regexp "^
$")) (define re:ientry-top (regexp "
(.*)
(.*)")) (define re:isubentry-end (regexp "^
")) (define re:isee (regexp "^
(.*)
see (.*)")) (use-html-spec #f) (define index-file (with-input-from-file (build-path dir-to-read (format "~a.html" doc-to-read)) (lambda () (let loop () (let ([r (read-line)]) (cond [(eof-object? r) #f] [(regexp-match re:iname r) => (lambda (m) (cadr m))] [else (loop)])))))) (unless index-file (error 'makehdindex "Could not discover the HTML index file for ~a by reading ~a.html" dir-to-read doc-to-read)) (define doc (map xml->xexpr (with-input-from-file (build-path dir-to-read index-file) (lambda () (read-html-as-xml))))) ;; dive into 'html, then 'body: (define (go-in tag) (set! doc (ormap (lambda (i) (and (pair? i) (eq? (car i) tag) (cddr i))) doc))) (go-in 'html) (go-in 'body) ;; flatten paragraphs: (set! doc (let loop ([d doc]) (if (null? d) null (if (and (pair? (car d)) (eq? 'p (caar d))) (loop (append (cddar d) (cdr d))) (cons (car d) (loop (cdr d))))))) ;; lift
(set! doc (let loop ([d doc]) (if (null? d) null (if (pair? (car d)) (if (eq? 'br (caar d)) (cons '(br ()) (loop (append (cddar d) (cdr d)))) (cons (list* (caar d) (cadar d) (loop (cddar d))) (loop (cdr d)))) (cons (car d) (loop (cdr d))))))) ;; Skip to "node_index_start" anchor: (set! doc (let loop ([d doc]) (if (null? d) (error 'makehdindex "index start not found on index page ~s" index-file) (if (and (pair? (car d)) (eq? 'a (caar d)) (equal? '(name "node_index_start") (assq 'name (cadar d)))) (cdr d) (loop (cdr d)))))) (define spaces (string #\space #\tab #\newline #\return)) (define whitespace? (let ([re (regexp (format "^[~a]*$" spaces))]) (lambda (s) (regexp-match re s)))) (define re:pre-space (format "^[~a]*" spaces)) (define re:post-space (format "[~a]*$" spaces)) (define re:multi-space (format "[~a][~a]+" spaces spaces)) (define (extract-string n) (let ([n (let loop ([n n]) (if (string? n) n (if (eq? n 'nbsp) " " (apply string-append (map loop (cddr n))))))]) (regexp-replace re:multi-space (regexp-replace re:post-space (regexp-replace re:pre-space n "") "") " "))) (define (build-prefix nesting) (string-append (if (null? nesting) "" (string-append (let loop ([l nesting]) (if (null? (cdr l)) (car l) (string-append (loop (cdr l)) ", " (car l)))) ", ")))) (define (nb-whitespace? x) (or (eq? x 'nbsp) (and (string? x) (whitespace? x)))) ;; Parse each line in the index. An index entry is of the form ;; (br ()) [whitespace|nbsp]* name ["," name]* ;; Every four nbsps indicate a level o nesting ;; Each name is a link when its of the form (a ((href ...)) ...) ;; If there's more then one name, the extras are links, ;; and they refer to the same name as earlier items: ;; rough-index is a list of (cons name link), where the ;; link is "page%anchor" (define rough-index null) ;; xrefs is a list of (cons name name) (define xrefs null) (let loop ([nestings (list "???")][d doc]) (if (null? d) null (let ([l (car d)]) (if (not (pair? l)) (loop nestings (cdr d)) (cond [(eq? 'br (car l)) ;; Start of an entry (let ([d (cdr d)]) (let-values ([(d depth) (let loop ([d d][n 0]) (if (nb-whitespace? (car d)) (loop (cdr d) (add1 n)) (if (and (string? (car d)) (whitespace? (car d))) (loop (cdr d) n) (values d n))))]) (let* ([id (car d)] [anchor? (and (pair? id) (eq? 'a (car id)))] [name (extract-string id)] [nested (list-tail nestings (- (length nestings) (quotient depth 4)))]) ;; Output links until we find a br: (let xloop ([d d]) (unless (null? d) (let ([l (car d)]) (cond [(and (pair? l) (eq? 'br (car l))) ;; Done with this line ;; Extend/replace nestings (loop (cons name nested) d)] [(and (pair? l) (eq? 'a (car l))) ;; Found a link (set! rough-index (cons (cons (format "~a~a" (build-prefix nested) name) (cadr (assq 'href (cadr l)))) rough-index)) (xloop (cdr d))] [(and (pair? l) (eq? 'em (car l)) (equal? "see" (caddr l))) ;; Found an X-ref (let* ([d (let loop ([d (cdr d)]) (if (and (string? (car d)) (whitespace? (car d))) (loop (cdr d)) d))] [target (extract-string (car d))]) (set! xrefs (cons (cons (format "~a~a" (build-prefix nested) ;; Drop trailing comma: (substring name 0 (sub1 (string-length name)))) target) xrefs)) (xloop (cdr d)))] [else (xloop (cdr d))])))))))] [else (loop nestings (cdr d))]))))) (fprintf (current-error-port) " Copying `see' entries~n") ; Copy entries for each "see" xref (define full-rough-index (apply append rough-index (map (lambda (see) (let* ([dest (car see)] [src (cdr see)] [re (regexp (string-append "^" (regexp-quote src) "(|,.*)$"))]) (let loop ([l rough-index]) (cond [(null? l) null] [(regexp-match re (caar l)) => (lambda (m) (cons (cons (string-append dest (cadr m)) (cdar l)) (loop (cdr l))))] [else (loop (cdr l))])))) xrefs))) (define re:split (regexp "^([^#]*)#(.*)$")) ;; Split HTML page and anchor name: (define full-index-without-titles (map (lambda (i) (let ([m (regexp-match re:split (cdr i))]) (list (car i) (cadr m) (caddr m)))) full-rough-index)) (load-relative "gettitle.ss") (fprintf (current-error-port) " Getting titles~n") ;; Get title in each case: (define full-index (map (lambda (i) (list (car i) (cadr i) (caddr i) (clean-up (get-title (cadr i) (caddr i))))) full-index-without-titles)) ;; If split-out-keywords?, then figure out which index items ;; should actually be keyword entries: (when split-out-keywords? (fprintf (current-error-port) " Finding keywords~n") (with-output-to-file (build-path dir-to-read "keywords") (lambda () (printf "(~n") (for-each (lambda (i) (let ([file (cadr i)] [anchor (caddr i)]) (let ([re (regexp (format "(procedure:).*([(][^)]*[)])" (regexp-quote anchor)))]) (with-input-from-file (build-path dir-to-read file) (lambda () (let loop () (let ([l (read-line)]) (unless (eof-object? l) (let ([m (regexp-match re l)]) (if m ;; keyword entry: (printf "(~s ~s ~s ~s ~s)~n" (car i) (clean-up (caddr m)) (cadr i) (caddr i) (cadddr i)) (loop))))))))))) full-index) (printf ")~n")) 'truncate/replace)) (fprintf (current-error-port) " Filtering keywords~n") ; Remove anything already covered as a keyword (i.e., ref to same section) (define keywords (with-handlers ([void (lambda (x) null)]) (with-input-from-file (build-path dir-to-read "keywords") read))) (define ht (make-hash-table)) (for-each (lambda (k) (let* ([s (string->symbol (car k))] [l (hash-table-get ht s (lambda () null))]) (hash-table-put! ht s (cons (list-ref k 4) l)))) keywords) (define filtered-index (filter (lambda (i) (not (member (list-ref i 3) (hash-table-get ht (string->symbol (car i)) (lambda () null))))) full-index)) (with-output-to-file (build-path dir-to-read "hdindex") (lambda () (printf "(~n") (for-each (lambda (s) (printf "~s~n" s)) ; Resort because we may have added extra entries (quicksort filtered-index (lambda (a b) (string-ci