[racket] read WebPage

From: Matthias Felleisen (matthias at ccs.neu.edu)
Date: Wed Jun 12 21:52:49 EDT 2013

Here is an old script that reads stock prices, but it still runs. -- Matthias

#lang racket 

(require net/url net/uri-codec)

(require (lib "url.ss" "net")
         (lib "uri-codec.ss" "net")
         (lib "list.ss")
         (lib "contract.ss"))

(current-alist-separator-mode 'amp)

(define-struct no-quote (company reason))
;; no-quote = (make-no-quote String String)

 (stock-quote     (string?   . -> . (or/c number? string?)))
 ;; stock-quote may also raise a no-quote exception:
 ; (no-quote?       (any?      . -> . boolean?))
 ; (no-quote-reason (no-quote? . -> . string?))

#| ----------------------------------------------------------------------------
  Stock and Fund Quotes
  This module implements an extremely simple stock quote server. The quote 
  server sends a query to SOURCE and then looks for a the textual pattern 
  PATTERN in the resulting page. This process is brittle, and it has broken
  over the years but it's easy to fix. -- If someone has a more reliable
  method for obtaining stock quotes, by all means replace the implementation
  of the module with something better. 

(define SOURCE "http://finance.yahoo.com/q?s=~a&d=v1")

(define PATTERN "([0-9\\.]+)</span></span>")

(define (stock-quote company)
  (with-handlers ([no-quote? no-quote-reason])
    (let* ([PG (send-query company (form-query company))]
           ; [_1 (printf "~a~n" PG)]
           [LN (filter find PG)]
           ; [_2 (printf "~a~n" LN)]
           [QT (extract-quote company LN)])
      ; (printf "~a~n" QT)
      (string->number QT))))

;; String -> URL 
;; create a URL for the stock query to the server 
(define (form-query company) 
  (string->url (format SOURCE company)))    

;; String URL -> (listof String)
;; contract the page at URL with a query; produce list of lines from response
;; raises no-stock-quote if it can't find the server or something goes wrong
(define (send-query company URL)
        (lambda (e) (raise (make-no-quote company (exn-message e))))])
    (call/input-url URL get-pure-port read-a-page)))

;; Iport -> (listof String)
;; reading a page from ip as a list of lines
(define (read-a-page ip)
  (let reader ()
    (let ([next (read-line ip)])
        [(eof-object? next) '()]
        [else (cons next (reader))]))))

;; String (union #f (list String String)) -> String
(define (extract-quote company LN)
  (if (pair? LN)
      ; (cadr (find (car LN)))
      (cadr (find (car LN)))
        (format "couldn't find a quote for ~a" company)))))

;; String -> (union #f (list String String))
;; find PATTERN in x or produce false 
(define (find x) (regexp-match PATTERN x))

  ;; tests:
  (require (lib "testing.scm" "testing"))

  (define (good-number? x)
    (test-p x number? "not a positive number"))
  ; (lambda (x) (and (number? x) (> x 0)))

  (define merck   "<b>29.99</b></big></td>")
  (test-p (find merck) pair? "pattern not found")
  (test-p (filter find (list "" merck "" merck "")) pair? "filter find")
  (test-p (extract-quote "mrk" (filter find (list "" merck ""))) string? 
  (test-e (extract-quote "mrk" (filter find (list "" "xxx" ""))) no-quote 

  (printf "make sure you're connected when you run these test~n")
  (good-number? (stock-quote "PRSCX")) ; a fund: T Rowe Price Science  
  (good-number? (stock-quote "MRK")) ; a stock: Merck 
  (test== (string? (stock-quote "XXXXXXX")) #t "bad stock found")
  (printf "make sure you are *not* connected when you run these test~n")
  (test-p (stock-quote "MRK") string? "number found despite lack of connection")

On Jun 12, 2013, at 3:10 PM, Frank Weytjens wrote:

> Hi Racket users,
> I'm trying to extract some information from a webpage.
> First attempt is to read the complete page
> Later i will try to filter the StockPrice and Volume at a certain time
> The goal is to calculate what direction the StocPrice is moving in
> multiplied by the number of Stocks that changed owner during vast amounts of time.
> Like if it was a moving/accelerating mass in fysics and you want to know it's momentum
> The value of this program is void.
> It's just for fun.
> The first problem I encounter is that reading the webpage stops at the first curly bracket
> #lang racket
> (require net/url)
> (define GLE (get-pure-port (string->url "http://www.tijd.be/beurzen/Societe_Generale.360017048")))
> (define readPage
>   (lambda (ticker)
>     (read/recursive ticker)))
> (define readMore
>     (lambda ()
>       (let ([length  (pipe-content-length GLE)]) 
>         (if (< (file-position* GLE) length) ((display (readPage GLE)) (readMore)) (print 'Finished)))))
> (readMore)
> error --> .....ascript>djConfig=. read: unexpected `}'
> Sorry for the naive code, i'm just a beginner.
> Thanks in advance
> Frank
> ____________________
>  Racket Users list:
>  http://lists.racket-lang.org/users

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.racket-lang.org/users/archive/attachments/20130612/2295fa46/attachment.html>

Posted on the users mailing list.