[racket] read WebPage
Here is an old script that reads stock prices, but it still runs. -- Matthias
#lang racket
(require net/url net/uri-codec)
#;
(require (lib "url.ss" "net")
(lib "uri-codec.ss" "net")
(lib "list.ss")
(lib "contract.ss"))
(current-alist-separator-mode 'amp)
(define-struct no-quote (company reason))
;; no-quote = (make-no-quote String String)
(provide/contract
(stock-quote (string? . -> . (or/c number? string?)))
;; stock-quote may also raise a no-quote exception:
; (no-quote? (any? . -> . boolean?))
; (no-quote-reason (no-quote? . -> . string?))
)
#| ----------------------------------------------------------------------------
Stock and Fund Quotes
---------------------
This module implements an extremely simple stock quote server. The quote
server sends a query to SOURCE and then looks for a the textual pattern
PATTERN in the resulting page. This process is brittle, and it has broken
over the years but it's easy to fix. -- If someone has a more reliable
method for obtaining stock quotes, by all means replace the implementation
of the module with something better.
|#
(define SOURCE "http://finance.yahoo.com/q?s=~a&d=v1")
(define PATTERN "([0-9\\.]+)</span></span>")
(define (stock-quote company)
(with-handlers ([no-quote? no-quote-reason])
(let* ([PG (send-query company (form-query company))]
; [_1 (printf "~a~n" PG)]
[LN (filter find PG)]
; [_2 (printf "~a~n" LN)]
[QT (extract-quote company LN)])
; (printf "~a~n" QT)
(string->number QT))))
;; String -> URL
;; create a URL for the stock query to the server
(define (form-query company)
(string->url (format SOURCE company)))
;; String URL -> (listof String)
;; contract the page at URL with a query; produce list of lines from response
;; raises no-stock-quote if it can't find the server or something goes wrong
(define (send-query company URL)
(with-handlers
([exn:fail:network?
(lambda (e) (raise (make-no-quote company (exn-message e))))])
(call/input-url URL get-pure-port read-a-page)))
;; Iport -> (listof String)
;; reading a page from ip as a list of lines
(define (read-a-page ip)
(let reader ()
(let ([next (read-line ip)])
(cond
[(eof-object? next) '()]
[else (cons next (reader))]))))
;; String (union #f (list String String)) -> String
(define (extract-quote company LN)
(if (pair? LN)
; (cadr (find (car LN)))
(cadr (find (car LN)))
(raise
(make-no-quote
company
(format "couldn't find a quote for ~a" company)))))
;; String -> (union #f (list String String))
;; find PATTERN in x or produce false
(define (find x) (regexp-match PATTERN x))
#|
;; tests:
(require (lib "testing.scm" "testing"))
(define (good-number? x)
(test-p x number? "not a positive number"))
; (lambda (x) (and (number? x) (> x 0)))
(define merck "<b>29.99</b></big></td>")
(test-p (find merck) pair? "pattern not found")
(test-p (filter find (list "" merck "" merck "")) pair? "filter find")
(test-p (extract-quote "mrk" (filter find (list "" merck ""))) string?
"extract-quote")
(test-e (extract-quote "mrk" (filter find (list "" "xxx" ""))) no-quote
"extract-quote")
(printf "make sure you're connected when you run these test~n")
(good-number? (stock-quote "PRSCX")) ; a fund: T Rowe Price Science
(good-number? (stock-quote "MRK")) ; a stock: Merck
(test== (string? (stock-quote "XXXXXXX")) #t "bad stock found")
(printf "make sure you are *not* connected when you run these test~n")
(test-p (stock-quote "MRK") string? "number found despite lack of connection")
|#
On Jun 12, 2013, at 3:10 PM, Frank Weytjens wrote:
>
> Hi Racket users,
>
> I'm trying to extract some information from a webpage.
> First attempt is to read the complete page
> Later i will try to filter the StockPrice and Volume at a certain time
> The goal is to calculate what direction the StocPrice is moving in
> multiplied by the number of Stocks that changed owner during vast amounts of time.
> Like if it was a moving/accelerating mass in fysics and you want to know it's momentum
> The value of this program is void.
> It's just for fun.
>
> The first problem I encounter is that reading the webpage stops at the first curly bracket
>
> #lang racket
> (require net/url)
> (define GLE (get-pure-port (string->url "http://www.tijd.be/beurzen/Societe_Generale.360017048")))
> (define readPage
> (lambda (ticker)
> (read/recursive ticker)))
> (define readMore
> (lambda ()
> (let ([length (pipe-content-length GLE)])
> (if (< (file-position* GLE) length) ((display (readPage GLE)) (readMore)) (print 'Finished)))))
>
> (readMore)
>
>
> error --> .....ascript>djConfig=. read: unexpected `}'
>
>
> Sorry for the naive code, i'm just a beginner.
>
> Thanks in advance
>
> Frank
> ____________________
> Racket Users list:
> http://lists.racket-lang.org/users
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.racket-lang.org/users/archive/attachments/20130612/2295fa46/attachment.html>