;;; slashdot.lisp --- Slashdot headline access tools. ;; Copyright 2001,2002 by Dave Pearson ;; $Revision: 1.7 $ ;; slashdot.lisp is free software distributed under the terms of the GNU ;; General Public Licence, version 2. For details see the file COPYING. ;;; Commentary: ;; ;; This code was written as an exercise in playing with clocc's XML handling ;; code. I also use it as the core of a utility that grabs slashdot ;; headlines and stores them "offline" for me (see slashcache.lisp ;; ). ;; ;; Although the code is called "slashdot" and is aimed at slashdot.org it ;; should work for most "backslash" systems. For example, I've used it to ;; read NewsForge headlines without any problems. ;;; TODO: ;; ;; o Make the code more generic. I guess this means I should turn it into a ;; "backslash" client rather than a "slashdot" client. This way it could ;; be the core of a utility for reading headlines from many "backslash" ;; servers (it can be now but it wouldn't work as well as I'd like). ;; Uses clocc . (eval-when (compile load eval) (require :cllib-date #p"clocc:src;cllib;date") (require :cllib-url #p"clocc:src;cllib;url") (require :cllib-html #p"clocc:src;cllib;html") (require :cllib-xml #p"clocc:src;cllib;xml")) ;; Place everything in it's own package (defpackage org.davep.slashdot (:nicknames slashdot) (:use common-lisp) (:documentation "slashdot.xml parsing package") (:export "*TIMESTAMP-FILE*" "*DEFAULT-SLASHDOT-XML-URL*" "*URL-READER-MAX-RETRY*" "*URL-READER-TIMEOUT*" "*URL-READER-FUNCTION*" "*FILE-READER-FUNCTION*" "*MINIMUM-REFRESH-SECONDS*" "STORY" "TITLE" "URL" "POSTING-TIME" "AUTHOR" "DEPARTMENT" "TOPIC" "COMMENTS" "SECTION" "DESCRIPTION" "IMAGE" "LATEST-STORIES" "STORIES")) (in-package :org.davep.slashdot) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Configuration variables. (defvar *timestamp-file* (merge-pathnames (user-homedir-pathname) (make-pathname :name ".slashdot-timestamp")) "Name of the file used to remember the last time we spoke to slashdot.org.") (defvar *default-slashdot-xml-url* (cllib:url "http://slashdot.org/slashdot.xml") "Default URL for the slashdot XML file.") (defvar *last-stories* nil "The last list of stories retrieved from slashdot.") (defvar *url-reader-max-retry* 0 "Max number of retries for the URL reader.") (defvar *url-reader-timeout* 5 "Timeout, in seconds, for the URL reader.") (defvar *url-reader-function* (lambda (url) (if (too-soon-p) *last-stories* (prog1 (cllib:xml-read-from-url url :out nil :max-retry *url-reader-max-retry* :timeout *url-reader-timeout*) (remember-timestamp)))) "Function for reading the slashdot XML data from an URL.") (defvar *file-reader-function* (lambda (file) (cllib:xml-read-from-file file :out nil)) "Function for reading the slashdot XML data from a file.") (defvar *minimum-refresh-seconds* (* 60 30) "Minimum number of seconds to wait before subsequent connections to slashdot.org Note that slashdot.org asks that you don't pull down the XML file more than once every 30 minutes. Please observe this request.") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Support functions. (defun get-xml-item-data (xml item-name) "Get the data for the element of XML whose name is ITEM-NAME." (let ((item (find-if (lambda (item) (when (cllib:xml-obj-p item) (string= (cllib:xmln-ln (cllib:xmlo-name item)) item-name))) xml))) (when item (cllib:xmlo-data item)))) (defun story-attribute (story attribute) "Get the data for ATTRIBUTE of STORY." (let ((attr (get-xml-item-data (cllib:xmlo-data story) attribute))) (when attr (car attr)))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Class for holding a slashdot story. (defclass story () ((title :accessor title :initarg :title :initform "") (url :accessor url :initarg :url :initform "") (posting-time :accessor posting-time :initarg :posting-time :initform 0) (author :accessor author :initarg :author :initform "") (department :accessor department :initarg :department :initform "") (topic :accessor topic :initarg :topic :initform "") (comments :accessor comments :initarg :comments :initform "") (section :accessor section :initarg :section :initform "") (description :accessor description :initarg :description :initform "") (image :accessor image :initarg :image :initform "")) (:documentation "A slashdot story.")) (defmethod print-object ((story story) (stream stream)) "Print STORY on STREAM." (print-unreadable-object (story stream :type t) (format stream "~S ~S" (title story) (cllib:dttm->string (posting-time story))))) (defmethod story-id ((story story)) "Return the unique id for STORY." (cllib:substitute-subseq (cllib:url-path-args (url story)) "?sid=" "")) (defun story-from-xml (xml) "Create an instance of class STORY from the contents of XML." (make-instance 'story :title (story-attribute xml "title") :url (cllib:url (story-attribute xml "url")) :posting-time (cllib:string->dttm (story-attribute xml "time")) :author (story-attribute xml "author") :department (story-attribute xml "department") :topic (story-attribute xml "topic") :comments (story-attribute xml "comments") :section (story-attribute xml "section") :description (story-attribute xml "description") :image (story-attribute xml "image"))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Class for holding latest slashdot stories. (defclass latest-stories () ((xml-source :accessor xml-source :initarg :xml-source :initform nil) (stories :accessor stories :initform nil)) (:documentation "The latest stories available from slashdot.")) (defmethod initialize-instance :after ((stories latest-stories) &rest rest) "Initialise an instance of the latest-stories class." (declare (ignore rest)) (when (null (xml-source stories)) (setf (xml-source stories) *default-slashdot-xml-url*)) (setf (stories stories) (mapcar #'story-from-xml (remove-if-not #'cllib:xml-obj-p (get-xml-item-data (let ((cllib:*print-log*)) (funcall (if (cllib:url-p (xml-source stories)) *url-reader-function* *file-reader-function*) (xml-source stories))) "backslash"))))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Timestamp functions. (defun remember-timestamp () "Update the slashdot timestamp file." (with-open-file (timestamp *timestamp-file* :direction :output :if-exists :supersede) (format timestamp "~A~%" (get-universal-time)))) (defun load-timestamp () "Load the slashdot timestamp. Return NIL if not available." (with-open-file (timestamp *timestamp-file* :direction :input :if-does-not-exist nil) (when timestamp (with-standard-io-syntax (let ((*read-eval*)) (read timestamp)))))) (defun seconds-since-last-stamp () "Get the number of seconds elapsed since the last timestamp." (let ((timestamp (load-timestamp))) (if timestamp (- (get-universal-time) timestamp) (1+ *minimum-refresh-seconds*)))) (defun too-soon-p () "Is it too soon to be grabbing the XML file from slashdot.org?" (<= (seconds-since-last-stamp) *minimum-refresh-seconds*)) (provide 'slashdot) ;;; slashdot.lisp ends here.