efined in [1], the ; parser is necessarily single-threaded -- obviously as elements ; in a text XML document are laid down sequentially. The parser ; therefore is a tree fold that has been transformed to accept an ; accumulating parameter [1,2]. ; Formally, the denotational semantics of the parser can be expressed ; as ;@smallexample ; parser:: (Start-tag -> Seed -> Seed) -> ; (Start-tag -> Seed -> Seed -> Seed) -> ; (Char-Data -> Seed -> Seed) -> ; XML-text-fragment -> Seed -> Seed ; parser fdown fup fchar " content " seed ; = fup "" seed ; (parser fdown fup fchar "content" (fdown "" seed)) ; ; parser fdown fup fchar "char-data content" seed ; = parser fdown fup fchar "content" (fchar "char-data" seed) ; ; parser fdown fup fchar "elem-content content" seed ; = parser fdown fup fchar "content" ( ; parser fdown fup fchar "elem-content" seed) ;@end smallexample ; Compare the last two equations with the left fold ;@smallexample ; fold-left kons elem:list seed = fold-left kons list (kons elem seed) ;@end smallexample ; The real parser created by @code{SSAX:make-parser} is slightly more ; complicated, to account for processing instructions, entity ; references, namespaces, processing of document type declaration, etc. ; The XML standard document referred to in this module is ; @uref{http://www.w3.org/TR/1998/REC-xml-19980210.html} ; ; The present file also defines a procedure that parses the text of an ; XML document or of a separate element into SXML, an S-expression-based ; model of an XML Information Set. SXML is also an Abstract Syntax Tree ; of an XML document. SXML is similar but not identical to DOM; SXML is ; particularly suitable for Scheme-based XML/HTML authoring, SXPath ; queries, and tree transformations. See SXML.html for more details. ; SXML is a term implementation of evaluation of the XML document [3]. ; The other implementation is context-passing. ; The present frameworks fully supports the XML Namespaces Recommendation: ; @uref{http://www.w3.org/TR/REC-xml-names/} ; Other links: ;@table @asis ;@item [1] ; Jeremy Gibbons, Geraint Jones, "The Under-appreciated Unfold," ; Proc. ICFP'98, 1998, pp. 273-279. ;@item [2] ; Richard S. Bird, The promotion and accumulation strategies in ; transformational programming, ACM Trans. Progr. Lang. Systems, ; 6(4):487-504, October 1984. ;@item [3] ; Ralf Hinze, "Deriving Backtracking Monad Transformers," ; Functional Pearl. Proc ICFP'00, pp. 186-197. ;@end table ;; ;;; Code: (define-module (sxml ssax) #:use-module (sxml ssax input-parse) #:use-module (srfi srfi-1) #:use-module (srfi srfi-13) #:export (current-ssax-error-port with-ssax-error-to-port xml-token? xml-token-kind xml-token-head make-empty-attlist attlist-add attlist-null? attlist-remove-top attlist->alist attlist-fold define-parsed-entity! reset-parsed-entity-definitions! ssax:uri-string->symbol ssax:skip-internal-dtd ssax:read-pi-body-as-string ssax:reverse-collect-str-drop-ws ssax:read-markup-token ssax:read-cdata-body ssax:read-char-ref ssax:read-attributes ssax:complete-start-tag ssax:read-external-id ssax:read-char-data ssax:xml->sxml ssax:make-parser ssax:make-pi-parser ssax:make-elem-parser)) (define (parser-error port message . rest) (apply throw 'parser-error port message rest)) (define ascii->char integer->char) (define char->ascii char->integer) (define current-ssax-error-port (make-parameter (current-error-port))) (define *current-ssax-error-port* (parameter-fluid current-ssax-error-port)) (define (with-ssax-error-to-port port thunk) (parameterize ((current-ssax-error-port port)) (thunk))) (define (ssax:warn port . args) (with-output-to-port (current-ssax-error-port) (lambda () (display ";;; SSAX warning: ") (for-each display args) (newline)))) (define (ucscode->string codepoint) (string (integer->char codepoint))) (define char-newline #\newline) (define char-return #\return) (define char-tab #\tab) (define nl "\n") ;; This isn't a great API, but a more proper fix will involve hacking ;; SSAX. (define (reset-parsed-entity-definitions!) "Restore the set of parsed entity definitions to its initial state." (set! ssax:predefined-parsed-entities '((amp . "&") (lt . "<") (gt . ">") (apos . "'") (quot . "\"")))) (define (define-parsed-entity! entity str) "Define a new parsed entity. @var{entity} should be a symbol. Instances of &@var{entity}; in XML text will be replaced with the string @var{str}, which will then be parsed." (set! ssax:predefined-parsed-entities (acons entity str ssax:predefined-parsed-entities))) ;; Execute a sequence of forms and return the result of the _first_ one. ;; Like PROG1 in Lisp. Typically used to evaluate one or more forms with ;; side effects and return a value that must be computed before some or ;; all of the side effects happen. (define-syntax begin0 (syntax-rules () ((begin0 form form1 ... ) (let ((val form)) form1 ... val)))) ; Like let* but allowing for multiple-value bindings (define-syntax let*-values (syntax-rules () ((let*-values () . bodies) (begin . bodies)) ((let*-values (((var) initializer) . rest) . bodies) (let ((var initializer)) ; a single var optimization (let*-values rest . bodies))) ((let*-values ((vars initializer) . rest) . bodies) (call-with-values (lambda () initializer) ; the most generic case (lambda vars (let*-values rest . bodies)))))) ;; needed for some dumb reason (define inc 1+) (define dec 1-) (define-syntax include-from-path/filtered (lambda (x) (define (read-filtered accept-list file) (with-input-from-file (%search-load-path file) (lambda () (let loop ((sexp (read)) (out '())) (cond ((eof-object? sexp) (reverse out)) ((and (pair? sexp) (memq (car sexp) accept-list)) (loop (read) (cons sexp out))) (else (loop (read) out))))))) (syntax-case x () ((_ accept-list file) (with-syntax (((exp ...) (datum->syntax x (read-filtered (syntax->datum #'accept-list) (syntax->datum #'file))))) #'(begin exp ...)))))) (include-from-path "sxml/upstream/assert.scm") (include-from-path/filtered (define define-syntax ssax:define-labeled-arg-macro) "sxml/upstream/SSAX.scm")