;;; xml2mmdl.xom ;;; Converts XML markup declarations into MMDL instances (pass1) ;;; Version: 1999-02-15 Rick Jelliffe (ricko@gate.sinica.edu.tw) ;;; Copyright (C) 1999 Rick Jelliffe ricko@gate.sinica.edu.tw ;;; Permission granted to use granted under MPL or GPL ;;; To use this program, you need the OmniMark programming language. ;;; Down load "OmniMark LE" light edition from www.omnimark.com ;;; The command line is ;;; omle -x xml2mmdl.xom infil.dtd > xxx ;;; omle -x xml2mmdl2.xom xxx > outfile.xml ;;; For XML (eXtensible Markup Language) markup declaration syntax, refer to: ;;; ;;; For DDML (Document Description Markup Language) specification, refer to: ;;; http://www.w3.org/TR/NOTE-ddml ;;; Some SGML-isms are also supported, for slightly wider reach & robustness: ;;; * data attributes on entities ;;; * PIs in the text ;;; * Case insensitivity ;;; * Public ID but no System Id (a warning comment is put out) ;;; * omitability indicators ;;; * inclusions and exclusions (a warning is put into a More element) ;;; * groups on the LHS of declarations (they get stuffed into the name attribute) ;;; Before running this program, you will have to figure out how to handle ;;; exernal parameter entity references and marked sections. This program ;;; does not handle either, and will probably halt with an error message. ;;; Cut and paste the external entity's resource at the point of reference. ;;; This program does not handle models like the following: a | b , c. ;;; You must parenthesize the content models first: (a | b), c. ;;; After running this program, you may want to clean up the comments. ;;; For example, you might move the most important ones over into ;;; More or Doc elements. Comments relating to parameter entities ;;; should be removed. Atribute declarations are moved into their ;;; relative element definition, but comments about attributes will ;;; remain where they were. ;;; This version does not handle namespaces yet. CROSS-TRANSLATE GLOBAL SWITCH MIXED-CONTENT GLOBAL STREAM theExpandedContentModel GLOBAL STREAM theTmpExpandedContentModelA GLOBAL STREAM theTmpExpandedContentModelB GLOBAL STREAM PE VARIABLE GLOBAL STREAM r FIND-START DEACTIVATE MIXED-CONTENT CLEAR PE SET theExpandedContentModel TO "ANY" ; robust default SET theTmpExpandedContentModelA TO "ANY" ; robust default SET theTmpExpandedContentModelB TO "ANY" ; robust default OUTPUT "%n" FIND-END OUTPUT "" ;; Lining Comments we get rid of FIND UL "") ["=-#@"])+ WHITE-SPACE* "-->" WHITE-SPACE* FIND "" ;; strip FIND "" ;; strip ;; Comments we output as comments, willy nilly FIND UL "") ANY)+=theComment "-->" WHITE-SPACE* OUTPUT "%sn%n" ;; Element types FIND UL "")) ANY)+=theContentModel WHITE-SPACE* ">" WHITE-SPACE* LOCAL STREAM content LOCAL SWITCH PE-FOUND ;; expand PE references DEACTIVATE PE-FOUND OPEN theTmpExpandedContentModelA AS BUFFER REPEAT SCAN PATTERN theContentModel MATCH "%%" [ANY EXCEPT ";"]+=thePEName ";" ACTIVATE PE-FOUND USING PE KEY "%x(thePEName)" PUT theTmpExpandedContentModelA " %g(PE) " MATCH "--" ((LOOKAHEAD NOT "--") ANY)+=comment "--" ; strip SGML comments MATCH ANY=theChar PUT theTmpExpandedContentModelA PATTERN theChar AGAIN CLOSE theTmpExpandedContentModelA ;; expand PE references: loops until no more PEs REPEAT DEACTIVATE PE-FOUND OPEN theTmpExpandedContentModelB AS BUFFER REPEAT SCAN theTmpExpandedContentModelA MATCH "%%" [ANY EXCEPT ";"]+=thePEName ";" ACTIVATE PE-FOUND USING PE KEY "%x(thePEName)" PUT theTmpExpandedContentModelB " %g(PE) " MATCH ANY=theChar PUT theTmpExpandedContentModelB PATTERN theChar AGAIN CLOSE theTmpExpandedContentModelB SET theTmpExpandedContentModelA TO "%g(theTmpExpandedContentModelB)" EXIT WHEN NOT ACTIVE PE-FOUND AGAIN WHEN ACTIVE PE-FOUND ;; add spaces to simplify pattern matching later OPEN theExpandedContentModel AS BUFFER REPEAT SCAN theTmpExpandedContentModelA MATCH "(" PUT theExpandedContentModel " ( " MATCH ")" PUT theExpandedContentModel " )" MATCH ANY=theChar PUT theExpandedContentModel PATTERN theChar AGAIN CLOSE theExpandedContentModel ;; output the data OUTPUT "%n%n" OUTPUT "%tThe XML content model was %n%t%t%x(theContentModel) " REPEAT SCAN PATTERN theContentModel MATCH "+(" OUTPUT "%n%t%tWarning: a global inclusion was detected.%n" MATCH "-(" OUTPUT "%n%t%tWarning: a global exclusion was detected.%n" MATCH ANY ;; ignore AGAIN ;; some nice message OUTPUT "%n%t%tThe original SGML markup declarations had omissibility %x(theOmitability) " WHEN PATTERN theOmitability IS SPECIFIED AND LENGTH OF PATTERN theOmitability IS GREATER-THAN 0 OUTPUT "%n%t%n" WHEN PATTERN theOmitability IS SPECIFIED DO WHEN "%g(theExpandedContentModel)" MATCHES UL "EMPTY" OUTPUT "%t%n" ELSE WHEN "%g(theExpandedContentModel)" MATCHES UL "ANY" OUTPUT "%t%n" ELSE WHEN theExpandedContentModel MATCHES ( WHITE-SPACE* ("(" WHITE-SPACE*)* UL "#PCDATA" (WHITE-SPACE | "*" | "+" | ")" )* (LOOKAHEAD NOT ["|,"]) ) OUTPUT "%t%n" ELSE REPEAT SCAN theExpandedContentModel MATCH UL "#PCDATA" ACTIVATE MIXED-CONTENT MATCH ANY ; swallow AGAIN OUTPUT "%t" OUTPUT "" WHEN ACTIVE MIXED-CONTENT OUTPUT "%g(theExpandedContentModel)" OUTPUT "" WHEN ACTIVE MIXED-CONTENT DEACTIVATE MIXED-CONTENT OUTPUT "%n" DONE ;; This is the action which puts out attributes OUTPUT "%n" OUTPUT REFERENT "A%x(theGI)" OUTPUT "%n" SET REFERENT "A%x(theGI)" TO "" OUTPUT "%n" ;; PIs -- probably shouldn't happen except for XML header FIND UL "")) ANY)+=theData WHITE-SPACE* "?>" WHITE-SPACE* OUTPUT "%n" ;; Parameter Entity FIND UL "" WHITE-SPACE* DO WHEN PATTERN theValue IS SPECIFIED OR PATTERN theValueB IS SPECIFIED DO UNLESS PE HAS KEY PATTERN theName NEW PE KEY PATTERN theName SET PE TO PATTERN theValue WHEN PATTERN theValue IS SPECIFIED SET PE TO PATTERN theValueB WHEN PATTERN theValueB IS SPECIFIED DONE ELSE WHEN PATTERN theSysIdA IS SPECIFIED OR PATTERN theSysIdB IS SPECIFIED OR PATTERN theSysIdC IS SPECIFIED OR PATTERN theSysIdD IS SPECIFIED OUTPUT "%n%n" ELSE OUTPUT "%n%n" OUTPUT "%t%n" DONE ;; General Entity FIND UL "" WHITE-SPACE* DO WHEN PATTERN theValue IS SPECIFIED OR PATTERN theValueA IS SPECIFIED ;; load it ELSE WHEN PATTERN theSysIdA IS SPECIFIED OR PATTERN theSysIdB IS SPECIFIED OR PATTERN theSysIdC IS SPECIFIED OR PATTERN theSysIdD IS SPECIFIED OUTPUT "%n" ELSE OUTPUT "%n%n" OUTPUT "%t%n" DONE ;; Notation FIND UL "" WHITE-SPACE* DO WHEN PATTERN theSysIdA IS SPECIFIED OR PATTERN theSysIdB IS SPECIFIED OR PATTERN theSysIdC IS SPECIFIED OR PATTERN theSysIdD IS SPECIFIED OUTPUT "%n%n" ELSE OUTPUT "%n%n" OUTPUT "%t%n" DONE ;; Attributes FIND UL ""]+=theAttlist ">" WHITE-SPACE* LOCAL STREAM theUnknownAtt LOCAL STREAM AttlistBufferA LOCAL STREAM AttlistBufferB LOCAL SWITCH PE-FOUND SET theUnknownAtt TO "" ;; expand PE references DEACTIVATE PE-FOUND OPEN AttlistBufferA AS BUFFER REPEAT SCAN PATTERN theAttlist MATCH "%%" [ANY EXCEPT ";"]+=thePEName ";" ACTIVATE PE-FOUND USING PE KEY "%x(thePEName)" PUT AttlistBufferA " %g(PE) " MATCH "--" ((LOOKAHEAD NOT "--") ANY)+=comment "--" ; strip SGML comments MATCH ANY=theChar PUT AttlistBufferA PATTERN theChar AGAIN CLOSE AttlistBufferA ;; expand PE references: loops until no more PEs REPEAT DEACTIVATE PE-FOUND OPEN AttlistBufferB AS BUFFER REPEAT SCAN AttlistBufferA MATCH "%%" [ANY EXCEPT ";"]+=thePEName ";" ACTIVATE PE-FOUND USING PE KEY "%x(thePEName)" PUT AttlistBufferB " %g(PE) " MATCH ANY=theChar PUT AttlistBufferB PATTERN theChar AGAIN CLOSE AttlistBufferB SET AttlistBufferA TO "%g(AttlistBufferB)" EXIT WHEN NOT ACTIVE PE-FOUND AGAIN WHEN ACTIVE PE-FOUND OPEN r AS REFERENT "A%x(theGI)" PUT r "%tThe XML attribute list was %n%t%t%x(theAttlist)%n%t%n " REPEAT SCAN AttlistBufferA MATCH WHITE-SPACE* ;; swallow MATCH "%%" [ANY EXCEPT ";"]+=thePERef ";" WHITE-SPACE* PUT r "%t%n" MATCH [ANY EXCEPT WHITE-SPACE]+=theAttName WHITE-SPACE+ ( ( "(" [ANY EXCEPT ")"]+=theEnum ")" WHITE-SPACE+) | ([ANY EXCEPT WHITE-SPACE]+=theType WHITE-SPACE+) ) (UL "#FIXED" WHITE-SPACE*)? (UL "'" [ANY EXCEPT "'"]*=theTokenA "'" WHITE-SPACE* )? (UL '"' [ANY EXCEPT '"']*=theTokenB '"' WHITE-SPACE* )? ( "(" [ANY EXCEPT ")"]+=theNotation ")" WHITE-SPACE*)? (UL "#" [ANY EXCEPT WHITE-SPACE]+=theImplication WHITE-SPACE*)? PUT r "%t%g(theUnknownAtt)%n" DO WHEN PATTERN theEnum IS SPECIFIED REPEAT SCAN PATTERN theEnum MATCH ["(),|"] ; strip MATCH WHITE-SPACE+ ; strip MATCH [ANY EXCEPT WHITE-SPACE OR "(),|" ]+=theEnumToken PUT r "%t%t%t%n" MATCH ANY ;; never happen AGAIN DONE PUT r "%t%n" SET theUnknownAtt TO "" AGAIN CLOSE r