;;; xml2mmdl.xom
;;; Converts XML markup declarations into MMDL instances (pass1)
;;; Version: 1999-02-15 Rick Jelliffe (ricko@gate.sinica.edu.tw)
;;; Copyright (C) 1999 Rick Jelliffe ricko@gate.sinica.edu.tw
;;; Permission granted to use granted under MPL or GPL
;;; To use this program, you need the OmniMark programming language.
;;; Down load "OmniMark LE" light edition from www.omnimark.com
;;; The command line is
;;; omle -x xml2mmdl.xom infil.dtd > xxx
;;; omle -x xml2mmdl2.xom xxx > outfile.xml
;;; For XML (eXtensible Markup Language) markup declaration syntax, refer to:
;;;
;;; For DDML (Document Description Markup Language) specification, refer to:
;;; http://www.w3.org/TR/NOTE-ddml
;;; Some SGML-isms are also supported, for slightly wider reach & robustness:
;;; * data attributes on entities
;;; * PIs in the text
;;; * Case insensitivity
;;; * Public ID but no System Id (a warning comment is put out)
;;; * omitability indicators
;;; * inclusions and exclusions (a warning is put into a More element)
;;; * groups on the LHS of declarations (they get stuffed into the name attribute)
;;; Before running this program, you will have to figure out how to handle
;;; exernal parameter entity references and marked sections. This program
;;; does not handle either, and will probably halt with an error message.
;;; Cut and paste the external entity's resource at the point of reference.
;;; This program does not handle models like the following: a | b , c.
;;; You must parenthesize the content models first: (a | b), c.
;;; After running this program, you may want to clean up the comments.
;;; For example, you might move the most important ones over into
;;; More or Doc elements. Comments relating to parameter entities
;;; should be removed. Atribute declarations are moved into their
;;; relative element definition, but comments about attributes will
;;; remain where they were.
;;; This version does not handle namespaces yet.
CROSS-TRANSLATE
GLOBAL SWITCH MIXED-CONTENT
GLOBAL STREAM theExpandedContentModel
GLOBAL STREAM theTmpExpandedContentModelA
GLOBAL STREAM theTmpExpandedContentModelB
GLOBAL STREAM PE VARIABLE
GLOBAL STREAM r
FIND-START
DEACTIVATE MIXED-CONTENT
CLEAR PE
SET theExpandedContentModel TO "ANY" ; robust default
SET theTmpExpandedContentModelA TO "ANY" ; robust default
SET theTmpExpandedContentModelB TO "ANY" ; robust default
OUTPUT "%n"
FIND-END
OUTPUT ""
;; Lining Comments we get rid of
FIND
UL "") ["=-#@"])+ WHITE-SPACE*
"-->" WHITE-SPACE*
FIND ""
;; strip
FIND ""
;; strip
;; Comments we output as comments, willy nilly
FIND
UL "") ANY)+=theComment "-->" WHITE-SPACE*
OUTPUT "%sn%n"
;; Element types
FIND UL "")) ANY)+=theContentModel
WHITE-SPACE* ">" WHITE-SPACE*
LOCAL STREAM content
LOCAL SWITCH PE-FOUND
;; expand PE references
DEACTIVATE PE-FOUND
OPEN theTmpExpandedContentModelA AS BUFFER
REPEAT SCAN PATTERN theContentModel
MATCH "%%" [ANY EXCEPT ";"]+=thePEName ";"
ACTIVATE PE-FOUND
USING PE KEY "%x(thePEName)"
PUT theTmpExpandedContentModelA " %g(PE) "
MATCH "--" ((LOOKAHEAD NOT "--") ANY)+=comment "--" ; strip SGML comments
MATCH ANY=theChar PUT theTmpExpandedContentModelA PATTERN theChar
AGAIN
CLOSE theTmpExpandedContentModelA
;; expand PE references: loops until no more PEs
REPEAT
DEACTIVATE PE-FOUND
OPEN theTmpExpandedContentModelB AS BUFFER
REPEAT SCAN theTmpExpandedContentModelA
MATCH "%%" [ANY EXCEPT ";"]+=thePEName ";"
ACTIVATE PE-FOUND
USING PE KEY "%x(thePEName)"
PUT theTmpExpandedContentModelB " %g(PE) "
MATCH ANY=theChar PUT theTmpExpandedContentModelB PATTERN theChar
AGAIN
CLOSE theTmpExpandedContentModelB
SET theTmpExpandedContentModelA TO "%g(theTmpExpandedContentModelB)"
EXIT WHEN NOT ACTIVE PE-FOUND
AGAIN WHEN ACTIVE PE-FOUND
;; add spaces to simplify pattern matching later
OPEN theExpandedContentModel AS BUFFER
REPEAT SCAN theTmpExpandedContentModelA
MATCH "(" PUT theExpandedContentModel " ( "
MATCH ")" PUT theExpandedContentModel " )"
MATCH ANY=theChar PUT theExpandedContentModel PATTERN theChar
AGAIN
CLOSE theExpandedContentModel
;; output the data
OUTPUT "%n%n"
OUTPUT "%tThe XML content model was %n%t%t%x(theContentModel) "
REPEAT SCAN PATTERN theContentModel
MATCH "+("
OUTPUT "%n%t%tWarning: a global inclusion was detected.%n"
MATCH "-("
OUTPUT "%n%t%tWarning: a global exclusion was detected.%n"
MATCH ANY
;; ignore
AGAIN
;; some nice message
OUTPUT "%n%t%tThe original SGML markup declarations had omissibility %x(theOmitability) "
WHEN PATTERN theOmitability IS SPECIFIED
AND LENGTH OF PATTERN theOmitability IS GREATER-THAN 0
OUTPUT "%n%t%n"
WHEN PATTERN theOmitability IS SPECIFIED
DO WHEN "%g(theExpandedContentModel)" MATCHES UL "EMPTY"
OUTPUT "%t%n"
ELSE WHEN "%g(theExpandedContentModel)" MATCHES UL "ANY"
OUTPUT "%t%n"
ELSE WHEN theExpandedContentModel
MATCHES ( WHITE-SPACE* ("(" WHITE-SPACE*)*
UL "#PCDATA" (WHITE-SPACE | "*" | "+" | ")" )* (LOOKAHEAD NOT ["|,"]) )
OUTPUT "%t%n"
ELSE
REPEAT SCAN theExpandedContentModel
MATCH UL "#PCDATA"
ACTIVATE MIXED-CONTENT
MATCH ANY
; swallow
AGAIN
OUTPUT "%t"
OUTPUT "" WHEN ACTIVE MIXED-CONTENT
OUTPUT "%g(theExpandedContentModel)"
OUTPUT "" WHEN ACTIVE MIXED-CONTENT
DEACTIVATE MIXED-CONTENT
OUTPUT "%n"
DONE
;; This is the action which puts out attributes
OUTPUT "%n"
OUTPUT REFERENT "A%x(theGI)"
OUTPUT "%n"
SET REFERENT "A%x(theGI)" TO ""
OUTPUT "%n"
;; PIs -- probably shouldn't happen except for XML header
FIND UL "" [ANY EXCEPT WHITE-SPACE]+=theTarget
WHITE-SPACE+ ((LOOKAHEAD NOT ( WHITE-SPACE* "?>")) ANY)+=theData
WHITE-SPACE* "?>" WHITE-SPACE*
OUTPUT "%n%x(theTarget) %x(theData)?>"
;; Parameter Entity
FIND UL "" WHITE-SPACE*
DO WHEN PATTERN theValue IS SPECIFIED
OR PATTERN theValueB IS SPECIFIED
DO UNLESS PE HAS KEY PATTERN theName
NEW PE KEY PATTERN theName
SET PE TO PATTERN theValue WHEN PATTERN theValue IS SPECIFIED
SET PE TO PATTERN theValueB WHEN PATTERN theValueB IS SPECIFIED
DONE
ELSE WHEN PATTERN theSysIdA IS SPECIFIED
OR PATTERN theSysIdB IS SPECIFIED
OR PATTERN theSysIdC IS SPECIFIED
OR PATTERN theSysIdD IS SPECIFIED
OUTPUT "%n%n"
ELSE
OUTPUT "%n%n"
OUTPUT "%t%n"
DONE
;; General Entity
FIND UL "" WHITE-SPACE*
DO WHEN PATTERN theValue IS SPECIFIED
OR PATTERN theValueA IS SPECIFIED
;; load it
ELSE WHEN PATTERN theSysIdA IS SPECIFIED
OR PATTERN theSysIdB IS SPECIFIED
OR PATTERN theSysIdC IS SPECIFIED
OR PATTERN theSysIdD IS SPECIFIED
OUTPUT "%n"
ELSE
OUTPUT "%n%n"
OUTPUT "%t%n"
DONE
;; Notation
FIND UL "" WHITE-SPACE*
DO WHEN PATTERN theSysIdA IS SPECIFIED
OR PATTERN theSysIdB IS SPECIFIED
OR PATTERN theSysIdC IS SPECIFIED
OR PATTERN theSysIdD IS SPECIFIED
OUTPUT "%n%n"
ELSE
OUTPUT "%n%n"
OUTPUT "%t%n"
DONE
;; Attributes
FIND UL ""]+=theAttlist
">" WHITE-SPACE*
LOCAL STREAM theUnknownAtt
LOCAL STREAM AttlistBufferA
LOCAL STREAM AttlistBufferB
LOCAL SWITCH PE-FOUND
SET theUnknownAtt TO ""
;; expand PE references
DEACTIVATE PE-FOUND
OPEN AttlistBufferA AS BUFFER
REPEAT SCAN PATTERN theAttlist
MATCH "%%" [ANY EXCEPT ";"]+=thePEName ";"
ACTIVATE PE-FOUND
USING PE KEY "%x(thePEName)"
PUT AttlistBufferA " %g(PE) "
MATCH "--" ((LOOKAHEAD NOT "--") ANY)+=comment "--" ; strip SGML comments
MATCH ANY=theChar PUT AttlistBufferA PATTERN theChar
AGAIN
CLOSE AttlistBufferA
;; expand PE references: loops until no more PEs
REPEAT
DEACTIVATE PE-FOUND
OPEN AttlistBufferB AS BUFFER
REPEAT SCAN AttlistBufferA
MATCH "%%" [ANY EXCEPT ";"]+=thePEName ";"
ACTIVATE PE-FOUND
USING PE KEY "%x(thePEName)"
PUT AttlistBufferB " %g(PE) "
MATCH ANY=theChar PUT AttlistBufferB PATTERN theChar
AGAIN
CLOSE AttlistBufferB
SET AttlistBufferA TO "%g(AttlistBufferB)"
EXIT WHEN NOT ACTIVE PE-FOUND
AGAIN WHEN ACTIVE PE-FOUND
OPEN r AS REFERENT "A%x(theGI)"
PUT r "%tThe XML attribute list was %n%t%t%x(theAttlist)%n%t%n "
REPEAT SCAN AttlistBufferA
MATCH WHITE-SPACE*
;; swallow
MATCH "%%" [ANY EXCEPT ";"]+=thePERef ";" WHITE-SPACE*
PUT r "%t%n"
MATCH [ANY EXCEPT WHITE-SPACE]+=theAttName WHITE-SPACE+
(
( "(" [ANY EXCEPT ")"]+=theEnum ")" WHITE-SPACE+)
|
([ANY EXCEPT WHITE-SPACE]+=theType WHITE-SPACE+)
)
(UL "#FIXED" WHITE-SPACE*)?
(UL "'" [ANY EXCEPT "'"]*=theTokenA "'" WHITE-SPACE* )?
(UL '"' [ANY EXCEPT '"']*=theTokenB '"' WHITE-SPACE* )?
( "(" [ANY EXCEPT ")"]+=theNotation ")" WHITE-SPACE*)?
(UL "#" [ANY EXCEPT WHITE-SPACE]+=theImplication WHITE-SPACE*)?
PUT r "%t"
DONE
PUT r "' "
ELSE
PUT r "%n%t%tType='???' "
DONE
PUT r "%n%t%tRequired='Yes' "
WHEN PATTERN theImplication IS SPECIFIED
AND PATTERN theImplication MATCHES UL "REQUIRED"
PUT r "%n%t%tAttValue='%x(theTokenA)' "
WHEN PATTERN theTokenA IS SPECIFIED
PUT r '%n%t%tAttValue="%x(theTokenB)" '
WHEN PATTERN theTokenB IS SPECIFIED
PUT r ">%g(theUnknownAtt)%n"
DO WHEN PATTERN theEnum IS SPECIFIED
REPEAT SCAN PATTERN theEnum
MATCH ["(),|"]
; strip
MATCH WHITE-SPACE+
; strip
MATCH [ANY EXCEPT WHITE-SPACE OR "(),|" ]+=theEnumToken
PUT r "%t%t%t%n"
MATCH ANY
;; never happen
AGAIN
DONE
PUT r "%t%n"
SET theUnknownAtt TO ""
AGAIN
CLOSE r