<!--                                                              -->
<!-- *******************      PTEXT.DTD       ******************* -->
<!--                                                              -->
<!-- A formal specification of the "parsed text" format designed  -->
<!-- for interchange of parsed texts between natural language     -->
<!-- processing operations (such as CARLA).                       -->
<!--                                                              -->
<!-- This file is maintained by Gary Simons.  Send comments or    -->
<!-- suggested refinements by email to: gary_simons@sil.org, or   -->
<!-- by post to: 7500 W. Camp Wisdom Rd., Dallas, TX 75236, USA   -->
<!--                                                              -->
<!-- The most recent version of this file may be obtained at      -->
<!-- the following URL:                                           -->
<!--                                                              -->
<!--         http://www.sil.org/silewp/1997/008/ptext.dtd         -->
<!--                                                              -->
<!--                                                              -->
<!-- *******************   REVISION HISTORY   ******************* -->
<!--                                                              -->
<!-- Version 8, November 4, 1997 -->
<!-- An error in the content model for <f> was fixed (by changing
        + to *) to support the new fVal attribute; reported by
        Andy Black.
     -->
<!-- Version 7, June 14, 1997 -->
<!-- The following changes were made in response to feedback
        from Andy Black:
     Content model of wsAlt changed from (ws+) to (ws, ws+)
        which now matches psAlt and fAlt
     Added fVal attribute to <f> and ID to <fs> and all basic values
        to support feature analyses that share feature values.
     -->
<!-- Version 6, May 13, 1996 -->
<!-- The following changes were made in response to problems
        encountered by Stephen McConnel and Verna Stutzman in
        attempting to implement PTEXT-aware software:
     Changed name from CARLA4.DTD to PTEXT.DTD to follow convention
        in SGML community of naming DTD after the document type.
     Removed NIL attribute from <ptext> and added a new <nil>
        element to the <declarations> section.
     Changed attribute CAT of <lex> from #REQUIRED to #IMPLIED.
     Changed attribute LEX of <m> from #REQUIRED to #IMPLIED.
     Changed RCDATA in content of <markup>, <ignore>, and <str>
        to #PCDATA.
     Added standard set of 5 basic entities: amp (&), lt (<),
        rsqb ([), sol (/), and quot (").
     Changed content model of <wsAlt> 2 or more <ws>s to 1 or more.
     Fixed bug in ATTLIST of <function>: id and ID were swapped.

     -->
<!-- Version 5, Nov 24, 1995 -->
<!-- The following changes were made in response to a review over
	email by the CARLA4 design team:
     Fixed some typos.
     -->
<!-- Version 4, Sept 11, 1995 -->
<!-- The following changes were made in response to a review by
	the CARLA4 design team at a meeting in Dallas during August
	and to a review by LinguaLinks programmers to see if the
	model was rich enough to encode glossed texts:
     Added NIL attribute to <ptext> for declaring ID for nil value.
     Added "unix" to values of SYSTEM for <process>.
     Added ID, ANA attributes and <failure> contents to <target>.
     Added SYN attribute to <w>.
     Added ID to <gloss> and <gloss> to content model for <ws>.
     Added <annoTypes> and <anno> to encode annotations in <ps>.
     -->
<!-- Version 3, June 20, 1995 -->
<!-- The following changes were made in response to a review by
	the CARLA4 design team at a meeting in Grand Forks, ND:
     Replaced <allcaps> and <initcap> with CAPITALIZE attr on <w>.
     Content model of <markup> changed to RCDATA so that data may
	contain < and >.
     Added <ignore> tag.
     Added <pedigree> and removed PROCESS attr for formal record 
       of processing history.
     Changed <langinfo> to <languages> and <langUsage>, adding
	case mappings to <langDefn>s
     Added <declarations> to group language items, <categories>, and
       two new elements: <lexTypes> and <glossTypes>.
     Added TYPE attr to <lex> and <gloss>.
     Added <wsAlt>, <psAlt>, and <fAlt> to encode alternatives
	in analyses.
     Changed <lform> and <aform> in <lex> to <form> and <adapt>.
     Added <form> (as distinct from <orth>) and <target> to <wf>.
     Added <puncforms> inventory and changed <punc> to point to 
	punctuation forms.
     Changed SURF attr of <m> to tag content in order to allow
	special characters (as entities) in string.
     -->
<!-- Version 2, May 31, 1995 -->
<!-- Added nbr and sym as possible text.tokens.  Added cf for 
     "compound form" to wordform inventory.
     -->
<!-- Version 1, April 26, 1995 -->
<!-- First draft by Gary Simons, working from 23 Jan 95 draft of 
     Framework paper by Black, Mann, and Simons -->

<!--                                                              -->
<!-- *******************     PARSED TEXT      ******************* -->
<!--                                                              -->

<!ELEMENT ptext    - - (header?, pedigree, declarations, lexicon?,
			wordforms?, puncforms?, (text | list)* ) >
<!-- The top-level element is named "ptext" for "parsed text".    -->

<!ELEMENT header   - - (#PCDATA) >
<!-- This gives information like what the text is, who is responsible
     for it, and when it was parsed.  Eventually this should have 
     internal structure, but for now it is just a string. -->

<!--                                                               -->
<!-- *******************       PEDIGREE       ******************** -->
<!--                                                               -->

<!ELEMENT pedigree - - (process+) >
<!-- This element documents the history of processes that produced 
     this file.  The value is a sequence of descriptions of the 
     processes that have been run to parse the file to its current 
     state.  The latest process is at the end of the list.  -->

<!ELEMENT process  - - (program, input*, output*) >
<!ATTLIST process      system  (dos | win | mac | cellar | unix)  
							  #REQUIRED >
<!-- Documents a single process that was run on the ptext.  SYSTEM
     identifies the host system, which in turn allows one to interpret
     the information which is embedded.  The list of possible systems
     will grow over time. -->

<!ELEMENT (program | input | output)  - - 
		     (name, description*, host?, path?, date, size?) >
<!ATTLIST (input | output)      function  CDATA  #REQUIRED >
<!-- Documents a single file or object that is involved in a
     process.  FUNCTION is a phrase describing the role of 
     an input or output of a process.  The ptext which is the 
     input to a process should not be listed as it is implicit in 
     the output of the preceding process. -->

<!ELEMENT (name | description | host | path | date | size) - - 
							  (#PCDATA)  >
<!ATTLIST (name | description)   lang  IDREF  #IMPLIED >
<!-- <name> is the file name or the unique id of an object.
     <description> is an optional comment-like description of the item.
     <host> identifies the host system, such as volume id of the hard
	disk or unique name of a CELLAR system.
     <path> may be used to give the full path from root to file.
     <date> is the latest modification date for the file or object.
     <size> is the size of the file.
     Only <name> and <date> are required; the others may be useful
	 if available.
     <description> may have a LANG attr and occur multiple times for
     different languages.  So may <name> when used in the declarations
     defined below.  When LANG is not given, it is assumed to be the 
     language declared in <langUsage> as the gloss language. -->

<!--                                                               -->
<!-- *******************     DECLARATIONS     ******************** -->
<!--                                                               -->

<!ELEMENT declarations - - (languages, langUsage, nil?, lexTypes?, 
			    glossTypes?, annoTypes?, categories?) >
<!-- A parsed text is meant to be self-documenting and self-contained.
     That is, the recipient of a parsed text should need no other 
     files in order to make use of the parse.  Thus each parsed text 
     declares all the identifiers it uses for languages, constrained
     types, and lexical and syntactic categories. --> 

<!ELEMENT languages - - (langDefn+) >
<!-- This element contains definitions for all the languages that 
     are used throughout this parsed text file. -->

<!ELEMENT langDefn - -  (name+, description*, caseMappings?)  >
<!ATTLIST langDefn      xxx  ID  #REQUIRED >
<!-- Every language used in the parsed text file must be declared in
     a langDefn.  XXX gives the standard three-letter language id 
     code from the Ethnologue; it typically includes a fourth letter
     to designate dialect and/or a particular language encoding in
     the CELLAR sense.  <name> is the full name of the language;
     <description> is any further identificational information the
     encoder may wish to include. -->

<!ELEMENT caseMappings    - -  (map+) >
<!ELEMENT map             - -  (lower, upper)  >
<!ELEMENT (lower | upper) - O  (#PCDATA)  >
<!-- Provides a list of mappings from lower case characters to their
     upper case equivalents.  Inclusion of this information in the
     <ptext> makes it possible to map a <ptext> back onto a
     conventional text without needing an input file to describe
     the orthography. -->

<!ELEMENT langUsage - O  EMPTY >
<!ATTLIST langUsage      text   IDREF  #REQUIRED
			 gloss  IDREF  #IMPLIED
			 target IDREF  #IMPLIED  >
<!-- Describes how the various defined languages are used in the ptext.
     TEXT gives the XXX code of the language of the base text.
     GLOSS gives the XXX code of the default language used for glossing.
	Multiple glosses can be given in multiple languages.  This
	declaration says which language it is if a gloss specifies
	no language.
     TARGET gives the XXX code of the target language for an adapted 
	text. -->

<!ELEMENT nil - O  EMPTY >
<!ATTLIST nil      id   ID  #REQUIRED  >
<!-- The value of the ID attribute declares the symbol that is
     used in the analysis to reprsent a nil analysis.  Typically,
     the string "nil" is used.  Many processors will never use
     a nil value.  Others will want to distinguish an unspecified
     attribute value (meaning the analysis has an unknown result
     are has not even been performed) from a nil value (meaning
     that the result of the analysis is known to be no value).
      -->

<!ENTITY  % declaredTypes  "lexTypes | glossTypes | annoTypes" >
<!ELEMENT (%declaredTypes;) - - (typeDefn+)  >
<!ATTLIST (%declaredTypes;)     default  IDREF  #REQUIRED >
<!ELEMENT typeDefn  - - (name*, description*)  >
<!ATTLIST typeDefn      id   ID   #REQUIRED    >
<!-- Lexical entries are typed (e.g. suffix versus root).  So are
     glosses (e.g. formal versus informal) and <ps>-level annotations
     (e.g. free translation versus grammatical note).  These elements 
     are used to declare the allowed type identifiers and what they 
     represent.  Note that the ID is all that is obligatory.  Name 
     and description are optional. -->

<!ELEMENT categories - - (cat+) >
<!ELEMENT cat        - - (name*, description*, fs?) >
<!ATTLIST cat            id   ID   #REQUIRED >
<!-- The only required component of a category definition
     is its ID.  This is used as the target of references from
     lexical items and phrase structures.  Name and description
     can be used for documentation purposes to give glosses and
     longer descriptions in as many languages as desired for 
     what the abbreviation used as the ID stands for.
     The optional fs element is for a feature structure that
     gives a formal definition of the category.  All lexical items
     and phrase structures that point to a <cat> have all
     the features defined in its feature structure unless they
     specifically override a feature value. -->


<!--                                                               -->
<!-- *******************       LEXICON        ******************** -->
<!--                                                               -->

<!ELEMENT lexicon  - - (lex+) >
<!-- Every parsed text is meant to be self contained.  That is, the
     recipient of a parsed text should need no other files in order to
     make use of the parse.  Thus each parsed text carries a lexicon
     of all the lexical items it uses. --> 

<!ELEMENT lex      - - (form, adapt?, fs?, gloss*) >
<!ATTLIST lex          id    ID     #REQUIRED
		       lang  IDREF  #IMPLIED
		       type  IDREF  #IMPLIED
		       cat   IDREF  #IMPLIED >
<!-- The only required components of a lexical item are its lexical
     form and the attributes ID and CAT.  ID is an SGML id that is
     used by analyses to point to this definition of the lexical item.
     CAT is an idref that is pointing to one of the categories
     that is sanctioned for use in this lexicon.  Similarly, 
     TYPE is an outbound pointer to one of the lexical entry types
     declared in <lexTypes>; if missing, the type is assumed to be
     the type designated by the DEFAULT attr of <lexTypes>.  The LANG
     attribute is used only if the item is in a language other than
     the one for the lexicon as a whole.  Multiple glosses are
     allowed for multiple languages.  An optional feature structure
     may specify feature values for this lexical item. -->

<!ELEMENT form     - - (#PCDATA) >
<!-- The spelling of the lexical (underlying) form for the lexical
     item. -->

<!ELEMENT adapt    - - (#PCDATA) >
<!-- The adaptation form for the lexical item.  The string given as 
     value of this may be used in transfer to retrieve the 
     corresponding item out of the target language dictionary. -->

<!ELEMENT gloss     - - (#PCDATA) >
<!ATTLIST gloss         lang  IDREF  #IMPLIED
			type  IDREF  #IMPLIED
			id    ID     #IMPLIED >
<!-- A gloss may indicate both a LANGuage and a TYPE (e.g. formal
     versus informal).  If LANG is missing, it is assumed to be the
     gloss language declared in <langUsage>.  If TYPE is missing,
     it is assumed to be the DEFAULT specified in <glossTypes>.  If
     <glossTypes> is absent, then all glosses are of the same type
     and the TYPE attr is meaningless.  Glosses used in <ws> may 
     also have an optional iD; this may be used by the ANA attribute
     of <w> to select the word gloss for the context. -->

<!--                                                             -->
<!-- *******************  WORDFORM INVENTORY  ****************** -->
<!--                                                             -->
<!ELEMENT wordforms - - (wf | cf)+  >

<!ELEMENT wf        - - (form, orth?, target*, (ws | wsAlt)? ) >
<!ATTLIST wf            id    ID     #REQUIRED 
			lang  IDREF  #IMPLIED >
<!-- Encodes a single wordform and (optionally) all of its
     analyses.  ID is the means by which a word token in the text
     (in <w>) points to its wordform.  If LANG is not specified,
     it is assumed to be the language declared in <langUasage> as
     the text language. -->

<!ELEMENT cf        - - ( (ws | wsAlt)? ) >
<!ATTLIST cf            id    ID      #REQUIRED
			wfs   IDREFS  #REQUIRED
			lang  IDREF   #IMPLIED >
<!-- Encodes a single compound (or idiom) form and (optionally)
     all of its analyses.  ID is the means by which a word token 
     in the text (in <w>) points to this compound or idiom for
     which it is a place holder.  WFS points to the wordforms
     (elsewhere in the inventory) which make up the compound or 
     idiom; the idrefs are listed in order of occurrence in the
     compound.  If LANG is not specified, it is assumed to be 
     the language of the wordforms that make up the compound. -->

<!-- <form> is defined above under <lex>.  In a <wf> it is the
     spelling of the (surface) wordform in the technical 
     orthography used for the analysis. -->

<!ELEMENT orth      - - (#PCDATA) >
<!-- The original orthographic form of the wordform.  If a <wf>
     has no <orth>, then the <form> is assumed to be the original
     orthographic form.  Both <orth> and <form> are used when 
     orthographic changes have been performed to transform the
     original orthography into a more convenient technical
     orthography.  <orth> is also used for the orthographic 
     (i.e. input) form of a text segment. -->

<!ELEMENT target    - - (#PCDATA | failure) >
<!ATTLIST target        lang  IDREF   #IMPLIED 
			id    ID      #REQUIRED
			ana   IDREFS  #IMPLIED >
<!-- A corresponding target wordform produced by adaptation for
     the indicated language.  If LANG is missing, it is assumed
     to be the target language declared in <langUsage>.  ID is
     used as the target of a pointer from <w> to select the
     adapted target form for a word in context.  ANA points
     to the <ws>s for this <wf> for which this is the adapted
     target form.  If ANA is missing, this is a target form
     for every <ws>. -->

<!ELEMENT failure  - O  EMPTY  >
<!-- Represents the fact that an attempt to synthesize a target
     form failed to produce any form. -->

<!ELEMENT ws        - - ( (ws | wsAlt | m)*, fs?, gloss*) >
<!ATTLIST ws            id    ID     #IMPLIED
			cat   IDREF  #IMPLIED >
<!-- <ws> encodes a "word structure" analysis.  ID is the means
     by which a word in context selects the analysis in context.
     All top-level ws nodes should have an ID for this purpose.
     CAT optionally points to the lexical <cat> for this analysis.
     When <ws> has no CAT, it simply represents a sequence.
     The <ws> may contain recursively embedded <ws>s to form a
     tree structure.  The <m>s are the leaf nodes of the structure
     tree.  The <ws> may also optionally contain a feature structure
     giving its feature analysis and glosses in one or more 
     languages or of one or more types. -->

<!ELEMENT wsAlt     - - (ws, ws+) >
<!-- A disjunctive alternation of word structures; that is, the
     analysis at this point could be any one of the embedded word
     structures.  In this context, <ws> may be used without a CAT
     to represent just a sequence of constituents. -->

<!ELEMENT m         - O (#PCDATA)? >
<!ATTLIST m             lex   IDREF  #IMPLIED >
<!-- <m> is mnemonic for morph or morpheme.  It is the terminal
     node of a word structure.  The LEX attribute is a pointer 
     to the lexical item for the morph; when it is missing, 
     it means that the analyst has not yet assigned this morph 
     to a particular morpheme in the lexicon.  The optional tag
     content is a string giving the surface form of the morph.
     If no content is given, then the end tag can be omitted. -->

<!--                                                             -->
<!-- **************** PUNCTUATION FORM INVENTORY *************** -->
<!--                                                             -->

<!ELEMENT puncforms - - (pf)+  >

<!ELEMENT pf  - - (form, orth?, target*, function* ) >
<!ATTLIST pf      id        ID     #REQUIRED 
		  position  (initial | internal | 
			     final   | isolated )  #REQUIRED >
<!-- Encodes a single punctuation form and (optionally) all of its
     functions.  ID is the means by which a punctuation token in the
     text (in <punc>) points to its punctuation form.  POSITION 
     indicates to a text output process how the punctuation is 
     placed relative to space: 'initial' means place the mark tight
     to the beginning of the next wordform, 'final' means place the
     mark tight to the preceding wordform, 'internal' means put no
     space on either side of it, and 'isolated' means put a space
     on both sides of it.  <form> holds the punctuation mark 
     itself.  <Orth> and <target> are available as for <wf> but 
     would seldom be used with punctuation. -->

<!ELEMENT function  - -  (name, description*)  >
<!ATTLIST function       id  ID  #REQUIRED  >
<!-- Describes one possible function of a punctuation form.  <name>
     is obligatory and is a symbol that can be matched on in rules
     and patterns.  <description> is optional documentation 
     (potentially in multiple languages) on what this function is.
     For instance, the ASCII apostrophe mark can have at least the
     following functions in English:  possession, elision, open
     quote, end quote. -->  

<!--                                                             -->
<!-- *******************   TEXTS AND LISTS   ******************* -->
<!--                                                             -->

<!ELEMENT text     - - ( markup | ignore | s )+ >
<!ELEMENT list     - - ( markup | ignore | s )+ >
<!-- <s> stands for "segment".   In a text, the segments form a 
     continuous sequence so that adjacency is defined over <s>
     boundaries. Also, the segments in a text are typically sentences.
     In a list, the segments could be sentences or phrases or single
     words.  Adjacency is not defined over neighboring segments. -->

<!ELEMENT markup   - - (#PCDATA)  >
<!-- Markup occurring between the text segments in the input text is 
     copied into these elements.  Markup occurring within text
     segments is also treated this way.  -->

<!ELEMENT ignore   - - (#PCDATA)  >
<!-- This element may occur both between text segments and within
     text segments to hold material from the original input file
     that is to be ignored completely during processing.  
     <ignore> differs from <markup> in that the latter
     may be matched and operated upon by processes, while
     the former is always ignored (except by a final TextOut
     process that reconstitutes the text). -->

<!--                                                             -->
<!-- *******************    TEXT SEGMENTS    ******************* -->
<!--                                                             -->

<!ELEMENT s         - - (orth, (ps | psAlt)? ) >
<!ATTLIST s             n     CDATA  #IMPLIED 
			lang  IDREF  #IMPLIED >
<!-- Encodes a single text segment and (optionally) all of its
     analyses.  <orth> is for the orthographic form of the whole
     segment.  N (following the TEI's N attribute) is a string
     which names the segment for application software reference
     purposes.  It is not necessarily an SGML ID; for instance,
     one could use "eccl.10.10" for a book, chapter, verse
     reference.  If LANG is not specified, it is assumed to be 
     the language declared in <langUsage> as the text language. -->

<!ENTITY  % text.tokens  "( markup | ignore | punc | w | nbr | sym )" >

<!ELEMENT ps        - - ( (ps | psAlt | %text.tokens;)*, fs?, anno*) >
<!ATTLIST ps            id    ID     #IMPLIED
			cat   IDREF  #IMPLIED >
<!-- <ps> encodes a "phrase structure" analysis.  No use for ID 
     has yet been designed, but it is likely to be useful as a 
     means for allowing multiple analyses to share structure.
     CAT optionally points to the syntactic <cat> for this analysis.
     When a ps has no CAT it simply represents a sequence.
     The ps may contain recursively embedded <ps>s or text tokens,
     which are the leaf nodes of the structure tree.  The main type
     of text token is <w> for word.  The <ps> may also optionally 
     contain a feature structure giving its feature analysis and
     annotations of more than one language or type. -->

<!ELEMENT psAlt     - - (ps, ps+) >
<!-- A disjunctive alternation of phrase structures; that is, the
     analysis at this point could be any one of the embedded phrase
     structures.  In this context, ps may be used without a CAT to
     represent just a sequence of constituents. -->

<!ELEMENT punc     - O EMPTY >
<!ATTLIST punc         form  IDREF  #REQUIRED 
		       ana   IDREFS #IMPLIED  >
<!-- The punc element represents a punctuation mark.  The FORM
     attribute points to a punctuation form in the <puncforms>
     inventory.  The ANA attribute gives the analysis in context.
     If missing, it is taken to be the list of all possible 
     analyses for the given punctuation form.  When present, it 
     is one or more pointers to <function> elements of the 
     corresponding punctuation form.   -->

<!ELEMENT w        - O EMPTY >
<!ATTLIST w            form  IDREF  #REQUIRED 
		       ana   IDREFS #IMPLIED 
		       syn   IDREFS #IMPLIED
		       capitalize  ( all | init | no )  no  >
<!-- The w element represents a word token.  The FORM attribute 
     points to the wordform of which this is a token.  The ANA
     attribute gives the analysis in context.  If missing, it is
     taken to be the list of all possible analyses for the given
     wordform.  When present, it is one or more pointers to <ws>
     elements of the corresponding wordform, or it is 'nil' to 
     indicate that none of the analyses work in this context.
     Rather than giving the ID of a <ws>, ANA may give the ID of
     a gloss within a <ws>, in which case the <ws> is the analysis
     and that gloss is selected as the one for this context.
     The SYN attribute selects the synthesized target form for 
     this context.  If missing, it is taken to be the list of all
     possible targets for the given wordform.  When present, it
     is one or more pointers to <target> elements of the
     corresponding wordform.  The CAPITALIZE attribute records 
     orthographic (as opposed to lexical) capitalization 
     information.  "all" tells a TextOut process that all letters 
     should be capitalized. "init" says that the first 
     capitalizable letter should be capitalized (in addition
     to any internal lexical capitalization that may be present).
     "no" means that no orthographic capitalization is to be added
     to the lexical capitalization that may be there already.  --> 

<!-- The nbr and sym elements are defined below as possible
     feature values.  As a text token, nbr is used for a string
     of characters that form a number; sym is for symbols that
     mix letters and digits and are neither words nor numbers.
     -->

<!ELEMENT anno      - - (#PCDATA)  >
<!ATTLIST anno          lang  IDREF  #IMPLIED
			type  IDREF  #IMPLIED >
<!-- This element is used for annotations on <ps> nodes.  The most
     common use would be on the top-level <ps> of an <s> to provide
     "freeform annotations" as are common in IT and SHOEBOX files.  
     An annotation may indicate both a LANGuage and a TYPE (e.g. 
     free translation or grammatical note).  If LANG is missing, 
     it is assumed to be the gloss language declared in <langUsage>.  
     If TYPE is missing, it is assumed to be the DEFAULT specified 
     in <annoTypes>.  If <annoTypes> is absent, then all glosses 
     are of the same type and the TYPE attr is meaningless. -->
     

<!--                                                             -->
<!-- *******************  FEATURE STRUCTURES  ****************** -->
<!--                                                             -->

<!-- To get things rolling, this is a simplified version of the  -->
<!-- of the TEI system for feature structure markup.             -->

<!ELEMENT  fs      - - (f | fAlt)*                            >
<!ATTLIST  fs          type  CDATA  #IMPLIED
		       rel   (eq | ne | sb | ns)    eq 
                       id    ID     #IMPLIED                  >
<!-- A feature structure has features and an optional type.
     The RELation tells whether the specified fs equals,
     doesn't equal, subsumes, doesn't subsume the actual
     feature structure that it represents.
     ID is used to provide a target for pointing (via the
     fVal attribute) when the same <fs> is the value of 
     more than one <f>. -->

<!ENTITY % simple.value   " plus | minus | any | none "       >
<!ENTITY % basic.value    " %simple.value; | sym | nbr | str ">

<!ELEMENT  f       - O (fs | %basic.value;)*                  >
<!ATTLIST  f           name   CDATA    #REQUIRED
		       org   (single | set | list)  #IMPLIED
		       rel   (eq | ne | sb | ns)    eq 
                       fVal   IDREFS   #IMPLIED               >
<!-- A feature has a name, organization, and relation.  The
     feature value is the content of the <f> element.
     ORGanization declares whether the feature value is a
     single value, a set of values, or an ordered list. 
     The RELation tells whether the specified fs equals,
     doesn't equal, subsumes, doesn't subsume the actual
     feature structure that it represents. 
     FVAL points to the ID's of the feature values; when 
     this attribute is used, the <f> element should have
     no content.  -->

<!ELEMENT  fAlt    - - ( (f | fs), (f | fs)+ )                >
<!-- A disjunctive alternation of features; that is, the feature
     structure has just one of the alternatives listed.  In this
     context, an embedded <fs> without a TYPE is used to specify
     a set of features. -->

<!ELEMENT  (%simple.value;)  - O  EMPTY                       >
<!-- This defines the possible feature values : <plus>, 
     <minus>, <any>, and <none>.  -->

<!ATTLIST  (%simple.value;)     id    ID     #IMPLIED         >
<!-- All kinds of simple values, as well as the basic values
     below, can have an ID (as can an <fs>) to serve as the 
     target for pointing (via the fVal attr of <f>) when 
     the same value is shared by more than one <f>. -->


<!ELEMENT  sym     - O EMPTY                                  >
<!ATTLIST  sym         id     ID      #IMPLIED
                       value  CDATA   #REQUIRED
		       rel    (eq | ne)    eq                 >
<!-- A feature value which is a symbol from a closed set.   -->

<!ELEMENT  nbr     - O EMPTY                                  >
<!ATTLIST  nbr         id     ID      #IMPLIED
                       value  CDATA   #REQUIRED
		       rel   (eq | ne |gt | lt | ge | le)  eq >
<!-- A feature value which is a number.                     -->

<!ELEMENT  str     - - (#PCDATA)                              >
<!ATTLIST  str         id     ID      #IMPLIED
                       rel   (eq | ne |gt | lt | ge | le)  eq >
<!-- A feature value which is an arbitrary string.          -->

