|
libxml2
|
Internals routines and limits exported by the parser. More...
Macros | |
| #define | inputPush xmlCtxtPushInput |
| Push an input on the stack. | |
| #define | inputPop xmlCtxtPopInput |
| Pop an input from the stack. | |
| #define | xmlParserMaxDepth 256 |
| Maximum element nesting depth (without XML_PARSE_HUGE). | |
| #define | XML_MAX_TEXT_LENGTH 10000000 |
| Maximum size allowed for a single text node when building a tree. | |
| #define | XML_MAX_HUGE_LENGTH 1000000000 |
| Maximum size allowed when XML_PARSE_HUGE is set. | |
| #define | XML_MAX_NAME_LENGTH 50000 |
| Maximum size allowed for a markup identifier. | |
| #define | XML_MAX_DICTIONARY_LIMIT 100000000 |
| Maximum size allowed by the parser for a dictionary by default This is not a limitation of the parser but a safety boundary feature, use XML_PARSE_HUGE option to override it. | |
| #define | XML_MAX_LOOKUP_LIMIT 10000000 |
| Maximum size allowed by the parser for ahead lookup This is an upper boundary enforced by the parser to avoid bad behaviour on "unfriendly' content Introduced in 2.9.0. | |
| #define | XML_MAX_NAMELEN 100 |
| Identifiers can be longer, but this will be more costly at runtime. | |
| #define | IS_BYTE_CHAR(c) |
| Macro to check the following production in the XML spec: | |
| #define | IS_CHAR(c) |
| Macro to check the following production in the XML spec: | |
| #define | IS_CHAR_CH(c) |
| Behaves like IS_CHAR on single-byte value. | |
| #define | IS_BLANK(c) |
| Macro to check the following production in the XML spec: | |
| #define | IS_BLANK_CH(c) |
| Behaviour same as IS_BLANK. | |
| #define | IS_BASECHAR(c) |
| Macro to check the following production in the XML spec: | |
| #define | IS_DIGIT(c) |
| Macro to check the following production in the XML spec: | |
| #define | IS_DIGIT_CH(c) |
| Behaves like IS_DIGIT but with a single byte argument. | |
| #define | IS_COMBINING(c) |
| Macro to check the following production in the XML spec: | |
| #define | IS_COMBINING_CH(c) |
| Always false (all combining chars > 0xff) | |
| #define | IS_EXTENDER(c) |
| Macro to check the following production in the XML spec: | |
| #define | IS_EXTENDER_CH(c) |
| Behaves like IS_EXTENDER but with a single-byte argument. | |
| #define | IS_IDEOGRAPHIC(c) |
| Macro to check the following production in the XML spec: | |
| #define | IS_LETTER(c) |
| Macro to check the following production in the XML spec: | |
| #define | IS_LETTER_CH(c) |
| Macro behaves like IS_LETTER, but only check base chars. | |
| #define | IS_ASCII_LETTER(c) |
| Macro to check [a-zA-Z]. | |
| #define | IS_ASCII_DIGIT(c) |
| Macro to check [0-9]. | |
| #define | IS_PUBIDCHAR(c) |
| Macro to check the following production in the XML spec: | |
| #define | IS_PUBIDCHAR_CH(c) |
| Same as IS_PUBIDCHAR but for single-byte value. | |
Functions | |
| int | xmlIsLetter (int c) |
| Check whether the character is allowed by the production. | |
| xmlParserCtxt * | xmlCreateFileParserCtxt (const char *filename) |
| Create a parser context for a file content. | |
| xmlParserCtxt * | xmlCreateURLParserCtxt (const char *filename, int options) |
| Create a parser context for a file or URL content. | |
| xmlParserCtxt * | xmlCreateMemoryParserCtxt (const char *buffer, int size) |
| Create a parser context for an XML in-memory document. | |
| xmlParserCtxt * | xmlCreateEntityParserCtxt (const xmlChar *URL, const xmlChar *ID, const xmlChar *base) |
| Create a parser context for an external entity Automatic support for ZLIB/Compress compressed document is provided by default if found at compile-time. | |
| void | xmlCtxtErrMemory (xmlParserCtxt *ctxt) |
| Handle an out-of-memory error. | |
| int | xmlSwitchEncoding (xmlParserCtxt *ctxt, xmlCharEncoding enc) |
| Use encoding specified by enum to decode input data. | |
| int | xmlSwitchEncodingName (xmlParserCtxt *ctxt, const char *encoding) |
| Use specified encoding to decode input data. | |
| int | xmlSwitchToEncoding (xmlParserCtxt *ctxt, xmlCharEncodingHandler *handler) |
| Use encoding handler to decode input data. | |
| int | xmlSwitchInputEncoding (xmlParserCtxt *ctxt, xmlParserInput *input, xmlCharEncodingHandler *handler) |
| Use encoding handler to decode input data. | |
| xmlParserInput * | xmlNewStringInputStream (xmlParserCtxt *ctxt, const xmlChar *buffer) |
| Create a new input stream based on a memory buffer. | |
| xmlParserInput * | xmlNewEntityInputStream (xmlParserCtxt *ctxt, xmlEntity *entity) |
| Create a new input stream based on an xmlEntity. | |
| int | xmlCtxtPushInput (xmlParserCtxt *ctxt, xmlParserInput *input) |
| Pushes a new parser input on top of the input stack. | |
| xmlParserInput * | xmlCtxtPopInput (xmlParserCtxt *ctxt) |
| Pops the top parser input from the input stack. | |
| int | xmlPushInput (xmlParserCtxt *ctxt, xmlParserInput *input) |
| Push an input stream onto the stack. | |
| xmlChar | xmlPopInput (xmlParserCtxt *ctxt) |
| void | xmlFreeInputStream (xmlParserInput *input) |
| Free up an input stream. | |
| xmlParserInput * | xmlNewInputFromFile (xmlParserCtxt *ctxt, const char *filename) |
| Create a new input stream based on a file or an URL. | |
| xmlParserInput * | xmlNewInputStream (xmlParserCtxt *ctxt) |
| Create a new input stream structure. | |
| xmlChar * | xmlSplitQName (xmlParserCtxt *ctxt, const xmlChar *name, xmlChar **prefix) |
| Parse an UTF8 encoded XML qualified name string. | |
| const xmlChar * | xmlParseName (xmlParserCtxt *ctxt) |
| Parse an XML name. | |
| xmlChar * | xmlParseNmtoken (xmlParserCtxt *ctxt) |
| Parse an XML Nmtoken. | |
| xmlChar * | xmlParseEntityValue (xmlParserCtxt *ctxt, xmlChar **orig) |
| Parse a value for ENTITY declarations. | |
| xmlChar * | xmlParseAttValue (xmlParserCtxt *ctxt) |
| Parse a value for an attribute Note: the parser won't do substitution of entities here, this will be handled later in xmlStringGetNodeList. | |
| xmlChar * | xmlParseSystemLiteral (xmlParserCtxt *ctxt) |
| Parse an XML Literal. | |
| xmlChar * | xmlParsePubidLiteral (xmlParserCtxt *ctxt) |
| Parse an XML public literal. | |
| void | xmlParseCharData (xmlParserCtxt *ctxt, int cdata) |
| xmlChar * | xmlParseExternalID (xmlParserCtxt *ctxt, xmlChar **publicId, int strict) |
| Parse an External ID or a Public ID. | |
| void | xmlParseComment (xmlParserCtxt *ctxt) |
| Parse an XML (SGML) comment. | |
| const xmlChar * | xmlParsePITarget (xmlParserCtxt *ctxt) |
| Parse the name of a PI. | |
| void | xmlParsePI (xmlParserCtxt *ctxt) |
| Parse an XML Processing Instruction. | |
| void | xmlParseNotationDecl (xmlParserCtxt *ctxt) |
| Parse a notation declaration. | |
| void | xmlParseEntityDecl (xmlParserCtxt *ctxt) |
| Parse an entity declaration. | |
| int | xmlParseDefaultDecl (xmlParserCtxt *ctxt, xmlChar **value) |
| Parse an attribute default declaration. | |
| xmlEnumeration * | xmlParseNotationType (xmlParserCtxt *ctxt) |
| Parse an Notation attribute type. | |
| xmlEnumeration * | xmlParseEnumerationType (xmlParserCtxt *ctxt) |
| Parse an Enumeration attribute type. | |
| int | xmlParseEnumeratedType (xmlParserCtxt *ctxt, xmlEnumeration **tree) |
| Parse an Enumerated attribute type. | |
| int | xmlParseAttributeType (xmlParserCtxt *ctxt, xmlEnumeration **tree) |
| Parse the Attribute list def for an element. | |
| void | xmlParseAttributeListDecl (xmlParserCtxt *ctxt) |
| Parse an attribute list declaration for an element. | |
| xmlElementContent * | xmlParseElementMixedContentDecl (xmlParserCtxt *ctxt, int inputchk) |
| Parse the declaration for a Mixed Element content The leading '(' and spaces have been skipped in xmlParseElementContentDecl. | |
| xmlElementContent * | xmlParseElementChildrenContentDecl (xmlParserCtxt *ctxt, int inputchk) |
| Parse the declaration for a Mixed Element content The leading '(' and spaces have been skipped in xmlParseElementContentDecl. | |
| int | xmlParseElementContentDecl (xmlParserCtxt *ctxt, const xmlChar *name, xmlElementContent **result) |
| Parse the declaration for an Element content either Mixed or Children, the cases EMPTY and ANY are handled directly in xmlParseElementDecl. | |
| int | xmlParseElementDecl (xmlParserCtxt *ctxt) |
| Parse an element declaration. | |
| void | xmlParseMarkupDecl (xmlParserCtxt *ctxt) |
| Parse markup declarations. | |
| int | xmlParseCharRef (xmlParserCtxt *ctxt) |
| Parse a numeric character reference. | |
| xmlEntity * | xmlParseEntityRef (xmlParserCtxt *ctxt) |
| void | xmlParseReference (xmlParserCtxt *ctxt) |
| Parse and handle entity references in content, depending on the SAX interface, this may end-up in a call to character() if this is a CharRef, a predefined entity, if there is no reference() callback. | |
| void | xmlParsePEReference (xmlParserCtxt *ctxt) |
| Parse a parameter entity reference. | |
| void | xmlParseDocTypeDecl (xmlParserCtxt *ctxt) |
| Parse a DOCTYPE declaration. | |
| const xmlChar * | xmlParseAttribute (xmlParserCtxt *ctxt, xmlChar **value) |
| Parse an attribute. | |
| const xmlChar * | xmlParseStartTag (xmlParserCtxt *ctxt) |
| Parse a start tag. | |
| void | xmlParseEndTag (xmlParserCtxt *ctxt) |
| Parse an end of tag. | |
| void | xmlParseCDSect (xmlParserCtxt *ctxt) |
| Parse escaped pure raw content. | |
| void | xmlParseContent (xmlParserCtxt *ctxt) |
| Parse XML element content. | |
| void | xmlParseElement (xmlParserCtxt *ctxt) |
| Parse an XML element. | |
| xmlChar * | xmlParseVersionNum (xmlParserCtxt *ctxt) |
| Parse the XML version value. | |
| xmlChar * | xmlParseVersionInfo (xmlParserCtxt *ctxt) |
| Parse the XML version. | |
| xmlChar * | xmlParseEncName (xmlParserCtxt *ctxt) |
| Parse the XML encoding name. | |
| const xmlChar * | xmlParseEncodingDecl (xmlParserCtxt *ctxt) |
| Parse the XML encoding declaration. | |
| int | xmlParseSDDecl (xmlParserCtxt *ctxt) |
| Parse the XML standalone declaration. | |
| void | xmlParseXMLDecl (xmlParserCtxt *ctxt) |
| Parse an XML declaration header. | |
| void | xmlParseTextDecl (xmlParserCtxt *ctxt) |
| Parse an XML declaration header for external entities. | |
| void | xmlParseMisc (xmlParserCtxt *ctxt) |
| Parse an XML Misc* optional field. | |
| void | xmlParseExternalSubset (xmlParserCtxt *ctxt, const xmlChar *publicId, const xmlChar *systemId) |
| Parse Markup declarations from an external subset. | |
| xmlChar * | xmlStringDecodeEntities (xmlParserCtxt *ctxt, const xmlChar *str, int what, xmlChar end, xmlChar end2, xmlChar end3) |
| xmlChar * | xmlStringLenDecodeEntities (xmlParserCtxt *ctxt, const xmlChar *str, int len, int what, xmlChar end, xmlChar end2, xmlChar end3) |
| int | xmlSkipBlankChars (xmlParserCtxt *ctxt) |
| Skip whitespace in the input stream. | |
| int | xmlStringCurrentChar (xmlParserCtxt *ctxt, const xmlChar *cur, int *len) |
| The current char value, if using UTF-8 this may actually span multiple bytes in the input buffer. | |
| void | xmlParserHandlePEReference (xmlParserCtxt *ctxt) |
| int | xmlCheckLanguageID (const xmlChar *lang) |
| Checks that the value conforms to the LanguageID production: | |
| int | xmlCurrentChar (xmlParserCtxt *ctxt, int *len) |
| The current char value, if using UTF-8 this may actually span multiple bytes in the input buffer. | |
| int | xmlCopyCharMultiByte (xmlChar *out, int val) |
| append the char value in the array | |
| int | xmlCopyChar (int len, xmlChar *out, int val) |
| append the char value in the array | |
| void | xmlNextChar (xmlParserCtxt *ctxt) |
| Skip to the next char input char. | |
| void | xmlParserInputShrink (xmlParserInput *in) |
| This function removes used input for the parser. | |
Internals routines and limits exported by the parser.
Except for some I/O-related functions, most of these macros and functions are deprecated.
| #define inputPop xmlCtxtPopInput |
Pop an input from the stack.
| #define inputPush xmlCtxtPushInput |
Push an input on the stack.
| #define IS_ASCII_DIGIT | ( | c | ) |
| #define IS_ASCII_LETTER | ( | c | ) |
Macro to check [a-zA-Z].
| c | an xmlChar value |
| #define IS_BASECHAR | ( | c | ) |
Macro to check the following production in the XML spec:
[85] BaseChar ::= ... long list see REC ...
| c | an UNICODE value (int) |
| #define IS_BLANK | ( | c | ) |
Macro to check the following production in the XML spec:
[3] S ::= (#x20 | #x9 | #xD | #xA)+
| c | an UNICODE value (int) |
| #define IS_BLANK_CH | ( | c | ) |
Behaviour same as IS_BLANK.
| c | an xmlChar value (normally unsigned char) |
| #define IS_BYTE_CHAR | ( | c | ) |
Macro to check the following production in the XML spec:
[2] Char ::= #x9 | #xA | #xD | [#x20...]
any byte character in the accepted range
| c | an byte value (int) |
| #define IS_CHAR | ( | c | ) |
Macro to check the following production in the XML spec:
[2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
| [#x10000-#x10FFFF]
any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
| c | an UNICODE value (int) |
| #define IS_CHAR_CH | ( | c | ) |
Behaves like IS_CHAR on single-byte value.
| c | an xmlChar (usually an unsigned char) |
| #define IS_COMBINING | ( | c | ) |
Macro to check the following production in the XML spec:
[87] CombiningChar ::= ... long list see REC ...
| c | an UNICODE value (int) |
| #define IS_COMBINING_CH | ( | c | ) |
| #define IS_DIGIT | ( | c | ) |
Macro to check the following production in the XML spec:
[88] Digit ::= ... long list see REC ...
| c | an UNICODE value (int) |
| #define IS_DIGIT_CH | ( | c | ) |
Behaves like IS_DIGIT but with a single byte argument.
| c | an xmlChar value (usually an unsigned char) |
| #define IS_EXTENDER | ( | c | ) |
Macro to check the following production in the XML spec:
[89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
#x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
[#x309D-#x309E] | [#x30FC-#x30FE]
| c | an UNICODE value (int) |
| #define IS_EXTENDER_CH | ( | c | ) |
Behaves like IS_EXTENDER but with a single-byte argument.
| c | an xmlChar value (usually an unsigned char) |
| #define IS_IDEOGRAPHIC | ( | c | ) |
Macro to check the following production in the XML spec:
[86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
| c | an UNICODE value (int) |
| #define IS_LETTER | ( | c | ) |
Macro to check the following production in the XML spec:
[84] Letter ::= BaseChar | Ideographic
| c | an UNICODE value (int) |
| #define IS_LETTER_CH | ( | c | ) |
Macro behaves like IS_LETTER, but only check base chars.
| c | an xmlChar value (normally unsigned char) |
| #define IS_PUBIDCHAR | ( | c | ) |
Macro to check the following production in the XML spec:
[13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] |
[-'()+,./:=?;!*#@$_%]
| c | an UNICODE value (int) |
| #define IS_PUBIDCHAR_CH | ( | c | ) |
Same as IS_PUBIDCHAR but for single-byte value.
| c | an xmlChar value (normally unsigned char) |
| #define XML_MAX_DICTIONARY_LIMIT 100000000 |
Maximum size allowed by the parser for a dictionary by default This is not a limitation of the parser but a safety boundary feature, use XML_PARSE_HUGE option to override it.
Introduced in 2.9.0
| #define XML_MAX_NAME_LENGTH 50000 |
Maximum size allowed for a markup identifier.
This is not a limitation of the parser but a safety boundary feature, use XML_PARSE_HUGE option to override it. Note that with the use of parsing dictionaries overriding the limit may result in more runtime memory usage in face of "unfriendly' content Introduced in 2.9.0
| #define XML_MAX_TEXT_LENGTH 10000000 |
Maximum size allowed for a single text node when building a tree.
This is not a limitation of the parser but a safety boundary feature, use XML_PARSE_HUGE option to override it. Introduced in 2.9.0
| int xmlCheckLanguageID | ( | const xmlChar * | lang | ) |
Checks that the value conforms to the LanguageID production:
NOTE: this is somewhat deprecated, those productions were removed from the XML Second edition.
[33] LanguageID ::= Langcode ('-' Subcode)*
[34] Langcode ::= ISO639Code | IanaCode | UserCode
[35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
[36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
[37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
[38] Subcode ::= ([a-z] | [A-Z])+
The current REC reference the successors of RFC 1766, currently 5646
http://www.rfc-editor.org/rfc/rfc5646.txt
langtag = language
["-" script]
["-" region]
*("-" variant)
*("-" extension)
["-" privateuse]
language = 2*3ALPHA ; shortest ISO 639 code
["-" extlang] ; sometimes followed by
; extended language subtags
/ 4ALPHA ; or reserved for future use
/ 5*8ALPHA ; or registered language subtag
extlang = 3ALPHA ; selected ISO 639 codes
*2("-" 3ALPHA) ; permanently reserved
script = 4ALPHA ; ISO 15924 code
region = 2ALPHA ; ISO 3166-1 code
/ 3DIGIT ; UN M.49 code
variant = 5*8alphanum ; registered variants
/ (DIGIT 3alphanum)
extension = singleton 1*("-" (2*8alphanum))
; Single alphanumerics
; "x" reserved for private use
singleton = DIGIT ; 0 - 9
/ %x41-57 ; A - W
/ %x59-5A ; Y - Z
/ %x61-77 ; a - w
/ %x79-7A ; y - z
it sounds right to still allow Irregular i-xxx IANA and user codes too The parser below doesn't try to cope with extension or privateuse that could be added but that's not interoperable anyway
| lang | pointer to the string value |
| int xmlCopyChar | ( | int | len, |
| xmlChar * | out, | ||
| int | val ) |
append the char value in the array
| len | Ignored, compatibility |
| out | pointer to an array of xmlChar |
| val | the char value |
| int xmlCopyCharMultiByte | ( | xmlChar * | out, |
| int | val ) |
append the char value in the array
| out | pointer to an array of xmlChar |
| val | the char value |
| xmlParserCtxt * xmlCreateEntityParserCtxt | ( | const xmlChar * | URL, |
| const xmlChar * | ID, | ||
| const xmlChar * | base ) |
Create a parser context for an external entity Automatic support for ZLIB/Compress compressed document is provided by default if found at compile-time.
| URL | the entity URL |
| ID | the entity PUBLIC ID |
| base | a possible base for the target URI |
| xmlParserCtxt * xmlCreateFileParserCtxt | ( | const char * | filename | ) |
Create a parser context for a file content.
Automatic support for ZLIB/Compress compressed document is provided by default if found at compile-time.
| filename | the filename |
| xmlParserCtxt * xmlCreateMemoryParserCtxt | ( | const char * | buffer, |
| int | size ) |
Create a parser context for an XML in-memory document.
The input buffer must not contain a terminating null byte.
| buffer | a pointer to a char array |
| size | the size of the array |
| xmlParserCtxt * xmlCreateURLParserCtxt | ( | const char * | filename, |
| int | options ) |
Create a parser context for a file or URL content.
Automatic support for ZLIB/Compress compressed document is provided by default if found at compile-time and for file accesses
| filename | the filename or URL |
| options | a combination of xmlParserOption |
| void xmlCtxtErrMemory | ( | xmlParserCtxt * | ctxt | ) |
Handle an out-of-memory error.
| ctxt | an XML parser context |
| xmlParserInput * xmlCtxtPopInput | ( | xmlParserCtxt * | ctxt | ) |
Pops the top parser input from the input stack.
| ctxt | an XML parser context |
| int xmlCtxtPushInput | ( | xmlParserCtxt * | ctxt, |
| xmlParserInput * | value ) |
Pushes a new parser input on top of the input stack.
| ctxt | an XML parser context |
| value | the parser input |
| int xmlCurrentChar | ( | xmlParserCtxt * | ctxt, |
| int * | len ) |
The current char value, if using UTF-8 this may actually span multiple bytes in the input buffer.
Implement the end of line normalization:
2.11 End-of-Line Handling
Wherever an external parsed entity or the literal entity value of an internal parsed entity contains either the literal two-character sequence "#xD#xA" or a standalone literal #xD, an XML processor must pass to the application the single character #xA. This behavior can conveniently be produced by normalizing all line breaks to #xA on input, before parsing.)
| ctxt | the XML parser context |
| len | pointer to the length of the char read |
| void xmlFreeInputStream | ( | xmlParserInput * | input | ) |
Free up an input stream.
| input | an xmlParserInput |
| int xmlIsLetter | ( | int | c | ) |
Check whether the character is allowed by the production.
| c | an unicode character (int) |
| xmlParserInput * xmlNewEntityInputStream | ( | xmlParserCtxt * | ctxt, |
| xmlEntity * | ent ) |
Create a new input stream based on an xmlEntity.
| ctxt | an XML parser context |
| ent | an Entity pointer |
| xmlParserInput * xmlNewInputFromFile | ( | xmlParserCtxt * | ctxt, |
| const char * | filename ) |
Create a new input stream based on a file or an URL.
Unlike the default external entity loader, this function doesn't use XML catalogs.
| ctxt | an XML parser context |
| filename | the filename to use as entity |
| xmlParserInput * xmlNewInputStream | ( | xmlParserCtxt * | ctxt | ) |
Create a new input stream structure.
| ctxt | an XML parser context |
| xmlParserInput * xmlNewStringInputStream | ( | xmlParserCtxt * | ctxt, |
| const xmlChar * | buffer ) |
Create a new input stream based on a memory buffer.
| ctxt | an XML parser context |
| buffer | an memory buffer |
| void xmlNextChar | ( | xmlParserCtxt * | ctxt | ) |
Skip to the next char input char.
| ctxt | the XML parser context |
| const xmlChar * xmlParseAttribute | ( | xmlParserCtxt * | ctxt, |
| xmlChar ** | value ) |
Parse an attribute.
[41] Attribute ::= Name Eq AttValue
[ WFC: No External Entity References ] Attribute values cannot contain direct or indirect entity references to external entities.
[ WFC: No < in Attribute Values ] The replacement text of any entity referred to directly or indirectly in an attribute value (other than "<") must not contain a <.
[ VC: Attribute Value Type ] The attribute must have been declared; the value must be of the type declared for it.
[25] Eq ::= S? '=' S?
With namespace:
[NS 11] Attribute ::= QName Eq AttValue
Also the case QName == xmlns:??? is handled independently as a namespace definition.
| ctxt | an XML parser context |
| value | a xmlChar ** used to store the value of the attribute |
| void xmlParseAttributeListDecl | ( | xmlParserCtxt * | ctxt | ) |
Parse an attribute list declaration for an element.
Always consumes '<!'.
[52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' [53] AttDef ::= S Name S AttType S DefaultDecl
| ctxt | an XML parser context |
| int xmlParseAttributeType | ( | xmlParserCtxt * | ctxt, |
| xmlEnumeration ** | tree ) |
Parse the Attribute list def for an element.
[54] AttType ::= StringType | TokenizedType | EnumeratedType
[55] StringType ::= 'CDATA'
[56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
Validity constraints for attribute values syntax are checked in xmlValidateAttributeValue
[ VC: ID ] Values of type ID must match the Name production. A name must not appear more than once in an XML document as a value of this type; i.e., ID values must uniquely identify the elements which bear them.
[ VC: One ID per Element Type ] No element type may have more than one ID attribute specified.
[ VC: ID Attribute Default ] An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
[ VC: IDREF ] Values of type IDREF must match the Name production, and values of type IDREFS must match Names; each IDREF Name must match the value of an ID attribute on some element in the XML document; i.e. IDREF values must match the value of some ID attribute.
[ VC: Entity Name ] Values of type ENTITY must match the Name production, values of type ENTITIES must match Names; each Entity Name must match the name of an unparsed entity declared in the DTD.
[ VC: Name Token ] Values of type NMTOKEN must match the Nmtoken production; values of type NMTOKENS must match Nmtokens.
| ctxt | an XML parser context |
| tree | the enumeration tree built while parsing |
| xmlChar * xmlParseAttValue | ( | xmlParserCtxt * | ctxt | ) |
Parse a value for an attribute Note: the parser won't do substitution of entities here, this will be handled later in xmlStringGetNodeList.
[10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
"'" ([^<&'] | Reference)* "'"
3.3.3 Attribute-Value Normalization:
Before the value of an attribute is passed to the application or checked for validity, the XML processor must normalize it as follows:
If the declared value is not CDATA, then the XML processor must further process the normalized attribute value by discarding any leading and trailing space (#x20) characters, and by replacing sequences of space (#x20) characters by a single space (#x20) character. All attributes for which no declaration has been read should be treated by a non-validating parser as if declared CDATA.
| ctxt | an XML parser context |
| void xmlParseCDSect | ( | xmlParserCtxt * | ctxt | ) |
Parse escaped pure raw content.
Always consumes '<!['.
[18] CDSect ::= CDStart CData CDEnd [19] CDStart ::= '<![CDATA[' [20] Data ::= (Char* - (Char* ']]>' Char*)) [21] CDEnd ::= ']]>'
| ctxt | an XML parser context |
| void xmlParseCharData | ( | xmlParserCtxt * | ctxt, |
| int | cdata ) |
| ctxt | an XML parser context |
| cdata | unused |
| int xmlParseCharRef | ( | xmlParserCtxt * | ctxt | ) |
Parse a numeric character reference.
Always consumes '&'.
[66] CharRef ::= '&#' [0-9]+ ';' |
'&#x' [0-9a-fA-F]+ ';'
[ WFC: Legal Character ] Characters referred to using character references must match the production for Char.
| ctxt | an XML parser context |
| void xmlParseComment | ( | xmlParserCtxt * | ctxt | ) |
Parse an XML (SGML) comment.
Always consumes '<!'.
The spec says that "For compatibility, the string "–" (double-hyphen) must not occur within comments. "
[15] Comment ::= ''
| ctxt | an XML parser context |
| void xmlParseContent | ( | xmlParserCtxt * | ctxt | ) |
Parse XML element content.
This is useful if you're only interested in custom SAX callbacks. If you want a node list, use xmlCtxtParseContent.
| ctxt | an XML parser context |
| int xmlParseDefaultDecl | ( | xmlParserCtxt * | ctxt, |
| xmlChar ** | value ) |
Parse an attribute default declaration.
[60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
[ VC: Required Attribute ] if the default declaration is the keyword #REQUIRED, then the attribute must be specified for all elements of the type in the attribute-list declaration.
[ VC: Attribute Default Legal ] The declared default value must meet the lexical constraints of the declared attribute type c.f. xmlValidateAttributeDecl
[ VC: Fixed Attribute Default ] if an attribute has a default value declared with the #FIXED keyword, instances of that attribute must match the default value.
[ WFC: No < in Attribute Values ] handled in xmlParseAttValue
| ctxt | an XML parser context |
| value | Receive a possible fixed default value for the attribute |
| void xmlParseDocTypeDecl | ( | xmlParserCtxt * | ctxt | ) |
Parse a DOCTYPE declaration.
[28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
('[' (markupdecl | PEReference | S)* ']' S?)? '>'
[ VC: Root Element Type ] The Name in the document type declaration must match the element type of the root element.
| ctxt | an XML parser context |
| void xmlParseElement | ( | xmlParserCtxt * | ctxt | ) |
Parse an XML element.
[39] element ::= EmptyElemTag | STag content ETag
[ WFC: Element Type Match ] The Name in an element's end-tag must match the element type in the start-tag.
| ctxt | an XML parser context |
| xmlElementContent * xmlParseElementChildrenContentDecl | ( | xmlParserCtxt * | ctxt, |
| int | inputchk ) |
Parse the declaration for a Mixed Element content The leading '(' and spaces have been skipped in xmlParseElementContentDecl.
[47] children ::= (choice | seq) ('?' | '*' | '+')?
[48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
[49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
[50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
[ VC: Proper Group/PE Nesting ] applies to [49] and [50] TODO Parameter-entity replacement text must be properly nested with parenthesized groups. That is to say, if either of the opening or closing parentheses in a choice, seq, or Mixed construct is contained in the replacement text for a parameter entity, both must be contained in the same replacement text. For interoperability, if a parameter-entity reference appears in a choice, seq, or Mixed construct, its replacement text should not be empty, and neither the first nor last non-blank character of the replacement text should be a connector (| or ,).
| ctxt | an XML parser context |
| inputchk | the input used for the current entity, needed for boundary checks |
| int xmlParseElementContentDecl | ( | xmlParserCtxt * | ctxt, |
| const xmlChar * | name, | ||
| xmlElementContent ** | result ) |
Parse the declaration for an Element content either Mixed or Children, the cases EMPTY and ANY are handled directly in xmlParseElementDecl.
[46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
| ctxt | an XML parser context |
| name | the name of the element being defined. |
| result | the Element Content pointer will be stored here if any |
| int xmlParseElementDecl | ( | xmlParserCtxt * | ctxt | ) |
Parse an element declaration.
Always consumes '<!'.
[45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
[ VC: Unique Element Type Declaration ] No element type may be declared more than once
| ctxt | an XML parser context |
| xmlElementContent * xmlParseElementMixedContentDecl | ( | xmlParserCtxt * | ctxt, |
| int | openInputNr ) |
Parse the declaration for a Mixed Element content The leading '(' and spaces have been skipped in xmlParseElementContentDecl.
[51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
'(' S? '#PCDATA' S? ')'
[ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
[ VC: No Duplicate Types ] The same name must not appear more than once in a single mixed-content declaration.
| ctxt | an XML parser context |
| openInputNr | the input used for the current entity, needed for boundary checks |
| xmlChar * xmlParseEncName | ( | xmlParserCtxt * | ctxt | ) |
Parse the XML encoding name.
[81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
| ctxt | an XML parser context |
| const xmlChar * xmlParseEncodingDecl | ( | xmlParserCtxt * | ctxt | ) |
Parse the XML encoding declaration.
[80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |
"'" EncName "'")
this setups the conversion filters.
| ctxt | an XML parser context |
| void xmlParseEndTag | ( | xmlParserCtxt * | ctxt | ) |
Parse an end of tag.
[42] ETag ::= '</' Name S? '>'
With namespace
[NS 9] ETag ::= '</' QName S? '>'
| ctxt | an XML parser context |
| void xmlParseEntityDecl | ( | xmlParserCtxt * | ctxt | ) |
Parse an entity declaration.
Always consumes '<!'.
[70] EntityDecl ::= GEDecl | PEDecl [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) [74] PEDef ::= EntityValue | ExternalID [76] NDataDecl ::= S 'NDATA' S Name
[ VC: Notation Declared ] The Name must match the declared name of a notation.
| ctxt | an XML parser context |
| xmlEntity * xmlParseEntityRef | ( | xmlParserCtxt * | ctxt | ) |
| ctxt | an XML parser context |
| xmlChar * xmlParseEntityValue | ( | xmlParserCtxt * | ctxt, |
| xmlChar ** | orig ) |
Parse a value for ENTITY declarations.
[9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
"'" ([^%&'] | PEReference | Reference)* "'"
| ctxt | an XML parser context |
| orig | if non-NULL store a copy of the original entity value |
| int xmlParseEnumeratedType | ( | xmlParserCtxt * | ctxt, |
| xmlEnumeration ** | tree ) |
Parse an Enumerated attribute type.
[57] EnumeratedType ::= NotationType | Enumeration
[58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
| ctxt | an XML parser context |
| tree | the enumeration tree built while parsing |
| xmlEnumeration * xmlParseEnumerationType | ( | xmlParserCtxt * | ctxt | ) |
Parse an Enumeration attribute type.
[59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
[ VC: Enumeration ] Values of this type must match one of the Nmtoken tokens in the declaration
| ctxt | an XML parser context |
| xmlChar * xmlParseExternalID | ( | xmlParserCtxt * | ctxt, |
| xmlChar ** | publicId, | ||
| int | strict ) |
Parse an External ID or a Public ID.
NOTE: Productions [75] and [83] interact badly since [75] can generate ‘'PUBLIC’ S PubidLiteral S SystemLiteral‘
[75] ExternalID ::= 'SYSTEM’ S SystemLiteral
| 'PUBLIC' S PubidLiteral S SystemLiteral
[83] PublicID ::= 'PUBLIC' S PubidLiteral
| ctxt | an XML parser context |
| publicId | a xmlChar** receiving PubidLiteral |
| strict | indicate whether we should restrict parsing to only production [75], see NOTE below |
| void xmlParseExternalSubset | ( | xmlParserCtxt * | ctxt, |
| const xmlChar * | publicId, | ||
| const xmlChar * | systemId ) |
Parse Markup declarations from an external subset.
[30] extSubset ::= textDecl? extSubsetDecl
[31] extSubsetDecl ::= (markupdecl | conditionalSect |
PEReference | S) *
| ctxt | an XML parser context |
| publicId | the public identifier |
| systemId | the system identifier (URL) |
| void xmlParseMarkupDecl | ( | xmlParserCtxt * | ctxt | ) |
Parse markup declarations.
Always consumes '<!' or '<?'.
[29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
NotationDecl | PI | Comment
[ VC: Proper Declaration/PE Nesting ] Parameter-entity replacement text must be properly nested with markup declarations. That is to say, if either the first character or the last character of a markup declaration (markupdecl above) is contained in the replacement text for a parameter-entity reference, both must be contained in the same replacement text.
[ WFC: PEs in Internal Subset ] In the internal DTD subset, parameter-entity references can occur only where markup declarations can occur, not within markup declarations. (This does not apply to references that occur in external parameter entities or to the external subset.)
| ctxt | an XML parser context |
| void xmlParseMisc | ( | xmlParserCtxt * | ctxt | ) |
Parse an XML Misc* optional field.
[27] Misc ::= Comment | PI | S
| ctxt | an XML parser context |
| const xmlChar * xmlParseName | ( | xmlParserCtxt * | ctxt | ) |
Parse an XML name.
[4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
CombiningChar | Extender
[5] Name ::= (Letter | '_' | ':') (NameChar)*
[6] Names ::= Name (#x20 Name)*
| ctxt | an XML parser context |
| xmlChar * xmlParseNmtoken | ( | xmlParserCtxt * | ctxt | ) |
Parse an XML Nmtoken.
[7] Nmtoken ::= (NameChar)+ [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
| ctxt | an XML parser context |
| void xmlParseNotationDecl | ( | xmlParserCtxt * | ctxt | ) |
Parse a notation declaration.
Always consumes '<!'.
[82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID)
S? '>'
Hence there is actually 3 choices:
'PUBLIC' S PubidLiteral 'PUBLIC' S PubidLiteral S SystemLiteral 'SYSTEM' S SystemLiteral
See the NOTE on xmlParseExternalID.
| ctxt | an XML parser context |
| xmlEnumeration * xmlParseNotationType | ( | xmlParserCtxt * | ctxt | ) |
Parse an Notation attribute type.
Note: the leading 'NOTATION' S part has already being parsed...
[58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
[ VC: Notation Attributes ] Values of this type must match one of the notation names included in the declaration; all notation names in the declaration must be declared.
| ctxt | an XML parser context |
| void xmlParsePEReference | ( | xmlParserCtxt * | ctxt | ) |
Parse a parameter entity reference.
| ctxt | an XML parser context |
| void xmlParsePI | ( | xmlParserCtxt * | ctxt | ) |
Parse an XML Processing Instruction.
[16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
The processing is transferred to SAX once parsed.
| ctxt | an XML parser context |
| const xmlChar * xmlParsePITarget | ( | xmlParserCtxt * | ctxt | ) |
Parse the name of a PI.
[17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
| ctxt | an XML parser context |
| xmlChar * xmlParsePubidLiteral | ( | xmlParserCtxt * | ctxt | ) |
Parse an XML public literal.
[12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
| ctxt | an XML parser context |
| void xmlParseReference | ( | xmlParserCtxt * | ctxt | ) |
Parse and handle entity references in content, depending on the SAX interface, this may end-up in a call to character() if this is a CharRef, a predefined entity, if there is no reference() callback.
or if the parser was asked to switch to that mode.
Always consumes '&'.
[67] Reference ::= EntityRef | CharRef
| ctxt | an XML parser context |
| void xmlParserHandlePEReference | ( | xmlParserCtxt * | ctxt | ) |
[69] PEReference ::= '%' Name ';'
[ WFC: No Recursion ] A parsed entity must not contain a recursive reference to itself, either directly or indirectly.
[ WFC: Entity Declared ] In a document without any DTD, a document with only an internal DTD subset which contains no parameter entity references, or a document with "standalone='yes'", ... ... The declaration of a parameter entity must precede any reference to it...
[ VC: Entity Declared ] In a document with an external subset or external parameter entities with "standalone='no'", ... ... The declaration of a parameter entity must precede any reference to it...
[ WFC: In DTD ] Parameter-entity references may only appear in the DTD. NOTE: misleading but this is handled.
A PEReference may have been detected in the current input stream the handling is done accordingly to http://www.w3.org/TR/REC-xml#entproc i.e.
| ctxt | the parser context |
| void xmlParserInputShrink | ( | xmlParserInput * | in | ) |
This function removes used input for the parser.
| in | an XML parser input |
| int xmlParseSDDecl | ( | xmlParserCtxt * | ctxt | ) |
Parse the XML standalone declaration.
[32] SDDecl ::= S 'standalone' Eq
(("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
[ VC: Standalone Document Declaration ] TODO The standalone document declaration must have the value "no" if any external markup declarations contain declarations of:
| ctxt | an XML parser context |
| const xmlChar * xmlParseStartTag | ( | xmlParserCtxt * | ctxt | ) |
Parse a start tag.
Always consumes '<'.
[40] STag ::= '<' Name (S Attribute)* S? '>'
[ WFC: Unique Att Spec ] No attribute name may appear more than once in the same start-tag or empty-element tag.
[44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
[ WFC: Unique Att Spec ] No attribute name may appear more than once in the same start-tag or empty-element tag.
With namespace:
[NS 8] STag ::= '<' QName (S Attribute)* S? '>' [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
| ctxt | an XML parser context |
| xmlChar * xmlParseSystemLiteral | ( | xmlParserCtxt * | ctxt | ) |
Parse an XML Literal.
[11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
| ctxt | an XML parser context |
| void xmlParseTextDecl | ( | xmlParserCtxt * | ctxt | ) |
Parse an XML declaration header for external entities.
[77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
| ctxt | an XML parser context |
| xmlChar * xmlParseVersionInfo | ( | xmlParserCtxt * | ctxt | ) |
Parse the XML version.
[24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
[25] Eq ::= S? '=' S?
| ctxt | an XML parser context |
| xmlChar * xmlParseVersionNum | ( | xmlParserCtxt * | ctxt | ) |
Parse the XML version value.
[26] VersionNum ::= '1.' [0-9]+
In practice allow [0-9].[0-9]+ at that level
| ctxt | an XML parser context |
| void xmlParseXMLDecl | ( | xmlParserCtxt * | ctxt | ) |
Parse an XML declaration header.
[23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
| ctxt | an XML parser context |
| xmlChar xmlPopInput | ( | xmlParserCtxt * | ctxt | ) |
| ctxt | an XML parser context |
| int xmlPushInput | ( | xmlParserCtxt * | ctxt, |
| xmlParserInput * | input ) |
Push an input stream onto the stack.
| ctxt | an XML parser context |
| input | an XML parser input fragment (entity, XML fragment ...). |
| int xmlSkipBlankChars | ( | xmlParserCtxt * | ctxt | ) |
Skip whitespace in the input stream.
| ctxt | the XML parser context |
| xmlChar * xmlSplitQName | ( | xmlParserCtxt * | ctxt, |
| const xmlChar * | name, | ||
| xmlChar ** | prefixOut ) |
Parse an UTF8 encoded XML qualified name string.
| ctxt | an XML parser context |
| name | an XML parser context |
| prefixOut | a xmlChar ** |
| int xmlStringCurrentChar | ( | xmlParserCtxt * | ctxt, |
| const xmlChar * | cur, | ||
| int * | len ) |
The current char value, if using UTF-8 this may actually span multiple bytes in the input buffer.
| ctxt | the XML parser context |
| cur | pointer to the beginning of the char |
| len | pointer to the length of the char read |
| xmlChar * xmlStringDecodeEntities | ( | xmlParserCtxt * | ctxt, |
| const xmlChar * | str, | ||
| int | what, | ||
| xmlChar | end, | ||
| xmlChar | end2, | ||
| xmlChar | end3 ) |
| ctxt | the parser context |
| str | the input string |
| what | combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF |
| end | an end marker xmlChar, 0 if none |
| end2 | an end marker xmlChar, 0 if none |
| end3 | an end marker xmlChar, 0 if none |
| xmlChar * xmlStringLenDecodeEntities | ( | xmlParserCtxt * | ctxt, |
| const xmlChar * | str, | ||
| int | len, | ||
| int | what, | ||
| xmlChar | end, | ||
| xmlChar | end2, | ||
| xmlChar | end3 ) |
| ctxt | the parser context |
| str | the input string |
| len | the string length |
| what | combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF |
| end | an end marker xmlChar, 0 if none |
| end2 | an end marker xmlChar, 0 if none |
| end3 | an end marker xmlChar, 0 if none |
| int xmlSwitchEncoding | ( | xmlParserCtxt * | ctxt, |
| xmlCharEncoding | enc ) |
Use encoding specified by enum to decode input data.
This overrides the encoding found in the XML declaration.
This function can also be used to override the encoding of chunks passed to xmlParseChunk.
| ctxt | the parser context |
| enc | the encoding value (number) |
| int xmlSwitchEncodingName | ( | xmlParserCtxt * | ctxt, |
| const char * | encoding ) |
Use specified encoding to decode input data.
This overrides the encoding found in the XML declaration.
This function can also be used to override the encoding of chunks passed to xmlParseChunk.
| ctxt | the parser context |
| encoding | the encoding name |
| int xmlSwitchInputEncoding | ( | xmlParserCtxt * | ctxt, |
| xmlParserInput * | input, | ||
| xmlCharEncodingHandler * | handler ) |
Use encoding handler to decode input data.
| ctxt | the parser context, only for error reporting |
| input | the input stream |
| handler | the encoding handler |
| int xmlSwitchToEncoding | ( | xmlParserCtxt * | ctxt, |
| xmlCharEncodingHandler * | handler ) |
Use encoding handler to decode input data.
This function can be used to enforce the encoding of chunks passed to xmlParseChunk.
| ctxt | the parser context |
| handler | the encoding handler |