xmltok.h

00001 /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
00002    See the file COPYING for copying permission.
00003 */
00004 
00005 #ifndef XmlTok_INCLUDED
00006 #define XmlTok_INCLUDED 1
00007 
00008 #ifdef __cplusplus
00009 extern "C" {
00010 #endif
00011 
00012 /* The following token may be returned by XmlContentTok */
00013 #define XML_TOK_TRAILING_RSQB -5 /* ] or ]] at the end of the scan; might be
00014                                     start of illegal ]]> sequence */
00015 /* The following tokens may be returned by both XmlPrologTok and
00016    XmlContentTok.
00017 */
00018 #define XML_TOK_NONE -4          /* The string to be scanned is empty */
00019 #define XML_TOK_TRAILING_CR -3   /* A CR at the end of the scan;
00020                                     might be part of CRLF sequence */ 
00021 #define XML_TOK_PARTIAL_CHAR -2  /* only part of a multibyte sequence */
00022 #define XML_TOK_PARTIAL -1       /* only part of a token */
00023 #define XML_TOK_INVALID 0
00024 
00025 /* The following tokens are returned by XmlContentTok; some are also
00026    returned by XmlAttributeValueTok, XmlEntityTok, XmlCdataSectionTok.
00027 */
00028 #define XML_TOK_START_TAG_WITH_ATTS 1
00029 #define XML_TOK_START_TAG_NO_ATTS 2
00030 #define XML_TOK_EMPTY_ELEMENT_WITH_ATTS 3 /* empty element tag <e/> */
00031 #define XML_TOK_EMPTY_ELEMENT_NO_ATTS 4
00032 #define XML_TOK_END_TAG 5
00033 #define XML_TOK_DATA_CHARS 6
00034 #define XML_TOK_DATA_NEWLINE 7
00035 #define XML_TOK_CDATA_SECT_OPEN 8
00036 #define XML_TOK_ENTITY_REF 9
00037 #define XML_TOK_CHAR_REF 10               /* numeric character reference */
00038 
00039 /* The following tokens may be returned by both XmlPrologTok and
00040    XmlContentTok.
00041 */
00042 #define XML_TOK_PI 11                     /* processing instruction */
00043 #define XML_TOK_XML_DECL 12               /* XML decl or text decl */
00044 #define XML_TOK_COMMENT 13
00045 #define XML_TOK_BOM 14                    /* Byte order mark */
00046 
00047 /* The following tokens are returned only by XmlPrologTok */
00048 #define XML_TOK_PROLOG_S 15
00049 #define XML_TOK_DECL_OPEN 16              /* <!foo */
00050 #define XML_TOK_DECL_CLOSE 17             /* > */
00051 #define XML_TOK_NAME 18
00052 #define XML_TOK_NMTOKEN 19
00053 #define XML_TOK_POUND_NAME 20             /* #name */
00054 #define XML_TOK_OR 21                     /* | */
00055 #define XML_TOK_PERCENT 22
00056 #define XML_TOK_OPEN_PAREN 23
00057 #define XML_TOK_CLOSE_PAREN 24
00058 #define XML_TOK_OPEN_BRACKET 25
00059 #define XML_TOK_CLOSE_BRACKET 26
00060 #define XML_TOK_LITERAL 27
00061 #define XML_TOK_PARAM_ENTITY_REF 28
00062 #define XML_TOK_INSTANCE_START 29
00063 
00064 /* The following occur only in element type declarations */
00065 #define XML_TOK_NAME_QUESTION 30          /* name? */
00066 #define XML_TOK_NAME_ASTERISK 31          /* name* */
00067 #define XML_TOK_NAME_PLUS 32              /* name+ */
00068 #define XML_TOK_COND_SECT_OPEN 33         /* <![ */
00069 #define XML_TOK_COND_SECT_CLOSE 34        /* ]]> */
00070 #define XML_TOK_CLOSE_PAREN_QUESTION 35   /* )? */
00071 #define XML_TOK_CLOSE_PAREN_ASTERISK 36   /* )* */
00072 #define XML_TOK_CLOSE_PAREN_PLUS 37       /* )+ */
00073 #define XML_TOK_COMMA 38
00074 
00075 /* The following token is returned only by XmlAttributeValueTok */
00076 #define XML_TOK_ATTRIBUTE_VALUE_S 39
00077 
00078 /* The following token is returned only by XmlCdataSectionTok */
00079 #define XML_TOK_CDATA_SECT_CLOSE 40
00080 
00081 /* With namespace processing this is returned by XmlPrologTok for a
00082    name with a colon.
00083 */
00084 #define XML_TOK_PREFIXED_NAME 41
00085 
00086 #ifdef XML_DTD
00087 #define XML_TOK_IGNORE_SECT 42
00088 #endif /* XML_DTD */
00089 
00090 #ifdef XML_DTD
00091 #define XML_N_STATES 4
00092 #else /* not XML_DTD */
00093 #define XML_N_STATES 3
00094 #endif /* not XML_DTD */
00095 
00096 #define XML_PROLOG_STATE 0
00097 #define XML_CONTENT_STATE 1
00098 #define XML_CDATA_SECTION_STATE 2
00099 #ifdef XML_DTD
00100 #define XML_IGNORE_SECTION_STATE 3
00101 #endif /* XML_DTD */
00102 
00103 #define XML_N_LITERAL_TYPES 2
00104 #define XML_ATTRIBUTE_VALUE_LITERAL 0
00105 #define XML_ENTITY_VALUE_LITERAL 1
00106 
00107 /* The size of the buffer passed to XmlUtf8Encode must be at least this. */
00108 #define XML_UTF8_ENCODE_MAX 4
00109 /* The size of the buffer passed to XmlUtf16Encode must be at least this. */
00110 #define XML_UTF16_ENCODE_MAX 2
00111 
00112 typedef struct position {
00113   /* first line and first column are 0 not 1 */
00114   unsigned long lineNumber;
00115   unsigned long columnNumber;
00116 } POSITION;
00117 
00118 typedef struct {
00119   const char *name;
00120   const char *valuePtr;
00121   const char *valueEnd;
00122   char normalized;
00123 } ATTRIBUTE;
00124 
00125 struct encoding;
00126 typedef struct encoding ENCODING;
00127 
00128 typedef int (FASTCALL *SCANNER)(const ENCODING *,
00129                                 const char *,
00130                                 const char *,
00131                                 const char **);
00132 
00133 struct encoding {
00134   SCANNER scanners[XML_N_STATES];
00135   SCANNER literalScanners[XML_N_LITERAL_TYPES];
00136   int (FASTCALL *sameName)(const ENCODING *,
00137                            const char *,
00138                            const char *);
00139   int (FASTCALL *nameMatchesAscii)(const ENCODING *,
00140                                    const char *,
00141                                    const char *,
00142                                    const char *);
00143   int (FASTCALL *nameLength)(const ENCODING *, const char *);
00144   const char *(FASTCALL *skipS)(const ENCODING *, const char *);
00145   int (FASTCALL *getAtts)(const ENCODING *enc,
00146                           const char *ptr,
00147                           int attsMax,
00148                           ATTRIBUTE *atts);
00149   int (FASTCALL *charRefNumber)(const ENCODING *enc, const char *ptr);
00150   int (FASTCALL *predefinedEntityName)(const ENCODING *,
00151                                        const char *,
00152                                        const char *);
00153   void (FASTCALL *updatePosition)(const ENCODING *,
00154                                   const char *ptr,
00155                                   const char *end,
00156                                   POSITION *);
00157   int (FASTCALL *isPublicId)(const ENCODING *enc,
00158                              const char *ptr,
00159                              const char *end,
00160                              const char **badPtr);
00161   void (FASTCALL *utf8Convert)(const ENCODING *enc,
00162                                const char **fromP,
00163                                const char *fromLim,
00164                                char **toP,
00165                                const char *toLim);
00166   void (FASTCALL *utf16Convert)(const ENCODING *enc,
00167                                 const char **fromP,
00168                                 const char *fromLim,
00169                                 unsigned short **toP,
00170                                 const unsigned short *toLim);
00171   int minBytesPerChar;
00172   char isUtf8;
00173   char isUtf16;
00174 };
00175 
00176 /* Scan the string starting at ptr until the end of the next complete
00177    token, but do not scan past eptr.  Return an integer giving the
00178    type of token.
00179 
00180    Return XML_TOK_NONE when ptr == eptr; nextTokPtr will not be set.
00181 
00182    Return XML_TOK_PARTIAL when the string does not contain a complete
00183    token; nextTokPtr will not be set.
00184 
00185    Return XML_TOK_INVALID when the string does not start a valid
00186    token; nextTokPtr will be set to point to the character which made
00187    the token invalid.
00188 
00189    Otherwise the string starts with a valid token; nextTokPtr will be
00190    set to point to the character following the end of that token.
00191 
00192    Each data character counts as a single token, but adjacent data
00193    characters may be returned together.  Similarly for characters in
00194    the prolog outside literals, comments and processing instructions.
00195 */
00196 
00197 
00198 #define XmlTok(enc, state, ptr, end, nextTokPtr) \
00199   (((enc)->scanners[state])(enc, ptr, end, nextTokPtr))
00200 
00201 #define XmlPrologTok(enc, ptr, end, nextTokPtr) \
00202    XmlTok(enc, XML_PROLOG_STATE, ptr, end, nextTokPtr)
00203 
00204 #define XmlContentTok(enc, ptr, end, nextTokPtr) \
00205    XmlTok(enc, XML_CONTENT_STATE, ptr, end, nextTokPtr)
00206 
00207 #define XmlCdataSectionTok(enc, ptr, end, nextTokPtr) \
00208    XmlTok(enc, XML_CDATA_SECTION_STATE, ptr, end, nextTokPtr)
00209 
00210 #ifdef XML_DTD
00211 
00212 #define XmlIgnoreSectionTok(enc, ptr, end, nextTokPtr) \
00213    XmlTok(enc, XML_IGNORE_SECTION_STATE, ptr, end, nextTokPtr)
00214 
00215 #endif /* XML_DTD */
00216 
00217 /* This is used for performing a 2nd-level tokenization on the content
00218    of a literal that has already been returned by XmlTok.
00219 */
00220 #define XmlLiteralTok(enc, literalType, ptr, end, nextTokPtr) \
00221   (((enc)->literalScanners[literalType])(enc, ptr, end, nextTokPtr))
00222 
00223 #define XmlAttributeValueTok(enc, ptr, end, nextTokPtr) \
00224    XmlLiteralTok(enc, XML_ATTRIBUTE_VALUE_LITERAL, ptr, end, nextTokPtr)
00225 
00226 #define XmlEntityValueTok(enc, ptr, end, nextTokPtr) \
00227    XmlLiteralTok(enc, XML_ENTITY_VALUE_LITERAL, ptr, end, nextTokPtr)
00228 
00229 #define XmlSameName(enc, ptr1, ptr2) (((enc)->sameName)(enc, ptr1, ptr2))
00230 
00231 #define XmlNameMatchesAscii(enc, ptr1, end1, ptr2) \
00232   (((enc)->nameMatchesAscii)(enc, ptr1, end1, ptr2))
00233 
00234 #define XmlNameLength(enc, ptr) \
00235   (((enc)->nameLength)(enc, ptr))
00236 
00237 #define XmlSkipS(enc, ptr) \
00238   (((enc)->skipS)(enc, ptr))
00239 
00240 #define XmlGetAttributes(enc, ptr, attsMax, atts) \
00241   (((enc)->getAtts)(enc, ptr, attsMax, atts))
00242 
00243 #define XmlCharRefNumber(enc, ptr) \
00244   (((enc)->charRefNumber)(enc, ptr))
00245 
00246 #define XmlPredefinedEntityName(enc, ptr, end) \
00247   (((enc)->predefinedEntityName)(enc, ptr, end))
00248 
00249 #define XmlUpdatePosition(enc, ptr, end, pos) \
00250   (((enc)->updatePosition)(enc, ptr, end, pos))
00251 
00252 #define XmlIsPublicId(enc, ptr, end, badPtr) \
00253   (((enc)->isPublicId)(enc, ptr, end, badPtr))
00254 
00255 #define XmlUtf8Convert(enc, fromP, fromLim, toP, toLim) \
00256   (((enc)->utf8Convert)(enc, fromP, fromLim, toP, toLim))
00257 
00258 #define XmlUtf16Convert(enc, fromP, fromLim, toP, toLim) \
00259   (((enc)->utf16Convert)(enc, fromP, fromLim, toP, toLim))
00260 
00261 typedef struct {
00262   ENCODING initEnc;
00263   const ENCODING **encPtr;
00264 } INIT_ENCODING;
00265 
00266 int  XmlParseXmlDecl(int isGeneralTextEntity,
00267                               const ENCODING *enc,
00268                               const char *ptr,
00269                               const char *end,
00270                               const char **badPtr,
00271                               const char **versionPtr,
00272                               const char **versionEndPtr,
00273                               const char **encodingNamePtr,
00274                               const ENCODING **namedEncodingPtr,
00275                               int *standalonePtr);
00276 
00277 int  XmlInitEncoding(INIT_ENCODING *, const ENCODING **, const char *name);
00278 const ENCODING  *XmlGetUtf8InternalEncoding(void);
00279 const ENCODING  *XmlGetUtf16InternalEncoding(void);
00280 int  XmlUtf8Encode(int charNumber, char *buf);
00281 int  XmlUtf16Encode(int charNumber, unsigned short *buf);
00282 
00283 int  XmlSizeOfUnknownEncoding(void);
00284 ENCODING  *
00285 XmlInitUnknownEncoding(void *mem,
00286                        int *table,
00287                        int (*conv)(void *userData, const char *p),
00288                        void *userData);
00289 
00290 int  XmlParseXmlDeclNS(int isGeneralTextEntity,
00291                                 const ENCODING *enc,
00292                                 const char *ptr,
00293                                 const char *end,
00294                                 const char **badPtr,
00295                                 const char **versionPtr,
00296                                 const char **versionEndPtr,
00297                                 const char **encodingNamePtr,
00298                                 const ENCODING **namedEncodingPtr,
00299                                 int *standalonePtr);
00300 int  XmlInitEncodingNS(INIT_ENCODING *, const ENCODING **, const char *name);
00301 const ENCODING  *XmlGetUtf8InternalEncodingNS(void);
00302 const ENCODING  *XmlGetUtf16InternalEncodingNS(void);
00303 ENCODING  *
00304 XmlInitUnknownEncodingNS(void *mem,
00305                          int *table,
00306                          int (*conv)(void *userData, const char *p),
00307                          void *userData);
00308 #ifdef __cplusplus
00309 }
00310 #endif
00311 
00312 #endif /* not XmlTok_INCLUDED */

Generated on Mon Jun 5 10:20:42 2006 for Intelligence.kdevelop by  doxygen 1.4.6