xml是一种当前很受欢迎的数据格式, 它的优点在于: 人性化,自述性以及使用的方便性.但是,不幸的是,基于java的xml解释器往往太大了,比如sun的jaxp.jar 和 parser.jar 每个都达到了1.4mb. 如果你要在只有有限的内存容量的运行环境里运行你的程序,比如j2me的环境.或者说带宽很有限的运行环境里,比如applet,这些大的package不应该成为你的选择对象.
注意:本篇的所有所需要的所有代码你可以通过此链接下载:
http://www.matrix.org.cn/down_view.asp?id=67
下面是qdparser的代码:
package qdxml;
import java.io.*;
import java.util.*;
/** quick and dirty xml parser. this parser is, like the sax parser,
an event based parser, but with much less functionality. */
public class qdparser {
private static int popmode(stack st) {
if(!st.empty())
return ((integer)st.pop()).intvalue();
else
return pre;
}
private final static int
text = 1,
entity = 2,
open_tag = 3,
close_tag = 4,
start_tag = 5,
attribute_lvalue = 6,
attribute_equal = 9,
attribute_rvalue = 10,
quote = 7,
in_tag = 8,
single_tag = 12,
comment = 13,
done = 11,
doctype = 14,
pre = 15,
cdata = 16;
public static void parse(dochandler doc,reader r) throws exception {
stack st = new stack();
int depth = 0;
int mode = pre;
int c = 0;
int quotec = '"';
depth = 0;
stringbuffer sb = new stringbuffer();
stringbuffer etag = new stringbuffer();
string tagname = null;
string lvalue = null;
string rvalue = null;
hashtable attrs = null;
st = new stack();
doc.startdocument.);
int line=1, col=0;
boolean eol = false;
while((c = r.read()) != -1) {
// we need to map /r, /r/n, and /n to /n
// see xml spec section 2.11
if(c == '/n' && eol) {
eol = false;
continue;
} else if(eol) {
eol = false;
} else if(c == '/n') {
line++;
col=0;
} else if(c == '/r') {
eol = true;
c = '/n';
line++;
col=0;
} else {
col++;
}
if(mode == done) {
doc.enddocument.);
return;
// we are between tags collecting text.
} else if(mode == text) {
if(c == '<') {
st.push(new integer(mode));
mode = start_tag;
if(sb.length() > 0) {
doc.text(sb.tostring());
sb.setlength(0);
}
} else if(c == '&') {
st.push(new integer(mode));
mode = entity;
etag.setlength(0);
} else
sb.append((char)c);
// we are processing a closing tag: e.g. </foo>
} else if(mode == close_tag) {
if(c == '>') {
mode = popmode(st);
tagname = sb.tostring();
sb.setlength(0);
depth--;
if(depth==0)
mode = done;
doc.endelement(tagname);
} else {
sb.append((char)c);
}
// we are processing cdata
} else if(mode == cdata) {
if(c == '>'
&& sb.tostring().endswith("]]")) {
sb.setlength(sb.length()-2);
doc.text(sb.tostring());
sb.setlength(0);
mode = popmode(st);
} else
sb.append((char)c);
// we are processing a comment. we are inside
// the <!-- .... --> looking for the -->.
} else if(mode == comment) {
if(c == '>'
&& sb.tostring().endswith("--")) {
sb.setlength(0);
mode = popmode(st);
} else
sb.append((char)c);
// we are outside the root tag element
} else if(mode == pre) {
if(c == '<') {
mode = text;
st.push(new integer(mode));
mode = start_tag;
}
// we are inside one of these <? ... ?>
// or one of these <!doctype ... >
} else if(mode == doctype) {
if(c == '>') {
mode = popmode(st);
if(mode == text) mode = pre;
}
// we have just seen a < and
// are wondering what we are looking at
// <foo>, </foo>, <!-- ... --->, etc.
} else if(mode == start_tag) {
mode = popmode(st);
if(c == '/') {
st.push(new integer(mode));
mode = close_tag;
} else if (c == '?') {
mode = doctype;
} else {
st.push(new integer(mode));
mode = open_tag;
tagname = null;
attrs = new hashtable();
sb.append((char)c);
}
// we are processing an entity, e.g. <, », etc.
} else if(mode == entity) {
if(c == ';') {
mode = popmode(st);
string cent = etag.tostring();
etag.setlength(0);
if(cent.equals("lt"))
sb.append('<');
else if(cent.equals("gt"))
sb.append('>');
else if(cent.equals("amp"))
sb.append('&');
else if(cent.equals("quot"))
sb.append('"');
else if(cent.equals("apos"))
sb.append('/'');
// could parse hex entities if we wanted to
//else if(cent.startswith("#x"))
//sb.append((char)integer.parseint(cent.substring(2),16));
else if(cent.startswith("#"))
sb.append((char)integer.parseint(cent.substring(1)));
// insert custom entity definitions here
else
exc("unknown entity: &"+cent+";",line,col);
} else {
etag.append((char)c);
}
// we have just seen something like this:
// <foo a="b"/
// and are looking for the final >.
} else if(mode == single_tag) {
if(tagname == null)
tagname = sb.tostring();
if(c != '>')
exc("expected > for tag: <"+tagname+"/>",line,col);
doc.startelement(tagname,attrs);
doc.endelement(tagname);
if(depth==0) {
doc.enddocument.);
return;
}
sb.setlength(0);
attrs = new hashtable();
tagname = null;
mode = popmode(st);
// we are processing something
// like this <foo ... >. it could
// still be a <!-- ... --> or something.
} else if(mode == open_tag) {
if(c == '>') {
if(tagname == null)
tagname = sb.tostring();
sb.setlength(0);
depth++;
doc.startelement(tagname,attrs);
tagname = null;
attrs = new hashtable();
mode = popmode(st);
} else if(c == '/') {
mode = single_tag;
} else if(c == '-' && sb.tostring().equals("!-")) {
mode = comment;
} else if(c == '[' && sb.tostring().equals("![cdata")) {
mode = cdata;
sb.setlength(0);
} else if(c == 'e' && sb.tostring().equals("!doctyp")) {
sb.setlength(0);
mode = doctype;
} else if(character.iswhitespace((char)c)) {
tagname = sb.tostring();
sb.setlength(0);
mode = in_tag;
} else {
sb.append((char)c);
}
// we are processing the quoted right-hand side
// of an element's attribute.
} else if(mode == quote) {
if(c == quotec) {
rvalue = sb.tostring();
sb.setlength(0);
attrs.put(lvalue,rvalue);
mode = in_tag;
// see section the xml spec, section 3.3.3
// on normalization processing.
} else if(" /r/n/u0009".indexof(c)>=0) {
sb.append(' ');
} else if(c == '&') {
st.push(new integer(mode));
mode = entity;
etag.setlength(0);
} else {
sb.append((char)c);
}
} else if(mode == attribute_rvalue) {
if(c == '"' || c == '/'') {
quotec = c;
mode = quote;
} else if(character.iswhitespace((char)c)) {
} else {
exc("error in attribute processing",line,col);
}
} else if(mode == attribute_lvalue) {
if(character.iswhit
闽公网安备 35060202000074号