服务热线:13616026886

技术文档 欢迎使用技术文档,我们为你提供从新手到专业开发者的所有资源,你也可以通过它日益精进

位置:首页 > 技术文档 > JAVA > 新手入门 > 基础入门 > 查看文档

创造一种迅速而又随性的xml解释器


  xml是一种当前很受欢迎的数据格式, 它的优点在于: 人性化,自述性以及使用的方便性.但是,不幸的是,基于java的xml解释器往往太大了,比如sun的jaxp.jar 和 parser.jar 每个都达到了1.4mb. 如果你要在只有有限的内存容量的运行环境里运行你的程序,比如j2me的环境.或者说带宽很有限的运行环境里,比如applet,这些大的package不应该成为你的选择对象.
  注意:本篇的所有所需要的所有代码你可以通过此链接下载:
  http://www.matrix.org.cn/down_view.asp?id=67
  下面是qdparser的代码:
  package qdxml;
  import java.io.*;
  import java.util.*;
  
  /** quick and dirty xml parser. this parser is, like the sax parser,
  an event based parser, but with much less functionality. */
  public class qdparser {
  private static int popmode(stack st) {
  if(!st.empty())
  return ((integer)st.pop()).intvalue();
  else
  return pre;
  }
  private final static int
  text = 1,
  entity = 2,
  open_tag = 3,
  close_tag = 4,
  start_tag = 5,
  attribute_lvalue = 6,
  attribute_equal = 9,
  attribute_rvalue = 10,
  quote = 7,
  in_tag = 8,
  single_tag = 12,
  comment = 13,
  done = 11,
  doctype = 14,
  pre = 15,
  cdata = 16;
  public static void parse(dochandler doc,reader r) throws exception {
  stack st = new stack();
  int depth = 0;
  int mode = pre;
  int c = 0;
  int quotec = '"';
  depth = 0;
  stringbuffer sb = new stringbuffer();
  stringbuffer etag = new stringbuffer();
  string tagname = null;
  string lvalue = null;
  string rvalue = null;
  hashtable attrs = null;
  st = new stack();
  doc.startdocument.);
  int line=1, col=0;
  boolean eol = false;
  while((c = r.read()) != -1) {
  
  // we need to map /r, /r/n, and /n to /n
  // see xml spec section 2.11
  if(c == '/n' && eol) {
  eol = false;
  continue;
  } else if(eol) {
  eol = false;
  } else if(c == '/n') {
  line++;
  col=0;
  } else if(c == '/r') {
  eol = true;
  c = '/n';
  line++;
  col=0;
  } else {
  col++;
  }
  
  if(mode == done) {
  doc.enddocument.);
  return;
  
  // we are between tags collecting text.
  } else if(mode == text) {
  if(c == '<') {
  st.push(new integer(mode));
  mode = start_tag;
  if(sb.length() > 0) {
  doc.text(sb.tostring());
  sb.setlength(0);
  }
  } else if(c == '&') {
  st.push(new integer(mode));
  mode = entity;
  etag.setlength(0);
  } else
  sb.append((char)c);
  
  // we are processing a closing tag: e.g. </foo>
  } else if(mode == close_tag) {
  if(c == '>') {
  mode = popmode(st);
  tagname = sb.tostring();
  sb.setlength(0);
  depth--;
  if(depth==0)
  mode = done;
  doc.endelement(tagname);
  } else {
  sb.append((char)c);
  }
  
  // we are processing cdata
  } else if(mode == cdata) {
  if(c == '>'
  && sb.tostring().endswith("]]")) {
  sb.setlength(sb.length()-2);
  doc.text(sb.tostring());
  sb.setlength(0);
  mode = popmode(st);
  } else
  sb.append((char)c);
  
  // we are processing a comment. we are inside
  // the <!-- .... --> looking for the -->.
  } else if(mode == comment) {
  if(c == '>'
  && sb.tostring().endswith("--")) {
  sb.setlength(0);
  mode = popmode(st);
  } else
  sb.append((char)c);
  
  // we are outside the root tag element
  } else if(mode == pre) {
  if(c == '<') {
  mode = text;
  st.push(new integer(mode));
  mode = start_tag;
  }
  
  // we are inside one of these <? ... ?>
  // or one of these <!doctype ... >
  } else if(mode == doctype) {
  if(c == '>') {
  mode = popmode(st);
  if(mode == text) mode = pre;
  }
  
  // we have just seen a < and
  // are wondering what we are looking at
  // <foo>, </foo>, <!-- ... --->, etc.
  } else if(mode == start_tag) {
  mode = popmode(st);
  if(c == '/') {
  st.push(new integer(mode));
  mode = close_tag;
  } else if (c == '?') {
  mode = doctype;
  } else {
  st.push(new integer(mode));
  mode = open_tag;
  tagname = null;
  attrs = new hashtable();
  sb.append((char)c);
  }
  
  // we are processing an entity, e.g. <, », etc.
  } else if(mode == entity) {
  if(c == ';') {
  mode = popmode(st);
  string cent = etag.tostring();
  etag.setlength(0);
  if(cent.equals("lt"))
  sb.append('<');
  else if(cent.equals("gt"))
  sb.append('>');
  else if(cent.equals("amp"))
  sb.append('&');
  else if(cent.equals("quot"))
  sb.append('"');
  else if(cent.equals("apos"))
  sb.append('/'');
  // could parse hex entities if we wanted to
  //else if(cent.startswith("#x"))
  //sb.append((char)integer.parseint(cent.substring(2),16));
  else if(cent.startswith("#"))
  sb.append((char)integer.parseint(cent.substring(1)));
  // insert custom entity definitions here
  else
  exc("unknown entity: &"+cent+";",line,col);
  } else {
  etag.append((char)c);
  }
  
  // we have just seen something like this:
  // <foo a="b"/
  // and are looking for the final >.
  } else if(mode == single_tag) {
  if(tagname == null)
  tagname = sb.tostring();
  if(c != '>')
  exc("expected > for tag: <"+tagname+"/>",line,col);
  doc.startelement(tagname,attrs);
  doc.endelement(tagname);
  if(depth==0) {
  doc.enddocument.);
  return;
  }
  sb.setlength(0);
  attrs = new hashtable();
  tagname = null;
  mode = popmode(st);
  
  // we are processing something
  // like this <foo ... >. it could
  // still be a <!-- ... --> or something.
  } else if(mode == open_tag) {
  if(c == '>') {
  if(tagname == null)
  tagname = sb.tostring();
  sb.setlength(0);
  depth++;
  doc.startelement(tagname,attrs);
  tagname = null;
  attrs = new hashtable();
  mode = popmode(st);
  } else if(c == '/') {
  mode = single_tag;
  } else if(c == '-' && sb.tostring().equals("!-")) {
  mode = comment;
  } else if(c == '[' && sb.tostring().equals("![cdata")) {
  mode = cdata;
  sb.setlength(0);
  } else if(c == 'e' && sb.tostring().equals("!doctyp")) {
  sb.setlength(0);
  mode = doctype;
  } else if(character.iswhitespace((char)c)) {
  tagname = sb.tostring();
  sb.setlength(0);
  mode = in_tag;
  } else {
  sb.append((char)c);
  }
  
  // we are processing the quoted right-hand side
  // of an element's attribute.
  } else if(mode == quote) {
  if(c == quotec) {
  rvalue = sb.tostring();
  sb.setlength(0);
  attrs.put(lvalue,rvalue);
  mode = in_tag;
  // see section the xml spec, section 3.3.3
  // on normalization processing.
  } else if(" /r/n/u0009".indexof(c)>=0) {
  sb.append(' ');
  } else if(c == '&') {
  st.push(new integer(mode));
  mode = entity;
  etag.setlength(0);
  } else {
  sb.append((char)c);
  }
  
  } else if(mode == attribute_rvalue) {
  if(c == '"' || c == '/'') {
  quotec = c;
  mode = quote;
  } else if(character.iswhitespace((char)c)) {
  
  } else {
  exc("error in attribute processing",line,col);
  }
  
  } else if(mode == attribute_lvalue) {
  if(character.iswhit

扫描关注微信公众号