//package xmlDOMParse; import java.awt.*; import java.awt.event.*; import java.applet.*; import java.io.Serializable; import java.lang.*; import java.util.*; import org.w3c.dom.*; import org.apache.xerces.parsers.DOMParser; import ui.DOMParserSaveEncoding; import org.apache.regexp.*; public class DOMParse { private Document xmlDocument; private static ArrayList authors = new ArrayList(); private static ArrayList fullTextSrc = new ArrayList(); private static String dateModified = new String(); private static String dateCreated = new String(); private static String titleDE = new String(); private static String titleEN = new String(); private static String cnote = new String(); public DOMParse (Document pXMLDocument) { xmlDocument = pXMLDocument; authors = new ArrayList(); fullTextSrc = new ArrayList(); dateModified = new String(); dateCreated = new String(); titleDE = new String(); titleEN = new String(); cnote = new String(); } private void parseDoc() throws RESyntaxException { NodeList nodes = xmlDocument.getChildNodes(); extract(nodes); } private static void extract(NodeList nodes) throws RESyntaxException { int len = (nodes != null) ? nodes.getLength() : 0; for (int i=0; i < len; i++) { Node node = nodes.item(i); switch (node.getNodeType()) { case Node.DOCUMENT_NODE: { // doesn't work at the moment String docNode = node.getNodeName(); // #FIXME# break; } case Node.ELEMENT_NODE: { if (node.getNodeName().equals("rdf:Description")) { /* Extracting the copyright note from the xml file if availabel */ if (node.getParentNode().getNodeName().equals("dc:rights")) { Element element = (Element)node; RE regexp = new RE(" +"); // some text needs regexp to try { // be shown correctly String cnote = regexp.subst(element.getAttribute("rdfs:label"), " "); System.out.println(cnote); // for testing if (cnote.length() == 0) // I think this is stupid throw new RESyntaxException("The copyright note is empty!"); } catch (RESyntaxException reex) { System.err.println(reex); } } /* Extracting the authors from the xml file, there has to be at * least one author! */ if (node.getParentNode().getNodeName().equals("rdf:li") && node.getParentNode().getParentNode().getNodeName().equals("rdf:Bag")) { Element element = (Element)node; System.out.println(element.getAttribute("vCard:FN")); // for testing authors.add(element.getAttribute("vCard:FN")); } /* Extracting the creation date of the xml file */ if (node.getParentNode().getNodeName().equals("dcq:created")) { Element element = (Element)node; System.out.println(element.getAttribute("rdf:value")); // for testing dateCreated = element.getAttribute("rdf:value"); } /* Extracting the modification date of the xml file. */ if (node.getParentNode().getNodeName().equals("dcq:modified")) { Element element = (Element)node; System.out.println(element.getAttribute("rdf:value")); // for testing dateModified = element.getAttribute("rdf:value"); } } if (node.getNodeName().equals("rdf:li")) { Element element = (Element)node; /* Extracting the resource of the publication. Should be at least * one item, at most three. */ if (node.getParentNode().getNodeName().equals("rdf:Alt") && node.getParentNode().getParentNode().getNodeName().equals("dc:identifier")) { System.out.println(element.getAttribute("rdf:resource")); // for testing fullTextSrc.add(element.getAttribute("rdf:resource")); } /* Extracting title of the publication. */ if (element.hasAttribute("xml:lang")) { NodeList nl = node.getChildNodes(); for (int k=0; k < nl.getLength(); k++) { if ((nl.item(k)).getNodeType() == Node.TEXT_NODE) { /* Looking for the German title */ if (element.getAttribute("xml:lang").equals("de")) { RE regexp = new RE("\n"); try { titleDE = regexp.subst((nl.item(k)).getNodeValue(), " "); System.out.println("Deutscher Titel: " + titleDE); if (titleDE.length() == 0) { throw new RESyntaxException("No German title."); } } catch (RESyntaxException reex) { System.err.println(reex); } } /* Looking for the English titel */ else { RE regexp = new RE("\n"); try { titleEN = regexp.subst((nl.item(k)).getNodeValue(), " "); System.out.println("Englischer Titel: " + titleEN); // for testing if (titleEN.length() == 0) { throw new RESyntaxException("No English title."); } } catch (RESyntaxException reex) { System.err.println(reex); } } } } } } } /* Recursively calling the method. Could be done better. */ extract(node.getChildNodes()); break; } } } /* getMethods() could be useful later, i.e. for returning the nodes in the * applet. */ public ArrayList getAuthors() { return authors; } public ArrayList getFullTextSrc() { return fullTextSrc; } public String getCNote() { return cnote; } public String getDateModified() { return dateModified; } public String getDateCreated() { return dateCreated; } public String getTitleDE() { return titleDE; } public String getTitleEN() { return titleEN; } /* Now fill in all extracted nodes in the Applet */ private void fillInElements() { TextField cNoteF = new TextField(); cNoteF.setText(getCNote()); TextField dateCF = new TextField(); dateCF.setText(getDateCreated()); TextField dateMF = new TextField(); dateMF.setText(getDateModified()); TextField titleDEF = new TextField(); titleDEF.setText(getTitleDE()); TextField titleENF = new TextField(); titleENF.setText(getTitleEN()); TextField author1F = new TextField(); String author1 = (String)getAuthors().get(0); author1F.setText(author1); /* is there a 2nd author? */ if (getAuthors().get(1) != null) { TextField author2F = new TextField(); String author2 = (String)getAuthors().get(1); author2F.setText(author2); } /* is there a 3rd author? */ if (getAuthors().get(2) != null) { TextField author3F = new TextField(); String author3 = (String)getAuthors().get(2); author3F.setText(author3); } /* is there a 4th author? */ if (getAuthors().get(3) != null) { TextField author4F = new TextField(); String author4 = (String)getAuthors().get(3); author4F.setText(author4); } /* just filling in the first resource for now */ TextField fullTextSrcF = new TextField(); String fullTextSrc = (String)getAuthors().get(0); fullTextSrcF.setText(fullTextSrc); } public static void main(String[] args) { String DEFAULT_PARSER_NAME = "org.apache.xerces.parsers.DOMParser"; /* For now, there's the xercesSamples.jar necessary; it works but needs to * be done differently. */ DOMParserSaveEncoding parser; Document xmlDocument; String filename=args[0]; try { parser = new DOMParserSaveEncoding(); /* setting some features, don't know if this is really necessary */ parser.setFeature("http://apache.org/xml/features/dom/defer-node-expansion", false); parser.setFeature("http://apache.org/xml/features/continue-after-fatal-error", true); parser.setFeature("http://apache.org/xml/features/allow-java-encodings", true); // xmlDocument = parser.getDocument(); // DOMParse doc = new DOMParse(xmlDocument); // doc.parseDoc(); parser.parse(filename); Document document = parser.getDocument(); NodeList nodes = document.getChildNodes(); extract(nodes); } catch (Exception ex) { System.err.println(ex.getMessage()); ex.printStackTrace(); } } }