I have an Element object, its HTMLDocument object, and I want it to be the string value of this element.
I need this result
Christina Toth, Farm. D.
=========================
plz see below code.
public static void main(String args[]) throws Exception { InputStream is = Nullsoft.getInputStream(); InputStreamReader isr = new InputStreamReader(is); BufferedReader br = new BufferedReader(isr); HTMLEditorKit htmlKit = new HTMLEditorKit(); HTMLDocument htmlDoc = (HTMLDocument) htmlKit.createDefaultDocument(); HTMLEditorKit.Parser parser = new ParserDelegator(); HTMLEditorKit.ParserCallback callback = htmlDoc.getReader(0); parser.parse(br, callback, true); // Parse ElementIterator iterator = new ElementIterator(htmlDoc); Element element; while ((element = iterator.next()) != null) { AttributeSet attributes = element.getAttributes(); Object name = attributes.getAttribute(StyleConstants.NameAttribute); if ((name instanceof HTML.Tag) && ((name == HTML.Tag.DIV) || (name == HTML.Tag.H2) || (name == HTML.Tag.H3))) { StringBuffer text = new StringBuffer(); int count = element.getElementCount(); for (int i = 0; i < count; i++) { Element child = element.getElement(i); AttributeSet childAttributes = child.getAttributes(); // if (childAttributes.getAttribute(StyleConstants.NameAttribute) == HTML.Tag.CONTENT) { int startOffset = child.getStartOffset(); int endOffset = child.getEndOffset(); int length = endOffset - startOffset; text.append(htmlDoc.getText(startOffset, length)); } } System.out.println(name + ": " + text.toString()); } } System.exit(0); } public static InputStream getInputStream() { String text = "<html>\n" + "<head>\n" + "<title>pg_0001</title>\n" + "\n" + "<style type=\"text/css\">\n" + ".ft3{font-style:normal;font-weight:bold;font-size:11px;font-family:Helvetica;color:#000000;}\n" + "</style>\n" + "</head>\n" + "<body vlink=\"#FFFFFF\" link=\"#FFFFFF\" bgcolor=\"#ffffff\">\n" + "\n" + "\n" + "<div style=\"position:absolute;top:597;left:252\"><nobr><span class=\"ft3\">Christina Toth, Pharm. D.</span></nobr></div>\n" + "\n" + "\n" + "</body>\n" + "</html>"; InputStream is = null; try { is = new ByteArrayInputStream(text.getBytes("UTF-8")); } catch (UnsupportedEncodingException e) { e.printStackTrace(); } return is; }
source share