Imam kod koji parsira html datoteku, dobro radi za primjer datoteke:
–test.html–
Hello world.ali kad META tag završava sa /> onda ne radi dobro.Npr.
–test.html–
Hello world.Zna li itko zašto to ne radi i kako to riješiti?
Evo koda:
import javax.swing.text.html.parser.;
import javax.swing.text.html.;
import javax.swing.text.;
import java.io.;
public class ParseTest extends HTMLEditorKit.ParserCallback {
public void handleText(char[] data, int pos){
System.out.println(new String(data));}
public void handleStartTag(HTML.Tag t, MutableAttributeSet a, int pos){
System.out.println("start: " + t);}
public void handleEndTag(HTML.Tag t, int pos){
System.out.println("end: " + t);}
public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int pos){
if (t == HTML.Tag.META){
String name1=(String)a.getAttribute(HTML.Attribute.NAME);
if (name1 != null){
System.out.println("META name1: " + name1);
}
String content1 = (String)a.getAttribute(HTML.Attribute.CONTENT);
if (content1 != null){
System.out.println("META content1: " + content1);
}
}
}
public static void main(String argv[]) {
try {
Reader r = new FileReader(“test.html”);
ParserDelegator parser = new ParserDelegator();
HTMLEditorKit.ParserCallback callback = new ParseTest();
parser.parse(r, callback, false);
} catch (IOException e) {
e.printStackTrace();
}
}
}