JAVA通过xpath解析遇到的问题
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import javax.xml.namespace.NamespaceContext;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.*;
import java.io.IOException;
import java.io.StringReader;
import java.util.Iterator;
public static void main(String[] args) throws IOException, SAXException, ParserConfigurationException, XPathExpressionException {
String xml = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n" +
"<ns2:SPSCertificate xmlns:ns2=\"urn:un:unece:uncefact:data:standard:SPSCertificate:5\"\n" +
"xmlns=\"urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:9\">\n" +
" <ns2:SPSExchangedDocument>\n" +
" <Name languageID=\"en\">Certificate of Origin and Health for Pasteurised Dairy Products for Human Consumption exported to the People's Republic of China</Name>\n" +
"\t</ns2:SPSExchangedDocument>\n" +
"</ns2:SPSCertificate>";
// System.out.println(xml);
DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();
domFactory.setNamespaceAware(true);
DocumentBuilder builder = domFactory.newDocumentBuilder();
Document doc = builder.parse(new InputSource(new StringReader(xml)));
XPath xpath = XPathFactory.newInstance().newXPath();
xpath.setNamespaceContext(new NamespaceContext() {
@Override
public Iterator getPrefixes(String arg0) {
return null;
}
@Override
public String getPrefix(String arg0) {
return null;
}
@Override
public String getNamespaceURI(String arg0) {
if ("ns2".equals(arg0)) {
return "urn:un:unece:uncefact:data:standard:SPSCertificate:5";
} else if ("".equals(arg0)) {
return "urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:9";
}
return null;
}
});
XPathExpression expr = xpath
.compile("/ns2:SPSCertificate/ns2:SPSExchangedDocument/Name[@languageID=\"en\"]");
Object result = expr.evaluate(doc, XPathConstants.NODESET);
NodeList nodes = (NodeList) result;
for (int i = 0; i < nodes.getLength(); i++) {
Node node = nodes.item(i);
System.out.println(node.getTextContent());
}
}
这是修复后的代码:
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import javax.xml.namespace.NamespaceContext;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.*;
import java.io.IOException;
import java.io.StringReader;
import java.util.Iterator;
public class Main {
public static void main(String[] args) throws IOException, SAXException, ParserConfigurationException, XPathExpressionException {
String xml = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n" +
"<ns2:SPSCertificate xmlns:ns2=\"urn:un:unece:uncefact:data:standard:SPSCertificate:5\"\n" +
"xmlns=\"urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:9\">\n" +
" <ns2:SPSExchangedDocument>\n" +
" <Name languageID=\"en\">Certificate of Origin and Health for Pasteurised Dairy Products for Human Consumption exported to the People's Republic of China</Name>\n" +
"\t</ns2:SPSExchangedDocument>\n" +
"</ns2:SPSCertificate>";
// System.out.println(xml);
DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();
domFactory.setNamespaceAware(true);
DocumentBuilder builder = domFactory.newDocumentBuilder();
Document doc = builder.parse(new InputSource(new StringReader(xml)));
XPath xpath = XPathFactory.newInstance().newXPath();
xpath.setNamespaceContext(new NamespaceContext() {
@Override
public String getNamespaceURI(String prefix) {
if ("ns2".equals(prefix)) {
return "urn:un:unece:uncefact:data:standard:SPSCertificate:5";
} else if ("default".equals(prefix)) {
return "urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:9";
}
return null;
}
@Override
public String getPrefix(String namespaceURI) {
return null;
}
@Override
public Iterator getPrefixes(String namespaceURI) {
return null;
}
});
XPathExpression expr = xpath
.compile("/ns2:SPSCertificate/ns2:SPSExchangedDocument/default:Name[@languageID=\"en\"]");
Object result = expr.evaluate(doc, XPathConstants.NODESET);
NodeList nodes = (NodeList) result;
for (int i = 0; i < nodes.getLength(); i++) {
Node node = nodes.item(i);
System.out.println(node.getTextContent());
}
}
}
-
尝试使用通配符*来匹配任何命名空间的元素。修改XPath表达式如下:
"/*[local-name()='SPSCertificate']/*[local-name()='SPSExchangedDocument']/*[local-name()='Name'][@languageID='en']"
这样可以忽略命名空间并匹配对应的元素。
修改代码如下:
public static void main(String[] args) throws IOException, SAXException, ParserConfigurationException, XPathExpressionException {
String xml = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n" +
"<ns2:SPSCertificate xmlns:ns2=\"urn:un:unece:uncefact:data:standard:SPSCertificate:5\"\n" +
"xmlns=\"urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:9\">\n" +
" <ns2:SPSExchangedDocument>\n" +
" <Name languageID=\"en\">Certificate of Origin and Health for Pasteurised Dairy Products for Human Consumption exported to the People's Republic of China</Name>\n" +
"\t</ns2:SPSExchangedDocument>\n" +
"</ns2:SPSCertificate>";
// System.out.println(xml);
DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();
domFactory.setNamespaceAware(true);
DocumentBuilder builder = domFactory.newDocumentBuilder();
Document doc = builder.parse(new InputSource(new StringReader(xml)));
XPath xpath = XPathFactory.newInstance().newXPath();
xpath.setNamespaceContext(new NamespaceContext() {
@Override
public Iterator getPrefixes(String arg0) {
return null;
}
@Override
public String getPrefix(String arg0) {
return null;
}
@Override
public String getNamespaceURI(String arg0) {
if ("ns2".equals(arg0)) {
return "urn:un:unece:uncefact:data:standard:SPSCertificate:5";
} else if ("".equals(arg0)) {
return "urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:9";
}
return null;
}
});
XPathExpression expr = xpath
.compile("/*[local-name()='SPSCertificate']/*[local-name()='SPSExchangedDocument']/*[local-name()='Name'][@languageID='en']");
Object result = expr.evaluate(doc, XPathConstants.NODESET);
NodeList nodes = (NodeList) result;
for (int i = 0; i < nodes.getLength(); i++) {
Node node = nodes.item(i);
System.out.println(node.getTextContent());
}
}
运行结果,如下:
Certificate of Origin and Health for Pasteurised Dairy Products for Human Consumption exported to the People's Republic of China
<?xml version="1.0" encoding="UTF-8" ?>
<!--这是一个注释-->
<person>
<name id="1">张三</name>
<age>16</age>
<address>广东</address>
<name id="2">张三</name>
<age>28</age>
<![CDATA[
这是一个字符数据区,在这里怎么写文本数据都行> <
]]>
<address>广东</address>
</person>
回答部分参考、引用ChatGpt以便为您提供更准确的答案:
根据提供的代码和问题描述,问题似乎是在使用Java中的XPath解析XML时遇到了命名空间的问题。通过分析代码和给定的XML片段,我可以给出以下解答:
在给定的XML中,命名空间的声明如下: xmlns:ns2="urn:un:unece:uncefact:data:standard:SPSCertificate:5" xmlns="urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:9"
代码中的NamespaceContext实现定义了命名空间的URI与前缀之间的映射关系。在这种情况下,"ns2"前缀对应的URI是"urn:un:unece:uncefact:data:standard:SPSCertificate:5",而空前缀对应的URI是"urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:9"。
XPath表达式的编写需要根据命名空间的定义进行相应的处理。在给定的代码中,XPath表达式是"/ns2:SPSCertificate/ns2:SPSExchangedDocument/Name[@languageID="en"]",其中使用了"ns2"前缀来指定命名空间。
问题中提到如果将xmlns命名空间删除,则程序执行结果为空。这是因为XPath表达式中使用了命名空间前缀,而删除了命名空间后,节点的命名空间信息也被删除了,导致XPath无法正确匹配节点。
如果不能修改XML,但仍然想要解析正确的结果,可以尝试修改NamespaceContext实现中的getNamespaceURI方法,将返回值修改为与给定的XML片段中的命名空间URI一致,即"urn:un:unece:uncefact:data:standard:SPSCertificate:5"和"urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:9",然后重新运行代码,应该能够正确解析节点并输出结果。
需要注意的是,代码中的XPath表达式是针对带有命名空间的XML进行编写的,如果在其他不带命名空间的XML上使用相同的表达式,可能会导致解析失败或输出错误的结果。