JAVA解析多个命名空间问题

JAVA通过xpath解析遇到的问题

img


如果在xml中把xmlns这个命名空间删除,即可。但是不能改XML。程序执行结果为空
附上代码:

import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

import javax.xml.namespace.NamespaceContext;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.*;
import java.io.IOException;
import java.io.StringReader;
import java.util.Iterator;
public static void main(String[] args) throws IOException, SAXException, ParserConfigurationException, XPathExpressionException {
        String xml = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n" +
                "<ns2:SPSCertificate xmlns:ns2=\"urn:un:unece:uncefact:data:standard:SPSCertificate:5\"\n" +
                "xmlns=\"urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:9\">\n" +
                "    <ns2:SPSExchangedDocument>\n" +
                "        <Name languageID=\"en\">Certificate of Origin and Health for Pasteurised Dairy Products for Human Consumption exported to the People's Republic of China</Name>\n" +
                "\t</ns2:SPSExchangedDocument>\n" +
                "</ns2:SPSCertificate>";
//        System.out.println(xml);
        DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();
        domFactory.setNamespaceAware(true);
        DocumentBuilder builder = domFactory.newDocumentBuilder();
        Document doc = builder.parse(new InputSource(new StringReader(xml)));
        XPath xpath = XPathFactory.newInstance().newXPath();
        xpath.setNamespaceContext(new NamespaceContext() {
            @Override
            public Iterator getPrefixes(String arg0) {
                return null;
            }

            @Override
            public String getPrefix(String arg0) {
                return null;
            }

            @Override
            public String getNamespaceURI(String arg0) {
                if ("ns2".equals(arg0)) {
                    return "urn:un:unece:uncefact:data:standard:SPSCertificate:5";
                } else if ("".equals(arg0)) {
                    return "urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:9";
                }
                return null;
            }
        });
        XPathExpression expr = xpath
                .compile("/ns2:SPSCertificate/ns2:SPSExchangedDocument/Name[@languageID=\"en\"]");
        Object result = expr.evaluate(doc, XPathConstants.NODESET);
        NodeList nodes = (NodeList) result;
        for (int i = 0; i < nodes.getLength(); i++) {
            Node node = nodes.item(i);
            System.out.println(node.getTextContent());
        }
    }

TechWhizKid参考GPT回答:

  • 在你的XPath表达式中,你没有指定Name元素的命名空间。在你的XML中,Name元素使用的是默认命名空间,即urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:9。所以你要在XPath表达式中指定命名空间。给默认命名空间赋一个前缀(例如"default"),然后在XPath表达式中使用这个前缀。

这是修复后的代码:

import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

import javax.xml.namespace.NamespaceContext;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.*;
import java.io.IOException;
import java.io.StringReader;
import java.util.Iterator;

public class Main {
    public static void main(String[] args) throws IOException, SAXException, ParserConfigurationException, XPathExpressionException {
        String xml = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n" +
                "<ns2:SPSCertificate xmlns:ns2=\"urn:un:unece:uncefact:data:standard:SPSCertificate:5\"\n" +
                "xmlns=\"urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:9\">\n" +
                "    <ns2:SPSExchangedDocument>\n" +
                "        <Name languageID=\"en\">Certificate of Origin and Health for Pasteurised Dairy Products for Human Consumption exported to the People's Republic of China</Name>\n" +
                "\t</ns2:SPSExchangedDocument>\n" +
                "</ns2:SPSCertificate>";
//        System.out.println(xml);
        DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();
        domFactory.setNamespaceAware(true);
        DocumentBuilder builder = domFactory.newDocumentBuilder();
        Document doc = builder.parse(new InputSource(new StringReader(xml)));
        XPath xpath = XPathFactory.newInstance().newXPath();
        xpath.setNamespaceContext(new NamespaceContext() {
            @Override
            public String getNamespaceURI(String prefix) {
                if ("ns2".equals(prefix)) {
                    return "urn:un:unece:uncefact:data:standard:SPSCertificate:5";
                } else if ("default".equals(prefix)) {
                    return "urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:9";
                }
                return null;
            }

            @Override
            public String getPrefix(String namespaceURI) {
                return null;
            }

            @Override
            public Iterator getPrefixes(String namespaceURI) {
                return null;
            }
        });
        XPathExpression expr = xpath
                .compile("/ns2:SPSCertificate/ns2:SPSExchangedDocument/default:Name[@languageID=\"en\"]");
        Object result = expr.evaluate(doc, XPathConstants.NODESET);
        NodeList nodes = (NodeList) result;
        for (int i = 0; i < nodes.getLength(); i++) {
            Node node = nodes.item(i);
            System.out.println(node.getTextContent());
        }
    }
}


-

尝试使用通配符*来匹配任何命名空间的元素。修改XPath表达式如下:

"/*[local-name()='SPSCertificate']/*[local-name()='SPSExchangedDocument']/*[local-name()='Name'][@languageID='en']"

这样可以忽略命名空间并匹配对应的元素。
修改代码如下:

public static void main(String[] args) throws IOException, SAXException, ParserConfigurationException, XPathExpressionException {
        String xml = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n" +
                "<ns2:SPSCertificate xmlns:ns2=\"urn:un:unece:uncefact:data:standard:SPSCertificate:5\"\n" +
                "xmlns=\"urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:9\">\n" +
                "    <ns2:SPSExchangedDocument>\n" +
                "        <Name languageID=\"en\">Certificate of Origin and Health for Pasteurised Dairy Products for Human Consumption exported to the People's Republic of China</Name>\n" +
                "\t</ns2:SPSExchangedDocument>\n" +
                "</ns2:SPSCertificate>";
//        System.out.println(xml);
        DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();
        domFactory.setNamespaceAware(true);
        DocumentBuilder builder = domFactory.newDocumentBuilder();
        Document doc = builder.parse(new InputSource(new StringReader(xml)));
        XPath xpath = XPathFactory.newInstance().newXPath();
        xpath.setNamespaceContext(new NamespaceContext() {
            @Override
            public Iterator getPrefixes(String arg0) {
                return null;
            }

            @Override
            public String getPrefix(String arg0) {
                return null;
            }

            @Override
            public String getNamespaceURI(String arg0) {
                if ("ns2".equals(arg0)) {
                    return "urn:un:unece:uncefact:data:standard:SPSCertificate:5";
                } else if ("".equals(arg0)) {
                    return "urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:9";
                }
                return null;
            }
        });
        XPathExpression expr = xpath
                .compile("/*[local-name()='SPSCertificate']/*[local-name()='SPSExchangedDocument']/*[local-name()='Name'][@languageID='en']");
        Object result = expr.evaluate(doc, XPathConstants.NODESET);
        NodeList nodes = (NodeList) result;
        for (int i = 0; i < nodes.getLength(); i++) {
            Node node = nodes.item(i);
            System.out.println(node.getTextContent());
        }
    }

运行结果,如下:

Certificate of Origin and Health for Pasteurised Dairy Products for Human Consumption exported to the People's Republic of China


回答部分参考、引用ChatGpt以便为您提供更准确的答案:

根据提供的代码和问题描述,问题似乎是在使用Java中的XPath解析XML时遇到了命名空间的问题。通过分析代码和给定的XML片段,我可以给出以下解答:

在给定的XML中,命名空间的声明如下: xmlns:ns2="urn:un:unece:uncefact:data:standard:SPSCertificate:5" xmlns="urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:9"

代码中的NamespaceContext实现定义了命名空间的URI与前缀之间的映射关系。在这种情况下,"ns2"前缀对应的URI是"urn:un:unece:uncefact:data:standard:SPSCertificate:5",而空前缀对应的URI是"urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:9"。

XPath表达式的编写需要根据命名空间的定义进行相应的处理。在给定的代码中,XPath表达式是"/ns2:SPSCertificate/ns2:SPSExchangedDocument/Name[@languageID="en"]",其中使用了"ns2"前缀来指定命名空间。

问题中提到如果将xmlns命名空间删除,则程序执行结果为空。这是因为XPath表达式中使用了命名空间前缀,而删除了命名空间后,节点的命名空间信息也被删除了,导致XPath无法正确匹配节点。

如果不能修改XML,但仍然想要解析正确的结果,可以尝试修改NamespaceContext实现中的getNamespaceURI方法,将返回值修改为与给定的XML片段中的命名空间URI一致,即"urn:un:unece:uncefact:data:standard:SPSCertificate:5"和"urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:9",然后重新运行代码,应该能够正确解析节点并输出结果。

需要注意的是,代码中的XPath表达式是针对带有命名空间的XML进行编写的,如果在其他不带命名空间的XML上使用相同的表达式,可能会导致解析失败或输出错误的结果。