java爬虫webMagic用正则表达式匹配a标签的onclick里面的值

java爬虫webMagic用正则表达式匹配a标签的onclick里面queryArticleByCondition方法里的this后面的值,就是一个URL地址

<a style="cursor:pointer" onclick="queryArticleByCondition(this,'/liuyanggov/dwzt/ggzyjyzx/jyxx96/fjsz34/zbgg97/2a7bc3f8-3.html')" tagname="/liuyanggov/dwzt/ggzyjyzx/jyxx96/fjsz34/zbgg97/2a7bc3f8-3.html">下一页</a>

可以定义一个Selector类筛选a标签,参考demo:

public class LinksSelector extends BaseElementSelector {
    public LinksSelector() {
    }

    @Override
    public String select(Element element) {
        throw new UnsupportedOperationException();
    }

    @Override
    public List<String> selectList(Element element) {

        Elements elements = element.select(LinkTag.HREF.toString()+LinkTag.NOTNA.toString());
        List<String> links = new ArrayList(elements.size());
        Iterator var4 = elements.iterator();

        while (var4.hasNext()) {
            Element element0 = (Element) var4.next();
            if (!StringUtil.isBlank(element0.baseUri())) {
                links.add(element0.attr("abs:href"));
            } else {
                links.add(element0.attr("href"));
            }
        }

        return links;
    }

    @Override
    public Element selectElement(Element element) {
        throw new UnsupportedOperationException();
    }

    @Override
    public List<Element> selectElements(Element element) {
        throw new UnsupportedOperationException();
    }

    @Override
    public boolean hasAttribute() {
        return true;
    }
}