使用itextpdf将字符串格式HTML转成pdf文件,生成的文件内容不全

使用itextpdf将字符串格式HTML转成pdf文件,生成的文件内容不全(html中有base64格式的的图片数张)
package com.transfar.utils;

import com.alibaba.fastjson.JSONObject;
import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.PageSize;
import com.itextpdf.text.pdf.PdfWriter;
import com.itextpdf.tool.xml.Pipeline;
import com.itextpdf.tool.xml.XMLWorker;
import com.itextpdf.tool.xml.XMLWorkerHelper;
import com.itextpdf.tool.xml.css.CssFilesImpl;
import com.itextpdf.tool.xml.css.StyleAttrCSSResolver;
import com.itextpdf.tool.xml.html.CssAppliersImpl;
import com.itextpdf.tool.xml.html.HTML;
import com.itextpdf.tool.xml.html.TagProcessorFactory;
import com.itextpdf.tool.xml.html.Tags;
import com.itextpdf.tool.xml.parser.XMLParser;
import com.itextpdf.tool.xml.pipeline.css.CssResolverPipeline;
import com.itextpdf.tool.xml.pipeline.end.PdfWriterPipeline;
import com.itextpdf.tool.xml.pipeline.html.HtmlPipeline;
import com.itextpdf.tool.xml.pipeline.html.HtmlPipelineContext;
import lombok.extern.slf4j.Slf4j;

import java.io.*;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;

@Slf4j
public class PdfUtil {
/**
* HTML TO PDF 到指定目录
*/
public static void writeToOutputStreamAsPDF(String htmlContent,File filePath) throws DocumentException, IOException {
log.info("开始生成pdf");
InputStream html = null;
OutputStream os = null;
PdfWriter pdfWriter = null;
try {
// 1. 获取生成pdf的html内容
html = new ByteArrayInputStream(htmlContent.getBytes(StandardCharsets.UTF_8));
os = new FileOutputStream(filePath);
Document document = new Document(PageSize.A4,36, 36, 36, 36);
pdfWriter = PdfWriter.getInstance(document, os);
document.open();

        //BASE64图片处理
        final TagProcessorFactory tagProcessorFactory = Tags.getHtmlTagProcessorFactory();
        tagProcessorFactory.removeProcessor(HTML.Tag.IMG);
        tagProcessorFactory.addProcessor(new ImageTagProcessor(), HTML.Tag.IMG);

        final CssFilesImpl cssFiles = new CssFilesImpl();
        cssFiles.add(XMLWorkerHelper.getInstance().getDefaultCSS());
        final StyleAttrCSSResolver cssResolver = new StyleAttrCSSResolver(cssFiles);
        final HtmlPipelineContext hpc = new HtmlPipelineContext(new CssAppliersImpl(new CustomXMLWorkerFontProvider()));
        hpc.setAcceptUnknown(true).autoBookmark(true).setTagFactory(tagProcessorFactory);
        final HtmlPipeline htmlPipeline = new HtmlPipeline(hpc, new PdfWriterPipeline(document, pdfWriter));
        final Pipeline pipeline = new CssResolverPipeline(cssResolver, htmlPipeline);

        final XMLWorker worker = new XMLWorker(pipeline, true);
        final Charset charset = StandardCharsets.UTF_8;
        final XMLParser xmlParser = new XMLParser(true, worker, charset);

        xmlParser.parse(html,charset);

        document.close();

        log.info("pdf生成成功 文件路径为" + filePath.getPath());
    } catch (Exception ex) {
        ex.printStackTrace();
        throw ex;
    }finally {
        try {
            if(pdfWriter != null){
                pdfWriter.close();
            }
            if (html != null) {
                html.close();
            }
            if (os != null) {
                os.close();
            }
        }catch(IOException ex){
            ex.printStackTrace();
            throw ex;
        }
    }
}
生成的pdf内容只有3页不到,实际我传的html远不止这么大
经过调试发现是com.itextpdf.text.pdf.PdfWriter内有一个GENERATION_MAX属性

img


img

可以看到topdf方法,当generation等于GENERATION_MAX时输出就被打上了停止标记导致pdf内容不够

有人能提供此问题的解决方案吗?(感恩感恩)大伙用itextpdf开发html转pdf时有遇到类似的问题吗,欢迎评论区找我交流