import nl.siegmann.epublib.domain.Book;
import nl.siegmann.epublib.domain.Resource;
import nl.siegmann.epublib.domain.TOCReference;
import nl.siegmann.epublib.domain.TableOfContents;
import nl.siegmann.epublib.epub.EpubReader;
import nl.siegmann.epublib.epub.EpubWriter;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Entities;
import org.jsoup.select.Elements;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.util.*;
/**
* 电子书章节表
*
* @author hyx
* @version 1.0
*/
public class Test {
//需要拆分的电子书地址
private static String bookPath = "C:\\Users\\hyx\\Desktop\\ebook_b38f0e50140b4deba06ba464d17176a5.epub";
//电子书拆分后的输出文件夹
private static String outPath = "C:\\Users\\hyx\\Desktop\\aaa\\";
public static void main(String[] args) throws Exception {
//加载电子书
EpubReader epubReader = new EpubReader();
Book book = epubReader.readEpub(new FileInputStream(bookPath));
//按章节拆分
List<MyBook> myBookList = splitEbook(book);
for (MyBook myBook : myBookList) {
saveEbookFile(myBook);
}
}
/**
* 将拆分的电子书输出到目录
*
* @param myBook 拆分后的电子书
*/
public static void saveEbookFile(MyBook myBook) throws Exception {
if (myBook != null) {
File file = new File(outPath + myBook.getTitle() + ".epub");
EpubWriter epubWriter = new EpubWriter();
FileOutputStream fileOutputStream = new FileOutputStream(file);
epubWriter.write(myBook, fileOutputStream);
//递归保存子章节
List<MyBook> myBookList = myBook.getChildBookList();
if (myBookList != null && myBookList.size() > 0) {
for (MyBook book : myBookList) {
saveEbookFile(book);
}
}
}
}
/**
* 拆分章节
*
* @param book 需要拆分的电子书
* @return 拆分后的章节list
*/
public static List<MyBook> splitEbook(Book book) throws Exception {
TableOfContents tableOfContents = book.getTableOfContents();
List<TOCReference> refs = tableOfContents.getTocReferences();
List<MyBook> list = new ArrayList<>();
//一级章节
if (refs != null && refs.size() > 0) {
for (int i = 0; i < refs.size(); i++) {
List<Fragment> parentNextFragmentList = new ArrayList<>();
TOCReference next = null;
if ((i + 1) < refs.size()) {
next = refs.get(i + 1);
if (next != null) {
//下一章节文件
String resourceId = next.getResourceId();
//下一章节锚点
String fragmentId = next.getFragmentId();
if (StringUtils.isNotBlank(fragmentId) && StringUtils.isNotBlank(resourceId)) {
parentNextFragmentList.add(new Fragment(resourceId, fragmentId));
}
}
}
TOCReference reference = refs.get(i);
MyBook newBook = new MyBook();
//生成一级章节并递归拆分子章节
setTOCReference(book, newBook, reference, next, parentNextFragmentList);
list.add(newBook);
}
}
return list;
}
/**
* 生成一级章节并递归拆分子章节
*
* @param resourceBook 源电子书
* @param newBook 生成的电子书
* @param tocReference 源章节
* @param nextTocReference 下一章节
* @param parentNextFragmentList 当前章节父章节的下一章节
*/
public static TOCReference setTOCReference(Book resourceBook, MyBook newBook, TOCReference tocReference,
TOCReference nextTocReference, List<Fragment> parentNextFragmentList) throws Exception {
if (parentNextFragmentList == null) {
parentNextFragmentList = new ArrayList<>();
}
// 将指定document中的内容替换
Resource res = tocReference.getResource();
Resource r = new Resource(res.getId(), res.getData(), res.getHref(), res.getMediaType());
//title
newBook.getMetadata().addTitle(tocReference.getTitle());
//封面图
newBook.setCoverImage(resourceBook.getCoverImage());
//更改章节内容
String data = new String(r.getData());
org.jsoup.nodes.Document doc = Jsoup.parse(data);
//设置为xhtml
doc.outputSettings().syntax(org.jsoup.nodes.Document.OutputSettings.Syntax.xml).escapeMode(Entities.EscapeMode.xhtml);
//当前章节锚点
String fragmentId = tocReference.getFragmentId();
//当前章节文件
String resourceId = tocReference.getResourceId();
//如果当前章节有锚点,则删除锚点之前的标签
if (StringUtils.isNotBlank(fragmentId)) {
Element body = doc.body();
if (body != null) {
Elements children = body.children();
if (children != null && children.size() > 0) {
for (Element child : children) {
String id = child.id();
if (fragmentId.equals(id)) {
break;
}
child.remove();
}
}
}
}
//如果下一个章节有锚点,并且和当前章节在同一html,则删除之后章节内容
if (nextTocReference != null) {
//下一章节锚点
String nextFragmentId = nextTocReference.getFragmentId();
//下一章节文件
String nextResourceId = nextTocReference.getResourceId();
if (StringUtils.isNotBlank(nextFragmentId) && resourceId.equals(nextResourceId)) {
//查找下一章节锚点的标签
Element elementById = doc.getElementById(nextFragmentId);
if (elementById != null) {
//删除下一章节锚点之后的所有标签
while (true) {
Element element = elementById.nextElementSibling();
if (element == null) {
break;
}
element.remove();
}
//删除下一章节锚点的标签
elementById.remove();
}
//之前用select删除,如果内容太大会导致程序卡住,改为while (true)循环删除
// Elements elements = doc.select("#" + fragmentId + ",#" + fragmentId + " ~ *");\
// doc.body().html(elements.outerHtml());
}
}
//如果子章节有锚点,并且和当前章节在同一html,则删除子章节之后的内容
List<TOCReference> childrenList = tocReference.getChildren();
if (childrenList != null && childrenList.size() > 0) {
TOCReference childReference = childrenList.get(0);
if (childReference != null) {
//子章节锚点
String childFragmentId = childReference.getFragmentId();
//子章节文件
String childResourceId = childReference.getResourceId();
if (StringUtils.isNotBlank(childFragmentId) && resourceId.equals(childResourceId)) {
//查找子章节锚点的标签
Element elementById = doc.getElementById(childFragmentId);
if (elementById != null) {
//删除子章节锚点之后的所有标签
while (true) {
Element element = elementById.nextElementSibling();
if (element == null) {
break;
}
element.remove();
}
//删除子章节锚点的标签
elementById.remove();
}
//之前用select删除,如果内容太大会导致程序卡住,改为while (true)循环删除
// Elements elements = doc.select("#" + childFragmentId + ",#" + childFragmentId + " ~ *");\
// doc.body().html(elements.outerHtml());
}
}
}
//如果父章节的下一章在同文件,则删除父章节的下一章节
if (parentNextFragmentList != null) {
for (Fragment fragment : parentNextFragmentList) {
if (StringUtils.isBlank(fragment.getResourceId()) || StringUtils.isBlank(fragment.getFragmentId())) {
continue;
}
String parentNextResourceId = fragment.getResourceId();
String parentNextFragmentId = fragment.getFragmentId();
if (resourceId.equals(parentNextResourceId)) {
//查找章节锚点的标签
Element elementById = doc.getElementById(parentNextFragmentId);
if (elementById != null) {
//当前章节在同级元素的位置
Integer siblingIndex = null;
if (StringUtils.isNotBlank(fragmentId)) {
Element element = doc.getElementById(fragmentId);
if (element != null) {
siblingIndex = element.elementSiblingIndex();
}
}
//删除章节在同级元素的位置
int parentSiblingIndex = elementById.elementSiblingIndex();
if (siblingIndex != null && parentSiblingIndex > siblingIndex) {
//删除章节锚点之后的所有标签
while (true) {
Element element = elementById.nextElementSibling();
if (element == null) {
break;
}
element.remove();
}
//删除章节锚点的标签
elementById.remove();
}
}
}
}
}
//更新章节内容
String html = doc.outerHtml();
r.setData(html.getBytes());
//添加章节
TOCReference addSection = newBook.addSection(tocReference.getTitle(), r);
//设置锚点
addSection.setFragmentId(tocReference.getFragmentId());
//设置css和img
setCssAndImg(resourceBook, newBook);
//递归拆分子章节
List<TOCReference> children = tocReference.getChildren();
List<MyBook> myBookList = new ArrayList<>();
if (children != null && children.size() > 0) {
for (int i = 0; i < children.size(); i++) {
TOCReference next = null;
if ((i + 1) < children.size()) {
next = children.get(i + 1);
if (next != null) {
//下一章节文件
String parentNextResourceId = next.getResourceId();
//下一章节锚点
String parentNextFragmentId = next.getFragmentId();
if (StringUtils.isNotBlank(parentNextResourceId) && StringUtils.isNotBlank(parentNextFragmentId)) {
parentNextFragmentList.add(new Fragment(parentNextResourceId, parentNextFragmentId));
}
}
}
MyBook newBook2 = new MyBook();
setTOCReference(resourceBook, newBook2, children.get(i), next, parentNextFragmentList);
myBookList.add(newBook2);
}
}
newBook.setChildBookList(myBookList);
return addSection;
}
/**
* 给电子书添加内容中出现的样式、图片、超链接
*
* @param sourceBook 源电子书
* @param newBook 生成的新电子书
*/
public static void setCssAndImg(Book sourceBook, Book newBook) {
if (sourceBook == null || newBook == null) {
return;
}
try {
Resource resource = newBook.getTableOfContents().getTocReferences().get(0).getResource();
org.jsoup.nodes.Document doc = Jsoup.parse(new String(resource.getData()));
//设置css、img和超链接
Set<String> cssSet = new HashSet<>();
Set<String> imgSet = new HashSet<>();
Set<String> hrefSet = new HashSet<>();
Elements cssList = doc.getElementsByTag("link");
Elements imgList = doc.getElementsByTag("img");
Elements hrefList = doc.getElementsByTag("a");
for (Element element : cssList) {
String href = element.attr("href");
href = href.substring(3);
cssSet.add(href);
}
for (Element element : imgList) {
String href = element.attr("src");
href = href.substring(3);
imgSet.add(href);
}
for (Element element : hrefList) {
String href = element.attr("href");
href = href.substring(3);
hrefSet.add(href);
}
for (Iterator iterator = cssSet.iterator(); iterator.hasNext(); ) {
String href = (String) iterator.next();
Resource res = sourceBook.getResources().getByHref(href);
if (res != null) {
newBook.addResource(res);
}
}
for (Iterator iterator = imgSet.iterator(); iterator.hasNext(); ) {
String href = (String) iterator.next();
Resource res = sourceBook.getResources().getByHref(href);
if (res != null) {
newBook.addResource(res);
}
}
for (Iterator iterator = hrefSet.iterator(); iterator.hasNext(); ) {
String href = (String) iterator.next();
Resource res = sourceBook.getResources().getByHref(href);
if (res != null) {
newBook.addResource(res);
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
}