You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
333 lines
9.1 KiB
333 lines
9.1 KiB
package converter
|
|
|
|
import (
|
|
"bytes"
|
|
"compress/gzip"
|
|
"fmt"
|
|
"moredoc/util"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"go.uber.org/zap"
|
|
)
|
|
|
|
const (
|
|
soffice = "soffice"
|
|
ebookConvert = "ebook-convert"
|
|
svgo = "svgo"
|
|
mutool = "mutool"
|
|
dirDteFmt = "2006/01/02/15"
|
|
)
|
|
|
|
type ConvertCallback func(page int, pagePath string, err error)
|
|
|
|
type Converter struct {
|
|
cachePath string
|
|
timeout time.Duration
|
|
logger *zap.Logger
|
|
}
|
|
|
|
type Page struct {
|
|
PageNum int
|
|
PagePath string
|
|
}
|
|
|
|
func NewConverter(logger *zap.Logger, timeout ...time.Duration) *Converter {
|
|
expire := 1 * time.Hour
|
|
if len(timeout) > 0 {
|
|
expire = timeout[0]
|
|
}
|
|
defaultCachePath := "cache/convert"
|
|
os.MkdirAll(defaultCachePath, os.ModePerm)
|
|
return &Converter{
|
|
cachePath: defaultCachePath,
|
|
timeout: expire,
|
|
logger: logger.Named("converter"),
|
|
}
|
|
}
|
|
|
|
func (c *Converter) SetCachePath(cachePath string) {
|
|
os.MkdirAll(cachePath, os.ModePerm)
|
|
c.cachePath = cachePath
|
|
}
|
|
|
|
// ConvertToPDF 将文件转为PDF。
|
|
// 自动根据文件类型调用相应的转换函数。
|
|
func (c *Converter) ConvertToPDF(src string) (dst string, err error) {
|
|
ext := strings.ToLower(filepath.Ext(src))
|
|
switch ext {
|
|
case ".epub":
|
|
return c.ConvertEPUBToPDF(src)
|
|
case ".umd":
|
|
return c.ConvertUMDToPDF(src)
|
|
case ".txt":
|
|
return c.ConvertTXTToPDF(src)
|
|
case ".mobi":
|
|
return c.ConvertMOBIToPDF(src)
|
|
case ".chm":
|
|
return c.ConvertCHMToPDF(src)
|
|
case ".pdf":
|
|
return c.PDFToPDF(src)
|
|
// case ".doc", ".docx", ".rtf", ".wps", ".odt",
|
|
// ".xls", ".xlsx", ".et", ".ods",
|
|
// ".ppt", ".pptx", ".dps", ".odp", ".pps", ".ppsx", ".pot", ".potx":
|
|
// return c.ConvertOfficeToPDF(src)
|
|
default:
|
|
return c.ConvertOfficeToPDF(src)
|
|
// return "", fmt.Errorf("不支持的文件类型:%s", ext)
|
|
}
|
|
}
|
|
|
|
// ConvertOfficeToPDF 通过soffice将office文档转换为pdf
|
|
func (c *Converter) ConvertOfficeToPDF(src string) (dst string, err error) {
|
|
return c.convertToPDFBySoffice(src)
|
|
}
|
|
|
|
// ConvertEPUBToPDF 将 epub 转为PDF
|
|
func (c *Converter) ConvertEPUBToPDF(src string) (dst string, err error) {
|
|
return c.convertToPDFByCalibre(src)
|
|
}
|
|
|
|
// ConvertUMDToPDF 将umd转为PDF
|
|
func (c *Converter) ConvertUMDToPDF(src string) (dst string, err error) {
|
|
return c.convertToPDFBySoffice(src)
|
|
}
|
|
|
|
// ConvertTXTToPDF 将 txt 转为PDF
|
|
func (c *Converter) ConvertTXTToPDF(src string) (dst string, err error) {
|
|
return c.convertToPDFBySoffice(src)
|
|
}
|
|
|
|
// ConvertMOBIToPDF 将 mobi 转为PDF
|
|
func (c *Converter) ConvertMOBIToPDF(src string) (dst string, err error) {
|
|
return c.convertToPDFByCalibre(src)
|
|
}
|
|
|
|
// ConvertPDFToTxt 将PDF转为TXT
|
|
func (c *Converter) ConvertPDFToTxt(src string) (dst string, err error) {
|
|
dst = strings.ReplaceAll(filepath.Join(c.cachePath, time.Now().Format(dirDteFmt), filepath.Base(src)+".txt"), "\\", "/")
|
|
args := []string{
|
|
"convert",
|
|
"-o",
|
|
dst,
|
|
src,
|
|
}
|
|
os.MkdirAll(filepath.Dir(dst), os.ModePerm)
|
|
c.logger.Debug("convert pdf to txt", zap.String("cmd", mutool), zap.Strings("args", args))
|
|
_, err = util.ExecCommand(mutool, args, c.timeout)
|
|
if err != nil {
|
|
c.logger.Error("convert pdf to txt", zap.String("cmd", mutool), zap.Strings("args", args), zap.Error(err))
|
|
return
|
|
}
|
|
return dst, nil
|
|
}
|
|
|
|
// ConvertCHMToPDF 将CHM转为PDF
|
|
func (c *Converter) ConvertCHMToPDF(src string) (dst string, err error) {
|
|
return c.convertToPDFByCalibre(src)
|
|
}
|
|
|
|
// ConvertPDFToSVG 将PDF转为SVG
|
|
func (c *Converter) ConvertPDFToSVG(src string, fromPage, toPage int, enableSVGO, enableGZIP bool) (pages []Page, err error) {
|
|
pages, err = c.convertPDFToPage(src, fromPage, toPage, ".svg")
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
if len(pages) == 0 {
|
|
return
|
|
}
|
|
|
|
if enableSVGO { // 压缩svg
|
|
c.CompressSVGBySVGO(filepath.Dir(pages[0].PagePath))
|
|
}
|
|
|
|
if enableGZIP { // gzip 压缩
|
|
for idx, page := range pages {
|
|
if dst, errCompress := c.CompressSVGByGZIP(page.PagePath); errCompress == nil {
|
|
os.Remove(page.PagePath)
|
|
page.PagePath = dst
|
|
pages[idx] = page
|
|
}
|
|
}
|
|
}
|
|
return
|
|
}
|
|
|
|
// ConvertPDFToPNG 将PDF转为PNG
|
|
func (c *Converter) ConvertPDFToPNG(src string, fromPage, toPage int) (pages []Page, err error) {
|
|
return c.convertPDFToPage(src, fromPage, toPage, ".png")
|
|
}
|
|
|
|
func (c *Converter) PDFToPDF(src string) (dst string, err error) {
|
|
dst = strings.ReplaceAll(filepath.Join(c.cachePath, time.Now().Format(dirDteFmt), filepath.Base(src)), "\\", "/")
|
|
err = util.CopyFile(src, dst)
|
|
if err != nil {
|
|
c.logger.Error("copy file error", zap.Error(err))
|
|
}
|
|
return
|
|
}
|
|
|
|
// ext 可选值: .png, .svg
|
|
func (c *Converter) convertPDFToPage(src string, fromPage, toPage int, ext string) (pages []Page, err error) {
|
|
pageRange := fmt.Sprintf("%d-%d", fromPage, toPage)
|
|
cacheFile := strings.ReplaceAll(filepath.Join(c.cachePath, time.Now().Format(dirDteFmt), strings.TrimSuffix(filepath.Base(src), filepath.Ext(src))+"/%d"+ext), "\\", "/")
|
|
args := []string{
|
|
"convert",
|
|
"-o",
|
|
cacheFile,
|
|
src,
|
|
pageRange,
|
|
}
|
|
os.MkdirAll(filepath.Dir(cacheFile), os.ModePerm)
|
|
c.logger.Debug("convert pdf to page", zap.String("cmd", mutool), zap.Strings("args", args))
|
|
_, err = util.ExecCommand(mutool, args, c.timeout)
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
for i := 0; i <= toPage-fromPage; i++ {
|
|
pagePath := fmt.Sprintf(cacheFile, i+1)
|
|
if _, errPage := os.Stat(pagePath); errPage != nil {
|
|
break
|
|
}
|
|
pages = append(pages, Page{
|
|
PageNum: fromPage + i,
|
|
PagePath: pagePath,
|
|
})
|
|
}
|
|
return
|
|
}
|
|
|
|
func (c *Converter) convertToPDFBySoffice(src string) (dst string, err error) {
|
|
basename := filepath.Base(src)
|
|
dst = strings.ReplaceAll(filepath.Join(c.cachePath, time.Now().Format(dirDteFmt), basename, strings.TrimSuffix(basename, filepath.Ext(src))+".pdf"), "\\", "/")
|
|
args := []string{
|
|
"--headless",
|
|
"--convert-to",
|
|
"pdf",
|
|
"--outdir",
|
|
filepath.Dir(dst),
|
|
}
|
|
args = append(args, src)
|
|
os.MkdirAll(filepath.Dir(dst), os.ModePerm)
|
|
c.logger.Debug("convert to pdf by soffice", zap.String("cmd", soffice), zap.Strings("args", args))
|
|
_, err = util.ExecCommand(soffice, args, c.timeout)
|
|
if err != nil {
|
|
c.logger.Error("convert to pdf by soffice", zap.String("cmd", soffice), zap.Strings("args", args), zap.Error(err))
|
|
}
|
|
return
|
|
}
|
|
|
|
func (c *Converter) convertToPDFByCalibre(src string) (dst string, err error) {
|
|
basename := filepath.Base(src)
|
|
dst = strings.ReplaceAll(filepath.Join(c.cachePath, time.Now().Format(dirDteFmt), basename, basename+".pdf"), "\\", "/")
|
|
args := []string{
|
|
src,
|
|
dst,
|
|
"--paper-size", "a4",
|
|
// "--pdf-default-font-size", "14",
|
|
"--pdf-page-margin-bottom", "36",
|
|
"--pdf-page-margin-left", "36",
|
|
"--pdf-page-margin-right", "36",
|
|
"--pdf-page-margin-top", "36",
|
|
}
|
|
os.MkdirAll(filepath.Dir(dst), os.ModePerm)
|
|
c.logger.Debug("convert to pdf by calibre", zap.String("cmd", ebookConvert), zap.Strings("args", args))
|
|
_, err = util.ExecCommand(ebookConvert, args, c.timeout)
|
|
if err != nil {
|
|
c.logger.Error("convert to pdf by calibre", zap.String("cmd", ebookConvert), zap.Strings("args", args), zap.Error(err))
|
|
}
|
|
return
|
|
}
|
|
|
|
func (c *Converter) CountPDFPages(file string) (pages int, err error) {
|
|
args := []string{
|
|
"show",
|
|
file,
|
|
"pages",
|
|
}
|
|
c.logger.Debug("count pdf pages", zap.String("cmd", mutool), zap.Strings("args", args))
|
|
var out string
|
|
out, err = util.ExecCommand(mutool, args, c.timeout)
|
|
if err != nil {
|
|
c.logger.Error("count pdf pages", zap.String("cmd", mutool), zap.Strings("args", args), zap.Error(err))
|
|
return
|
|
}
|
|
|
|
lines := strings.Split(out, "\n")
|
|
length := len(lines)
|
|
for i := length - 1; i >= 0; i-- {
|
|
line := strings.TrimSpace(strings.ToLower(lines[i]))
|
|
c.logger.Debug("count pdf pages", zap.String("line", line))
|
|
if strings.HasPrefix(line, "page") {
|
|
pages, _ = strconv.Atoi(strings.TrimSpace(strings.TrimLeft(strings.Split(line, "=")[0], "page")))
|
|
if pages > 0 {
|
|
return
|
|
}
|
|
}
|
|
}
|
|
return
|
|
}
|
|
|
|
func (c *Converter) ExistMupdf() (err error) {
|
|
_, err = exec.LookPath(mutool)
|
|
return
|
|
}
|
|
|
|
func (c *Converter) ExistSoffice() (err error) {
|
|
_, err = exec.LookPath(soffice)
|
|
return
|
|
}
|
|
|
|
func (c *Converter) ExistCalibre() (err error) {
|
|
_, err = exec.LookPath(ebookConvert)
|
|
return
|
|
}
|
|
|
|
func (c *Converter) ExistSVGO() (err error) {
|
|
_, err = exec.LookPath(svgo)
|
|
return
|
|
}
|
|
|
|
func (c *Converter) CompressSVGBySVGO(svgFolder string) (err error) {
|
|
args := []string{
|
|
"-f",
|
|
svgFolder,
|
|
}
|
|
c.logger.Debug("compress svg by svgo", zap.String("cmd", svgo), zap.Strings("args", args))
|
|
var out string
|
|
out, err = util.ExecCommand(svgo, args, c.timeout*10)
|
|
if err != nil {
|
|
c.logger.Error("compress svg by svgo", zap.String("cmd", svgo), zap.Strings("args", args), zap.Error(err))
|
|
}
|
|
c.logger.Debug("compress svg by svgo", zap.String("out", out))
|
|
return
|
|
}
|
|
|
|
// CompressSVGByGZIP 将SVG文件压缩为GZIP格式
|
|
func (c Converter) CompressSVGByGZIP(svgFile string) (dst string, err error) {
|
|
var svgBytes []byte
|
|
ext := filepath.Ext(svgFile)
|
|
dst = strings.TrimSuffix(svgFile, ext) + ".gzip.svg"
|
|
svgBytes, err = os.ReadFile(svgFile)
|
|
if err != nil {
|
|
c.logger.Error("read svg file", zap.String("svgFile", svgFile), zap.Error(err))
|
|
return
|
|
}
|
|
var buf bytes.Buffer
|
|
w := gzip.NewWriter(&buf)
|
|
defer w.Close()
|
|
w.Write(svgBytes)
|
|
w.Flush()
|
|
err = os.WriteFile(dst, buf.Bytes(), os.ModePerm)
|
|
if err != nil {
|
|
c.logger.Error("write svgz file", zap.String("svgzFile", dst), zap.Error(err))
|
|
}
|
|
return
|
|
}
|