Compare commits

...

6 Commits

Author SHA1 Message Date
truthhun 1220eb55f2 文档转换
6 months ago
truthhun 04eafd7f16 inkscape required
6 months ago
truthhun 3a1bec6f42 svg处理
6 months ago
truthhun 785b5ac5a9 文档转换处理
6 months ago
truthhun 1dc153e5df reconvert
6 months ago
truthhun dd8dd11043 支持webp
6 months ago

@ -250,7 +250,7 @@ func (s *ConfigAPIService) GetEnvs(ctx context.Context, req *emptypb.Empty) (res
Name: "Inkscape", Name: "Inkscape",
Description: "Inkscape是一个自由开源的矢量图形编辑器。在mupdf处理PDF出现兼容问题失败时自动切换inkscape来处理。", Description: "Inkscape是一个自由开源的矢量图形编辑器。在mupdf处理PDF出现兼容问题失败时自动切换inkscape来处理。",
Cmd: "inkscape", Cmd: "inkscape",
IsRequired: false, IsRequired: true,
}, },
{ {
Name: "SVGO", Name: "SVGO",

@ -0,0 +1,66 @@
/*
Copyright © 2023 NAME HERE <EMAIL ADDRESS>
*/
package cmd
import (
"fmt"
"moredoc/service"
"strings"
"github.com/spf13/cobra"
)
var (
ext string // 指定的文件后缀png、jpg、webp
documentId int64 // 指定的文档ID, 0表示全部大于0表示指定文档
)
// reconvertCmd represents the reconvert command
var reconvertCmd = &cobra.Command{
Use: "reconvert",
Short: "文档重转",
Long: `pngjpgwebp便
pngjpgwebp
`,
Run: func(cmd *cobra.Command, args []string) {
// ext必须是png、jpg、webp中的一种
ext = strings.ToLower(ext)
if ext != "png" && ext != "jpg" && ext != "webp" {
ext = "webp"
}
// 必须指定documentId
if documentId < 0 {
fmt.Println("\n请用--id指定的文档ID, 0表示全部大于0表示指定文档。如需重转全部文档建议先指定一个文档进行测试查验效果是否符合需求。")
fmt.Println("\n按回车键退出...")
fmt.Scanln()
return
}
// 提示用户输入Y确认否则退出
fmt.Println("\n请确认是否重转文档")
if documentId == 0 {
fmt.Print("转换文档:全部文档")
} else {
fmt.Print("转换文档ID", documentId)
}
fmt.Println(";文档预览格式:", ext)
fmt.Println("按 Y 确认,按其他键取消和退出...")
var confirm string
fmt.Scanln(&confirm)
if confirm != "Y" && confirm != "y" {
fmt.Println("\n已取消重转文档。")
return
}
service.Reconvert(cfg, logger, ext, documentId)
},
}
func init() {
rootCmd.AddCommand(reconvertCmd)
reconvertCmd.Flags().StringVarP(&ext, "ext", "e", "webp", "指定的文档预览格式png、jpg、webp")
reconvertCmd.Flags().Int64VarP(&documentId, "id", "d", -1, "指定的文档ID, 0表示全部大于0表示指定文档。如需重转全部文档建议先指定一个文档进行测试查验效果是否符合需求。")
}

@ -39,13 +39,8 @@ var (
// rootCmd represents the base command when called without any subcommands // rootCmd represents the base command when called without any subcommands
var rootCmd = &cobra.Command{ var rootCmd = &cobra.Command{
Use: "moredoc", Use: "moredoc",
Short: "A brief description of your application", Short: "魔豆文库,文库系统解决方案",
Long: `A longer description that spans multiple lines and likely contains Long: `魔豆文库使用Go语言开发的类似百度文库、新浪爱问文库的文库系统解决方案支持 TXT、PDF、EPUB、MOBI、Office 等格式文档的在线预览与管理,为 dochub文库的重构版本。`,
examples and usage of using your application. For example:
Cobra is a CLI library for Go that empowers applications.
This application is a tool to generate the needed files
to quickly create a Cobra application.`,
} }
// Execute adds all child commands to the root command and sets flags appropriately. // Execute adds all child commands to the root command and sets flags appropriately.

@ -30,8 +30,8 @@ import (
// serveCmd represents the serve command // serveCmd represents the serve command
var serveCmd = &cobra.Command{ var serveCmd = &cobra.Command{
Use: "serve", Use: "serve",
Short: "start a server", Short: "启动服务",
Long: `start a server`, Long: `启动魔豆文库程序服务,提供文档管理与预览。`,
Run: func(cmd *cobra.Command, args []string) { Run: func(cmd *cobra.Command, args []string) {
util.Version = Version util.Version = Version
util.Hash = GitHash util.Hash = GitHash

@ -5,7 +5,7 @@ Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
You may obtain a copy of the License at You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0 http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS, distributed under the License is distributed on an "AS IS" BASIS,
@ -24,8 +24,8 @@ import (
// syncdbCmd represents the syncdb command // syncdbCmd represents the syncdb command
var syncdbCmd = &cobra.Command{ var syncdbCmd = &cobra.Command{
Use: "syncdb", Use: "syncdb",
Short: "sync database scheme", Short: "同步数据库",
Long: `sync database scheme.`, Long: `同步数据库表结构以及初始化数据。`,
Run: func(cmd *cobra.Command, args []string) { Run: func(cmd *cobra.Command, args []string) {
service.SyncDB(cfg, logger) service.SyncDB(cfg, logger)
}, },

@ -5,7 +5,7 @@ Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
You may obtain a copy of the License at You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0 http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS, distributed under the License is distributed on an "AS IS" BASIS,
@ -27,11 +27,11 @@ var (
BuildAt = "unknown" BuildAt = "unknown"
) )
//versionCmd represents the version command // versionCmd represents the version command
var versionCmd = &cobra.Command{ var versionCmd = &cobra.Command{
Use: "version", Use: "version",
Short: "show current version information.", Short: "查看版本信息",
Long: `show current version information.`, Long: `查看魔豆文库版本信息如版本号、构建时间、Git提交哈希值等。`,
Run: func(cmd *cobra.Command, args []string) { Run: func(cmd *cobra.Command, args []string) {
fmt.Println("") fmt.Println("")
fmt.Println("AppName: ", "moredoc") fmt.Println("AppName: ", "moredoc")

@ -704,7 +704,7 @@ func (m *DBModel) initConfig() (err error) {
// 转换配置项 // 转换配置项
{Category: ConfigCategoryConverter, Name: ConfigConverterMaxPreview, Label: "最大预览页数", Value: "12", Placeholder: "文档允许的最大预览页数0表示不限制", InputType: InputTypeNumber, Sort: 0, Options: ""}, {Category: ConfigCategoryConverter, Name: ConfigConverterMaxPreview, Label: "最大预览页数", Value: "12", Placeholder: "文档允许的最大预览页数0表示不限制", InputType: InputTypeNumber, Sort: 0, Options: ""},
{Category: ConfigCategoryConverter, Name: ConfigConverterTimeout, Label: "转换超时(分钟)", Value: "30", Placeholder: "文档转换超时时间默认为30分钟", InputType: InputTypeNumber, Sort: 10, Options: ""}, {Category: ConfigCategoryConverter, Name: ConfigConverterTimeout, Label: "转换超时(分钟)", Value: "30", Placeholder: "文档转换超时时间默认为30分钟", InputType: InputTypeNumber, Sort: 10, Options: ""},
{Category: ConfigCategoryConverter, Name: ConfigConverterExtension, Label: "预览格式", Value: "svg", Placeholder: "将文档转为特定格式以供预览", InputType: InputTypeSelect, Sort: 20, Options: "svg:SVG\njpg:JPEG\npng:PNG"}, {Category: ConfigCategoryConverter, Name: ConfigConverterExtension, Label: "预览格式", Value: "svg", Placeholder: "将文档转为特定格式以供预览", InputType: InputTypeSelect, Sort: 20, Options: "svg:SVG\njpg:JPEG\npng:PNG\nwebp:WEBP"},
{Category: ConfigCategoryConverter, Name: ConfigConverterEnableGZIP, Label: "是否启用GZIP压缩", Value: "true", Placeholder: "是否对文档SVG预览文件启用GZIP压缩启用后转换效率会【稍微】下降但相对直接的SVG文件减少75%的存储空间", InputType: InputTypeSwitch, Sort: 30, Options: ""}, {Category: ConfigCategoryConverter, Name: ConfigConverterEnableGZIP, Label: "是否启用GZIP压缩", Value: "true", Placeholder: "是否对文档SVG预览文件启用GZIP压缩启用后转换效率会【稍微】下降但相对直接的SVG文件减少75%的存储空间", InputType: InputTypeSwitch, Sort: 30, Options: ""},
{Category: ConfigCategoryConverter, Name: ConfigConverterEnableSVGO, Label: "是否启用SVGO", Value: "false", Placeholder: "是否对文档SVG预览文件启用SVGO压缩启用后转换效率会【明显】下降但相对直接的SVG文件减少50%左右的存储空间", InputType: InputTypeSwitch, Sort: 40, Options: ""}, {Category: ConfigCategoryConverter, Name: ConfigConverterEnableSVGO, Label: "是否启用SVGO", Value: "false", Placeholder: "是否对文档SVG预览文件启用SVGO压缩启用后转换效率会【明显】下降但相对直接的SVG文件减少50%左右的存储空间", InputType: InputTypeSwitch, Sort: 40, Options: ""},
{Category: ConfigCategoryConverter, Name: ConfigConverterEnableConvertRepeatedDocument, Label: "是否转换重复文档", Value: "false", Placeholder: "对于已转换过的文档,再次被上传时是否再转换一次", InputType: InputTypeSwitch, Sort: 50, Options: ""}, {Category: ConfigCategoryConverter, Name: ConfigConverterEnableConvertRepeatedDocument, Label: "是否转换重复文档", Value: "false", Placeholder: "对于已转换过的文档,再次被上传时是否再转换一次", InputType: InputTypeSwitch, Sort: 50, Options: ""},

@ -740,7 +740,7 @@ func (m *DBModel) ConvertDocument() (err error) {
document.Status = DocumentStatusConverted document.Status = DocumentStatusConverted
document.EnableGZIP = cfg.EnableGZIP document.EnableGZIP = cfg.EnableGZIP
document.PreviewExt = ext document.PreviewExt = strings.TrimPrefix(ext, ".gzip")
err = m.db.Select("description", "cover", "width", "height", "preview", "pages", "status", "enable_gzip", "preview_ext").Where("id = ?", document.Id).Updates(document).Error err = m.db.Select("description", "cover", "width", "height", "preview", "pages", "status", "enable_gzip", "preview_ext").Where("id = ?", document.Id).Updates(document).Error
if err != nil { if err != nil {
m.SetDocumentStatus(document.Id, DocumentStatusFailed) m.SetDocumentStatus(document.Id, DocumentStatusFailed)

@ -2,7 +2,12 @@ package model
import ( import (
"bytes" "bytes"
"compress/gzip"
"errors"
"fmt" "fmt"
"io"
"moredoc/util"
"moredoc/util/converter"
"moredoc/util/sitemap" "moredoc/util/sitemap"
"os" "os"
"path/filepath" "path/filepath"
@ -15,8 +20,13 @@ import (
"gorm.io/gorm" "gorm.io/gorm"
) )
type reconvertDocument struct {
Id int64 `json:"id"`
}
var ( var (
isCreatingSitemap bool isCreatingSitemap bool
cacheReconvert = "cache/reconvert"
) )
// UpdateSitemap 更新站点地图 // UpdateSitemap 更新站点地图
@ -325,3 +335,182 @@ func (m *DBModel) loopCovertDocument() {
} }
} }
} }
func (m *DBModel) ReconvertDocoument(documentId int64, ext string) {
ext = "." + strings.TrimLeft(ext, ".")
os.RemoveAll(cacheReconvert)
os.MkdirAll(cacheReconvert, os.ModePerm)
if documentId <= 0 {
m.reconvertAllDocument(ext)
return
}
doc, err := m.GetDocument(documentId)
if err != nil {
m.logger.Error("ReconvertDocoument", zap.Error(err))
return
}
if doc.Status != DocumentStatusConverted {
m.logger.Error("ReconvertDocoument", zap.Error(errors.New("文档不是已转换的文档,不能重转")))
return
}
m.reconvertDocument(&doc, ext)
}
func (m *DBModel) reconvertDocument(doc *Document, ext string) {
m.logger.Debug("reconvertDocument", zap.Any("doc", doc), zap.String("ext", ext))
if doc.PreviewExt == ext {
m.logger.Info("reconvertDocument", zap.String("msg", "文档预览文件格式与指定格式一致,无需重转"), zap.String("document", doc.Title+doc.Ext))
return
}
// 1. 下载文档预览文件
attachment := m.GetAttachmentByTypeAndTypeId(AttachmentTypeDocument, doc.Id, "id", "hash")
if attachment.Id == 0 {
m.logger.Error("reconvertDocument", zap.String("msg", "文档预览文件不存在"), zap.String("document", doc.Title+doc.Ext))
return
}
cacheDir := filepath.Join(cacheReconvert, strconv.FormatInt(doc.Id, 10))
os.MkdirAll(cacheDir, os.ModePerm)
defer os.RemoveAll(cacheDir)
totalPreview := doc.Preview
if totalPreview == 0 {
totalPreview = doc.Pages
}
var (
convertedTargets []string
oldSrcFiles []string
)
for i := 1; i <= totalPreview; i++ {
// 已存在的预览文件
isGZIP := false
oldExt := doc.PreviewExt
if doc.EnableGZIP && strings.HasSuffix(oldExt, ".svg") {
oldExt = ".gzip.svg"
isGZIP = true
}
// 目标文件
dstFile := filepath.Join(cacheDir, fmt.Sprintf("%d%s", i, oldExt))
// 源文件
srcFile := fmt.Sprintf("documents/%s/%s/%d%s", strings.Join(strings.Split(attachment.Hash, "")[:5], "/"), attachment.Hash, i, oldExt)
oldSrcFiles = append(oldSrcFiles, srcFile)
err := util.CopyFile(srcFile, dstFile)
if err != nil {
m.logger.Error("reconvertDocument", zap.String("msg", "下载文档预览文件失败"), zap.String("document", doc.Title+doc.Ext), zap.Error(err))
return
}
m.logger.Debug("reconvertDocument", zap.Bool("isGZIP", isGZIP), zap.String("msg", "下载文档预览文件成功"), zap.String("document", doc.Title+doc.Ext), zap.String("srcFile", srcFile), zap.String("dstFile", dstFile))
if isGZIP { // 解压缩
m.ungzipSVG(dstFile)
}
// 2. 转换文档预览文件
convertedTargetFile := filepath.Join(cacheDir, fmt.Sprintf("%d%s", i, ext))
if strings.HasSuffix(oldExt, ".svg") {
// 如果是svg文件则需要使用inkscape预先转为png
tmpFile := filepath.Join(cacheDir, fmt.Sprintf("tmp-%d.png", i))
err = converter.ConvertByInkscape(dstFile, tmpFile)
if err == nil {
if strings.HasSuffix(convertedTargetFile, ".png") {
// 如果目标文件是png则直接使用inkscape转换后的文件
convertedTargetFile = tmpFile
} else {
// 如果目标文件不是png则需要使用ImageMagick转换
err = converter.ConvertByImageMagick(tmpFile, convertedTargetFile)
os.RemoveAll(tmpFile)
}
}
} else {
err = converter.ConvertByImageMagick(dstFile, convertedTargetFile)
}
if err != nil {
m.logger.Error("reconvertDocument", zap.String("msg", "转换文档预览文件失败"), zap.String("document", doc.Title+doc.Ext), zap.Error(err))
return
}
convertedTargets = append(convertedTargets, convertedTargetFile)
}
// 3. 上传文档预览文件
for i, srcFile := range convertedTargets {
dstFile := fmt.Sprintf("documents/%s/%s/%d%s", strings.Join(strings.Split(attachment.Hash, "")[:5], "/"), attachment.Hash, i+1, ext)
err := util.CopyFile(srcFile, dstFile)
if err != nil {
m.logger.Error("reconvertDocument", zap.String("msg", "上传文档预览文件失败"), zap.String("document", doc.Title+doc.Ext), zap.Error(err))
return
}
}
// 4. 更新数据库表的预览后缀
// 查询同一hash的文档
var (
attachemnts []Attachment
err error
data = map[string]interface{}{
"preview_ext": ext,
"enable_gzip": false,
}
)
m.db.Select("id", "type_id").Where("hash = ? and `type` = ?", attachment.Hash, AttachmentTypeDocument).Find(&attachemnts)
if len(attachemnts) > 0 {
var ids []int64
for _, attachemnt := range attachemnts {
ids = append(ids, attachemnt.TypeId)
}
err = m.db.Model(&Document{}).Where("id IN (?)", ids).Updates(data).Error
} else {
err = m.db.Model(doc).Updates(data).Error
}
if err != nil {
m.logger.Error("reconvertDocument", zap.String("msg", "更新文档预览文件后缀失败"), zap.String("document", doc.Title+doc.Ext), zap.Error(err))
return
}
// 5. 删除缓存文件,删除原预览文件
for _, file := range oldSrcFiles {
os.Remove(file)
}
}
func (m *DBModel) reconvertAllDocument(ext string) {
var cfg reconvertDocument
bytes, _ := os.ReadFile("cache/reconvert.json")
json.Unmarshal(bytes, &cfg)
for {
var doc Document
m.db.Where("id > ?", cfg.Id).Where("status = ?", DocumentStatusConverted).Order("id asc").Find(&doc)
if doc.Id == 0 {
break
}
m.reconvertDocument(&doc, ext)
cfg.Id = doc.Id
bytes, _ = json.Marshal(cfg)
os.WriteFile("cache/reconvert.json", bytes, os.ModePerm)
}
}
func (m *DBModel) ungzipSVG(svg string) {
m.logger.Info("ungzipSVG", zap.String("svg", svg))
bs, err := os.ReadFile(svg)
if err != nil {
m.logger.Error("ungzipSVG", zap.Error(err))
return
}
gz, err := gzip.NewReader(bytes.NewReader(bs))
if err != nil {
m.logger.Error("ungzipSVG", zap.Error(err))
return
}
defer gz.Close()
fp, err := os.Create(svg)
if err != nil {
m.logger.Error("ungzipSVG", zap.Error(err))
return
}
defer fp.Close()
io.Copy(fp, gz)
}

@ -0,0 +1,19 @@
package service
import (
"moredoc/conf"
"moredoc/model"
"go.uber.org/zap"
)
func Reconvert(cfg *conf.Config, logger *zap.Logger, ext string, documentId int64) {
db, err := model.NewDBModel(&cfg.Database, logger)
if err != nil {
logger.Fatal("NewDBModel", zap.Error(err))
return
}
logger.Info("Reconvert", zap.Int64("documentId", documentId), zap.String("ext", ext))
db.ReconvertDocoument(documentId, ext)
logger.Info("Reconvert", zap.Int64("documentId", documentId), zap.String("ext", ext), zap.String("status", "done!"))
}

@ -176,15 +176,25 @@ type OptionConvertPages struct {
// ConvertPDFToPages 将PDF转为预览页 // ConvertPDFToPages 将PDF转为预览页
func (c *Converter) ConvertPDFToPages(src string, fromPage, toPage int, option *OptionConvertPages) (pages []Page, err error) { func (c *Converter) ConvertPDFToPages(src string, fromPage, toPage int, option *OptionConvertPages) (pages []Page, err error) {
switch strings.TrimLeft(option.Extension, ".") { ext := strings.TrimLeft(option.Extension, ".")
switch ext {
case "png": case "png":
return c.ConvertPDFToPNG(src, fromPage, toPage) return c.ConvertPDFToPNG(src, fromPage, toPage)
case "jpg": case "jpg", "webp":
// 见将pdf转为png然后png再转为jpg // 见将pdf转为png然后png再转为jpg
pages, err = c.ConvertPDFToPNG(src, fromPage, toPage) pages, err = c.ConvertPDFToPNG(src, fromPage, toPage)
// 通过imagemagick将图片转为jpg // 通过imagemagick将图片转为jpg
var (
dst string
errConvert error
)
for idx, page := range pages { for idx, page := range pages {
if dst, errConvert := c.ConvertPNGToJPG(page.PagePath); errConvert == nil { if ext != "webp" {
dst, errConvert = c.ConvertPNGToJPG(page.PagePath)
} else {
dst, errConvert = c.ConvertPNGToWEBP(page.PagePath)
}
if errConvert == nil {
os.Remove(page.PagePath) os.Remove(page.PagePath)
page.PagePath = dst page.PagePath = dst
pages[idx] = page pages[idx] = page
@ -212,6 +222,22 @@ func (c *Converter) ConvertPNGToJPG(src string) (dst string, err error) {
return return
} }
// 将png转为webp
func (c *Converter) ConvertPNGToWEBP(src string) (dst string, err error) {
dst = strings.TrimSuffix(src, filepath.Ext(src)) + ".webp"
// 通过imagemagick将图片转为jpg
args := []string{
src,
dst,
}
c.logger.Debug("convert png to webp", zap.String("cmd", imageMagick), zap.Strings("args", args))
_, err = command.ExecCommand(imageMagick, args, c.timeout)
if err != nil {
c.logger.Error("convert png to webp", zap.String("cmd", imageMagick), zap.Strings("args", args), zap.Error(err))
}
return
}
func (c *Converter) ConvertPDFToJPG(src string, fromPage, toPage int) (pages []Page, err error) { func (c *Converter) ConvertPDFToJPG(src string, fromPage, toPage int) (pages []Page, err error) {
return c.convertPDFToPage(src, fromPage, toPage, ".jpg") return c.convertPDFToPage(src, fromPage, toPage, ".jpg")
} }
@ -512,3 +538,22 @@ func (c *Converter) Clean() (err error) {
} }
return return
} }
func ConvertByImageMagick(src, dst string, moreArgs ...string) (err error) {
args := []string{src}
args = append(args, moreArgs...)
args = append(args, dst)
_, err = command.ExecCommand(imageMagick, args)
return
}
// 通过Inksacpe进行转换如将svg转为png
func ConvertByInkscape(src, dst string, moreArgs ...string) (err error) {
args := []string{
"-o", dst,
}
args = append(args, moreArgs...)
args = append(args, src)
_, err = command.ExecCommand(inkscape, args)
return
}

Loading…
Cancel
Save