文档处理调整

dev
truthhun 2 years ago
parent c1620307a9
commit b86a15cd44

3
.gitignore vendored

@ -12,4 +12,5 @@ ginrpc*
dist
output
cache
uploads
uploads
documents

@ -5,4 +5,5 @@
- [ ] 每天上传的文件数量限制,用于避免用户恶意上传文档
- [ ] 根据用户组来进行额度授权
- [ ] 定时清除cache/convert目录下的文件
- [ ] 去除document表中的封面。因为封面这些都是按约定的方式存储的路径不需要存储在数据库中
- [ ] 去除document表中的封面。因为封面这些都是按约定的方式存储的路径不需要存储在数据库中
- [ ] 增加一个阅读放大器,类似购物商城那种书标经过,可以放大某一块

@ -203,7 +203,7 @@ func (s *AttachmentAPIService) UploadDocument(ctx *gin.Context) {
return
}
attachment, err := s.saveFile(ctx, fileheader)
attachment, err := s.saveFile(ctx, fileheader, true)
if err != nil {
os.Remove("." + attachment.Path)
ctx.JSON(http.StatusInternalServerError, ginResponse{Code: http.StatusInternalServerError, Message: err.Error(), Error: err.Error()})
@ -232,6 +232,30 @@ func (s *AttachmentAPIService) UploadConfig(ctx *gin.Context) {
s.uploadImage(ctx, model.AttachmentTypeConfig)
}
// ViewDocumentPages 浏览文档页面
func (s *AttachmentAPIService) ViewDocumentPages(ctx *gin.Context) {
hash := ctx.Param("hash")
if len(hash) != 32 {
ctx.JSON(http.StatusNotFound, nil)
return
}
page := strings.TrimLeft(ctx.Param("page"), "./")
if strings.HasSuffix(page, ".gzip.svg") {
ctx.Header("Content-Encoding", "gzip")
}
ctx.Header("Content-Type", "image/svg+xml")
ctx.File(fmt.Sprintf("documents/%s/%s/%s", strings.Join(strings.Split(hash, "")[:5], "/"), hash, page))
}
func (s *AttachmentAPIService) ViewDocumentCover(ctx *gin.Context) {
hash := ctx.Param("hash")
if len(hash) != 32 {
ctx.JSON(http.StatusNotFound, nil)
return
}
ctx.File(fmt.Sprintf("documents/%s/%s/cover.png", strings.Join(strings.Split(hash, "")[:5], "/"), hash))
}
// UploadArticle 上传文章相关图片和视频。这里不验证文件格式。
// 注意当前适配了wangeditor的接口规范如果需要适配其他编辑器需要修改此接口或者增加其他接口
func (s *AttachmentAPIService) UploadArticle(ctx *gin.Context) {
@ -346,7 +370,7 @@ func (s *AttachmentAPIService) uploadImage(ctx *gin.Context, attachmentType int)
// saveFile 保存文件。文件以md5值命名以及存储
// 同时,返回附件信息
func (s *AttachmentAPIService) saveFile(ctx *gin.Context, fileHeader *multipart.FileHeader) (attachment *model.Attachment, err error) {
func (s *AttachmentAPIService) saveFile(ctx *gin.Context, fileHeader *multipart.FileHeader, isDocument ...bool) (attachment *model.Attachment, err error) {
cacheDir := fmt.Sprintf("cache/uploads/%s", time.Now().Format("2006/01/02"))
os.MkdirAll(cacheDir, os.ModePerm)
ext := strings.ToLower(filepath.Ext(fileHeader.Filename))
@ -369,7 +393,11 @@ func (s *AttachmentAPIService) saveFile(ctx *gin.Context, fileHeader *multipart.
return
}
savePath := fmt.Sprintf("uploads/%s/%s%s", strings.Join(strings.Split(md5hash, "")[0:5], "/"), md5hash, ext)
savePathFormat := "uploads/%s/%s%s"
if len(isDocument) > 0 && isDocument[0] {
savePathFormat = "documents/%s/%s%s"
}
savePath := fmt.Sprintf(savePathFormat, strings.Join(strings.Split(md5hash, "")[0:5], "/"), md5hash, ext)
os.MkdirAll(filepath.Dir(savePath), os.ModePerm)
err = util.CopyFile(cachePath, savePath)
if err != nil {

@ -9,7 +9,6 @@ import (
"moredoc/model"
"moredoc/util"
"github.com/gofrs/uuid"
"go.uber.org/zap"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
@ -71,11 +70,11 @@ func (s *DocumentAPIService) CreateDocument(ctx context.Context, req *pb.CreateD
}
var (
documents []model.Document
uuidAttachmentIdMap = make(map[string]int64)
jieba = util.NewJieba()
documents []model.Document
docMapAttachment = make(map[int]int64)
jieba = util.NewJieba()
)
for _, doc := range req.Document {
for idx, doc := range req.Document {
attachment, ok := attachmentMap[doc.AttachmentId]
if !ok {
continue
@ -85,14 +84,14 @@ func (s *DocumentAPIService) CreateDocument(ctx context.Context, req *pb.CreateD
Title: doc.Title,
Keywords: strings.Join(jieba.SegWords(doc.Title, 10), ","),
UserId: userCliams.UserId,
UUID: uuid.Must(uuid.NewV4()).String(),
Score: 300,
Price: int(doc.Price),
Size: attachment.Size,
Ext: attachment.Ext,
Status: model.DocumentStatusPending,
// UUID: uuid.Must(uuid.NewV4()).String(),
Score: 300,
Price: int(doc.Price),
Size: attachment.Size,
Ext: attachment.Ext,
Status: model.DocumentStatusPending,
}
uuidAttachmentIdMap[doc.UUID] = attachment.Id
docMapAttachment[idx] = attachment.Id
documents = append(documents, doc)
}
@ -102,8 +101,8 @@ func (s *DocumentAPIService) CreateDocument(ctx context.Context, req *pb.CreateD
}
attachIdTypeIdMap := make(map[int64]int64)
for _, doc := range docs {
if attachmentId, ok := uuidAttachmentIdMap[doc.UUID]; ok {
for idx, doc := range docs {
if attachmentId, ok := docMapAttachment[idx]; ok {
attachIdTypeIdMap[attachmentId] = doc.Id
}
}

@ -177,7 +177,7 @@ func (m *DBModel) DeleteAttachment(ids []int64) (err error) {
}
func (m *DBModel) GetAttachmentByTypeAndTypeId(typ int, typeId int64) (attachment Attachment) {
err := m.db.Where("type = ? and type_id = ?", typ, typeId).First(&attachment).Error
err := m.db.Where("type = ? and type_id = ?", typ, typeId).Last(&attachment).Error
if err != nil && err != gorm.ErrRecordNotFound {
m.logger.Error("GetAttachmentByTypeAndTypeId", zap.Error(err))
}

@ -38,15 +38,13 @@ var DocumentStatusMap = map[int]struct{}{
type Document struct {
Id int64 `form:"id" json:"id,omitempty" gorm:"primaryKey;autoIncrement;column:id;comment:;"`
Title string `form:"title" json:"title,omitempty" gorm:"column:title;type:varchar(255);size:255;comment:文档名称;"`
Keywords string `form:"keywords" json:"keywords,omitempty" gorm:"column:keywords;type:varchar(255);size:255;comment:文档关键字;"`
Description string `form:"description" json:"description,omitempty" gorm:"column:description;type:varchar(512);size:512;comment:文档描述;"`
Keywords string `form:"keywords" json:"keywords,omitempty" gorm:"column:keywords;type:varchar(128);size:128;comment:文档关键字;"`
Description string `form:"description" json:"description,omitempty" gorm:"column:description;type:varchar(255);size:255;comment:文档描述;"`
UserId int64 `form:"user_id" json:"user_id,omitempty" gorm:"column:user_id;type:bigint(20);size:20;default:0;index:user_id;comment:文档所属用户ID;"`
Cover string `form:"cover" json:"cover,omitempty" gorm:"column:cover;type:varchar(255);size:255;comment:文档封面;"`
Width int `form:"width" json:"width,omitempty" gorm:"column:width;type:int(11);size:11;default:0;comment:宽;"`
Height int `form:"height" json:"height,omitempty" gorm:"column:height;type:int(11);size:11;default:0;comment:高;"`
Preview int `form:"preview" json:"preview,omitempty" gorm:"column:preview;type:int(11);size:11;default:0;comment:允许预览页数;"`
Pages int `form:"pages" json:"pages,omitempty" gorm:"column:pages;type:int(11);size:11;default:0;comment:文档页数;"`
UUID string `form:"uuid" json:"uuid,omitempty" gorm:"column:uuid;type:varchar(36);size:36;index:idx_uuid,unique;comment:文档UUID用于隐藏文档真实路径;"`
DownloadCount int `form:"download_count" json:"download_count,omitempty" gorm:"column:download_count;type:int(11);size:11;default:0;comment:下载人次;"`
ViewCount int `form:"view_count" json:"view_count,omitempty" gorm:"column:view_count;type:int(11);size:11;default:0;comment:浏览人次;"`
FavoriteCount int `form:"favorite_count" json:"favorite_count,omitempty" gorm:"column:favorite_count;type:int(11);size:11;default:0;comment:收藏人次;"`
@ -61,6 +59,7 @@ type Document struct {
UpdatedAt *time.Time `form:"updated_at" json:"updated_at,omitempty" gorm:"column:updated_at;type:datetime;comment:更新时间;"`
DeletedAt *gorm.DeletedAt `form:"deleted_at" json:"deleted_at,omitempty" gorm:"column:deleted_at;type:datetime;index:idx_deleted_at;comment:删除时间;"`
DeletedUserId int64 `form:"deleted_user_id" json:"deleted_user_id,omitempty" gorm:"column:deleted_user_id;type:bigint(20);size:20;default:0;comment:删除用户ID;"`
EnableGZIP bool `form:"enable_gzip" json:"enable_gzip,omitempty" gorm:"column:enable_gzip;type:tinyint(1);size:1;default:0;comment:是否启用GZIP压缩;"`
}
func (Document) TableName() string {
@ -516,11 +515,11 @@ func (m *DBModel) ConvertDocument() (err error) {
hashMapDocs := m.GetDocumentStatusConvertedByHash([]string{attachment.Hash})
if len(hashMapDocs) > 0 {
// 已有文档转换成功将hash相同的文档相关数据迁移到当前文档
sql := " UPDATE `%s` SET `description`= ?, `cover` = ?, `width` = ?, `height`= ?, `preview`= ?, `pages` = ?, `status` = ? WHERE status in ? and id in (select type_id from `%s` where `hash` = ? and `type` = ?)"
sql := " UPDATE `%s` SET `description`= ? , `enable_gzip`, `width` = ?, `height`= ?, `preview`= ?, `pages` = ?, `status` = ? WHERE status in ? and id in (select type_id from `%s` where `hash` = ? and `type` = ?)"
sql = fmt.Sprintf(sql, Document{}.TableName(), Attachment{}.TableName())
for hash, doc := range hashMapDocs {
err = m.db.Exec(sql,
doc.Description, doc.Cover, doc.Width, doc.Height, doc.Preview, doc.Pages, DocumentStatusConverted, []int{DocumentStatusPending, DocumentStatusConverting, DocumentStatusFailed}, hash, AttachmentTypeDocument,
doc.Description, doc.EnableGZIP, doc.Width, doc.Height, doc.Preview, doc.Pages, DocumentStatusConverted, []int{DocumentStatusPending, DocumentStatusConverting, DocumentStatusFailed}, hash, AttachmentTypeDocument,
).Error
if err != nil {
m.logger.Error("ConvertDocument", zap.Error(err))
@ -564,7 +563,6 @@ func (m *DBModel) ConvertDocument() (err error) {
util.CopyFile(pages[0].PagePath, cover)
util.CropImage(cover, DocumentCoverWidth, DocumentCoverHeight)
document.Width, document.Height, _ = util.GetImageSize(coverBig) // 页面宽高
document.Cover = "/" + cover
}
// PDF转为SVG
@ -580,7 +578,16 @@ func (m *DBModel) ConvertDocument() (err error) {
}
for _, page := range pages {
util.CopyFile(page.PagePath, fmt.Sprintf(baseDir+"/%d%s", page.PageNum, filepath.Ext(page.PagePath)))
ext := ".svg"
if strings.HasSuffix(page.PagePath, ".gzip.svg") {
ext = ".gzip.svg"
}
dst := fmt.Sprintf(baseDir+"/%d%s", page.PageNum, ext)
m.logger.Debug("ConvertDocument CopyFile", zap.String("src", page.PagePath), zap.String("dst", dst))
errCopy := util.CopyFile(page.PagePath, dst)
if errCopy != nil {
m.logger.Error("ConvertDocument CopyFile", zap.Error(errCopy))
}
os.Remove(page.PagePath)
}
@ -592,12 +599,13 @@ func (m *DBModel) ConvertDocument() (err error) {
if content, errRead := os.ReadFile(textFile); errRead == nil {
contentStr := string(content)
replacer := strings.NewReplacer("\r", " ", "\n", " ", "\t", " ")
document.Description = replacer.Replace(util.Substr(contentStr, 500))
document.Description = strings.TrimSpace(replacer.Replace(util.Substr(contentStr, 255)))
}
os.Remove(textFile)
document.Status = DocumentStatusConverted
err = m.db.Select("description", "cover", "width", "height", "preview", "pages", "status").Where("id = ?", document.Id).Updates(document).Error
document.EnableGZIP = cfg.EnableGZIP
err = m.db.Select("description", "cover", "width", "height", "preview", "pages", "status", "enable_gzip").Where("id = ?", document.Id).Updates(document).Error
if err != nil {
m.SetDocumentStatus(document.Id, DocumentStatusFailed)
m.logger.Error("ConvertDocument", zap.Error(err))

@ -61,9 +61,9 @@ func Run(cfg *conf.Config, logger *zap.Logger) {
}
app := gin.New()
app.Use(
gzip.Gzip(gzip.DefaultCompression), // gzip
gin.Recovery(), // recovery
cors.Default(), // allows all origins
gzip.Gzip(gzip.BestCompression, gzip.WithExcludedExtensions([]string{".svg", ".png", ".gif", ".jpeg", ".jpg", ".ico"})), // gzip
gin.Recovery(), // recovery
cors.Default(), // allows all origins
)
endpoint := fmt.Sprintf("localhost:%v", cfg.Port)

@ -17,6 +17,8 @@ func RegisterGinRouter(app *gin.Engine, dbModel *model.DBModel, logger *zap.Logg
app.GET("/helloworld", func(ctx *gin.Context) {
ctx.JSON(http.StatusOK, "hello world")
})
app.GET("/document/page/:hash/:page", attachmentAPIService.ViewDocumentPages)
app.GET("/document/cover/:hash", attachmentAPIService.ViewDocumentCover)
checkPermissionGroup := app.Group("/api/v1/upload")
checkPermissionGroup.Use(auth.AuthGin())

@ -315,18 +315,21 @@ func (c *Converter) CompressSVGBySVGO(svgFolder string) (err error) {
// CompressSVGByGZIP 将SVG文件压缩为GZIP格式
func (c *Converter) CompressSVGByGZIP(svgFile string) (dst string, err error) {
var svgBytes []byte
ext := filepath.Ext(svgFile)
dst = strings.TrimSuffix(svgFile, ext) + ".gzip.svg"
dst = strings.TrimSuffix(svgFile, filepath.Ext(svgFile)) + ".gzip.svg"
svgBytes, err = os.ReadFile(svgFile)
if err != nil {
c.logger.Error("read svg file", zap.String("svgFile", svgFile), zap.Error(err))
return
}
var buf bytes.Buffer
w := gzip.NewWriter(&buf)
defer w.Close()
w.Write(svgBytes)
w.Flush()
gzw, err := gzip.NewWriterLevel(&buf, gzip.BestCompression)
if err != nil {
return "", err
}
defer gzw.Close()
gzw.Write(svgBytes)
gzw.Flush()
err = os.WriteFile(dst, buf.Bytes(), os.ModePerm)
if err != nil {
c.logger.Error("write svgz file", zap.String("svgzFile", dst), zap.Error(err))

Loading…
Cancel
Save