You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

517 lines
16 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

package model
import (
"bytes"
"compress/gzip"
"errors"
"fmt"
"io"
"moredoc/util"
"moredoc/util/converter"
"moredoc/util/sitemap"
"os"
"path/filepath"
"strconv"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
"go.uber.org/zap"
"gorm.io/gorm"
)
type reconvertDocument struct {
Id int64 `json:"id"`
}
var (
isCreatingSitemap bool
cacheReconvert = "cache/reconvert"
)
// UpdateSitemap 更新站点地图
func (m *DBModel) UpdateSitemap() (err error) {
if isCreatingSitemap {
return
}
isCreatingSitemap = true
defer func() {
isCreatingSitemap = false
}()
os.MkdirAll("sitemap", os.ModePerm)
var (
limit = 10000
page = 1
documents []Document
articles []Article
modelDocument = &Document{}
modelArticle = &Article{}
sitemapIndexes []sitemap.SitemapIndex
sm = sitemap.NewSitemap()
domain = strings.TrimRight(m.GetConfigOfSystem(ConfigSystemDomain).Domain, "/")
)
for {
if err = m.db.Model(modelDocument).Select("id", "updated_at").Limit(limit).Offset((page - 1) * limit).Order("id asc").Find(&documents).Error; err != nil && err != gorm.ErrRecordNotFound {
m.logger.Error("execUpdateSitemap", zap.Error(err))
return
}
if len(documents) == 0 {
break
}
file := fmt.Sprintf("sitemap/documents-%d.xml", page)
var su []sitemap.SitemapUrl
for _, doc := range documents {
su = append(su, sitemap.SitemapUrl{
Loc: fmt.Sprintf("%s/document/%d", domain, doc.Id),
Lastmod: doc.UpdatedAt.Format(time.RFC3339),
ChangeFreq: sitemap.DAILY,
Priority: 1.0,
})
}
if err = sm.CreateSitemapContent(su, file); err != nil {
m.logger.Error("execUpdateSitemap", zap.Error(err))
return
}
sitemapIndexes = append(sitemapIndexes, sitemap.SitemapIndex{
Loc: domain + "/" + file,
Lastmod: time.Now().Format(time.RFC3339),
})
page++
}
page = 1
for {
if err = m.db.Model(modelArticle).Select("id", "updated_at", "identifier").Limit(limit).Offset((page - 1) * limit).Order("id asc").Find(&articles).Error; err != nil && err != gorm.ErrRecordNotFound {
m.logger.Error("execUpdateSitemap", zap.Error(err))
return
}
if len(articles) == 0 {
break
}
file := fmt.Sprintf("sitemap/articles-%d.xml", page)
var su []sitemap.SitemapUrl
for _, article := range articles {
su = append(su, sitemap.SitemapUrl{
Loc: fmt.Sprintf("%s/article/%s", domain, article.Identifier),
Lastmod: article.UpdatedAt.Format(time.RFC3339),
ChangeFreq: sitemap.DAILY,
Priority: 1.0,
})
}
if err = sm.CreateSitemapContent(su, file); err != nil {
m.logger.Error("execUpdateSitemap", zap.Error(err))
return
}
sitemapIndexes = append(sitemapIndexes, sitemap.SitemapIndex{
Loc: domain + "/" + file,
Lastmod: time.Now().Format(time.RFC3339),
})
page++
}
if len(sitemapIndexes) > 0 {
if err = sm.CreateSitemapIndex(sitemapIndexes, "sitemap/sitemap.xml"); err != nil {
m.logger.Error("execUpdateSitemap", zap.Error(err))
return
}
}
return
}
// SEO
func (m *DBModel) InitSEO() {
// 扫描dist目录下的所有HTML文件将文件名作为SEO的关键字
cfg := m.GetConfigOfSystem()
dist := "dist"
pages := map[string]string{
"200.html": "",
"404.html": "404 - 页面未找到 - ",
"findpassword/index.html": "找回密码 - ",
"index.html": "",
"login/index.html": "用户登录 - ",
"register/index.html": "用户注册 - ",
"search/index.html": "文档搜索 - ",
"upload/index.html": "文档上传 - ",
}
filepath.Walk(dist, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if info.IsDir() {
return nil
}
path = filepath.ToSlash(path)
if filepath.Ext(path) == ".html" {
name := strings.TrimPrefix(path, dist+"/")
defaultTitle, ok := pages[name]
if !ok && strings.HasPrefix(path, dist+"/admin") {
defaultTitle = "管理后台 - "
}
bs, _ := os.ReadFile(path)
if doc, errDoc := goquery.NewDocumentFromReader(bytes.NewReader(bs)); errDoc != nil {
m.logger.Error("initSEO", zap.Error(errDoc), zap.String("file", path))
} else {
m.logger.Debug("initSEO", zap.String("file", path), zap.String("title", defaultTitle+cfg.Sitename))
doc.Find("title").SetText(defaultTitle + cfg.Sitename)
doc.Find("meta[name='keywords']").SetAttr("content", cfg.Keywords)
doc.Find("meta[name='description']").SetAttr("content", cfg.Description)
doc.Find("meta[content='moredoc']").Remove()
doc.Find("meta[name='og:type']").Remove()
if htmlStr, errHtml := doc.Html(); errHtml == nil {
os.WriteFile(path, []byte(htmlStr), os.ModePerm)
}
}
}
return nil
})
}
func (m *DBModel) cronUpdateSitemap() {
layout := "2006-01-02"
lastUpdated := time.Now().Format(layout)
for {
hour, _ := strconv.Atoi(os.Getenv("MOREDOC_UPDATE_SITEMAP_HOUR")) // 默认为每天凌晨0点更新站点地图
hour = hour % 24
m.logger.Debug("cronUpdateSitemap", zap.Int("hour", hour), zap.String("lastUpdated", lastUpdated))
now := time.Now()
if now.Hour() == hour && now.Format(layout) != lastUpdated {
m.logger.Debug("cronUpdateSitemapstart...")
err := m.UpdateSitemap()
if err != nil {
m.logger.Debug("cronUpdateSitemapend...", zap.Error(err))
}
m.logger.Debug("cronUpdateSitemapend...")
lastUpdated = now.Format(layout)
}
time.Sleep(1 * time.Minute)
}
}
// 清理无效附件
// 1. 找出已被标记删除的附件
// 2. 查询是否存在相同hash的未被标记删除的附件对于此类附件则只删除附件记录而不删除附件文件。
// 3. 删除已被标记删除的附件
// 4. 对于文档类附件要注意衍生的附件如缩略图、PDF等也要一并删除。
func (m *DBModel) cronCleanInvalidAttachment() {
sleepDuration := 1 * time.Minute
for {
time.Sleep(1 * time.Second)
m.logger.Debug("cronCleanInvalidAttachmentstart...")
var (
deletedAttachemnts, attachemnts []Attachment
hashes []string
hashMap = make(map[string]struct{})
ids []int64
retentionMinute = m.GetConfigOfSecurity(ConfigSecurityAttachmentRetentionMinute).AttachmentRetentionMinute
)
if retentionMinute < 0 {
retentionMinute = 0
}
// 1. 找出已被标记删除的附件
m.db.Unscoped().Where("deleted_at IS NOT NULL").Where("deleted_at < ?", time.Now().Add(-time.Duration(retentionMinute)*time.Minute)).Limit(100).Find(&deletedAttachemnts)
if len(deletedAttachemnts) == 0 {
m.logger.Debug("cronCleanInvalidAttachmentend...")
time.Sleep(sleepDuration)
continue
}
for _, attachemnt := range deletedAttachemnts {
hashes = append(hashes, attachemnt.Hash)
ids = append(ids, attachemnt.Id)
}
// 2. 查询是否存在相同hash的未被标记删除的附件
m.db.Select("hash").Where("hash IN (?)", hashes).Group("hash").Limit(len(hashes)).Find(&attachemnts)
for _, attachemnt := range attachemnts {
hashMap[attachemnt.Hash] = struct{}{}
}
// 3. 删除已被标记删除的附件
err := m.db.Unscoped().Where("id IN (?)", ids).Delete(&Attachment{}).Error
if err != nil {
m.logger.Error("cronCleanInvalidAttachment", zap.Error(err))
m.logger.Debug("cronCleanInvalidAttachmentend...")
continue
}
m.logger.Debug("cronCleanInvalidAttachment", zap.Any("ids", ids), zap.Any("Attachemnts", deletedAttachemnts))
for _, attachemnt := range deletedAttachemnts {
if _, ok := hashMap[attachemnt.Hash]; !ok { // 删除附件文件
m.logger.Debug("cronCleanInvalidAttachment", zap.String("path", attachemnt.Path), zap.Any("attachemnt", attachemnt))
file := strings.TrimLeft(attachemnt.Path, "./")
m.logger.Debug("cronCleanInvalidAttachment", zap.String("file", file))
if err := os.Remove(file); err != nil {
m.logger.Error("cronCleanInvalidAttachment", zap.Error(err), zap.String("file", file))
}
if attachemnt.Type == AttachmentTypeDocument { // 删除文档的衍生文件
folder := strings.TrimSuffix(file, filepath.Ext(file))
m.logger.Debug("cronCleanInvalidAttachment", zap.String("folder", folder))
if err := os.RemoveAll(folder); err != nil {
m.logger.Error("cronCleanInvalidAttachment", zap.Error(err), zap.String("folder", folder))
}
}
}
}
m.logger.Debug("cronCleanInvalidAttachmentend...")
}
}
func (m *DBModel) cronMarkAttachmentDeleted() {
// 定时标记删除24小时前上传的但是未被使用的附件
for {
time.Sleep(1 * time.Hour)
var (
configs []Config
banners []Banner
hashes []string
)
// 1. 查找图片类配置
m.db.Select("value").Where("input_type = ?", "image").Find(&configs)
if len(configs) > 0 {
for _, config := range configs {
// 文件hash
hash := strings.TrimSpace(strings.TrimSuffix(filepath.Base(config.Value), filepath.Ext(config.Value)))
if hash != "" {
hashes = append(hashes, hash)
}
}
}
// 2. 查找横幅类配置
m.db.Select("path").Find(&banners)
if len(banners) > 0 {
for _, banner := range banners {
// 文件hash
hash := strings.TrimSpace(strings.TrimSuffix(filepath.Base(banner.Path), filepath.Ext(banner.Path)))
if hash != "" {
hashes = append(hashes, hash)
}
}
}
if len(hashes) > 0 {
err := m.db.Where("`hash` NOT IN (?) and `type` in (?)", hashes, []int{AttachmentTypeConfig, AttachmentTypeBanner}).Delete(&Attachment{}).Error
if err != nil {
m.logger.Error("cronMarkAttachmentDeleted", zap.Error(err))
}
}
// 非配置类和横幅类附件如果type_id为0则表示未被使用超过24小时则标记删除
m.logger.Debug("cronMarkAttachmentDeleted start...")
err := m.db.Where("`type` not in (?) and type_id = ?", []int{AttachmentTypeConfig, AttachmentTypeBanner}, 0).Where("created_at < ?", time.Now().Add(-time.Duration(24)*time.Hour)).Delete(&Attachment{}).Error
if err != nil {
m.logger.Error("cronMarkAttachmentDeleted", zap.Error(err))
}
m.logger.Debug("cronMarkAttachmentDeleted end...")
}
}
func (m *DBModel) loopCovertDocument() {
if convertDocumentRunning {
return
}
// 清空缓存目录
os.RemoveAll("cache/convert")
convertDocumentRunning = true
sleep := 10 * time.Second
m.db.Model(&Document{}).Where("status = ?", DocumentStatusConverting).Update("status", DocumentStatusPending)
for {
now := time.Now()
m.logger.Debug("loopCovertDocumentstart...")
err := m.ConvertDocument()
if err != nil && err != gorm.ErrRecordNotFound {
m.logger.Error("loopCovertDocument", zap.Error(err))
}
m.logger.Debug("loopCovertDocumentend...", zap.String("cost", time.Since(now).String()))
if err == gorm.ErrRecordNotFound {
time.Sleep(sleep)
}
}
}
func (m *DBModel) ReconvertDocoument(documentId int64, ext string) {
ext = "." + strings.TrimLeft(ext, ".")
os.RemoveAll(cacheReconvert)
os.MkdirAll(cacheReconvert, os.ModePerm)
if documentId <= 0 {
m.reconvertAllDocument(ext)
return
}
doc, err := m.GetDocument(documentId)
if err != nil {
m.logger.Error("ReconvertDocoument", zap.Error(err))
return
}
if doc.Status != DocumentStatusConverted {
m.logger.Error("ReconvertDocoument", zap.Error(errors.New("文档不是已转换的文档,不能重转")))
return
}
m.reconvertDocument(&doc, ext)
}
func (m *DBModel) reconvertDocument(doc *Document, ext string) {
m.logger.Debug("reconvertDocument", zap.Any("doc", doc), zap.String("ext", ext))
if doc.PreviewExt == ext {
m.logger.Info("reconvertDocument", zap.String("msg", "文档预览文件格式与指定格式一致,无需重转"), zap.String("document", doc.Title+doc.Ext))
return
}
// 1. 下载文档预览文件
attachment := m.GetAttachmentByTypeAndTypeId(AttachmentTypeDocument, doc.Id, "id", "hash")
if attachment.Id == 0 {
m.logger.Error("reconvertDocument", zap.String("msg", "文档预览文件不存在"), zap.String("document", doc.Title+doc.Ext))
return
}
cacheDir := filepath.Join(cacheReconvert, strconv.FormatInt(doc.Id, 10))
os.MkdirAll(cacheDir, os.ModePerm)
defer os.RemoveAll(cacheDir)
totalPreview := doc.Preview
if totalPreview == 0 {
totalPreview = doc.Pages
}
var (
convertedTargets []string
oldSrcFiles []string
)
for i := 1; i <= totalPreview; i++ {
// 已存在的预览文件
isGZIP := false
oldExt := doc.PreviewExt
if doc.EnableGZIP && strings.HasSuffix(oldExt, ".svg") {
oldExt = ".gzip.svg"
isGZIP = true
}
// 目标文件
dstFile := filepath.Join(cacheDir, fmt.Sprintf("%d%s", i, oldExt))
// 源文件
srcFile := fmt.Sprintf("documents/%s/%s/%d%s", strings.Join(strings.Split(attachment.Hash, "")[:5], "/"), attachment.Hash, i, oldExt)
oldSrcFiles = append(oldSrcFiles, srcFile)
err := util.CopyFile(srcFile, dstFile)
if err != nil {
m.logger.Error("reconvertDocument", zap.String("msg", "下载文档预览文件失败"), zap.String("document", doc.Title+doc.Ext), zap.Error(err))
return
}
m.logger.Debug("reconvertDocument", zap.Bool("isGZIP", isGZIP), zap.String("msg", "下载文档预览文件成功"), zap.String("document", doc.Title+doc.Ext), zap.String("srcFile", srcFile), zap.String("dstFile", dstFile))
if isGZIP { // 解压缩
m.ungzipSVG(dstFile)
}
// 2. 转换文档预览文件
convertedTargetFile := filepath.Join(cacheDir, fmt.Sprintf("%d%s", i, ext))
if strings.HasSuffix(oldExt, ".svg") {
// 如果是svg文件则需要使用inkscape预先转为png
tmpFile := filepath.Join(cacheDir, fmt.Sprintf("tmp-%d.png", i))
err = converter.ConvertByInkscape(dstFile, tmpFile)
if err == nil {
if strings.HasSuffix(convertedTargetFile, ".png") {
// 如果目标文件是png则直接使用inkscape转换后的文件
convertedTargetFile = tmpFile
} else {
// 如果目标文件不是png则需要使用ImageMagick转换
err = converter.ConvertByImageMagick(tmpFile, convertedTargetFile)
os.RemoveAll(tmpFile)
}
}
} else {
err = converter.ConvertByImageMagick(dstFile, convertedTargetFile)
}
if err != nil {
m.logger.Error("reconvertDocument", zap.String("msg", "转换文档预览文件失败"), zap.String("document", doc.Title+doc.Ext), zap.Error(err))
return
}
convertedTargets = append(convertedTargets, convertedTargetFile)
}
// 3. 上传文档预览文件
for i, srcFile := range convertedTargets {
dstFile := fmt.Sprintf("documents/%s/%s/%d%s", strings.Join(strings.Split(attachment.Hash, "")[:5], "/"), attachment.Hash, i+1, ext)
err := util.CopyFile(srcFile, dstFile)
if err != nil {
m.logger.Error("reconvertDocument", zap.String("msg", "上传文档预览文件失败"), zap.String("document", doc.Title+doc.Ext), zap.Error(err))
return
}
}
// 4. 更新数据库表的预览后缀
// 查询同一hash的文档
var (
attachemnts []Attachment
err error
data = map[string]interface{}{
"preview_ext": ext,
"enable_gzip": false,
}
)
m.db.Select("id", "type_id").Where("hash = ? and `type` = ?", attachment.Hash, AttachmentTypeDocument).Find(&attachemnts)
if len(attachemnts) > 0 {
var ids []int64
for _, attachemnt := range attachemnts {
ids = append(ids, attachemnt.TypeId)
}
err = m.db.Model(&Document{}).Where("id IN (?)", ids).Updates(data).Error
} else {
err = m.db.Model(doc).Updates(data).Error
}
if err != nil {
m.logger.Error("reconvertDocument", zap.String("msg", "更新文档预览文件后缀失败"), zap.String("document", doc.Title+doc.Ext), zap.Error(err))
return
}
// 5. 删除缓存文件,删除原预览文件
for _, file := range oldSrcFiles {
os.Remove(file)
}
}
func (m *DBModel) reconvertAllDocument(ext string) {
var cfg reconvertDocument
bytes, _ := os.ReadFile("cache/reconvert.json")
json.Unmarshal(bytes, &cfg)
for {
var doc Document
m.db.Where("id > ?", cfg.Id).Where("status = ?", DocumentStatusConverted).Order("id asc").Find(&doc)
if doc.Id == 0 {
break
}
m.reconvertDocument(&doc, ext)
cfg.Id = doc.Id
bytes, _ = json.Marshal(cfg)
os.WriteFile("cache/reconvert.json", bytes, os.ModePerm)
}
}
func (m *DBModel) ungzipSVG(svg string) {
m.logger.Info("ungzipSVG", zap.String("svg", svg))
bs, err := os.ReadFile(svg)
if err != nil {
m.logger.Error("ungzipSVG", zap.Error(err))
return
}
gz, err := gzip.NewReader(bytes.NewReader(bs))
if err != nil {
m.logger.Error("ungzipSVG", zap.Error(err))
return
}
defer gz.Close()
fp, err := os.Create(svg)
if err != nil {
m.logger.Error("ungzipSVG", zap.Error(err))
return
}
defer fp.Close()
io.Copy(fp, gz)
}