You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

517 lines
16 KiB

package model
import (
1 year ago
"bytes"
"compress/gzip"
6 months ago
"errors"
"fmt"
"io"
"moredoc/util"
"moredoc/util/converter"
"moredoc/util/sitemap"
"os"
"path/filepath"
"strconv"
"strings"
"time"
1 year ago
"github.com/PuerkitoBio/goquery"
"go.uber.org/zap"
"gorm.io/gorm"
)
6 months ago
type reconvertDocument struct {
Id int64 `json:"id"`
}
var (
isCreatingSitemap bool
6 months ago
cacheReconvert = "cache/reconvert"
)
// UpdateSitemap 更新站点地图
func (m *DBModel) UpdateSitemap() (err error) {
if isCreatingSitemap {
return
}
isCreatingSitemap = true
defer func() {
isCreatingSitemap = false
}()
os.MkdirAll("sitemap", os.ModePerm)
var (
limit = 10000
page = 1
documents []Document
articles []Article
modelDocument = &Document{}
modelArticle = &Article{}
sitemapIndexes []sitemap.SitemapIndex
sm = sitemap.NewSitemap()
domain = strings.TrimRight(m.GetConfigOfSystem(ConfigSystemDomain).Domain, "/")
)
for {
if err = m.db.Model(modelDocument).Select("id", "updated_at").Limit(limit).Offset((page - 1) * limit).Order("id asc").Find(&documents).Error; err != nil && err != gorm.ErrRecordNotFound {
m.logger.Error("execUpdateSitemap", zap.Error(err))
return
}
if len(documents) == 0 {
break
}
file := fmt.Sprintf("sitemap/documents-%d.xml", page)
var su []sitemap.SitemapUrl
for _, doc := range documents {
su = append(su, sitemap.SitemapUrl{
Loc: fmt.Sprintf("%s/document/%d", domain, doc.Id),
Lastmod: doc.UpdatedAt.Format(time.RFC3339),
ChangeFreq: sitemap.DAILY,
Priority: 1.0,
})
}
if err = sm.CreateSitemapContent(su, file); err != nil {
m.logger.Error("execUpdateSitemap", zap.Error(err))
return
}
sitemapIndexes = append(sitemapIndexes, sitemap.SitemapIndex{
Loc: domain + "/" + file,
Lastmod: time.Now().Format(time.RFC3339),
})
page++
}
page = 1
for {
if err = m.db.Model(modelArticle).Select("id", "updated_at", "identifier").Limit(limit).Offset((page - 1) * limit).Order("id asc").Find(&articles).Error; err != nil && err != gorm.ErrRecordNotFound {
m.logger.Error("execUpdateSitemap", zap.Error(err))
return
}
if len(articles) == 0 {
break
}
file := fmt.Sprintf("sitemap/articles-%d.xml", page)
var su []sitemap.SitemapUrl
for _, article := range articles {
su = append(su, sitemap.SitemapUrl{
Loc: fmt.Sprintf("%s/article/%s", domain, article.Identifier),
Lastmod: article.UpdatedAt.Format(time.RFC3339),
ChangeFreq: sitemap.DAILY,
Priority: 1.0,
})
}
if err = sm.CreateSitemapContent(su, file); err != nil {
m.logger.Error("execUpdateSitemap", zap.Error(err))
return
}
sitemapIndexes = append(sitemapIndexes, sitemap.SitemapIndex{
Loc: domain + "/" + file,
Lastmod: time.Now().Format(time.RFC3339),
})
page++
}
if len(sitemapIndexes) > 0 {
if err = sm.CreateSitemapIndex(sitemapIndexes, "sitemap/sitemap.xml"); err != nil {
m.logger.Error("execUpdateSitemap", zap.Error(err))
return
}
}
return
}
1 year ago
// SEO
func (m *DBModel) InitSEO() {
// 扫描dist目录下的所有HTML文件将文件名作为SEO的关键字
cfg := m.GetConfigOfSystem()
dist := "dist"
pages := map[string]string{
"200.html": "",
"404.html": "404 - 页面未找到 - ",
"findpassword/index.html": "找回密码 - ",
"index.html": "",
"login/index.html": "用户登录 - ",
"register/index.html": "用户注册 - ",
"search/index.html": "文档搜索 - ",
"upload/index.html": "文档上传 - ",
}
filepath.Walk(dist, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if info.IsDir() {
return nil
}
1 year ago
path = filepath.ToSlash(path)
if filepath.Ext(path) == ".html" {
1 year ago
name := strings.TrimPrefix(path, dist+"/")
defaultTitle, ok := pages[name]
if !ok && strings.HasPrefix(path, dist+"/admin") {
defaultTitle = "管理后台 - "
}
bs, _ := os.ReadFile(path)
if doc, errDoc := goquery.NewDocumentFromReader(bytes.NewReader(bs)); errDoc != nil {
m.logger.Error("initSEO", zap.Error(errDoc), zap.String("file", path))
} else {
7 months ago
m.logger.Debug("initSEO", zap.String("file", path), zap.String("title", defaultTitle+cfg.Sitename))
doc.Find("title").SetText(defaultTitle + cfg.Sitename)
doc.Find("meta[name='keywords']").SetAttr("content", cfg.Keywords)
doc.Find("meta[name='description']").SetAttr("content", cfg.Description)
doc.Find("meta[content='moredoc']").Remove()
doc.Find("meta[name='og:type']").Remove()
if htmlStr, errHtml := doc.Html(); errHtml == nil {
os.WriteFile(path, []byte(htmlStr), os.ModePerm)
1 year ago
}
}
}
return nil
})
}
func (m *DBModel) cronUpdateSitemap() {
layout := "2006-01-02"
lastUpdated := time.Now().Format(layout)
for {
hour, _ := strconv.Atoi(os.Getenv("MOREDOC_UPDATE_SITEMAP_HOUR")) // 默认为每天凌晨0点更新站点地图
hour = hour % 24
7 months ago
m.logger.Debug("cronUpdateSitemap", zap.Int("hour", hour), zap.String("lastUpdated", lastUpdated))
now := time.Now()
if now.Hour() == hour && now.Format(layout) != lastUpdated {
7 months ago
m.logger.Debug("cronUpdateSitemapstart...")
err := m.UpdateSitemap()
if err != nil {
7 months ago
m.logger.Debug("cronUpdateSitemapend...", zap.Error(err))
}
7 months ago
m.logger.Debug("cronUpdateSitemapend...")
lastUpdated = now.Format(layout)
}
time.Sleep(1 * time.Minute)
}
}
// 清理无效附件
// 1. 找出已被标记删除的附件
// 2. 查询是否存在相同hash的未被标记删除的附件对于此类附件则只删除附件记录而不删除附件文件。
// 3. 删除已被标记删除的附件
// 4. 对于文档类附件要注意衍生的附件如缩略图、PDF等也要一并删除。
func (m *DBModel) cronCleanInvalidAttachment() {
sleepDuration := 1 * time.Minute
for {
time.Sleep(1 * time.Second)
7 months ago
m.logger.Debug("cronCleanInvalidAttachmentstart...")
var (
deletedAttachemnts, attachemnts []Attachment
hashes []string
hashMap = make(map[string]struct{})
ids []int64
1 year ago
retentionMinute = m.GetConfigOfSecurity(ConfigSecurityAttachmentRetentionMinute).AttachmentRetentionMinute
)
1 year ago
if retentionMinute < 0 {
retentionMinute = 0
}
// 1. 找出已被标记删除的附件
1 year ago
m.db.Unscoped().Where("deleted_at IS NOT NULL").Where("deleted_at < ?", time.Now().Add(-time.Duration(retentionMinute)*time.Minute)).Limit(100).Find(&deletedAttachemnts)
if len(deletedAttachemnts) == 0 {
7 months ago
m.logger.Debug("cronCleanInvalidAttachmentend...")
time.Sleep(sleepDuration)
continue
}
for _, attachemnt := range deletedAttachemnts {
hashes = append(hashes, attachemnt.Hash)
ids = append(ids, attachemnt.Id)
}
// 2. 查询是否存在相同hash的未被标记删除的附件
m.db.Select("hash").Where("hash IN (?)", hashes).Group("hash").Limit(len(hashes)).Find(&attachemnts)
for _, attachemnt := range attachemnts {
hashMap[attachemnt.Hash] = struct{}{}
}
// 3. 删除已被标记删除的附件
err := m.db.Unscoped().Where("id IN (?)", ids).Delete(&Attachment{}).Error
if err != nil {
m.logger.Error("cronCleanInvalidAttachment", zap.Error(err))
7 months ago
m.logger.Debug("cronCleanInvalidAttachmentend...")
continue
}
7 months ago
m.logger.Debug("cronCleanInvalidAttachment", zap.Any("ids", ids), zap.Any("Attachemnts", deletedAttachemnts))
for _, attachemnt := range deletedAttachemnts {
if _, ok := hashMap[attachemnt.Hash]; !ok { // 删除附件文件
m.logger.Debug("cronCleanInvalidAttachment", zap.String("path", attachemnt.Path), zap.Any("attachemnt", attachemnt))
file := strings.TrimLeft(attachemnt.Path, "./")
m.logger.Debug("cronCleanInvalidAttachment", zap.String("file", file))
if err := os.Remove(file); err != nil {
m.logger.Error("cronCleanInvalidAttachment", zap.Error(err), zap.String("file", file))
}
if attachemnt.Type == AttachmentTypeDocument { // 删除文档的衍生文件
folder := strings.TrimSuffix(file, filepath.Ext(file))
m.logger.Debug("cronCleanInvalidAttachment", zap.String("folder", folder))
if err := os.RemoveAll(folder); err != nil {
m.logger.Error("cronCleanInvalidAttachment", zap.Error(err), zap.String("folder", folder))
}
}
}
}
7 months ago
m.logger.Debug("cronCleanInvalidAttachmentend...")
}
}
func (m *DBModel) cronMarkAttachmentDeleted() {
// 定时标记删除24小时前上传的但是未被使用的附件
for {
time.Sleep(1 * time.Hour)
var (
configs []Config
banners []Banner
hashes []string
)
// 1. 查找图片类配置
m.db.Select("value").Where("input_type = ?", "image").Find(&configs)
if len(configs) > 0 {
for _, config := range configs {
// 文件hash
hash := strings.TrimSpace(strings.TrimSuffix(filepath.Base(config.Value), filepath.Ext(config.Value)))
if hash != "" {
hashes = append(hashes, hash)
}
}
}
// 2. 查找横幅类配置
m.db.Select("path").Find(&banners)
if len(banners) > 0 {
for _, banner := range banners {
// 文件hash
hash := strings.TrimSpace(strings.TrimSuffix(filepath.Base(banner.Path), filepath.Ext(banner.Path)))
if hash != "" {
hashes = append(hashes, hash)
}
}
}
if len(hashes) > 0 {
err := m.db.Where("`hash` NOT IN (?) and `type` in (?)", hashes, []int{AttachmentTypeConfig, AttachmentTypeBanner}).Delete(&Attachment{}).Error
if err != nil {
m.logger.Error("cronMarkAttachmentDeleted", zap.Error(err))
}
}
// 非配置类和横幅类附件如果type_id为0则表示未被使用超过24小时则标记删除
7 months ago
m.logger.Debug("cronMarkAttachmentDeleted start...")
err := m.db.Where("`type` not in (?) and type_id = ?", []int{AttachmentTypeConfig, AttachmentTypeBanner}, 0).Where("created_at < ?", time.Now().Add(-time.Duration(24)*time.Hour)).Delete(&Attachment{}).Error
if err != nil {
m.logger.Error("cronMarkAttachmentDeleted", zap.Error(err))
}
7 months ago
m.logger.Debug("cronMarkAttachmentDeleted end...")
}
}
1 year ago
func (m *DBModel) loopCovertDocument() {
if convertDocumentRunning {
return
}
// 清空缓存目录
os.RemoveAll("cache/convert")
1 year ago
convertDocumentRunning = true
sleep := 10 * time.Second
m.db.Model(&Document{}).Where("status = ?", DocumentStatusConverting).Update("status", DocumentStatusPending)
for {
now := time.Now()
7 months ago
m.logger.Debug("loopCovertDocumentstart...")
1 year ago
err := m.ConvertDocument()
if err != nil && err != gorm.ErrRecordNotFound {
7 months ago
m.logger.Error("loopCovertDocument", zap.Error(err))
1 year ago
}
7 months ago
m.logger.Debug("loopCovertDocumentend...", zap.String("cost", time.Since(now).String()))
1 year ago
if err == gorm.ErrRecordNotFound {
time.Sleep(sleep)
}
}
}
6 months ago
func (m *DBModel) ReconvertDocoument(documentId int64, ext string) {
ext = "." + strings.TrimLeft(ext, ".")
6 months ago
os.RemoveAll(cacheReconvert)
os.MkdirAll(cacheReconvert, os.ModePerm)
if documentId <= 0 {
6 months ago
m.reconvertAllDocument(ext)
return
}
doc, err := m.GetDocument(documentId)
if err != nil {
m.logger.Error("ReconvertDocoument", zap.Error(err))
return
}
if doc.Status != DocumentStatusConverted {
m.logger.Error("ReconvertDocoument", zap.Error(errors.New("文档不是已转换的文档,不能重转")))
return
6 months ago
}
m.reconvertDocument(&doc, ext)
6 months ago
}
func (m *DBModel) reconvertDocument(doc *Document, ext string) {
m.logger.Debug("reconvertDocument", zap.Any("doc", doc), zap.String("ext", ext))
6 months ago
if doc.PreviewExt == ext {
m.logger.Info("reconvertDocument", zap.String("msg", "文档预览文件格式与指定格式一致,无需重转"), zap.String("document", doc.Title+doc.Ext))
6 months ago
return
}
6 months ago
// 1. 下载文档预览文件
attachment := m.GetAttachmentByTypeAndTypeId(AttachmentTypeDocument, doc.Id, "id", "hash")
6 months ago
if attachment.Id == 0 {
m.logger.Error("reconvertDocument", zap.String("msg", "文档预览文件不存在"), zap.String("document", doc.Title+doc.Ext))
6 months ago
return
}
cacheDir := filepath.Join(cacheReconvert, strconv.FormatInt(doc.Id, 10))
os.MkdirAll(cacheDir, os.ModePerm)
defer os.RemoveAll(cacheDir)
totalPreview := doc.Preview
if totalPreview == 0 {
totalPreview = doc.Pages
}
var (
convertedTargets []string
oldSrcFiles []string
)
6 months ago
for i := 1; i <= totalPreview; i++ {
// 已存在的预览文件
isGZIP := false
oldExt := doc.PreviewExt
if doc.EnableGZIP && strings.HasSuffix(oldExt, ".svg") {
oldExt = ".gzip.svg"
isGZIP = true
}
// 目标文件
dstFile := filepath.Join(cacheDir, fmt.Sprintf("%d%s", i, oldExt))
// 源文件
srcFile := fmt.Sprintf("documents/%s/%s/%d%s", strings.Join(strings.Split(attachment.Hash, "")[:5], "/"), attachment.Hash, i, oldExt)
oldSrcFiles = append(oldSrcFiles, srcFile)
err := util.CopyFile(srcFile, dstFile)
if err != nil {
m.logger.Error("reconvertDocument", zap.String("msg", "下载文档预览文件失败"), zap.String("document", doc.Title+doc.Ext), zap.Error(err))
return
}
m.logger.Debug("reconvertDocument", zap.Bool("isGZIP", isGZIP), zap.String("msg", "下载文档预览文件成功"), zap.String("document", doc.Title+doc.Ext), zap.String("srcFile", srcFile), zap.String("dstFile", dstFile))
if isGZIP { // 解压缩
m.ungzipSVG(dstFile)
}
// 2. 转换文档预览文件
convertedTargetFile := filepath.Join(cacheDir, fmt.Sprintf("%d%s", i, ext))
6 months ago
if strings.HasSuffix(oldExt, ".svg") {
// 如果是svg文件则需要使用inkscape预先转为png
tmpFile := filepath.Join(cacheDir, fmt.Sprintf("tmp-%d.png", i))
err = converter.ConvertByInkscape(dstFile, tmpFile)
if err == nil {
if strings.HasSuffix(convertedTargetFile, ".png") {
// 如果目标文件是png则直接使用inkscape转换后的文件
convertedTargetFile = tmpFile
} else {
// 如果目标文件不是png则需要使用ImageMagick转换
err = converter.ConvertByImageMagick(tmpFile, convertedTargetFile)
os.RemoveAll(tmpFile)
}
}
} else {
err = converter.ConvertByImageMagick(dstFile, convertedTargetFile)
}
if err != nil {
m.logger.Error("reconvertDocument", zap.String("msg", "转换文档预览文件失败"), zap.String("document", doc.Title+doc.Ext), zap.Error(err))
return
}
convertedTargets = append(convertedTargets, convertedTargetFile)
}
6 months ago
// 3. 上传文档预览文件
for i, srcFile := range convertedTargets {
dstFile := fmt.Sprintf("documents/%s/%s/%d%s", strings.Join(strings.Split(attachment.Hash, "")[:5], "/"), attachment.Hash, i+1, ext)
err := util.CopyFile(srcFile, dstFile)
if err != nil {
m.logger.Error("reconvertDocument", zap.String("msg", "上传文档预览文件失败"), zap.String("document", doc.Title+doc.Ext), zap.Error(err))
return
}
}
6 months ago
// 4. 更新数据库表的预览后缀
6 months ago
// 查询同一hash的文档
var (
attachemnts []Attachment
err error
data = map[string]interface{}{
"preview_ext": ext,
"enable_gzip": false,
}
)
m.db.Select("id", "type_id").Where("hash = ? and `type` = ?", attachment.Hash, AttachmentTypeDocument).Find(&attachemnts)
if len(attachemnts) > 0 {
var ids []int64
for _, attachemnt := range attachemnts {
ids = append(ids, attachemnt.TypeId)
}
err = m.db.Model(&Document{}).Where("id IN (?)", ids).Updates(data).Error
} else {
err = m.db.Model(doc).Updates(data).Error
}
if err != nil {
m.logger.Error("reconvertDocument", zap.String("msg", "更新文档预览文件后缀失败"), zap.String("document", doc.Title+doc.Ext), zap.Error(err))
return
}
6 months ago
// 5. 删除缓存文件,删除原预览文件
for _, file := range oldSrcFiles {
os.Remove(file)
}
6 months ago
}
func (m *DBModel) reconvertAllDocument(ext string) {
var cfg reconvertDocument
6 months ago
bytes, _ := os.ReadFile("cache/reconvert.json")
json.Unmarshal(bytes, &cfg)
for {
var doc Document
6 months ago
m.db.Where("id > ?", cfg.Id).Where("status = ?", DocumentStatusConverted).Order("id asc").Find(&doc)
if doc.Id == 0 {
break
}
m.reconvertDocument(&doc, ext)
cfg.Id = doc.Id
bytes, _ = json.Marshal(cfg)
os.WriteFile("cache/reconvert.json", bytes, os.ModePerm)
}
}
func (m *DBModel) ungzipSVG(svg string) {
m.logger.Info("ungzipSVG", zap.String("svg", svg))
bs, err := os.ReadFile(svg)
if err != nil {
m.logger.Error("ungzipSVG", zap.Error(err))
return
}
gz, err := gzip.NewReader(bytes.NewReader(bs))
if err != nil {
m.logger.Error("ungzipSVG", zap.Error(err))
return
}
defer gz.Close()
fp, err := os.Create(svg)
if err != nil {
m.logger.Error("ungzipSVG", zap.Error(err))
return
}
defer fp.Close()
io.Copy(fp, gz)
}