package model import ( "bytes" "compress/gzip" "errors" "fmt" "io" "moredoc/util" "moredoc/util/converter" "moredoc/util/sitemap" "os" "path/filepath" "strconv" "strings" "time" "" "" "" ) type reconvertDocument struct { Id int64 `json:"id"` } var ( isCreatingSitemap bool cacheReconvert = "cache/reconvert" ) // UpdateSitemap 更新站点地图 func (m *DBModel) UpdateSitemap() (err error) { if isCreatingSitemap { return } isCreatingSitemap = true defer func() { isCreatingSitemap = false }() os.MkdirAll("sitemap", os.ModePerm) var ( limit = 10000 page = 1 documents []Document articles []Article modelDocument = &Document{} modelArticle = &Article{} sitemapIndexes []sitemap.SitemapIndex sm = sitemap.NewSitemap() domain = strings.TrimRight(m.GetConfigOfSystem(ConfigSystemDomain).Domain, "/") ) for { if err = m.db.Model(modelDocument).Select("id", "updated_at").Limit(limit).Offset((page - 1) * limit).Order("id asc").Find(&documents).Error; err != nil && err != gorm.ErrRecordNotFound { m.logger.Error("execUpdateSitemap", zap.Error(err)) return } if len(documents) == 0 { break } file := fmt.Sprintf("sitemap/documents-%d.xml", page) var su []sitemap.SitemapUrl for _, doc := range documents { su = append(su, sitemap.SitemapUrl{ Loc: fmt.Sprintf("%s/document/%d", domain, doc.Id), Lastmod: doc.UpdatedAt.Format(time.RFC3339), ChangeFreq: sitemap.DAILY, Priority: 1.0, }) } if err = sm.CreateSitemapContent(su, file); err != nil { m.logger.Error("execUpdateSitemap", zap.Error(err)) return } sitemapIndexes = append(sitemapIndexes, sitemap.SitemapIndex{ Loc: domain + "/" + file, Lastmod: time.Now().Format(time.RFC3339), }) page++ } page = 1 for { if err = m.db.Model(modelArticle).Select("id", "updated_at", "identifier").Limit(limit).Offset((page - 1) * limit).Order("id asc").Find(&articles).Error; err != nil && err != gorm.ErrRecordNotFound { m.logger.Error("execUpdateSitemap", zap.Error(err)) return } if len(articles) == 0 { break } file := fmt.Sprintf("sitemap/articles-%d.xml", page) var su []sitemap.SitemapUrl for _, article := range articles { su = append(su, sitemap.SitemapUrl{ Loc: fmt.Sprintf("%s/article/%s", domain, article.Identifier), Lastmod: article.UpdatedAt.Format(time.RFC3339), ChangeFreq: sitemap.DAILY, Priority: 1.0, }) } if err = sm.CreateSitemapContent(su, file); err != nil { m.logger.Error("execUpdateSitemap", zap.Error(err)) return } sitemapIndexes = append(sitemapIndexes, sitemap.SitemapIndex{ Loc: domain + "/" + file, Lastmod: time.Now().Format(time.RFC3339), }) page++ } if len(sitemapIndexes) > 0 { if err = sm.CreateSitemapIndex(sitemapIndexes, "sitemap/sitemap.xml"); err != nil { m.logger.Error("execUpdateSitemap", zap.Error(err)) return } } return } // SEO func (m *DBModel) InitSEO() { // 扫描dist目录下的所有HTML文件,将文件名作为SEO的关键字 cfg := m.GetConfigOfSystem() dist := "dist" pages := map[string]string{ "200.html": "", "404.html": "404 - 页面未找到 - ", "findpassword/index.html": "找回密码 - ", "index.html": "", "login/index.html": "用户登录 - ", "register/index.html": "用户注册 - ", "search/index.html": "文档搜索 - ", "upload/index.html": "文档上传 - ", } filepath.Walk(dist, func(path string, info os.FileInfo, err error) error { if err != nil { return err } if info.IsDir() { return nil } path = filepath.ToSlash(path) if filepath.Ext(path) == ".html" { name := strings.TrimPrefix(path, dist+"/") defaultTitle, ok := pages[name] if !ok && strings.HasPrefix(path, dist+"/admin") { defaultTitle = "管理后台 - " } bs, _ := os.ReadFile(path) if doc, errDoc := goquery.NewDocumentFromReader(bytes.NewReader(bs)); errDoc != nil { m.logger.Error("initSEO", zap.Error(errDoc), zap.String("file", path)) } else { m.logger.Debug("initSEO", zap.String("file", path), zap.String("title", defaultTitle+cfg.Sitename)) doc.Find("title").SetText(defaultTitle + cfg.Sitename) doc.Find("meta[name='keywords']").SetAttr("content", cfg.Keywords) doc.Find("meta[name='description']").SetAttr("content", cfg.Description) doc.Find("meta[content='moredoc']").Remove() doc.Find("meta[name='og:type']").Remove() if htmlStr, errHtml := doc.Html(); errHtml == nil { os.WriteFile(path, []byte(htmlStr), os.ModePerm) } } } return nil }) } func (m *DBModel) cronUpdateSitemap() { layout := "2006-01-02" lastUpdated := time.Now().Format(layout) for { hour, _ := strconv.Atoi(os.Getenv("MOREDOC_UPDATE_SITEMAP_HOUR")) // 默认为每天凌晨0点更新站点地图 hour = hour % 24 m.logger.Debug("cronUpdateSitemap", zap.Int("hour", hour), zap.String("lastUpdated", lastUpdated)) now := time.Now() if now.Hour() == hour && now.Format(layout) != lastUpdated { m.logger.Debug("cronUpdateSitemap,start...") err := m.UpdateSitemap() if err != nil { m.logger.Debug("cronUpdateSitemap,end...", zap.Error(err)) } m.logger.Debug("cronUpdateSitemap,end...") lastUpdated = now.Format(layout) } time.Sleep(1 * time.Minute) } } // 清理无效附件 // 1. 找出已被标记删除的附件 // 2. 查询是否存在相同hash的未被标记删除的附件,对于此类附件,则只删除附件记录而不删除附件文件。 // 3. 删除已被标记删除的附件 // 4. 对于文档类附件,要注意衍生的附件,如缩略图、PDF等,也要一并删除。 func (m *DBModel) cronCleanInvalidAttachment() { sleepDuration := 1 * time.Minute for { time.Sleep(1 * time.Second) m.logger.Debug("cronCleanInvalidAttachment,start...") var ( deletedAttachemnts, attachemnts []Attachment hashes []string hashMap = make(map[string]struct{}) ids []int64 retentionMinute = m.GetConfigOfSecurity(ConfigSecurityAttachmentRetentionMinute).AttachmentRetentionMinute ) if retentionMinute < 0 { retentionMinute = 0 } // 1. 找出已被标记删除的附件 m.db.Unscoped().Where("deleted_at IS NOT NULL").Where("deleted_at < ?", time.Now().Add(-time.Duration(retentionMinute)*time.Minute)).Limit(100).Find(&deletedAttachemnts) if len(deletedAttachemnts) == 0 { m.logger.Debug("cronCleanInvalidAttachment,end...") time.Sleep(sleepDuration) continue } for _, attachemnt := range deletedAttachemnts { hashes = append(hashes, attachemnt.Hash) ids = append(ids, attachemnt.Id) } // 2. 查询是否存在相同hash的未被标记删除的附件 m.db.Select("hash").Where("hash IN (?)", hashes).Group("hash").Limit(len(hashes)).Find(&attachemnts) for _, attachemnt := range attachemnts { hashMap[attachemnt.Hash] = struct{}{} } // 3. 删除已被标记删除的附件 err := m.db.Unscoped().Where("id IN (?)", ids).Delete(&Attachment{}).Error if err != nil { m.logger.Error("cronCleanInvalidAttachment", zap.Error(err)) m.logger.Debug("cronCleanInvalidAttachment,end...") continue } m.logger.Debug("cronCleanInvalidAttachment", zap.Any("ids", ids), zap.Any("Attachemnts", deletedAttachemnts)) for _, attachemnt := range deletedAttachemnts { if _, ok := hashMap[attachemnt.Hash]; !ok { // 删除附件文件 m.logger.Debug("cronCleanInvalidAttachment", zap.String("path", attachemnt.Path), zap.Any("attachemnt", attachemnt)) file := strings.TrimLeft(attachemnt.Path, "./") m.logger.Debug("cronCleanInvalidAttachment", zap.String("file", file)) if err := os.Remove(file); err != nil { m.logger.Error("cronCleanInvalidAttachment", zap.Error(err), zap.String("file", file)) } if attachemnt.Type == AttachmentTypeDocument { // 删除文档的衍生文件 folder := strings.TrimSuffix(file, filepath.Ext(file)) m.logger.Debug("cronCleanInvalidAttachment", zap.String("folder", folder)) if err := os.RemoveAll(folder); err != nil { m.logger.Error("cronCleanInvalidAttachment", zap.Error(err), zap.String("folder", folder)) } } } } m.logger.Debug("cronCleanInvalidAttachment,end...") } } func (m *DBModel) cronMarkAttachmentDeleted() { // 定时标记删除24小时前上传的但是未被使用的附件 for { time.Sleep(1 * time.Hour) var ( configs []Config banners []Banner hashes []string ) // 1. 查找图片类配置 m.db.Select("value").Where("input_type = ?", "image").Find(&configs) if len(configs) > 0 { for _, config := range configs { // 文件hash hash := strings.TrimSpace(strings.TrimSuffix(filepath.Base(config.Value), filepath.Ext(config.Value))) if hash != "" { hashes = append(hashes, hash) } } } // 2. 查找横幅类配置 m.db.Select("path").Find(&banners) if len(banners) > 0 { for _, banner := range banners { // 文件hash hash := strings.TrimSpace(strings.TrimSuffix(filepath.Base(banner.Path), filepath.Ext(banner.Path))) if hash != "" { hashes = append(hashes, hash) } } } if len(hashes) > 0 { err := m.db.Where("`hash` NOT IN (?) and `type` in (?)", hashes, []int{AttachmentTypeConfig, AttachmentTypeBanner}).Delete(&Attachment{}).Error if err != nil { m.logger.Error("cronMarkAttachmentDeleted", zap.Error(err)) } } // 非配置类和横幅类附件,如果type_id为0,则表示未被使用,超过24小时则标记删除 m.logger.Debug("cronMarkAttachmentDeleted start...") err := m.db.Where("`type` not in (?) and type_id = ?", []int{AttachmentTypeConfig, AttachmentTypeBanner}, 0).Where("created_at < ?", time.Now().Add(-time.Duration(24)*time.Hour)).Delete(&Attachment{}).Error if err != nil { m.logger.Error("cronMarkAttachmentDeleted", zap.Error(err)) } m.logger.Debug("cronMarkAttachmentDeleted end...") } } func (m *DBModel) loopCovertDocument() { if convertDocumentRunning { return } // 清空缓存目录 os.RemoveAll("cache/convert") convertDocumentRunning = true sleep := 10 * time.Second m.db.Model(&Document{}).Where("status = ?", DocumentStatusConverting).Update("status", DocumentStatusPending) for { now := time.Now() m.logger.Debug("loopCovertDocument,start...") err := m.ConvertDocument() if err != nil && err != gorm.ErrRecordNotFound { m.logger.Error("loopCovertDocument", zap.Error(err)) } m.logger.Debug("loopCovertDocument,end...", zap.String("cost", time.Since(now).String())) if err == gorm.ErrRecordNotFound { time.Sleep(sleep) } } } func (m *DBModel) ReconvertDocoument(documentId int64, ext string) { ext = "." + strings.TrimLeft(ext, ".") os.RemoveAll(cacheReconvert) os.MkdirAll(cacheReconvert, os.ModePerm) if documentId <= 0 { m.reconvertAllDocument(ext) return } doc, err := m.GetDocument(documentId) if err != nil { m.logger.Error("ReconvertDocoument", zap.Error(err)) return } if doc.Status != DocumentStatusConverted { m.logger.Error("ReconvertDocoument", zap.Error(errors.New("文档不是已转换的文档,不能重转"))) return } m.reconvertDocument(&doc, ext) } func (m *DBModel) reconvertDocument(doc *Document, ext string) { m.logger.Debug("reconvertDocument", zap.Any("doc", doc), zap.String("ext", ext)) if doc.PreviewExt == ext { m.logger.Info("reconvertDocument", zap.String("msg", "文档预览文件格式与指定格式一致,无需重转"), zap.String("document", doc.Title+doc.Ext)) return } // 1. 下载文档预览文件 attachment := m.GetAttachmentByTypeAndTypeId(AttachmentTypeDocument, doc.Id, "id", "hash") if attachment.Id == 0 { m.logger.Error("reconvertDocument", zap.String("msg", "文档预览文件不存在"), zap.String("document", doc.Title+doc.Ext)) return } cacheDir := filepath.Join(cacheReconvert, strconv.FormatInt(doc.Id, 10)) os.MkdirAll(cacheDir, os.ModePerm) defer os.RemoveAll(cacheDir) totalPreview := doc.Preview if totalPreview == 0 { totalPreview = doc.Pages } var ( convertedTargets []string oldSrcFiles []string ) for i := 1; i <= totalPreview; i++ { // 已存在的预览文件 isGZIP := false oldExt := doc.PreviewExt if doc.EnableGZIP && strings.HasSuffix(oldExt, ".svg") { oldExt = ".gzip.svg" isGZIP = true } // 目标文件 dstFile := filepath.Join(cacheDir, fmt.Sprintf("%d%s", i, oldExt)) // 源文件 srcFile := fmt.Sprintf("documents/%s/%s/%d%s", strings.Join(strings.Split(attachment.Hash, "")[:5], "/"), attachment.Hash, i, oldExt) oldSrcFiles = append(oldSrcFiles, srcFile) err := util.CopyFile(srcFile, dstFile) if err != nil { m.logger.Error("reconvertDocument", zap.String("msg", "下载文档预览文件失败"), zap.String("document", doc.Title+doc.Ext), zap.Error(err)) return } m.logger.Debug("reconvertDocument", zap.Bool("isGZIP", isGZIP), zap.String("msg", "下载文档预览文件成功"), zap.String("document", doc.Title+doc.Ext), zap.String("srcFile", srcFile), zap.String("dstFile", dstFile)) if isGZIP { // 解压缩 m.ungzipSVG(dstFile) } // 2. 转换文档预览文件 convertedTargetFile := filepath.Join(cacheDir, fmt.Sprintf("%d%s", i, ext)) if strings.HasSuffix(oldExt, ".svg") { // 如果是svg文件,则需要使用inkscape预先转为png tmpFile := filepath.Join(cacheDir, fmt.Sprintf("tmp-%d.png", i)) err = converter.ConvertByInkscape(dstFile, tmpFile) if err == nil { if strings.HasSuffix(convertedTargetFile, ".png") { // 如果目标文件是png,则直接使用inkscape转换后的文件 convertedTargetFile = tmpFile } else { // 如果目标文件不是png,则需要使用ImageMagick转换 err = converter.ConvertByImageMagick(tmpFile, convertedTargetFile) os.RemoveAll(tmpFile) } } } else { err = converter.ConvertByImageMagick(dstFile, convertedTargetFile) } if err != nil { m.logger.Error("reconvertDocument", zap.String("msg", "转换文档预览文件失败"), zap.String("document", doc.Title+doc.Ext), zap.Error(err)) return } convertedTargets = append(convertedTargets, convertedTargetFile) } // 3. 上传文档预览文件 for i, srcFile := range convertedTargets { dstFile := fmt.Sprintf("documents/%s/%s/%d%s", strings.Join(strings.Split(attachment.Hash, "")[:5], "/"), attachment.Hash, i+1, ext) err := util.CopyFile(srcFile, dstFile) if err != nil { m.logger.Error("reconvertDocument", zap.String("msg", "上传文档预览文件失败"), zap.String("document", doc.Title+doc.Ext), zap.Error(err)) return } } // 4. 更新数据库表的预览后缀 // 查询同一hash的文档 var ( attachemnts []Attachment err error data = map[string]interface{}{ "preview_ext": ext, "enable_gzip": false, } ) m.db.Select("id", "type_id").Where("hash = ? and `type` = ?", attachment.Hash, AttachmentTypeDocument).Find(&attachemnts) if len(attachemnts) > 0 { var ids []int64 for _, attachemnt := range attachemnts { ids = append(ids, attachemnt.TypeId) } err = m.db.Model(&Document{}).Where("id IN (?)", ids).Updates(data).Error } else { err = m.db.Model(doc).Updates(data).Error } if err != nil { m.logger.Error("reconvertDocument", zap.String("msg", "更新文档预览文件后缀失败"), zap.String("document", doc.Title+doc.Ext), zap.Error(err)) return } // 5. 删除缓存文件,删除原预览文件 for _, file := range oldSrcFiles { os.Remove(file) } } func (m *DBModel) reconvertAllDocument(ext string) { var cfg reconvertDocument bytes, _ := os.ReadFile("cache/reconvert.json") json.Unmarshal(bytes, &cfg) for { var doc Document m.db.Where("id > ?", cfg.Id).Where("status = ?", DocumentStatusConverted).Order("id asc").Find(&doc) if doc.Id == 0 { break } m.reconvertDocument(&doc, ext) cfg.Id = doc.Id bytes, _ = json.Marshal(cfg) os.WriteFile("cache/reconvert.json", bytes, os.ModePerm) } } func (m *DBModel) ungzipSVG(svg string) { m.logger.Info("ungzipSVG", zap.String("svg", svg)) bs, err := os.ReadFile(svg) if err != nil { m.logger.Error("ungzipSVG", zap.Error(err)) return } gz, err := gzip.NewReader(bytes.NewReader(bs)) if err != nil { m.logger.Error("ungzipSVG", zap.Error(err)) return } defer gz.Close() fp, err := os.Create(svg) if err != nil { m.logger.Error("ungzipSVG", zap.Error(err)) return } defer fp.Close() io.Copy(fp, gz) }