Просмотр исходного кода

1.修改部门代码
2.新增爬取网站页面,文件路径

GongZheng 4 месяцев назад
Родитель
Сommit
2c455af40b
41 измененных файлов с 3933 добавлено и 197 удалено
  1. 770 0
      sql/db_mcyc-202511101019.sql
  2. 13 0
      xzl-admin/pom.xml
  3. 30 120
      xzl-admin/src/main/java/com/xzl/XzlApplication.java
  4. 2 1
      xzl-admin/src/main/java/com/xzl/web/controller/KnowledgeFileController.java
  5. 0 3
      xzl-admin/src/main/java/com/xzl/web/controller/MonitorAreaController.java
  6. 3 8
      xzl-admin/src/main/java/com/xzl/web/controller/SysFileFolderController.java
  7. 105 0
      xzl-admin/src/main/java/com/xzl/web/controller/SysSpiderAttachmentsController.java
  8. 143 0
      xzl-admin/src/main/java/com/xzl/web/controller/SysSpiderSourceDataController.java
  9. 105 0
      xzl-admin/src/main/java/com/xzl/web/controller/SysSpiderStructuredDataController.java
  10. 63 0
      xzl-admin/src/main/java/com/xzl/web/mapper/SysSpiderAttachmentsMapper.java
  11. 61 0
      xzl-admin/src/main/java/com/xzl/web/mapper/SysSpiderSourceDataMapper.java
  12. 62 0
      xzl-admin/src/main/java/com/xzl/web/mapper/SysSpiderStructuredDataMapper.java
  13. 61 0
      xzl-admin/src/main/java/com/xzl/web/service/ISysSpiderAttachmentsService.java
  14. 63 0
      xzl-admin/src/main/java/com/xzl/web/service/ISysSpiderSourceDataService.java
  15. 62 0
      xzl-admin/src/main/java/com/xzl/web/service/ISysSpiderStructuredDataService.java
  16. 0 1
      xzl-admin/src/main/java/com/xzl/web/service/impl/SysFileFolderServiceImpl.java
  17. 95 0
      xzl-admin/src/main/java/com/xzl/web/service/impl/SysSpiderAttachmentsServiceImpl.java
  18. 195 0
      xzl-admin/src/main/java/com/xzl/web/service/impl/SysSpiderSourceDataServiceImpl.java
  19. 95 0
      xzl-admin/src/main/java/com/xzl/web/service/impl/SysSpiderStructuredDataServiceImpl.java
  20. 6 6
      xzl-admin/src/main/java/com/xzl/web/service/impl/UserPortraitServiceImpl.java
  21. 84 0
      xzl-admin/src/main/java/com/xzl/web/utils/SeleniumUtils.java
  22. 5 1
      xzl-admin/src/main/resources/application-dev.yml
  23. 91 0
      xzl-admin/src/main/resources/mapper/SysSpiderAttachmentsMapper.xml
  24. 76 0
      xzl-admin/src/main/resources/mapper/SysSpiderSourceDataMapper.xml
  25. 93 0
      xzl-admin/src/main/resources/mapper/SysSpiderStructuredDataMapper.xml
  26. 6 7
      xzl-admin/src/main/resources/mapper/UserPortraitMapper.xml
  27. 17 17
      xzl-common/src/main/java/com/xzl/common/core/domain/entity/SysUser.java
  28. 5 4
      xzl-framework/src/main/java/com/xzl/framework/web/service/SysLoginService.java
  29. 166 0
      xzl-system/src/main/java/com/xzl/system/domain/SysSpiderAttachments.java
  30. 112 0
      xzl-system/src/main/java/com/xzl/system/domain/SysSpiderSourceData.java
  31. 167 0
      xzl-system/src/main/java/com/xzl/system/domain/SysSpiderStructuredData.java
  32. 17 18
      xzl-system/src/main/resources/mapper/system/SysDeptMapper.xml
  33. 9 9
      xzl-system/src/main/resources/mapper/system/SysUserMapper.xml
  34. 44 0
      xzl-ui/src/api/spiderData/attachments.js
  35. 55 0
      xzl-ui/src/api/spiderData/spiderData.js
  36. 44 0
      xzl-ui/src/api/spiderData/structured.js
  37. 1 1
      xzl-ui/src/router/index.js
  38. 1 1
      xzl-ui/src/views/logistics4.vue
  39. 313 0
      xzl-ui/src/views/spiderData/attachments/index.vue
  40. 336 0
      xzl-ui/src/views/spiderData/sourceData/index.vue
  41. 357 0
      xzl-ui/src/views/spiderData/structured/index.vue

Разница между файлами не показана из-за своего большого размера
+ 770 - 0
sql/db_mcyc-202511101019.sql


+ 13 - 0
xzl-admin/pom.xml

@@ -135,6 +135,19 @@
             <artifactId>pinyin4j</artifactId>
             <version>2.5.1</version>
         </dependency>
+
+<!--        网页处理-->
+        <dependency>
+            <groupId>org.jsoup</groupId>
+            <artifactId>jsoup</artifactId>
+            <version>1.17.2</version> <!-- 推荐使用最新版本 -->
+        </dependency>
+
+        <dependency>
+            <groupId>org.seleniumhq.selenium</groupId>
+            <artifactId>selenium-java</artifactId>
+            <version>3.141.59</version>
+        </dependency>
     </dependencies>
 
     <build>

+ 30 - 120
xzl-admin/src/main/java/com/xzl/XzlApplication.java

@@ -1,6 +1,5 @@
 package com.xzl;
 
-import com.alibaba.fastjson.JSONObject;
 import com.xzl.common.core.domain.entity.MindMapNode;
 import com.xzl.common.utils.SecurityUtils;
 import org.apache.commons.lang3.ObjectUtils;
@@ -25,95 +24,6 @@ public class XzlApplication {
     SpringApplication.run(XzlApplication.class, args);
     System.out.println(SecurityUtils.encryptPassword("123456"));
     System.out.println("系统启动成功 ...");
-    String data ="{  \n" +
-            "  \"\": \"层级1节点\",  \n" +
-            "  \"\": \"层级1备注\",  \n" +
-            "  \"sonNode\": [  \n" +
-            "    {  \n" +
-            "      \"contentTxt\": \"层级2节点\",  \n" +
-            "      \"remark\": \"层级2备注\",  \n" +
-            "      \"sonNode\": [  \n" +
-            "        {  \n" +
-            "          \"contentTxt\": \"层级3节点\",  \n" +
-            "          \"remark\": \"层级3备注\",  \n" +
-            "          \"sonNode\": []  \n" +
-            "        },  \n" +
-            "        {  \n" +
-            "          \"contentTxt\": \"层级3节点\",  \n" +
-            "          \"remark\": \"层级3备注\",  \n" +
-            "          \"sonNode\": []  \n" +
-            "        }  \n" +
-            "      ]  \n" +
-            "    },  \n" +
-            "    {  \n" +
-            "      \"contentTxt\": \"层级2节点\",  \n" +
-            "      \"remark\": \"层级2备注\",  \n" +
-            "      \"sonNode\": []  \n" +
-            "    }  \n" +
-            "  ],\n" +
-            "  \"contentTxt\": \"层级1节点2\",  \n" +
-            "  \"remark\": \"层级1备注2\",\n" +
-            "  \"sonNode\": [\n" +
-            "        {\n" +
-            "            \"contentTxt\": \"层级2节点2\",\n" +
-            "            \"remark\": \"层级2备注2\",\n" +
-            "            \"sonNode\": [\n" +
-            "                {\n" +
-            "                    \"contentTxt\": \"层级3节点2\",\n" +
-            "                    \"remark\": \"层级3备注2\",\n" +
-            "                    \"sonNode\": []\n" +
-            "                },\n" +
-            "                {\n" +
-            "                    \"contentTxt\": \"层级3节点2\",\n" +
-            "                    \"remark\": \"层级3备注2\",\n" +
-            "                    \"sonNode\": []\n" +
-            "                }\n" +
-            "            ]\n" +
-            "        },\n" +
-            "        {\n" +
-            "            \"contentTxt\": \"层级2节点2\",\n" +
-            "            \"remark\": \"层级2备注2\",\n" +
-            "            \"sonNode\": []\n" +
-            "        }\n" +
-            "    ]\n" +
-            "}\n";
-    List<MindMapNode> mindMapNodes = JSONObject.parseArray(data, MindMapNode.class);
-      try {
-          createMindMap(mindMapNodes,"测试!", new OutputStream() {
-            @Override
-            public void write(int b) throws IOException {
-
-            }
-          });
-      } catch (IOException e) {
-          throw new RuntimeException(e);
-      } catch (CoreException e) {
-          throw new RuntimeException(e);
-      }
-//创建思维导图的工作空间
-    IWorkbookBuilder workbookBuilder = Core.getWorkbookBuilder();
-    IWorkbook workbook = workbookBuilder.createWorkbook();
-//获得默认sheet
-    ISheet primarySheet = workbook.getPrimarySheet();
-
-// 创建根节点
-    ITopic rootTopic = primarySheet.getRootTopic();
-    ITopic topicSon = workbook.createTopic();
-
-    //设置根主题的标题
-    rootTopic.setTitleText("测试!");
-// 根主题对象
-    rootTopic.setStructureClass("org.xmind.ui.logic.left");
-    topicSon.setTitleText(mindMapNodes.get(1).getContentTxt());
-    // 备注
-    IPlainNotesContent plainContent = (IPlainNotesContent) workbook.createNotesContent(INotes.PLAIN);
-    plainContent.setTextContent(mindMapNodes.get(1).getRemark());
-    INotes notes = topicSon.getNotes();
-    notes.setContent(INotes.PLAIN, plainContent);
-    buildNode(mindMapNodes.get(1).getSonNode(),topicSon,workbook);
-    topicSon.add(topicSon);
-//组装节点到根节点
-    rootTopic.add(topicSon);
 
   }
 
@@ -125,19 +35,19 @@ public class XzlApplication {
    * @throws IOException
    * @throws CoreException
    */
-  public static void createMindMap(List<MindMapNode> nodes, String root,OutputStream os) throws IOException, CoreException {
-    //创建脑图工作空间
-    IWorkbookBuilder workbookBuilder = Core.getWorkbookBuilder();
-    IWorkbook workbook = workbookBuilder.createWorkbook();
-    //获得默认sheet
-    ISheet primarySheet = workbook.getPrimarySheet();
-    ITopic rootTopic = primarySheet.getRootTopic();
-    //根节点的标题
-    rootTopic.setTitleText(root);
-    rootTopic.setStructureClass("org.xmind.ui.logic.right");
-    buildNode(nodes,rootTopic,workbook);
-    workbook.save(os);
-  }
+//  public static void createMindMap(List<MindMapNode> nodes, String root,OutputStream os) throws IOException, CoreException {
+//    //创建脑图工作空间
+//    IWorkbookBuilder workbookBuilder = Core.getWorkbookBuilder();
+//    IWorkbook workbook = workbookBuilder.createWorkbook();
+//    //获得默认sheet
+//    ISheet primarySheet = workbook.getPrimarySheet();
+//    ITopic rootTopic = primarySheet.getRootTopic();
+//    //根节点的标题
+//    rootTopic.setTitleText(root);
+//    rootTopic.setStructureClass("org.xmind.ui.logic.right");
+//    buildNode(nodes,rootTopic,workbook);
+//    workbook.save(os);
+//  }
 
   /**
    * 创建脑图节点
@@ -145,22 +55,22 @@ public class XzlApplication {
    * @param topic 根节点
    * @param workbook 工作空间
    */
-  private static void buildNode(List<MindMapNode> mindMapNodes, ITopic topic, IWorkbook workbook) {
-    if (ObjectUtils.isEmpty(mindMapNodes)) {
-      return;
-    } else {
-      for (MindMapNode mindMapNode : mindMapNodes) {
-        ITopic topicSon = workbook.createTopic();
-        topicSon.setTitleText(mindMapNode.getContentTxt());
-        // 备注
-        IPlainNotesContent plainContent = (IPlainNotesContent) workbook.createNotesContent(INotes.PLAIN);
-        plainContent.setTextContent(mindMapNode.getRemark());
-        INotes notes = topicSon.getNotes();
-        notes.setContent(INotes.PLAIN, plainContent);
-        buildNode(mindMapNode.getSonNode(),topicSon,workbook);
-        topic.add(topicSon);
-      }
-    }
-  }
+//  private static void buildNode(List<MindMapNode> mindMapNodes, ITopic topic, IWorkbook workbook) {
+//    if (ObjectUtils.isEmpty(mindMapNodes)) {
+//      return;
+//    } else {
+//      for (MindMapNode mindMapNode : mindMapNodes) {
+//        ITopic topicSon = workbook.createTopic();
+//        topicSon.setTitleText(mindMapNode.getContentTxt());
+//        // 备注
+//        IPlainNotesContent plainContent = (IPlainNotesContent) workbook.createNotesContent(INotes.PLAIN);
+//        plainContent.setTextContent(mindMapNode.getRemark());
+//        INotes notes = topicSon.getNotes();
+//        notes.setContent(INotes.PLAIN, plainContent);
+//        buildNode(mindMapNode.getSonNode(),topicSon,workbook);
+//        topic.add(topicSon);
+//      }
+//    }
+//  }
 
 }

+ 2 - 1
xzl-admin/src/main/java/com/xzl/web/controller/KnowledgeFileController.java

@@ -30,6 +30,7 @@ import org.springframework.web.multipart.MultipartFile;
 @RestController
 @RequestMapping("/system/file")
 public class KnowledgeFileController extends BaseController {
+
     @Autowired
     private KnowledgeFileService knowledgeFileService;
 
@@ -41,7 +42,7 @@ public class KnowledgeFileController extends BaseController {
     /**
      * 查询文件列表
      */
-    @PreAuthorize("@ss.hasPermi('system:user:list')")
+    @PreAuthorize("@ss.hasPermi('system:file:list')")
     @GetMapping("/list")
     public TableDataInfo list(
             KnowledgeFile knowledge,

+ 0 - 3
xzl-admin/src/main/java/com/xzl/web/controller/MonitorAreaController.java

@@ -20,20 +20,17 @@ public class MonitorAreaController {
     @Autowired
     private MonitorAreaService monitorAreaService;
 
-
     @RequestMapping(value = "/infoList",method = RequestMethod.GET)
     public InfoListVO infoList(@RequestParam(value = "id",required = false)String id,
                                @RequestParam(value = "areaType",required = false)String areaType){
         return monitorAreaService.infoList(id,areaType);
     }
 
-
     @RequestMapping(value = "/saveArea",method = RequestMethod.POST)
     public Map<String,String> saveArea(@RequestBody MonitorArea monitorArea){
         return monitorAreaService.saveArea(monitorArea);
     }
 
-
     @RequestMapping(value = "/deleteArea",method = RequestMethod.DELETE)
     public Map<String,String> deleteArea(@RequestBody List<String> ids){
         return monitorAreaService.deleteArea(ids);

+ 3 - 8
xzl-admin/src/main/java/com/xzl/web/controller/SysFileFolderController.java

@@ -1,9 +1,5 @@
 package com.xzl.web.controller;
 
-
-
-
-
 import java.util.List;
 import javax.servlet.http.HttpServletResponse;
 import com.xzl.common.core.domain.entity.SysFileFolder;
@@ -31,11 +27,10 @@ import com.xzl.common.core.page.TableDataInfo;
 @RestController
 @RequestMapping("/system/folder")
 public class SysFileFolderController extends BaseController {
+
     @Autowired
     private ISysFileFolderService sysFileFolderService;
 
-
-
     /**
      * 获取知识库目录结构(Markdown格式)
      */
@@ -66,7 +61,8 @@ public class SysFileFolderController extends BaseController {
     /**
      * 查询文件文件夹(麻城知识库四级结构)列表
      */
-    @PreAuthorize("@ss.hasPermi('system:user:list')")
+//    @PreAuthorize("@ss.hasPermi('folder')")
+    @PreAuthorize("@ss.hasPermi('system:folder:list')")
     @GetMapping("/list")
     public TableDataInfo list(SysFileFolder sysFileFolder) {
         startPage();
@@ -131,7 +127,6 @@ public class SysFileFolderController extends BaseController {
         return toAjax(sysFileFolderService.deleteSysFileFolderByFolderIds(folderIds));
     }
 
-
     /**
      * 获取部门树列表
      */

+ 105 - 0
xzl-admin/src/main/java/com/xzl/web/controller/SysSpiderAttachmentsController.java

@@ -0,0 +1,105 @@
+package com.xzl.web.controller;
+
+import java.util.List;
+import javax.servlet.http.HttpServletResponse;
+
+import com.xzl.system.domain.SysSpiderAttachments;
+import com.xzl.web.service.ISysSpiderAttachmentsService;
+import org.springframework.security.access.prepost.PreAuthorize;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.web.bind.annotation.GetMapping;
+import org.springframework.web.bind.annotation.PostMapping;
+import org.springframework.web.bind.annotation.PutMapping;
+import org.springframework.web.bind.annotation.DeleteMapping;
+import org.springframework.web.bind.annotation.PathVariable;
+import org.springframework.web.bind.annotation.RequestBody;
+import org.springframework.web.bind.annotation.RequestMapping;
+import org.springframework.web.bind.annotation.RestController;
+import com.xzl.common.annotation.Log;
+import com.xzl.common.core.controller.BaseController;
+import com.xzl.common.core.domain.AjaxResult;
+import com.xzl.common.enums.BusinessType;
+import com.xzl.common.utils.poi.ExcelUtil;
+import com.xzl.common.core.page.TableDataInfo;
+
+/**
+ * 附件Controller
+ *
+ * @author xzl
+ * @date 2025-11-13
+ */
+@RestController
+@RequestMapping("/spiderData/attachments")
+public class SysSpiderAttachmentsController extends BaseController
+{
+    @Autowired
+    private ISysSpiderAttachmentsService sysSpiderAttachmentsService;
+
+    /**
+     * 查询附件列表
+     */
+    @PreAuthorize("@ss.hasPermi('spiderData:attachments:list')")
+    @GetMapping("/list")
+    public TableDataInfo list(SysSpiderAttachments sysSpiderAttachments)
+    {
+        startPage();
+        List<SysSpiderAttachments> list = sysSpiderAttachmentsService.selectSysSpiderAttachmentsList(sysSpiderAttachments);
+        return getDataTable(list);
+    }
+
+    /**
+     * 导出附件列表
+     */
+    @PreAuthorize("@ss.hasPermi('spiderData:attachments:export')")
+    @Log(title = "附件", businessType = BusinessType.EXPORT)
+    @PostMapping("/export")
+    public void export(HttpServletResponse response, SysSpiderAttachments sysSpiderAttachments)
+    {
+        List<SysSpiderAttachments> list = sysSpiderAttachmentsService.selectSysSpiderAttachmentsList(sysSpiderAttachments);
+        ExcelUtil<SysSpiderAttachments> util = new ExcelUtil<SysSpiderAttachments>(SysSpiderAttachments.class);
+        util.exportExcel(response, list, "附件数据");
+    }
+
+    /**
+     * 获取附件详细信息
+     */
+    @PreAuthorize("@ss.hasPermi('spiderData:attachments:query')")
+    @GetMapping(value = "/{id}")
+    public AjaxResult getInfo(@PathVariable("id") Long id)
+    {
+        return success(sysSpiderAttachmentsService.selectSysSpiderAttachmentsById(id));
+    }
+
+    /**
+     * 新增附件
+     */
+    @PreAuthorize("@ss.hasPermi('spiderData:attachments:add')")
+    @Log(title = "附件", businessType = BusinessType.INSERT)
+    @PostMapping
+    public AjaxResult add(@RequestBody SysSpiderAttachments sysSpiderAttachments)
+    {
+        return toAjax(sysSpiderAttachmentsService.insertSysSpiderAttachments(sysSpiderAttachments));
+    }
+
+    /**
+     * 修改附件
+     */
+    @PreAuthorize("@ss.hasPermi('spiderData:attachments:edit')")
+    @Log(title = "附件", businessType = BusinessType.UPDATE)
+    @PutMapping
+    public AjaxResult edit(@RequestBody SysSpiderAttachments sysSpiderAttachments)
+    {
+        return toAjax(sysSpiderAttachmentsService.updateSysSpiderAttachments(sysSpiderAttachments));
+    }
+
+    /**
+     * 删除附件
+     */
+    @PreAuthorize("@ss.hasPermi('spiderData:attachments:remove')")
+    @Log(title = "附件", businessType = BusinessType.DELETE)
+	@DeleteMapping("/{ids}")
+    public AjaxResult remove(@PathVariable Long[] ids)
+    {
+        return toAjax(sysSpiderAttachmentsService.deleteSysSpiderAttachmentsByIds(ids));
+    }
+}

+ 143 - 0
xzl-admin/src/main/java/com/xzl/web/controller/SysSpiderSourceDataController.java

@@ -0,0 +1,143 @@
+package com.xzl.web.controller;
+
+import java.util.List;
+import javax.servlet.http.HttpServletResponse;
+
+
+import com.xzl.common.exception.ServiceException;
+import com.xzl.common.utils.StringUtils;
+import com.xzl.common.utils.poi.ExcelUtil;
+import com.xzl.system.domain.SysSpiderSourceData;
+import com.xzl.web.service.ISysSpiderSourceDataService;
+import io.swagger.annotations.ApiOperation;
+import org.springframework.security.access.prepost.PreAuthorize;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.web.bind.annotation.GetMapping;
+import org.springframework.web.bind.annotation.PostMapping;
+import org.springframework.web.bind.annotation.PutMapping;
+import org.springframework.web.bind.annotation.DeleteMapping;
+import org.springframework.web.bind.annotation.PathVariable;
+import org.springframework.web.bind.annotation.RequestBody;
+import org.springframework.web.bind.annotation.RequestMapping;
+import org.springframework.web.bind.annotation.RestController;
+import com.xzl.common.annotation.Log;
+import com.xzl.common.core.controller.BaseController;
+import com.xzl.common.core.domain.AjaxResult;
+import com.xzl.common.enums.BusinessType;
+
+
+import com.xzl.common.core.page.TableDataInfo;
+
+/**
+ * 原始采集数据Controller
+ *
+ * @author xzl
+ * @date 2025-11-13
+ */
+@RestController
+@RequestMapping("/spiderData/sourceData")
+public class SysSpiderSourceDataController extends BaseController
+{
+    @Autowired
+    private ISysSpiderSourceDataService sysSpiderSourceDataService;
+
+    /**
+     * 触发页面采集
+     * @param  前端传入的采集参数(包含 pageUrl)
+     * @return 采集结果(成功/失败信息)
+     */
+    @PostMapping("/collect") // 接口路径与前端 spiderData.js 中配置的一致
+    @ApiOperation("触发页面采集") // Swagger 接口说明(可选)
+    public AjaxResult collectPage(@RequestBody SysSpiderSourceData  sysSpiderSourceData) {
+        // 调用业务层执行采集逻辑(核心)
+        String pageUrl = sysSpiderSourceData.getPageUrl();
+        boolean empty = StringUtils.isEmpty(pageUrl);
+        if (empty){
+            return AjaxResult.error("url不能为空");
+        }
+        // 正则规则:支持 http/https,域名含字母/数字/短横线,允许端口和路径
+//        String urlRegex = "^(https?:\\/\\/)?([\\da-z.-]+)\\.([a-z.]{2,6})(:[0-9]{1,5})?(\\/[\\w.-]*)*$";
+        String urlRegex = "^(https?:\\/\\/)?([\\da-z.-]+)\\.([a-z.]{2,6})(:[0-9]{1,5})?(/[\\w\\-./?%&=]*)?$";
+
+        if (!pageUrl.matches(urlRegex)) {
+            throw new ServiceException("URL格式非法,请输入类似 https://www.xxx.com 的地址");
+        }
+        try {
+            sysSpiderSourceDataService.collectPage(sysSpiderSourceData.getPageUrl());
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+//        spiderCollectService.collectPage(collectDTO.getPageUrl());
+        // 返回成功结果(Result 是全局统一响应类)
+        return AjaxResult.success("采集任务已启动,正在处理");
+    }
+
+    /**
+     * 查询原始采集数据列表
+     */
+    @PreAuthorize("@ss.hasPermi('spiderData:sourceData:list')")
+    @GetMapping("/list")
+    public TableDataInfo list(SysSpiderSourceData sysSpiderSourceData)
+    {
+        startPage();
+        List<SysSpiderSourceData> list = sysSpiderSourceDataService.selectSysSpiderSourceDataList(sysSpiderSourceData);
+        return getDataTable(list);
+    }
+
+    /**
+     * 导出原始采集数据列表
+     */
+    @PreAuthorize("@ss.hasPermi('spiderData:sourceData:export')")
+    @Log(title = "原始采集数据", businessType = BusinessType.EXPORT)
+    @PostMapping("/export")
+    public void export(HttpServletResponse response, SysSpiderSourceData sysSpiderSourceData)
+    {
+        List<SysSpiderSourceData> list = sysSpiderSourceDataService.selectSysSpiderSourceDataList(sysSpiderSourceData);
+        ExcelUtil<SysSpiderSourceData> util = new ExcelUtil<SysSpiderSourceData>(SysSpiderSourceData.class);
+        util.exportExcel(response, list, "原始采集数据数据");
+    }
+
+    /**
+     * 获取原始采集数据详细信息
+     */
+    @PreAuthorize("@ss.hasPermi('spiderData:sourceData:query')")
+    @GetMapping(value = "/{id}")
+    public AjaxResult getInfo(@PathVariable("id") Long id)
+    {
+        return success(sysSpiderSourceDataService.selectSysSpiderSourceDataById(id));
+    }
+
+    /**
+     * 新增原始采集数据
+     */
+    @PreAuthorize("@ss.hasPermi('spiderData:sourceData:add')")
+    @Log(title = "原始采集数据", businessType = BusinessType.INSERT)
+    @PostMapping
+    public AjaxResult add(@RequestBody SysSpiderSourceData sysSpiderSourceData)
+    {
+        return toAjax(sysSpiderSourceDataService.insertSysSpiderSourceData(sysSpiderSourceData));
+    }
+
+    /**
+     * 修改原始采集数据
+     */
+    @PreAuthorize("@ss.hasPermi('spiderData:sourceData:edit')")
+    @Log(title = "原始采集数据", businessType = BusinessType.UPDATE)
+    @PutMapping
+    public AjaxResult edit(@RequestBody SysSpiderSourceData sysSpiderSourceData)
+    {
+        return toAjax(sysSpiderSourceDataService.updateSysSpiderSourceData(sysSpiderSourceData));
+    }
+
+    /**
+     * 删除原始采集数据
+     */
+    @PreAuthorize("@ss.hasPermi('spiderData:sourceData:remove')")
+    @Log(title = "原始采集数据", businessType = BusinessType.DELETE)
+	@DeleteMapping("/{ids}")
+    public AjaxResult remove(@PathVariable Long[] ids)
+    {
+        return toAjax(sysSpiderSourceDataService.deleteSysSpiderSourceDataByIds(ids));
+    }
+
+}

+ 105 - 0
xzl-admin/src/main/java/com/xzl/web/controller/SysSpiderStructuredDataController.java

@@ -0,0 +1,105 @@
+package com.xzl.web.controller;
+
+import java.util.List;
+import javax.servlet.http.HttpServletResponse;
+
+import com.xzl.system.domain.SysSpiderStructuredData;
+import com.xzl.web.service.ISysSpiderStructuredDataService;
+import org.springframework.security.access.prepost.PreAuthorize;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.web.bind.annotation.GetMapping;
+import org.springframework.web.bind.annotation.PostMapping;
+import org.springframework.web.bind.annotation.PutMapping;
+import org.springframework.web.bind.annotation.DeleteMapping;
+import org.springframework.web.bind.annotation.PathVariable;
+import org.springframework.web.bind.annotation.RequestBody;
+import org.springframework.web.bind.annotation.RequestMapping;
+import org.springframework.web.bind.annotation.RestController;
+import com.xzl.common.annotation.Log;
+import com.xzl.common.core.controller.BaseController;
+import com.xzl.common.core.domain.AjaxResult;
+import com.xzl.common.enums.BusinessType;
+import com.xzl.common.utils.poi.ExcelUtil;
+import com.xzl.common.core.page.TableDataInfo;
+
+/**
+ * 结构化数据Controller
+ *
+ * @author xzl
+ * @date 2025-11-13
+ */
+@RestController
+@RequestMapping("/spiderData/structured")
+public class SysSpiderStructuredDataController extends BaseController
+{
+    @Autowired
+    private ISysSpiderStructuredDataService sysSpiderStructuredDataService;
+
+    /**
+     * 查询结构化数据列表
+     */
+    @PreAuthorize("@ss.hasPermi('spiderData:structured:list')")
+    @GetMapping("/list")
+    public TableDataInfo list(SysSpiderStructuredData sysSpiderStructuredData)
+    {
+        startPage();
+        List<SysSpiderStructuredData> list = sysSpiderStructuredDataService.selectSysSpiderStructuredDataList(sysSpiderStructuredData);
+        return getDataTable(list);
+    }
+
+    /**
+     * 导出结构化数据列表
+     */
+    @PreAuthorize("@ss.hasPermi('spiderData:structured:export')")
+    @Log(title = "结构化数据", businessType = BusinessType.EXPORT)
+    @PostMapping("/export")
+    public void export(HttpServletResponse response, SysSpiderStructuredData sysSpiderStructuredData)
+    {
+        List<SysSpiderStructuredData> list = sysSpiderStructuredDataService.selectSysSpiderStructuredDataList(sysSpiderStructuredData);
+        ExcelUtil<SysSpiderStructuredData> util = new ExcelUtil<SysSpiderStructuredData>(SysSpiderStructuredData.class);
+        util.exportExcel(response, list, "结构化数据数据");
+    }
+
+    /**
+     * 获取结构化数据详细信息
+     */
+    @PreAuthorize("@ss.hasPermi('spiderData:structured:query')")
+    @GetMapping(value = "/{id}")
+    public AjaxResult getInfo(@PathVariable("id") Long id)
+    {
+        return success(sysSpiderStructuredDataService.selectSysSpiderStructuredDataById(id));
+    }
+
+    /**
+     * 新增结构化数据
+     */
+    @PreAuthorize("@ss.hasPermi('spiderData:structured:add')")
+    @Log(title = "结构化数据", businessType = BusinessType.INSERT)
+    @PostMapping
+    public AjaxResult add(@RequestBody SysSpiderStructuredData sysSpiderStructuredData)
+    {
+        return toAjax(sysSpiderStructuredDataService.insertSysSpiderStructuredData(sysSpiderStructuredData));
+    }
+
+    /**
+     * 修改结构化数据
+     */
+    @PreAuthorize("@ss.hasPermi('spiderData:structured:edit')")
+    @Log(title = "结构化数据", businessType = BusinessType.UPDATE)
+    @PutMapping
+    public AjaxResult edit(@RequestBody SysSpiderStructuredData sysSpiderStructuredData)
+    {
+        return toAjax(sysSpiderStructuredDataService.updateSysSpiderStructuredData(sysSpiderStructuredData));
+    }
+
+    /**
+     * 删除结构化数据
+     */
+    @PreAuthorize("@ss.hasPermi('spiderData:structured:remove')")
+    @Log(title = "结构化数据", businessType = BusinessType.DELETE)
+	@DeleteMapping("/{ids}")
+    public AjaxResult remove(@PathVariable Long[] ids)
+    {
+        return toAjax(sysSpiderStructuredDataService.deleteSysSpiderStructuredDataByIds(ids));
+    }
+}

+ 63 - 0
xzl-admin/src/main/java/com/xzl/web/mapper/SysSpiderAttachmentsMapper.java

@@ -0,0 +1,63 @@
+package com.xzl.web.mapper;
+
+
+import com.xzl.system.domain.SysSpiderAttachments;
+
+import java.util.List;
+
+/**
+ * 附件Mapper接口
+ *
+ * @author xzl
+ * @date 2025-11-13
+ */
+public interface SysSpiderAttachmentsMapper
+{
+    /**
+     * 查询附件
+     *
+     * @param id 附件主键
+     * @return 附件
+     */
+    public SysSpiderAttachments selectSysSpiderAttachmentsById(Long id);
+
+    /**
+     * 查询附件列表
+     *
+     * @param sysSpiderAttachments 附件
+     * @return 附件集合
+     */
+    public List<SysSpiderAttachments> selectSysSpiderAttachmentsList(SysSpiderAttachments sysSpiderAttachments);
+
+    /**
+     * 新增附件
+     *
+     * @param sysSpiderAttachments 附件
+     * @return 结果
+     */
+    public int insertSysSpiderAttachments(SysSpiderAttachments sysSpiderAttachments);
+
+    /**
+     * 修改附件
+     *
+     * @param sysSpiderAttachments 附件
+     * @return 结果
+     */
+    public int updateSysSpiderAttachments(SysSpiderAttachments sysSpiderAttachments);
+
+    /**
+     * 删除附件
+     *
+     * @param id 附件主键
+     * @return 结果
+     */
+    public int deleteSysSpiderAttachmentsById(Long id);
+
+    /**
+     * 批量删除附件
+     *
+     * @param ids 需要删除的数据主键集合
+     * @return 结果
+     */
+    public int deleteSysSpiderAttachmentsByIds(Long[] ids);
+}

+ 61 - 0
xzl-admin/src/main/java/com/xzl/web/mapper/SysSpiderSourceDataMapper.java

@@ -0,0 +1,61 @@
+package com.xzl.web.mapper;
+
+import java.util.List;
+import com.xzl.system.domain.SysSpiderSourceData;
+
+/**
+ * 原始采集数据Mapper接口
+ *
+ * @author xzl
+ * @date 2025-11-13
+ */
+public interface SysSpiderSourceDataMapper
+{
+    /**
+     * 查询原始采集数据
+     *
+     * @param id 原始采集数据主键
+     * @return 原始采集数据
+     */
+    public SysSpiderSourceData selectSysSpiderSourceDataById(Long id);
+
+    /**
+     * 查询原始采集数据列表
+     *
+     * @param sysSpiderSourceData 原始采集数据
+     * @return 原始采集数据集合
+     */
+    public List<SysSpiderSourceData> selectSysSpiderSourceDataList(SysSpiderSourceData sysSpiderSourceData);
+
+    /**
+     * 新增原始采集数据
+     *
+     * @param sysSpiderSourceData 原始采集数据
+     * @return 结果
+     */
+    public int insertSysSpiderSourceData(SysSpiderSourceData sysSpiderSourceData);
+
+    /**
+     * 修改原始采集数据
+     *
+     * @param sysSpiderSourceData 原始采集数据
+     * @return 结果
+     */
+    public int updateSysSpiderSourceData(SysSpiderSourceData sysSpiderSourceData);
+
+    /**
+     * 删除原始采集数据
+     *
+     * @param id 原始采集数据主键
+     * @return 结果
+     */
+    public int deleteSysSpiderSourceDataById(Long id);
+
+    /**
+     * 批量删除原始采集数据
+     *
+     * @param ids 需要删除的数据主键集合
+     * @return 结果
+     */
+    public int deleteSysSpiderSourceDataByIds(Long[] ids);
+}

+ 62 - 0
xzl-admin/src/main/java/com/xzl/web/mapper/SysSpiderStructuredDataMapper.java

@@ -0,0 +1,62 @@
+package com.xzl.web.mapper;
+
+import java.util.List;
+
+import com.xzl.system.domain.SysSpiderStructuredData;
+
+/**
+ * 结构化数据Mapper接口
+ *
+ * @author xzl
+ * @date 2025-11-13
+ */
+public interface SysSpiderStructuredDataMapper
+{
+    /**
+     * 查询结构化数据
+     *
+     * @param id 结构化数据主键
+     * @return 结构化数据
+     */
+    public SysSpiderStructuredData selectSysSpiderStructuredDataById(Long id);
+
+    /**
+     * 查询结构化数据列表
+     *
+     * @param sysSpiderStructuredData 结构化数据
+     * @return 结构化数据集合
+     */
+    public List<SysSpiderStructuredData> selectSysSpiderStructuredDataList(SysSpiderStructuredData sysSpiderStructuredData);
+
+    /**
+     * 新增结构化数据
+     *
+     * @param sysSpiderStructuredData 结构化数据
+     * @return 结果
+     */
+    public int insertSysSpiderStructuredData(SysSpiderStructuredData sysSpiderStructuredData);
+
+    /**
+     * 修改结构化数据
+     *
+     * @param sysSpiderStructuredData 结构化数据
+     * @return 结果
+     */
+    public int updateSysSpiderStructuredData(SysSpiderStructuredData sysSpiderStructuredData);
+
+    /**
+     * 删除结构化数据
+     *
+     * @param id 结构化数据主键
+     * @return 结果
+     */
+    public int deleteSysSpiderStructuredDataById(Long id);
+
+    /**
+     * 批量删除结构化数据
+     *
+     * @param ids 需要删除的数据主键集合
+     * @return 结果
+     */
+    public int deleteSysSpiderStructuredDataByIds(Long[] ids);
+}

+ 61 - 0
xzl-admin/src/main/java/com/xzl/web/service/ISysSpiderAttachmentsService.java

@@ -0,0 +1,61 @@
+package com.xzl.web.service;
+
+import java.util.List;
+import com.xzl.system.domain.SysSpiderAttachments;
+
+/**
+ * 附件Service接口
+ *
+ * @author xzl
+ * @date 2025-11-13
+ */
+public interface ISysSpiderAttachmentsService
+{
+    /**
+     * 查询附件
+     *
+     * @param id 附件主键
+     * @return 附件
+     */
+    public SysSpiderAttachments selectSysSpiderAttachmentsById(Long id);
+
+    /**
+     * 查询附件列表
+     *
+     * @param sysSpiderAttachments 附件
+     * @return 附件集合
+     */
+    public List<SysSpiderAttachments> selectSysSpiderAttachmentsList(SysSpiderAttachments sysSpiderAttachments);
+
+    /**
+     * 新增附件
+     *
+     * @param sysSpiderAttachments 附件
+     * @return 结果
+     */
+    public int insertSysSpiderAttachments(SysSpiderAttachments sysSpiderAttachments);
+
+    /**
+     * 修改附件
+     *
+     * @param sysSpiderAttachments 附件
+     * @return 结果
+     */
+    public int updateSysSpiderAttachments(SysSpiderAttachments sysSpiderAttachments);
+
+    /**
+     * 批量删除附件
+     *
+     * @param ids 需要删除的附件主键集合
+     * @return 结果
+     */
+    public int deleteSysSpiderAttachmentsByIds(Long[] ids);
+
+    /**
+     * 删除附件信息
+     *
+     * @param id 附件主键
+     * @return 结果
+     */
+    public int deleteSysSpiderAttachmentsById(Long id);
+}

+ 63 - 0
xzl-admin/src/main/java/com/xzl/web/service/ISysSpiderSourceDataService.java

@@ -0,0 +1,63 @@
+package com.xzl.web.service;
+
+import java.util.List;
+import com.xzl.system.domain.SysSpiderSourceData;
+
+/**
+ * 原始采集数据Service接口
+ *
+ * @author xzl
+ * @date 2025-11-13
+ */
+public interface ISysSpiderSourceDataService
+{
+    /**
+     * 查询原始采集数据
+     *
+     * @param id 原始采集数据主键
+     * @return 原始采集数据
+     */
+    public SysSpiderSourceData selectSysSpiderSourceDataById(Long id);
+
+    /**
+     * 查询原始采集数据列表
+     *
+     * @param sysSpiderSourceData 原始采集数据
+     * @return 原始采集数据集合
+     */
+    public List<SysSpiderSourceData> selectSysSpiderSourceDataList(SysSpiderSourceData sysSpiderSourceData);
+
+    /**
+     * 新增原始采集数据
+     *
+     * @param sysSpiderSourceData 原始采集数据
+     * @return 结果
+     */
+    public int insertSysSpiderSourceData(SysSpiderSourceData sysSpiderSourceData);
+
+    /**
+     * 修改原始采集数据
+     *
+     * @param sysSpiderSourceData 原始采集数据
+     * @return 结果
+     */
+    public int updateSysSpiderSourceData(SysSpiderSourceData sysSpiderSourceData);
+
+    /**
+     * 批量删除原始采集数据
+     *
+     * @param ids 需要删除的原始采集数据主键集合
+     * @return 结果
+     */
+    public int deleteSysSpiderSourceDataByIds(Long[] ids);
+
+    /**
+     * 删除原始采集数据信息
+     *
+     * @param id 原始采集数据主键
+     * @return 结果
+     */
+    public int deleteSysSpiderSourceDataById(Long id);
+
+    public void collectPage(String pageUrl);
+}

+ 62 - 0
xzl-admin/src/main/java/com/xzl/web/service/ISysSpiderStructuredDataService.java

@@ -0,0 +1,62 @@
+package com.xzl.web.service;
+
+import java.util.List;
+
+import com.xzl.system.domain.SysSpiderStructuredData;
+
+/**
+ * 结构化数据Service接口
+ *
+ * @author xzl
+ * @date 2025-11-13
+ */
+public interface ISysSpiderStructuredDataService
+{
+    /**
+     * 查询结构化数据
+     *
+     * @param id 结构化数据主键
+     * @return 结构化数据
+     */
+    public SysSpiderStructuredData selectSysSpiderStructuredDataById(Long id);
+
+    /**
+     * 查询结构化数据列表
+     *
+     * @param sysSpiderStructuredData 结构化数据
+     * @return 结构化数据集合
+     */
+    public List<SysSpiderStructuredData> selectSysSpiderStructuredDataList(SysSpiderStructuredData sysSpiderStructuredData);
+
+    /**
+     * 新增结构化数据
+     *
+     * @param sysSpiderStructuredData 结构化数据
+     * @return 结果
+     */
+    public int insertSysSpiderStructuredData(SysSpiderStructuredData sysSpiderStructuredData);
+
+    /**
+     * 修改结构化数据
+     *
+     * @param sysSpiderStructuredData 结构化数据
+     * @return 结果
+     */
+    public int updateSysSpiderStructuredData(SysSpiderStructuredData sysSpiderStructuredData);
+
+    /**
+     * 批量删除结构化数据
+     *
+     * @param ids 需要删除的结构化数据主键集合
+     * @return 结果
+     */
+    public int deleteSysSpiderStructuredDataByIds(Long[] ids);
+
+    /**
+     * 删除结构化数据信息
+     *
+     * @param id 结构化数据主键
+     * @return 结果
+     */
+    public int deleteSysSpiderStructuredDataById(Long id);
+}

+ 0 - 1
xzl-admin/src/main/java/com/xzl/web/service/impl/SysFileFolderServiceImpl.java

@@ -20,7 +20,6 @@ import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.stereotype.Service;
 import com.xzl.web.mapper.SysFileFolderMapper;
 
-import javax.xml.ws.Response;
 
 
 /**

+ 95 - 0
xzl-admin/src/main/java/com/xzl/web/service/impl/SysSpiderAttachmentsServiceImpl.java

@@ -0,0 +1,95 @@
+package com.xzl.web.service.impl;
+
+import java.util.List;
+
+import com.xzl.system.domain.SysSpiderAttachments;
+import com.xzl.web.mapper.SysSpiderAttachmentsMapper;
+import com.xzl.web.service.ISysSpiderAttachmentsService;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.stereotype.Service;
+
+
+/**
+ * 附件Service业务层处理
+ *
+ * @author xzl
+ * @date 2025-11-13
+ */
+@Service
+public class SysSpiderAttachmentsServiceImpl implements ISysSpiderAttachmentsService
+{
+    @Autowired
+    private SysSpiderAttachmentsMapper sysSpiderAttachmentsMapper;
+
+    /**
+     * 查询附件
+     *
+     * @param id 附件主键
+     * @return 附件
+     */
+    @Override
+    public SysSpiderAttachments selectSysSpiderAttachmentsById(Long id)
+    {
+        return sysSpiderAttachmentsMapper.selectSysSpiderAttachmentsById(id);
+    }
+
+    /**
+     * 查询附件列表
+     *
+     * @param sysSpiderAttachments 附件
+     * @return 附件
+     */
+    @Override
+    public List<SysSpiderAttachments> selectSysSpiderAttachmentsList(SysSpiderAttachments sysSpiderAttachments)
+    {
+        return sysSpiderAttachmentsMapper.selectSysSpiderAttachmentsList(sysSpiderAttachments);
+    }
+
+    /**
+     * 新增附件
+     *
+     * @param sysSpiderAttachments 附件
+     * @return 结果
+     */
+    @Override
+    public int insertSysSpiderAttachments(SysSpiderAttachments sysSpiderAttachments)
+    {
+        return sysSpiderAttachmentsMapper.insertSysSpiderAttachments(sysSpiderAttachments);
+    }
+
+    /**
+     * 修改附件
+     *
+     * @param sysSpiderAttachments 附件
+     * @return 结果
+     */
+    @Override
+    public int updateSysSpiderAttachments(SysSpiderAttachments sysSpiderAttachments)
+    {
+        return sysSpiderAttachmentsMapper.updateSysSpiderAttachments(sysSpiderAttachments);
+    }
+
+    /**
+     * 批量删除附件
+     *
+     * @param ids 需要删除的附件主键
+     * @return 结果
+     */
+    @Override
+    public int deleteSysSpiderAttachmentsByIds(Long[] ids)
+    {
+        return sysSpiderAttachmentsMapper.deleteSysSpiderAttachmentsByIds(ids);
+    }
+
+    /**
+     * 删除附件信息
+     *
+     * @param id 附件主键
+     * @return 结果
+     */
+    @Override
+    public int deleteSysSpiderAttachmentsById(Long id)
+    {
+        return sysSpiderAttachmentsMapper.deleteSysSpiderAttachmentsById(id);
+    }
+}

+ 195 - 0
xzl-admin/src/main/java/com/xzl/web/service/impl/SysSpiderSourceDataServiceImpl.java

@@ -0,0 +1,195 @@
+package com.xzl.web.service.impl;
+
+import java.util.*;
+
+
+import com.xzl.web.mapper.SysSpiderSourceDataMapper;
+import com.xzl.web.service.ISysSpiderSourceDataService;
+import com.xzl.web.utils.SeleniumUtils;
+import lombok.extern.slf4j.Slf4j;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+import org.openqa.selenium.chrome.ChromeOptions;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.stereotype.Service;
+
+import com.xzl.system.domain.SysSpiderSourceData;
+
+
+/**
+ * 原始采集数据Service业务层处理
+ *
+ * @author xzl
+ * @date 2025-11-13
+ */
+@Slf4j
+@Service
+public class SysSpiderSourceDataServiceImpl implements ISysSpiderSourceDataService {
+    @Autowired
+    private SysSpiderSourceDataMapper sysSpiderSourceDataMapper;
+
+    /**
+     * 查询原始采集数据
+     *
+     * @param id 原始采集数据主键
+     * @return 原始采集数据
+     */
+    @Override
+    public SysSpiderSourceData selectSysSpiderSourceDataById(Long id) {
+        return sysSpiderSourceDataMapper.selectSysSpiderSourceDataById(id);
+    }
+
+    /**
+     * 查询原始采集数据列表
+     *
+     * @param sysSpiderSourceData 原始采集数据
+     * @return 原始采集数据
+     */
+    @Override
+    public List<SysSpiderSourceData> selectSysSpiderSourceDataList(SysSpiderSourceData sysSpiderSourceData) {
+        return sysSpiderSourceDataMapper.selectSysSpiderSourceDataList(sysSpiderSourceData);
+    }
+
+    /**
+     * 新增原始采集数据
+     *
+     * @param sysSpiderSourceData 原始采集数据
+     * @return 结果
+     */
+    @Override
+    public int insertSysSpiderSourceData(SysSpiderSourceData sysSpiderSourceData) {
+        return sysSpiderSourceDataMapper.insertSysSpiderSourceData(sysSpiderSourceData);
+    }
+
+    /**
+     * 修改原始采集数据
+     *
+     * @param sysSpiderSourceData 原始采集数据
+     * @return 结果
+     */
+    @Override
+    public int updateSysSpiderSourceData(SysSpiderSourceData sysSpiderSourceData) {
+        return sysSpiderSourceDataMapper.updateSysSpiderSourceData(sysSpiderSourceData);
+    }
+
+    /**
+     * 批量删除原始采集数据
+     *
+     * @param ids 需要删除的原始采集数据主键
+     * @return 结果
+     */
+    @Override
+    public int deleteSysSpiderSourceDataByIds(Long[] ids) {
+        return sysSpiderSourceDataMapper.deleteSysSpiderSourceDataByIds(ids);
+    }
+
+    /**
+     * 删除原始采集数据信息
+     *
+     * @param id 原始采集数据主键
+     * @return 结果
+     */
+    @Override
+    public int deleteSysSpiderSourceDataById(Long id) {
+        return sysSpiderSourceDataMapper.deleteSysSpiderSourceDataById(id);
+    }
+
+    @Override
+    public void collectPage(String pageUrl) {
+        try {
+            // 1. 打印采集日志(便于调试)
+            log.info("开始采集页面:{}", pageUrl);
+
+            // 2. 核心采集逻辑(根据需求实现)
+            // 示例:调用 HTTP 工具获取页面内容(可使用 HttpClient、OkHttp 等)
+            // String pageContent = httpClient.get(pageUrl);
+
+            // 3. 数据解析(如 HTML 解析、提取需要的内容)
+            // 示例:使用 Jsoup 解析 HTML
+            // Document doc = Jsoup.parse(pageContent);
+            // String title = doc.title(); // 提取页面标题
+            // String content = doc.select("body").text(); // 提取页面正文
+
+            // 4. 数据入库(保存到数据库,需注入 Mapper/Repository)
+            // 示例:spiderDataMapper.insert(new SpiderData(pageUrl, title, content));
+
+
+            System.setProperty("webdriver.chrome.driver", "D:\\chromedriver.exe");
+
+            // 浏览器配置(无头模式、禁用图片、设置超时)
+            ChromeOptions options = new ChromeOptions();
+            options.addArguments("--headless=new");
+            options.addArguments("--disable-gpu");
+            options.addArguments("--window-size=1920,1080");
+//            options.addArguments("--disable-images");
+//            options.addArguments("--disable-javascript"); // 尝试禁用JS
+            options.addArguments("--no-sandbox");
+            options.addArguments("--disable-dev-shm-usage");
+            String htmlContent = SeleniumUtils.getPageSource(pageUrl);
+            if (htmlContent == null || htmlContent.isEmpty()) {
+                throw new IllegalArgumentException("网页内容为空");
+            }
+            SeleniumUtils.closeDriver();
+            SysSpiderSourceData sysSpiderSourceData = new SysSpiderSourceData();
+            sysSpiderSourceData.setPageUrl(pageUrl);
+            sysSpiderSourceData.setRawContent(htmlContent);
+            sysSpiderSourceData.setRawAttachments(getHtmlFileLinkAndName(htmlContent));
+            sysSpiderSourceData.setTaskId(UUID.randomUUID().toString());
+            sysSpiderSourceDataMapper.insertSysSpiderSourceData(sysSpiderSourceData);
+            // 5. 采集完成日志
+            log.info("页面采集完成:{}", pageUrl);
+
+        } catch (Exception e) {
+            // 捕获采集异常,打印日志(避免程序崩溃)
+            log.error("页面采集失败:{},错误信息:{}", pageUrl, e.getMessage(), e);
+            // 抛出业务异常(由全局异常处理器捕获并返回给前端)
+            throw new RuntimeException("采集失败:" + e.getMessage());
+        }
+    }
+
+    public String getHtmlFileLinkAndName(String htmlContent) {
+        ArrayList<String> attrCollections = new ArrayList<>();
+        attrCollections.add(".pdf");
+        attrCollections.add(".xlsx");
+        attrCollections.add(".xls");
+        attrCollections.add(".doc");
+        attrCollections.add(".docx");
+
+        Map<String, String> pdfMap = new HashMap<>();
+
+        Document parse = Jsoup.parse(htmlContent);
+        Elements aTags = parse.select("a");
+        for (Element aTag : aTags) {
+            String href = aTag.attr("href");
+            String text = aTag.text().trim();
+
+            // 检查 href 和文本是否有效
+            if (href != null && !href.isEmpty() && !text.isEmpty()) {
+                // 将 href 转换为小写,以便进行不区分大小写的后缀匹配
+                String lowerHref = href.toLowerCase();
+
+                // --- 核心修改点 ---
+                // 检查 href 是否以 attrCollections 中的任何一个后缀结尾
+                boolean isTargetFile = false;
+                for (String suffix : attrCollections) {
+                    if (lowerHref.endsWith(suffix)) {
+                        isTargetFile = true;
+                        break; // 找到一个匹配项即可,无需继续循环
+                    }
+                }
+
+                // 如果是目标文件类型,则存入 Map
+                if (isTargetFile) {
+                    // 处理相对路径,转换为绝对路径
+                    String absoluteUrl = aTag.absUrl("href");
+                    // 存入 Map。如果文件名(text)重复,后面的会覆盖前面的
+                    pdfMap.put(text, absoluteUrl);
+                }
+            }
+        }
+        return pdfMap.toString();
+    }
+
+}

+ 95 - 0
xzl-admin/src/main/java/com/xzl/web/service/impl/SysSpiderStructuredDataServiceImpl.java

@@ -0,0 +1,95 @@
+package com.xzl.web.service.impl;
+
+import com.xzl.system.domain.SysSpiderStructuredData;
+import com.xzl.web.mapper.SysSpiderStructuredDataMapper;
+import com.xzl.web.service.ISysSpiderStructuredDataService;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.stereotype.Service;
+
+import java.util.List;
+
+
+/**
+ * 结构化数据Service业务层处理
+ *
+ * @author xzl
+ * @date 2025-11-13
+ */
+@Service
+public class SysSpiderStructuredDataServiceImpl implements ISysSpiderStructuredDataService
+{
+    @Autowired
+    private SysSpiderStructuredDataMapper sysSpiderStructuredDataMapper;
+
+    /**
+     * 查询结构化数据
+     *
+     * @param id 结构化数据主键
+     * @return 结构化数据
+     */
+    @Override
+    public SysSpiderStructuredData selectSysSpiderStructuredDataById(Long id)
+    {
+        return sysSpiderStructuredDataMapper.selectSysSpiderStructuredDataById(id);
+    }
+
+    /**
+     * 查询结构化数据列表
+     *
+     * @param sysSpiderStructuredData 结构化数据
+     * @return 结构化数据
+     */
+    @Override
+    public List<SysSpiderStructuredData> selectSysSpiderStructuredDataList(SysSpiderStructuredData sysSpiderStructuredData)
+    {
+        return sysSpiderStructuredDataMapper.selectSysSpiderStructuredDataList(sysSpiderStructuredData);
+    }
+
+    /**
+     * 新增结构化数据
+     *
+     * @param sysSpiderStructuredData 结构化数据
+     * @return 结果
+     */
+    @Override
+    public int insertSysSpiderStructuredData(SysSpiderStructuredData sysSpiderStructuredData)
+    {
+        return sysSpiderStructuredDataMapper.insertSysSpiderStructuredData(sysSpiderStructuredData);
+    }
+
+    /**
+     * 修改结构化数据
+     *
+     * @param sysSpiderStructuredData 结构化数据
+     * @return 结果
+     */
+    @Override
+    public int updateSysSpiderStructuredData(SysSpiderStructuredData sysSpiderStructuredData)
+    {
+        return sysSpiderStructuredDataMapper.updateSysSpiderStructuredData(sysSpiderStructuredData);
+    }
+
+    /**
+     * 批量删除结构化数据
+     *
+     * @param ids 需要删除的结构化数据主键
+     * @return 结果
+     */
+    @Override
+    public int deleteSysSpiderStructuredDataByIds(Long[] ids)
+    {
+        return sysSpiderStructuredDataMapper.deleteSysSpiderStructuredDataByIds(ids);
+    }
+
+    /**
+     * 删除结构化数据信息
+     *
+     * @param id 结构化数据主键
+     * @return 结果
+     */
+    @Override
+    public int deleteSysSpiderStructuredDataById(Long id)
+    {
+        return sysSpiderStructuredDataMapper.deleteSysSpiderStructuredDataById(id);
+    }
+}

+ 6 - 6
xzl-admin/src/main/java/com/xzl/web/service/impl/UserPortraitServiceImpl.java

@@ -63,12 +63,12 @@ public class UserPortraitServiceImpl implements UserPortraitService {
       return null;
     }
     SysUser sysUser = userService.selectUserByUserName(username);
-    String nlpName = sysUser.getNlpName();
-    String nlpPwd = sysUser.getNlpPwd();
+//    String nlpName = sysUser.getNlpName();
+//    String nlpPwd = sysUser.getNlpPwd();
     // 调用登录
     Map loginForm = new HashMap();
-    loginForm.put("username", nlpName);
-    loginForm.put("password", nlpPwd);
+//    loginForm.put("username", nlpName);
+//    loginForm.put("password", nlpPwd);
     loginForm.put("userAgent", 1);
     loginForm.put("client", "zdwxxtyc");
     loginForm.put("saveUsername", true);
@@ -359,8 +359,8 @@ public class UserPortraitServiceImpl implements UserPortraitService {
     }
     SysUser user = new SysUser();
     user.setUserId(sysUser.getUserId());
-    user.setNlpName(nlpUserName);
-    user.setNlpPwd(nlpPassword);
+//    user.setNlpName(nlpUserName);
+//    user.setNlpPwd(nlpPassword);
     userService.updateUser(user);
     rs.put("success", 1);
     rs.put("msg", "密码修改成功");

+ 84 - 0
xzl-admin/src/main/java/com/xzl/web/utils/SeleniumUtils.java

@@ -0,0 +1,84 @@
+package com.xzl.web.utils;
+
+
+import lombok.extern.slf4j.Slf4j;
+import org.openqa.selenium.WebDriver;
+import org.openqa.selenium.chrome.ChromeDriver;
+import org.openqa.selenium.chrome.ChromeOptions;
+
+import java.util.concurrent.TimeUnit;
+
+/**
+ * Selenium 浏览器工具类(Chrome)
+ */
+@Slf4j
+public class SeleniumUtils {
+
+//    private static final Log log = LogFactory.get(SeleniumUtils.class);
+    private static WebDriver driver;
+
+    /**
+     * 初始化 Chrome 浏览器(无头模式)
+     */
+    public static void initDriver() {
+        try {
+            // 1. 手动指定 ChromeDriver 路径(Linux 部署路径)
+            System.setProperty("webdriver.chrome.driver", "D:\\chromedriver.exe");
+
+            // 浏览器配置(无头模式、禁用图片、设置超时)
+            ChromeOptions options = new ChromeOptions();
+            options.addArguments("--headless=new");
+            options.addArguments("--disable-gpu");
+            options.addArguments("--window-size=1920,1080");
+//            options.addArguments("--disable-images");
+            options.addArguments("--disable-javascript"); // 尝试禁用JS
+            options.addArguments("--no-sandbox");
+            options.addArguments("--disable-dev-shm-usage");
+
+            // 初始化驱动并设置超时
+            driver = new ChromeDriver(options);
+            driver.manage().timeouts().pageLoadTimeout(60, TimeUnit.SECONDS); // 页面加载超时30秒
+            driver.manage().timeouts().implicitlyWait(10, TimeUnit.SECONDS); // 元素查找超时10秒
+            log.info("Chrome 浏览器初始化成功");
+        } catch (Exception e) {
+            log.error("Chrome 浏览器初始化失败", e);
+            throw new RuntimeException("浏览器启动失败:" + e.getMessage());
+        }
+    }
+
+    /**
+     * 访问网页并获取 HTML 源码
+     * @param url 目标网页URL
+     * @return HTML 源码字符串
+     */
+    public static String getPageSource(String url) {
+        if (driver == null) {
+            initDriver();
+        }
+        try {
+            log.info("Selenium 访问网页:{}", url);
+            driver.get(url); // 模拟浏览器打开网页(支持动态渲染)
+            Thread.sleep(2000); // 等待2秒,确保动态内容加载完成(可按需调整)
+            return driver.getPageSource(); // 获取渲染后的完整HTML
+        } catch (Exception e) {
+            log.error("访问网页失败:{}", url, e);
+            throw new RuntimeException("网页访问失败:" + e.getMessage());
+        }
+    }
+
+    /**
+     * 关闭浏览器并释放资源
+     */
+    public static void closeDriver() {
+        if (driver != null) {
+            try {
+                driver.quit();
+                log.info("Chrome 浏览器已关闭");
+            } catch (Exception e) {
+                log.error("浏览器关闭失败", e);
+            } finally {
+                driver = null;
+            }
+        }
+    }
+}

+ 5 - 1
xzl-admin/src/main/resources/application-dev.yml

@@ -11,7 +11,7 @@ spring:
 #                username: jerry
  #               password: zjr38zjR@
                 username: root
-                password: 123456
+                password: root
 #                url: jdbc:mysql://localhost:3306/xtdb?allowMultiQueries=true&useUnicode=true&characterEncoding=UTF-8&useSSL=false
 #                username: root
 #                password: 1234
@@ -75,3 +75,7 @@ minio:
     access-key: minioadmin
     secret-key: minioadmin
     bucket-name: mc-kb
+
+chromeDriver:
+    driverPath: D:\chromedriver.exe
+

+ 91 - 0
xzl-admin/src/main/resources/mapper/SysSpiderAttachmentsMapper.xml

@@ -0,0 +1,91 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!DOCTYPE mapper
+PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
+"http://mybatis.org/dtd/mybatis-3-mapper.dtd">
+<mapper namespace="com.xzl.web.mapper.SysSpiderAttachmentsMapper">
+
+    <resultMap type="SysSpiderAttachments" id="SysSpiderAttachmentsResult">
+        <result property="id"    column="id"    />
+        <result property="rawDataId"    column="raw_data_id"    />
+        <result property="structuredDataId"    column="structured_data_id"    />
+        <result property="fileName"    column="file_name"    />
+        <result property="fileType"    column="file_type"    />
+        <result property="fileSize"    column="file_size"    />
+        <result property="originalUrl"    column="original_url"    />
+        <result property="localPath"    column="local_path"    />
+        <result property="downloadTime"    column="download_time"    />
+        <result property="taskId"    column="task_id"    />
+    </resultMap>
+
+    <sql id="selectSysSpiderAttachmentsVo">
+        select id, raw_data_id, structured_data_id, file_name, file_type, file_size, original_url, local_path, download_time, task_id from sys_spider_attachments
+    </sql>
+
+    <select id="selectSysSpiderAttachmentsList" parameterType="SysSpiderAttachments" resultMap="SysSpiderAttachmentsResult">
+        <include refid="selectSysSpiderAttachmentsVo"/>
+        <where>
+            <if test="fileName != null  and fileName != ''"> and file_name like concat('%', #{fileName}, '%')</if>
+            <if test="fileType != null  and fileType != ''"> and file_type = #{fileType}</if>
+            <if test="originalUrl != null  and originalUrl != ''"> and original_url like concat('%', #{originalUrl}, '%')</if>
+            <if test="taskId != null  and taskId != ''"> and task_id = #{taskId}</if>
+        </where>
+    </select>
+
+    <select id="selectSysSpiderAttachmentsById" parameterType="Long" resultMap="SysSpiderAttachmentsResult">
+        <include refid="selectSysSpiderAttachmentsVo"/>
+        where id = #{id}
+    </select>
+
+    <insert id="insertSysSpiderAttachments" parameterType="SysSpiderAttachments" useGeneratedKeys="true" keyProperty="id">
+        insert into sys_spider_attachments
+        <trim prefix="(" suffix=")" suffixOverrides=",">
+            <if test="rawDataId != null">raw_data_id,</if>
+            <if test="structuredDataId != null">structured_data_id,</if>
+            <if test="fileName != null">file_name,</if>
+            <if test="fileType != null">file_type,</if>
+            <if test="fileSize != null">file_size,</if>
+            <if test="originalUrl != null">original_url,</if>
+            <if test="localPath != null">local_path,</if>
+            <if test="downloadTime != null">download_time,</if>
+            <if test="taskId != null">task_id,</if>
+         </trim>
+        <trim prefix="values (" suffix=")" suffixOverrides=",">
+            <if test="rawDataId != null">#{rawDataId},</if>
+            <if test="structuredDataId != null">#{structuredDataId},</if>
+            <if test="fileName != null">#{fileName},</if>
+            <if test="fileType != null">#{fileType},</if>
+            <if test="fileSize != null">#{fileSize},</if>
+            <if test="originalUrl != null">#{originalUrl},</if>
+            <if test="localPath != null">#{localPath},</if>
+            <if test="downloadTime != null">#{downloadTime},</if>
+            <if test="taskId != null">#{taskId},</if>
+         </trim>
+    </insert>
+
+    <update id="updateSysSpiderAttachments" parameterType="SysSpiderAttachments">
+        update sys_spider_attachments
+        <trim prefix="SET" suffixOverrides=",">
+            <if test="rawDataId != null">raw_data_id = #{rawDataId},</if>
+            <if test="structuredDataId != null">structured_data_id = #{structuredDataId},</if>
+            <if test="fileName != null">file_name = #{fileName},</if>
+            <if test="fileType != null">file_type = #{fileType},</if>
+            <if test="fileSize != null">file_size = #{fileSize},</if>
+            <if test="originalUrl != null">original_url = #{originalUrl},</if>
+            <if test="localPath != null">local_path = #{localPath},</if>
+            <if test="downloadTime != null">download_time = #{downloadTime},</if>
+            <if test="taskId != null">task_id = #{taskId},</if>
+        </trim>
+        where id = #{id}
+    </update>
+
+    <delete id="deleteSysSpiderAttachmentsById" parameterType="Long">
+        delete from sys_spider_attachments where id = #{id}
+    </delete>
+
+    <delete id="deleteSysSpiderAttachmentsByIds" parameterType="String">
+        delete from sys_spider_attachments where id in
+        <foreach item="id" collection="array" open="(" separator="," close=")">
+            #{id}
+        </foreach>
+    </delete>
+</mapper>

+ 76 - 0
xzl-admin/src/main/resources/mapper/SysSpiderSourceDataMapper.xml

@@ -0,0 +1,76 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!DOCTYPE mapper
+PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
+"http://mybatis.org/dtd/mybatis-3-mapper.dtd">
+<mapper namespace="com.xzl.web.mapper.SysSpiderSourceDataMapper">
+
+    <resultMap type="SysSpiderSourceData" id="SysSpiderSourceDataResult">
+        <result property="id"    column="id"    />
+        <result property="pageUrl"    column="page_url"    />
+        <result property="rawContent"    column="raw_content"    />
+        <result property="rawAttachments"    column="raw_attachments"    />
+        <result property="collectionTime"    column="collection_time"    />
+        <result property="taskId"    column="task_id"    />
+    </resultMap>
+
+    <sql id="selectSysSpiderSourceDataVo">
+        select id, page_url, raw_content, raw_attachments, collection_time, task_id from sys_spider_source_data
+    </sql>
+
+    <select id="selectSysSpiderSourceDataList" parameterType="SysSpiderSourceData" resultMap="SysSpiderSourceDataResult">
+        <include refid="selectSysSpiderSourceDataVo"/>
+        <where>
+            <if test="pageUrl != null  and pageUrl != ''"> and page_url like concat('%', #{pageUrl}, '%')</if>
+            <if test="rawContent != null  and rawContent != ''"> and raw_content like concat('%', #{rawContent}, '%')</if>
+            <if test="rawAttachments != null  and rawAttachments != ''"> and raw_attachments like concat('%', #{rawAttachments}, '%')</if>
+            <if test="params.beginCollectionTime != null and params.beginCollectionTime != '' and params.endCollectionTime != null and params.endCollectionTime != ''"> and collection_time between #{params.beginCollectionTime} and #{params.endCollectionTime}</if>
+            <if test="taskId != null  and taskId != ''"> and task_id = #{taskId}</if>
+        </where>
+    </select>
+
+    <select id="selectSysSpiderSourceDataById" parameterType="Long" resultMap="SysSpiderSourceDataResult">
+        <include refid="selectSysSpiderSourceDataVo"/>
+        where id = #{id}
+    </select>
+
+    <insert id="insertSysSpiderSourceData" parameterType="SysSpiderSourceData" useGeneratedKeys="true" keyProperty="id">
+        insert into sys_spider_source_data
+        <trim prefix="(" suffix=")" suffixOverrides=",">
+            <if test="pageUrl != null">page_url,</if>
+            <if test="rawContent != null">raw_content,</if>
+            <if test="rawAttachments != null">raw_attachments,</if>
+            <if test="collectionTime != null">collection_time,</if>
+            <if test="taskId != null">task_id,</if>
+         </trim>
+        <trim prefix="values (" suffix=")" suffixOverrides=",">
+            <if test="pageUrl != null">#{pageUrl},</if>
+            <if test="rawContent != null">#{rawContent},</if>
+            <if test="rawAttachments != null">#{rawAttachments},</if>
+            <if test="collectionTime != null">#{collectionTime},</if>
+            <if test="taskId != null">#{taskId},</if>
+         </trim>
+    </insert>
+
+    <update id="updateSysSpiderSourceData" parameterType="SysSpiderSourceData">
+        update sys_spider_source_data
+        <trim prefix="SET" suffixOverrides=",">
+            <if test="pageUrl != null">page_url = #{pageUrl},</if>
+            <if test="rawContent != null">raw_content = #{rawContent},</if>
+            <if test="rawAttachments != null">raw_attachments = #{rawAttachments},</if>
+            <if test="collectionTime != null">collection_time = #{collectionTime},</if>
+            <if test="taskId != null">task_id = #{taskId},</if>
+        </trim>
+        where id = #{id}
+    </update>
+
+    <delete id="deleteSysSpiderSourceDataById" parameterType="Long">
+        delete from sys_spider_source_data where id = #{id}
+    </delete>
+
+    <delete id="deleteSysSpiderSourceDataByIds" parameterType="String">
+        delete from sys_spider_source_data where id in
+        <foreach item="id" collection="array" open="(" separator="," close=")">
+            #{id}
+        </foreach>
+    </delete>
+</mapper>

+ 93 - 0
xzl-admin/src/main/resources/mapper/SysSpiderStructuredDataMapper.xml

@@ -0,0 +1,93 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!DOCTYPE mapper
+PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
+"http://mybatis.org/dtd/mybatis-3-mapper.dtd">
+<mapper namespace="com.xzl.web.mapper.SysSpiderStructuredDataMapper">
+
+    <resultMap type="SysSpiderStructuredData" id="SysSpiderStructuredDataResult">
+        <result property="id"    column="id"    />
+        <result property="rawDataId"    column="raw_data_id"    />
+        <result property="title"    column="title"    />
+        <result property="content"    column="content"    />
+        <result property="author"    column="author"    />
+        <result property="department"    column="department"    />
+        <result property="publishTime"    column="publish_time"    />
+        <result property="keywords"    column="keywords"    />
+        <result property="customFields"    column="custom_fields"    />
+        <result property="processTime"    column="process_time"    />
+    </resultMap>
+
+    <sql id="selectSysSpiderStructuredDataVo">
+        select id, raw_data_id, title, content, author, department, publish_time, keywords, custom_fields, process_time from sys_spider_structured_data
+    </sql>
+
+    <select id="selectSysSpiderStructuredDataList" parameterType="SysSpiderStructuredData" resultMap="SysSpiderStructuredDataResult">
+        <include refid="selectSysSpiderStructuredDataVo"/>
+        <where>
+            <if test="title != null  and title != ''"> and title = #{title}</if>
+            <if test="content != null  and content != ''"> and content = #{content}</if>
+            <if test="author != null  and author != ''"> and author = #{author}</if>
+            <if test="department != null  and department != ''"> and department = #{department}</if>
+            <if test="params.beginPublishTime != null and params.beginPublishTime != '' and params.endPublishTime != null and params.endPublishTime != ''"> and publish_time between #{params.beginPublishTime} and #{params.endPublishTime}</if>
+            <if test="keywords != null  and keywords != ''"> and keywords = #{keywords}</if>
+        </where>
+    </select>
+
+    <select id="selectSysSpiderStructuredDataById" parameterType="Long" resultMap="SysSpiderStructuredDataResult">
+        <include refid="selectSysSpiderStructuredDataVo"/>
+        where id = #{id}
+    </select>
+
+    <insert id="insertSysSpiderStructuredData" parameterType="SysSpiderStructuredData" useGeneratedKeys="true" keyProperty="id">
+        insert into sys_spider_structured_data
+        <trim prefix="(" suffix=")" suffixOverrides=",">
+            <if test="rawDataId != null">raw_data_id,</if>
+            <if test="title != null">title,</if>
+            <if test="content != null">content,</if>
+            <if test="author != null">author,</if>
+            <if test="department != null">department,</if>
+            <if test="publishTime != null">publish_time,</if>
+            <if test="keywords != null">keywords,</if>
+            <if test="customFields != null">custom_fields,</if>
+            <if test="processTime != null">process_time,</if>
+         </trim>
+        <trim prefix="values (" suffix=")" suffixOverrides=",">
+            <if test="rawDataId != null">#{rawDataId},</if>
+            <if test="title != null">#{title},</if>
+            <if test="content != null">#{content},</if>
+            <if test="author != null">#{author},</if>
+            <if test="department != null">#{department},</if>
+            <if test="publishTime != null">#{publishTime},</if>
+            <if test="keywords != null">#{keywords},</if>
+            <if test="customFields != null">#{customFields},</if>
+            <if test="processTime != null">#{processTime},</if>
+         </trim>
+    </insert>
+
+    <update id="updateSysSpiderStructuredData" parameterType="SysSpiderStructuredData">
+        update sys_spider_structured_data
+        <trim prefix="SET" suffixOverrides=",">
+            <if test="rawDataId != null">raw_data_id = #{rawDataId},</if>
+            <if test="title != null">title = #{title},</if>
+            <if test="content != null">content = #{content},</if>
+            <if test="author != null">author = #{author},</if>
+            <if test="department != null">department = #{department},</if>
+            <if test="publishTime != null">publish_time = #{publishTime},</if>
+            <if test="keywords != null">keywords = #{keywords},</if>
+            <if test="customFields != null">custom_fields = #{customFields},</if>
+            <if test="processTime != null">process_time = #{processTime},</if>
+        </trim>
+        where id = #{id}
+    </update>
+
+    <delete id="deleteSysSpiderStructuredDataById" parameterType="Long">
+        delete from sys_spider_structured_data where id = #{id}
+    </delete>
+
+    <delete id="deleteSysSpiderStructuredDataByIds" parameterType="String">
+        delete from sys_spider_structured_data where id in
+        <foreach item="id" collection="array" open="(" separator="," close=")">
+            #{id}
+        </foreach>
+    </delete>
+</mapper>

+ 6 - 7
xzl-admin/src/main/resources/mapper/UserPortraitMapper.xml

@@ -5,7 +5,6 @@
         select distinct goods_name from d_order
     </select>
 
-
     <select id="getUserByDingUnionId" parameterType="string" resultType="com.xzl.common.core.domain.entity.SysUser">
         select user_name as "userName",password from sys_user where ding_union_id=#{unionId}
     </select>
@@ -30,9 +29,9 @@
     where d1.dept_id =  d2.dept_id
   </update>
 
-    <update id="syncDeptForDingtalk">
-    update sys_user e, sys_dept d
-    set e.dept_id = d.dept_id
-    where e.ding_dept_id = d.ding_id
-  </update>
-</mapper>
+<!--    <update id="syncDeptForDingtalk">-->
+<!--    update sys_user e, sys_dept d-->
+<!--    set e.dept_id = d.dept_id-->
+<!--    where e.ding_dept_id = d.ding_id-->
+<!--  </update>-->
+</mapper>

+ 17 - 17
xzl-common/src/main/java/com/xzl/common/core/domain/entity/SysUser.java

@@ -128,8 +128,8 @@ public class SysUser extends BaseEntity {
   private Long dingDeptId;
   private String dingUnionId;
 
-  private String nlpName;
-  private String nlpPwd;
+//  private String nlpName;
+//  private String nlpPwd;
 
   public SysUser() {
 
@@ -315,21 +315,21 @@ public class SysUser extends BaseEntity {
     this.dingUnionId = dingUnionId;
   }
 
-  public String getNlpName() {
-    return nlpName;
-  }
-
-  public void setNlpName(String nlpName) {
-    this.nlpName = nlpName;
-  }
-
-  public String getNlpPwd() {
-    return nlpPwd;
-  }
-
-  public void setNlpPwd(String nlpPwd) {
-    this.nlpPwd = nlpPwd;
-  }
+//  public String getNlpName() {
+//    return nlpName;
+//  }
+//
+//  public void setNlpName(String nlpName) {
+//    this.nlpName = nlpName;
+//  }
+
+//  public String getNlpPwd() {
+//    return nlpPwd;
+//  }
+//
+//  public void setNlpPwd(String nlpPwd) {
+//    this.nlpPwd = nlpPwd;
+//  }
 
   @Override
   public String toString() {

+ 5 - 4
xzl-framework/src/main/java/com/xzl/framework/web/service/SysLoginService.java

@@ -33,7 +33,7 @@ import com.xzl.system.service.ISysUserService;
 
 /**
  * 登录校验方法
- * 
+ *
  * @author xzl
  */
 @Component
@@ -47,7 +47,7 @@ public class SysLoginService
 
     @Autowired
     private RedisCache redisCache;
-    
+
     @Autowired
     private ISysUserService userService;
 
@@ -59,7 +59,7 @@ public class SysLoginService
 
     /**
      * 登录验证
-     * 
+     *
      * @param username 用户名
      * @param password 密码
      * @param code 验证码
@@ -107,7 +107,7 @@ public class SysLoginService
 
     /**
      * 校验验证码
-     * 
+     *
      * @param username 用户名
      * @param code 验证码
      * @param uuid 唯一标识
@@ -154,6 +154,7 @@ public class SysLoginService
             AsyncManager.me().execute(AsyncFactory.recordLogininfor(username, Constants.LOGIN_FAIL, MessageUtils.message("user.password.not.match")));
             throw new UserPasswordNotMatchException();
         }
+
         // 用户名不在指定范围内 错误
         if (username.length() < UserConstants.USERNAME_MIN_LENGTH
                 || username.length() > UserConstants.USERNAME_MAX_LENGTH)

+ 166 - 0
xzl-system/src/main/java/com/xzl/system/domain/SysSpiderAttachments.java

@@ -0,0 +1,166 @@
+package com.xzl.system.domain;
+
+import java.util.Date;
+import com.fasterxml.jackson.annotation.JsonFormat;
+import com.xzl.common.annotation.Excel;
+import com.xzl.common.core.domain.BaseEntity;
+import org.apache.commons.lang3.builder.ToStringBuilder;
+import org.apache.commons.lang3.builder.ToStringStyle;
+
+/**
+ * 附件对象 sys_spider_attachments
+ *
+ * @author xzl
+ * @date 2025-11-13
+ */
+public class SysSpiderAttachments extends BaseEntity
+{
+    private static final long serialVersionUID = 1L;
+
+    /** 附件ID */
+    private Long id;
+
+    /** 关联原始数据ID */
+    @Excel(name = "关联原始数据ID")
+    private Long rawDataId;
+
+    /** 关联结构化数据ID */
+    @Excel(name = "关联结构化数据ID")
+    private Long structuredDataId;
+
+    /** 附件文件名 */
+    @Excel(name = "附件文件名")
+    private String fileName;
+
+    /** 文件类型(pdf/docx/xlsx等) */
+    @Excel(name = "文件类型", readConverterExp = "p=df/docx/xlsx等")
+    private String fileType;
+
+    /** 文件大小(字节) */
+    @Excel(name = "文件大小", readConverterExp = "字=节")
+    private Long fileSize;
+
+    /** OA系统原始URL */
+    @Excel(name = "OA系统原始URL")
+    private String originalUrl;
+
+    /** 本地存储路径 */
+    @Excel(name = "本地存储路径")
+    private String localPath;
+
+    /** 下载时间 */
+    @JsonFormat(pattern = "yyyy-MM-dd")
+    @Excel(name = "下载时间", width = 30, dateFormat = "yyyy-MM-dd")
+    private Date downloadTime;
+
+    /** 关联采集任务ID */
+    @Excel(name = "关联采集任务ID")
+    private String taskId;
+
+    public void setId(Long id)
+    {
+        this.id = id;
+    }
+
+    public Long getId()
+    {
+        return id;
+    }
+    public void setRawDataId(Long rawDataId)
+    {
+        this.rawDataId = rawDataId;
+    }
+
+    public Long getRawDataId()
+    {
+        return rawDataId;
+    }
+    public void setStructuredDataId(Long structuredDataId)
+    {
+        this.structuredDataId = structuredDataId;
+    }
+
+    public Long getStructuredDataId()
+    {
+        return structuredDataId;
+    }
+    public void setFileName(String fileName)
+    {
+        this.fileName = fileName;
+    }
+
+    public String getFileName()
+    {
+        return fileName;
+    }
+    public void setFileType(String fileType)
+    {
+        this.fileType = fileType;
+    }
+
+    public String getFileType()
+    {
+        return fileType;
+    }
+    public void setFileSize(Long fileSize)
+    {
+        this.fileSize = fileSize;
+    }
+
+    public Long getFileSize()
+    {
+        return fileSize;
+    }
+    public void setOriginalUrl(String originalUrl)
+    {
+        this.originalUrl = originalUrl;
+    }
+
+    public String getOriginalUrl()
+    {
+        return originalUrl;
+    }
+    public void setLocalPath(String localPath)
+    {
+        this.localPath = localPath;
+    }
+
+    public String getLocalPath()
+    {
+        return localPath;
+    }
+    public void setDownloadTime(Date downloadTime)
+    {
+        this.downloadTime = downloadTime;
+    }
+
+    public Date getDownloadTime()
+    {
+        return downloadTime;
+    }
+    public void setTaskId(String taskId)
+    {
+        this.taskId = taskId;
+    }
+
+    public String getTaskId()
+    {
+        return taskId;
+    }
+
+    @Override
+    public String toString() {
+        return new ToStringBuilder(this, ToStringStyle.MULTI_LINE_STYLE)
+            .append("id", getId())
+            .append("rawDataId", getRawDataId())
+            .append("structuredDataId", getStructuredDataId())
+            .append("fileName", getFileName())
+            .append("fileType", getFileType())
+            .append("fileSize", getFileSize())
+            .append("originalUrl", getOriginalUrl())
+            .append("localPath", getLocalPath())
+            .append("downloadTime", getDownloadTime())
+            .append("taskId", getTaskId())
+            .toString();
+    }
+}

+ 112 - 0
xzl-system/src/main/java/com/xzl/system/domain/SysSpiderSourceData.java

@@ -0,0 +1,112 @@
+package com.xzl.system.domain;
+
+import com.fasterxml.jackson.annotation.JsonFormat;
+import com.xzl.common.annotation.Excel;
+import com.xzl.common.core.domain.BaseEntity;
+import org.apache.commons.lang3.builder.ToStringBuilder;
+import org.apache.commons.lang3.builder.ToStringStyle;
+
+import java.util.Date;
+
+/**
+ * 原始采集数据对象 sys_spider_source_data
+ *
+ * @author xzl
+ * @date 2025-11-13
+ */
+public class SysSpiderSourceData extends BaseEntity
+{
+    private static final long serialVersionUID = 1L;
+
+    /** 原始数据ID */
+    private Long id;
+
+    /** 采集页面URL */
+    @Excel(name = "采集页面URL")
+    private String pageUrl;
+
+    /** 原始文本内容(HTML/JSON等) */
+    @Excel(name = "原始文本内容", readConverterExp = "H=TML/JSON等")
+    private String rawContent;
+
+    /** 原始附件信息(JSON格式:文件名、URL等) */
+    @Excel(name = "原始附件信息", readConverterExp = "J=SON格式:文件名、URL等")
+    private String rawAttachments;
+
+    /** 采集时间 */
+    @JsonFormat(pattern = "yyyy-MM-dd")
+    @Excel(name = "采集时间", width = 30, dateFormat = "yyyy-MM-dd")
+    private Date collectionTime;
+
+    /** 关联采集任务ID(便于追溯) */
+    @Excel(name = "关联采集任务ID", readConverterExp = "便=于追溯")
+    private String taskId;
+
+    public void setId(Long id)
+    {
+        this.id = id;
+    }
+
+    public Long getId()
+    {
+        return id;
+    }
+    public void setPageUrl(String pageUrl)
+    {
+        this.pageUrl = pageUrl;
+    }
+
+    public String getPageUrl()
+    {
+        return pageUrl;
+    }
+    public void setRawContent(String rawContent)
+    {
+        this.rawContent = rawContent;
+    }
+
+    public String getRawContent()
+    {
+        return rawContent;
+    }
+    public void setRawAttachments(String rawAttachments)
+    {
+        this.rawAttachments = rawAttachments;
+    }
+
+    public String getRawAttachments()
+    {
+        return rawAttachments;
+    }
+    public void setCollectionTime(Date collectionTime)
+    {
+        this.collectionTime = collectionTime;
+    }
+
+    public Date getCollectionTime()
+    {
+        return collectionTime;
+    }
+    public void setTaskId(String taskId)
+    {
+        this.taskId = taskId;
+    }
+
+    public String getTaskId()
+    {
+        return taskId;
+    }
+
+    @Override
+    public String toString() {
+        return new ToStringBuilder(this, ToStringStyle.MULTI_LINE_STYLE)
+            .append("id", getId())
+            .append("pageUrl", getPageUrl())
+            .append("rawContent", getRawContent())
+            .append("rawAttachments", getRawAttachments())
+            .append("collectionTime", getCollectionTime())
+            .append("taskId", getTaskId())
+            .toString();
+    }
+
+}

+ 167 - 0
xzl-system/src/main/java/com/xzl/system/domain/SysSpiderStructuredData.java

@@ -0,0 +1,167 @@
+package com.xzl.system.domain;
+
+import java.util.Date;
+import com.fasterxml.jackson.annotation.JsonFormat;
+import com.xzl.common.annotation.Excel;
+import com.xzl.common.core.domain.BaseEntity;
+import org.apache.commons.lang3.builder.ToStringBuilder;
+import org.apache.commons.lang3.builder.ToStringStyle;
+
+/**
+ * 结构化数据对象 sys_spider_structured_data
+ *
+ * @author xzl
+ * @date 2025-11-13
+ */
+public class SysSpiderStructuredData extends BaseEntity
+{
+    private static final long serialVersionUID = 1L;
+
+    /** 结构化数据ID */
+    private Long id;
+
+    /** 关联原始数据ID */
+    @Excel(name = "关联原始数据ID")
+    private Long rawDataId;
+
+    /** 文档标题 */
+    @Excel(name = "文档标题")
+    private String title;
+
+    /** 结构化文本内容 */
+    @Excel(name = "结构化文本内容")
+    private String content;
+
+    /** 作者 */
+    @Excel(name = "作者")
+    private String author;
+
+    /** 所属部门 */
+    @Excel(name = "所属部门")
+    private String department;
+
+    /** 发布时间 */
+    @JsonFormat(pattern = "yyyy-MM-dd")
+    @Excel(name = "发布时间", width = 30, dateFormat = "yyyy-MM-dd")
+    private Date publishTime;
+
+    /** 关键词(逗号分隔) */
+    @Excel(name = "关键词", readConverterExp = "逗=号分隔")
+    private String keywords;
+
+    /** 自定义字段(JSON格式) */
+    @Excel(name = "自定义字段", readConverterExp = "J=SON格式")
+    private String customFields;
+
+    /** 处理时间 */
+    @JsonFormat(pattern = "yyyy-MM-dd")
+    @Excel(name = "处理时间", width = 30, dateFormat = "yyyy-MM-dd")
+    private Date processTime;
+
+    public void setId(Long id)
+    {
+        this.id = id;
+    }
+
+    public Long getId()
+    {
+        return id;
+    }
+    public void setRawDataId(Long rawDataId)
+    {
+        this.rawDataId = rawDataId;
+    }
+
+    public Long getRawDataId()
+    {
+        return rawDataId;
+    }
+    public void setTitle(String title)
+    {
+        this.title = title;
+    }
+
+    public String getTitle()
+    {
+        return title;
+    }
+    public void setContent(String content)
+    {
+        this.content = content;
+    }
+
+    public String getContent()
+    {
+        return content;
+    }
+    public void setAuthor(String author)
+    {
+        this.author = author;
+    }
+
+    public String getAuthor()
+    {
+        return author;
+    }
+    public void setDepartment(String department)
+    {
+        this.department = department;
+    }
+
+    public String getDepartment()
+    {
+        return department;
+    }
+    public void setPublishTime(Date publishTime)
+    {
+        this.publishTime = publishTime;
+    }
+
+    public Date getPublishTime()
+    {
+        return publishTime;
+    }
+    public void setKeywords(String keywords)
+    {
+        this.keywords = keywords;
+    }
+
+    public String getKeywords()
+    {
+        return keywords;
+    }
+    public void setCustomFields(String customFields)
+    {
+        this.customFields = customFields;
+    }
+
+    public String getCustomFields()
+    {
+        return customFields;
+    }
+    public void setProcessTime(Date processTime)
+    {
+        this.processTime = processTime;
+    }
+
+    public Date getProcessTime()
+    {
+        return processTime;
+    }
+
+    @Override
+    public String toString() {
+        return new ToStringBuilder(this, ToStringStyle.MULTI_LINE_STYLE)
+            .append("id", getId())
+            .append("rawDataId", getRawDataId())
+            .append("title", getTitle())
+            .append("content", getContent())
+            .append("author", getAuthor())
+            .append("department", getDepartment())
+            .append("publishTime", getPublishTime())
+            .append("keywords", getKeywords())
+            .append("customFields", getCustomFields())
+            .append("processTime", getProcessTime())
+            .toString();
+    }
+}

+ 17 - 18
xzl-system/src/main/resources/mapper/system/SysDeptMapper.xml

@@ -23,13 +23,12 @@ PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
 		<result property="dingId" column="ding_id" />
 		<result property="dingParentId" column="ding_parent_id" />
 	</resultMap>
-	
+
 	<sql id="selectDeptVo">
-        select d.dept_id, d.parent_id, d.ancestors, d.dept_name, d.order_num, d.leader, d.phone, d.email, d.status, d.del_flag, d.create_by, d.create_time ,
-        d.ding_id, d.ding_parent_id
+        select d.dept_id, d.parent_id, d.ancestors, d.dept_name, d.order_num, d.leader, d.phone, d.email, d.status, d.del_flag, d.create_by, d.create_time
         from sys_dept d
     </sql>
-    
+
 	<select id="selectDeptList" parameterType="SysDept" resultMap="SysDeptResult">
         <include refid="selectDeptVo"/>
         where d.del_flag = '0'
@@ -49,7 +48,7 @@ PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
 		${params.dataScope}
 		order by d.parent_id, d.order_num
     </select>
-    
+
     <select id="selectDeptListByRoleId" resultType="Long">
 		select d.dept_id
 		from sys_dept d
@@ -60,36 +59,36 @@ PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
             </if>
 		order by d.parent_id, d.order_num
 	</select>
-    
+
     <select id="selectDeptById" parameterType="Long" resultMap="SysDeptResult">
 		select d.dept_id, d.parent_id, d.ancestors, d.dept_name, d.order_num, d.leader, d.phone, d.email, d.status,
 			(select dept_name from sys_dept where dept_id = d.parent_id) parent_name
 		from sys_dept d
 		where d.dept_id = #{deptId}
 	</select>
-    
+
     <select id="checkDeptExistUser" parameterType="Long" resultType="int">
 		select count(1) from sys_user where dept_id = #{deptId} and del_flag = '0'
 	</select>
-	
+
 	<select id="hasChildByDeptId" parameterType="Long" resultType="int">
 		select count(1) from sys_dept
 		where del_flag = '0' and parent_id = #{deptId} limit 1
 	</select>
-	
+
 	<select id="selectChildrenDeptById" parameterType="Long" resultMap="SysDeptResult">
 		select * from sys_dept where find_in_set(#{deptId}, ancestors)
 	</select>
-	
+
 	<select id="selectNormalChildrenDeptById" parameterType="Long" resultType="int">
 		select count(*) from sys_dept where status = 0 and del_flag = '0' and find_in_set(#{deptId}, ancestors)
 	</select>
-	
+
 	<select id="checkDeptNameUnique" resultMap="SysDeptResult">
 	    <include refid="selectDeptVo"/>
 		where dept_name=#{deptName} and parent_id = #{parentId} and del_flag = '0' limit 1
 	</select>
-    
+
     <insert id="insertDept" parameterType="SysDept">
  		insert into sys_dept(
  			<if test="deptId != null and deptId != 0">dept_id,</if>
@@ -121,7 +120,7 @@ PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
  			sysdate()
  		)
 	</insert>
-	
+
 	<update id="updateDept" parameterType="SysDept">
  		update sys_dept
  		<set>
@@ -140,7 +139,7 @@ PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
  		</set>
  		where dept_id = #{deptId}
 	</update>
-	
+
 	<update id="updateDeptChildren" parameterType="java.util.List">
 	    update sys_dept set ancestors =
 	    <foreach collection="depts" item="item" index="index"
@@ -153,16 +152,16 @@ PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
 	        #{item.deptId}
 	    </foreach>
 	</update>
-	 
+
 	<update id="updateDeptStatusNormal" parameterType="Long">
- 	    update sys_dept set status = '0' where dept_id in 
+ 	    update sys_dept set status = '0' where dept_id in
  	    <foreach collection="array" item="deptId" open="(" separator="," close=")">
         	#{deptId}
         </foreach>
 	</update>
-	
+
 	<delete id="deleteDeptById" parameterType="Long">
 		update sys_dept set del_flag = '2' where dept_id = #{deptId}
 	</delete>
 
-</mapper> 
+</mapper>

+ 9 - 9
xzl-system/src/main/resources/mapper/system/SysUserMapper.xml

@@ -53,7 +53,7 @@ PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
 	<sql id="selectUserVo">
         select u.user_id, u.dept_id, u.user_name, u.nick_name, u.email, u.avatar, u.phonenumber, u.password, u.sex, u.status, u.del_flag, u.login_ip, u.login_date, u.create_by, u.create_time, u.remark,
         d.dept_id, d.parent_id, d.ancestors, d.dept_name, d.order_num, d.leader, d.status as dept_status,
-        r.role_id, r.role_name, r.role_key, r.role_sort, r.data_scope, r.status as role_status, u.nlp_name, u.nlp_pwd
+        r.role_id, r.role_name, r.role_key, r.role_sort, r.data_scope, r.status as role_status
         from sys_user u
 		    left join sys_dept d on u.dept_id = d.dept_id
 		    left join sys_user_role ur on u.user_id = ur.user_id
@@ -61,8 +61,8 @@ PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
     </sql>
 
     <select id="selectUserList" parameterType="SysUser" resultMap="SysUserResult">
-		select u.user_id, u.dept_id, u.nick_name, u.user_name, u.email, u.avatar, u.phonenumber, u.sex, u.status, u.del_flag, u.login_ip, u.login_date, u.create_by, u.create_time, u.remark, d.dept_name, d.leader ,
-		u.nlp_name, u.nlp_pwd
+		select u.user_id, u.dept_id, u.nick_name, u.user_name, u.email, u.avatar, u.phonenumber, u.sex, u.status, u.del_flag, u.login_ip, u.login_date, u.create_by, u.create_time, u.remark, d.dept_name, d.leader
+
 		from sys_user u
 		left join sys_dept d on u.dept_id = d.dept_id
 		where u.del_flag = '0'
@@ -164,8 +164,8 @@ PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
  			<if test="remark != null and remark != ''">remark,</if>
  			<if test="dingDeptId != null and dingDeptId != ''">ding_dept_id,</if>
  			<if test="dingUnionId != null and dingUnionId != ''">ding_union_id,</if>
- 			<if test="nlpName != null and nlpName != ''">nlp_name,</if>
- 			<if test="nlpPwd != null and nlpPwd != ''">nlp_pwd,</if>
+<!-- 			<if test="nlpName != null and nlpName != ''">nlp_name,</if>-->
+<!-- 			<if test="nlpPwd != null and nlpPwd != ''">nlp_pwd,</if>-->
  			create_time
  		)values(
  			<if test="userId != null and userId != ''">#{userId},</if>
@@ -182,8 +182,8 @@ PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
  			<if test="remark != null and remark != ''">#{remark},</if>
 			<if test="dingDeptId != null and dingDeptId != ''">#{dingDeptId},</if>
 			<if test="dingUnionId != null and dingUnionId != ''">#{dingUnionId},</if>
-		    <if test="nlpName != null and nlpName != ''">#{nlpName},</if>
-		    <if test="nlpPwd != null and nlpPwd != ''">#{nlpPwd},</if>
+<!--		    <if test="nlpName != null and nlpName != ''">#{nlpName},</if>-->
+<!--		    <if test="nlpPwd != null and nlpPwd != ''">#{nlpPwd},</if>-->
 		    sysdate()
  		)
 	</insert>
@@ -206,8 +206,8 @@ PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
  			<if test="remark != null">remark = #{remark},</if>
 			<if test="dingDeptId != null and dingDeptId != ''">ding_dept_id = #{dingDeptId},</if>
 			<if test="dingUnionId != null and dingUnionId != ''">ding_union_id = #{dingUnionId},</if>
-			<if test="nlpName != null and nlpName != ''">nlp_name = #{nlpName},</if>
-			<if test="nlpPwd != null and nlpPwd != ''">nlp_pwd = #{nlpPwd},</if>
+<!--			<if test="nlpName != null and nlpName != ''">nlp_name = #{nlpName},</if>-->
+<!--			<if test="nlpPwd != null and nlpPwd != ''">nlp_pwd = #{nlpPwd},</if>-->
 			update_time = sysdate()
  		</set>
  		where user_id = #{userId}

+ 44 - 0
xzl-ui/src/api/spiderData/attachments.js

@@ -0,0 +1,44 @@
+import request from '@/utils/request'
+
+// 查询附件列表
+export function listAttachments(query) {
+  return request({
+    url: '/spiderData/attachments/list',
+    method: 'get',
+    params: query
+  })
+}
+
+// 查询附件详细
+export function getAttachments(id) {
+  return request({
+    url: '/spiderData/attachments/' + id,
+    method: 'get'
+  })
+}
+
+// 新增附件
+export function addAttachments(data) {
+  return request({
+    url: '/spiderData/attachments',
+    method: 'post',
+    data: data
+  })
+}
+
+// 修改附件
+export function updateAttachments(data) {
+  return request({
+    url: '/spiderData/attachments',
+    method: 'put',
+    data: data
+  })
+}
+
+// 删除附件
+export function delAttachments(id) {
+  return request({
+    url: '/spiderData/attachments/' + id,
+    method: 'delete'
+  })
+}

+ 55 - 0
xzl-ui/src/api/spiderData/spiderData.js

@@ -0,0 +1,55 @@
+import request from '@/utils/request'
+
+// 查询原始采集数据列表
+export function listSpiderData(query) {
+  return request({
+    url: '/spiderData/sourceData/list',
+    method: 'get',
+    params: query
+  })
+}
+
+// 查询原始采集数据详细
+export function getSpiderData(id) {
+  return request({
+    url: '/spiderData/sourceData/' + id,
+    method: 'get'
+  })
+}
+
+// 新增原始采集数据
+export function addSpiderData(data) {
+  return request({
+    url: '/spiderData/sourceData',
+    method: 'post',
+    data: data
+  })
+}
+
+// 修改原始采集数据
+export function updateSpiderData(data) {
+  return request({
+    url: '/spiderData/sourceData',
+    method: 'put',
+    data: data
+  })
+}
+
+// 删除原始采集数据
+export function delSpiderData(id) {
+  return request({
+    url: '/spiderData/sourceData/' + id,
+    method: 'delete'
+  })
+}
+
+// 新增:采集接口函数
+export function spiderCollect(data) {
+  return request({
+    url: '/spiderData/sourceData/collect', // 后端采集接口地址,需与后端协商确认
+    method: 'post',
+    data: data // 传递pageUrl等参数
+  })
+}
+
+

+ 44 - 0
xzl-ui/src/api/spiderData/structured.js

@@ -0,0 +1,44 @@
+import request from '@/utils/request'
+
+// 查询结构化数据列表
+export function listStructured(query) {
+  return request({
+    url: '/spiderData/structured/list',
+    method: 'get',
+    params: query
+  })
+}
+
+// 查询结构化数据详细
+export function getStructured(id) {
+  return request({
+    url: '/spiderData/structured/' + id,
+    method: 'get'
+  })
+}
+
+// 新增结构化数据
+export function addStructured(data) {
+  return request({
+    url: '/spiderData/structured',
+    method: 'post',
+    data: data
+  })
+}
+
+// 修改结构化数据
+export function updateStructured(data) {
+  return request({
+    url: '/spiderData/structured',
+    method: 'put',
+    data: data
+  })
+}
+
+// 删除结构化数据
+export function delStructured(id) {
+  return request({
+    url: '/spiderData/structured/' + id,
+    method: 'delete'
+  })
+}

+ 1 - 1
xzl-ui/src/router/index.js

@@ -216,7 +216,6 @@ export const dynamicRoutes = [
     ]
   },
 
-
 ]
 
 // 防止连续点击多次路由报错
@@ -236,3 +235,4 @@ export default new Router({
   scrollBehavior: () => ({ y: 0 }),
   routes: constantRoutes
 })
+

+ 1 - 1
xzl-ui/src/views/logistics4.vue

@@ -1,4 +1,4 @@
-<template>
+g<template>
   <div id="screen" >
     <iframe frameborder="no"  src="http://10.70.192.123/webroot/decision/view/form?viewlet=大屏新版20240226/营销业务看板.frm" width="100%" height="100%"></iframe>
 

+ 313 - 0
xzl-ui/src/views/spiderData/attachments/index.vue

@@ -0,0 +1,313 @@
+<template>
+  <div class="app-container">
+    <el-form :model="queryParams" ref="queryForm" size="small" :inline="true" v-show="showSearch" label-width="68px">
+      <el-form-item label="附件文件名" prop="fileName">
+        <el-input
+          v-model="queryParams.fileName"
+          placeholder="请输入附件文件名"
+          clearable
+          @keyup.enter.native="handleQuery"
+        />
+      </el-form-item>
+      <el-form-item label="OA系统原始URL" prop="originalUrl">
+        <el-input
+          v-model="queryParams.originalUrl"
+          placeholder="请输入OA系统原始URL"
+          clearable
+          @keyup.enter.native="handleQuery"
+        />
+      </el-form-item>
+      <el-form-item label="关联采集任务ID" prop="taskId">
+        <el-input
+          v-model="queryParams.taskId"
+          placeholder="请输入关联采集任务ID"
+          clearable
+          @keyup.enter.native="handleQuery"
+        />
+      </el-form-item>
+      <el-form-item>
+        <el-button type="primary" icon="el-icon-search" size="mini" @click="handleQuery">搜索</el-button>
+        <el-button icon="el-icon-refresh" size="mini" @click="resetQuery">重置</el-button>
+      </el-form-item>
+    </el-form>
+
+    <el-row :gutter="10" class="mb8">
+      <el-col :span="1.5">
+        <el-button
+          type="primary"
+          plain
+          icon="el-icon-plus"
+          size="mini"
+          @click="handleAdd"
+          v-hasPermi="['spiderData:attachments:add']"
+        >新增</el-button>
+      </el-col>
+      <el-col :span="1.5">
+        <el-button
+          type="success"
+          plain
+          icon="el-icon-edit"
+          size="mini"
+          :disabled="single"
+          @click="handleUpdate"
+          v-hasPermi="['spiderData:attachments:edit']"
+        >修改</el-button>
+      </el-col>
+      <el-col :span="1.5">
+        <el-button
+          type="danger"
+          plain
+          icon="el-icon-delete"
+          size="mini"
+          :disabled="multiple"
+          @click="handleDelete"
+          v-hasPermi="['spiderData:attachments:remove']"
+        >删除</el-button>
+      </el-col>
+      <el-col :span="1.5">
+        <el-button
+          type="warning"
+          plain
+          icon="el-icon-download"
+          size="mini"
+          @click="handleExport"
+          v-hasPermi="['spiderData:attachments:export']"
+        >导出</el-button>
+      </el-col>
+      <right-toolbar :showSearch.sync="showSearch" @queryTable="getList"></right-toolbar>
+    </el-row>
+
+    <el-table v-loading="loading" :data="attachmentsList" @selection-change="handleSelectionChange">
+      <el-table-column type="selection" width="55" align="center" />
+      <el-table-column label="附件ID" align="center" prop="id" />
+      <el-table-column label="关联原始数据ID" align="center" prop="rawDataId" />
+      <el-table-column label="关联结构化数据ID" align="center" prop="structuredDataId" />
+      <el-table-column label="附件文件名" align="center" prop="fileName" />
+      <el-table-column label="文件类型" align="center" prop="fileType" />
+      <el-table-column label="文件大小" align="center" prop="fileSize" />
+      <el-table-column label="OA系统原始URL" align="center" prop="originalUrl" />
+      <el-table-column label="本地存储路径" align="center" prop="localPath" />
+      <el-table-column label="下载时间" align="center" prop="downloadTime" width="180">
+        <template slot-scope="scope">
+          <span>{{ parseTime(scope.row.downloadTime, '{y}-{m}-{d}') }}</span>
+        </template>
+      </el-table-column>
+      <el-table-column label="关联采集任务ID" align="center" prop="taskId" />
+      <el-table-column label="操作" align="center" class-name="small-padding fixed-width">
+        <template slot-scope="scope">
+          <el-button
+            size="mini"
+            type="text"
+            icon="el-icon-edit"
+            @click="handleUpdate(scope.row)"
+            v-hasPermi="['spiderData:attachments:edit']"
+          >修改</el-button>
+          <el-button
+            size="mini"
+            type="text"
+            icon="el-icon-delete"
+            @click="handleDelete(scope.row)"
+            v-hasPermi="['spiderData:attachments:remove']"
+          >删除</el-button>
+        </template>
+      </el-table-column>
+    </el-table>
+    
+    <pagination
+      v-show="total>0"
+      :total="total"
+      :page.sync="queryParams.pageNum"
+      :limit.sync="queryParams.pageSize"
+      @pagination="getList"
+    />
+
+    <!-- 添加或修改附件对话框 -->
+    <el-dialog :title="title" :visible.sync="open" width="500px" append-to-body>
+      <el-form ref="form" :model="form" :rules="rules" label-width="80px">
+        <el-form-item label="关联原始数据ID" prop="rawDataId">
+          <el-input v-model="form.rawDataId" placeholder="请输入关联原始数据ID" />
+        </el-form-item>
+        <el-form-item label="关联结构化数据ID" prop="structuredDataId">
+          <el-input v-model="form.structuredDataId" placeholder="请输入关联结构化数据ID" />
+        </el-form-item>
+        <el-form-item label="附件文件名" prop="fileName">
+          <el-input v-model="form.fileName" placeholder="请输入附件文件名" />
+        </el-form-item>
+        <el-form-item label="文件大小" prop="fileSize">
+          <el-input v-model="form.fileSize" placeholder="请输入文件大小" />
+        </el-form-item>
+        <el-form-item label="OA系统原始URL" prop="originalUrl">
+          <el-input v-model="form.originalUrl" placeholder="请输入OA系统原始URL" />
+        </el-form-item>
+        <el-form-item label="本地存储路径" prop="localPath">
+          <el-input v-model="form.localPath" placeholder="请输入本地存储路径" />
+        </el-form-item>
+        <el-form-item label="下载时间" prop="downloadTime">
+          <el-date-picker clearable
+            v-model="form.downloadTime"
+            type="date"
+            value-format="yyyy-MM-dd"
+            placeholder="请选择下载时间">
+          </el-date-picker>
+        </el-form-item>
+        <el-form-item label="关联采集任务ID" prop="taskId">
+          <el-input v-model="form.taskId" placeholder="请输入关联采集任务ID" />
+        </el-form-item>
+      </el-form>
+      <div slot="footer" class="dialog-footer">
+        <el-button type="primary" @click="submitForm">确 定</el-button>
+        <el-button @click="cancel">取 消</el-button>
+      </div>
+    </el-dialog>
+  </div>
+</template>
+
+<script>
+import { listAttachments, getAttachments, delAttachments, addAttachments, updateAttachments } from "@/api/spiderData/attachments";
+
+export default {
+  name: "Attachments",
+  data() {
+    return {
+      // 遮罩层
+      loading: true,
+      // 选中数组
+      ids: [],
+      // 非单个禁用
+      single: true,
+      // 非多个禁用
+      multiple: true,
+      // 显示搜索条件
+      showSearch: true,
+      // 总条数
+      total: 0,
+      // 附件表格数据
+      attachmentsList: [],
+      // 弹出层标题
+      title: "",
+      // 是否显示弹出层
+      open: false,
+      // 查询参数
+      queryParams: {
+        pageNum: 1,
+        pageSize: 10,
+        fileName: null,
+        fileType: null,
+        originalUrl: null,
+        taskId: null
+      },
+      // 表单参数
+      form: {},
+      // 表单校验
+      rules: {
+        rawDataId: [
+          { required: true, message: "关联原始数据ID不能为空", trigger: "blur" }
+        ],
+      }
+    };
+  },
+  created() {
+    this.getList();
+  },
+  methods: {
+    /** 查询附件列表 */
+    getList() {
+      this.loading = true;
+      listAttachments(this.queryParams).then(response => {
+        this.attachmentsList = response.rows;
+        this.total = response.total;
+        this.loading = false;
+      });
+    },
+    // 取消按钮
+    cancel() {
+      this.open = false;
+      this.reset();
+    },
+    // 表单重置
+    reset() {
+      this.form = {
+        id: null,
+        rawDataId: null,
+        structuredDataId: null,
+        fileName: null,
+        fileType: null,
+        fileSize: null,
+        originalUrl: null,
+        localPath: null,
+        downloadTime: null,
+        taskId: null
+      };
+      this.resetForm("form");
+    },
+    /** 搜索按钮操作 */
+    handleQuery() {
+      this.queryParams.pageNum = 1;
+      this.getList();
+    },
+    /** 重置按钮操作 */
+    resetQuery() {
+      this.resetForm("queryForm");
+      this.handleQuery();
+    },
+    // 多选框选中数据
+    handleSelectionChange(selection) {
+      this.ids = selection.map(item => item.id)
+      this.single = selection.length!==1
+      this.multiple = !selection.length
+    },
+    /** 新增按钮操作 */
+    handleAdd() {
+      this.reset();
+      this.open = true;
+      this.title = "添加附件";
+    },
+    /** 修改按钮操作 */
+    handleUpdate(row) {
+      this.reset();
+      const id = row.id || this.ids
+      getAttachments(id).then(response => {
+        this.form = response.data;
+        this.open = true;
+        this.title = "修改附件";
+      });
+    },
+    /** 提交按钮 */
+    submitForm() {
+      this.$refs["form"].validate(valid => {
+        if (valid) {
+          if (this.form.id != null) {
+            updateAttachments(this.form).then(response => {
+              this.$modal.msgSuccess("修改成功");
+              this.open = false;
+              this.getList();
+            });
+          } else {
+            addAttachments(this.form).then(response => {
+              this.$modal.msgSuccess("新增成功");
+              this.open = false;
+              this.getList();
+            });
+          }
+        }
+      });
+    },
+    /** 删除按钮操作 */
+    handleDelete(row) {
+      const ids = row.id || this.ids;
+      this.$modal.confirm('是否确认删除附件编号为"' + ids + '"的数据项?').then(function() {
+        return delAttachments(ids);
+      }).then(() => {
+        this.getList();
+        this.$modal.msgSuccess("删除成功");
+      }).catch(() => {});
+    },
+    /** 导出按钮操作 */
+    handleExport() {
+      this.download('spiderData/attachments/export', {
+        ...this.queryParams
+      }, `attachments_${new Date().getTime()}.xlsx`)
+    }
+  }
+};
+</script>

+ 336 - 0
xzl-ui/src/views/spiderData/sourceData/index.vue

@@ -0,0 +1,336 @@
+<template>
+  <div class="app-container">
+    <el-form :model="queryParams" ref="queryForm" size="small" :inline="true" v-show="showSearch" label-width="68px">
+      <el-form-item label="采集页面URL" prop="pageUrl">
+        <el-input
+          v-model="queryParams.pageUrl"
+          placeholder="请输入采集页面URL"
+          clearable
+          @keyup.enter.native="handleQuery"
+        />
+      </el-form-item>
+      <el-form-item label="采集时间">
+        <el-date-picker
+          v-model="daterangeCollectionTime"
+          style="width: 240px"
+          value-format="yyyy-MM-dd"
+          type="daterange"
+          range-separator="-"
+          start-placeholder="开始日期"
+          end-placeholder="结束日期"
+        ></el-date-picker>
+      </el-form-item>
+      <el-form-item label="关联采集任务ID" prop="taskId">
+        <el-input
+          v-model="queryParams.taskId"
+          placeholder="请输入关联采集任务ID"
+          clearable
+          @keyup.enter.native="handleQuery"
+        />
+      </el-form-item>
+      <el-form-item>
+        <el-button type="primary" icon="el-icon-search" size="mini" @click="handleQuery">搜索</el-button>
+        <el-button icon="el-icon-refresh" size="mini" @click="resetQuery">重置</el-button>
+        <el-button icon="el-icon-document" size="mini" @click="spiderQuery">采集</el-button>
+      </el-form-item>
+    </el-form>
+
+    <el-row :gutter="10" class="mb8">
+      <el-col :span="1.5">
+        <el-button
+          type="primary"
+          plain
+          icon="el-icon-plus"
+          size="mini"
+          @click="handleAdd"
+          v-hasPermi="['spiderData:spiderData:add']"
+        >新增</el-button>
+      </el-col>
+      <el-col :span="1.5">
+        <el-button
+          type="success"
+          plain
+          icon="el-icon-edit"
+          size="mini"
+          :disabled="single"
+          @click="handleUpdate"
+          v-hasPermi="['spiderData:spiderData:edit']"
+        >修改</el-button>
+      </el-col>
+      <el-col :span="1.5">
+        <el-button
+          type="danger"
+          plain
+          icon="el-icon-delete"
+          size="mini"
+          :disabled="multiple"
+          @click="handleDelete"
+          v-hasPermi="['spiderData:spiderData:remove']"
+        >删除</el-button>
+      </el-col>
+      <el-col :span="1.5">
+        <el-button
+          type="warning"
+          plain
+          icon="el-icon-download"
+          size="mini"
+          @click="handleExport"
+          v-hasPermi="['spiderData:spiderData:export']"
+        >导出</el-button>
+      </el-col>
+      <right-toolbar :showSearch.sync="showSearch" @queryTable="getList"></right-toolbar>
+    </el-row>
+
+    <el-table v-loading="loading" :data="spiderDataList" @selection-change="handleSelectionChange">
+      <el-table-column type="selection" width="55" align="center" />
+      <!-- <el-table-column label="原始数据ID" align="center" prop="id" /> -->
+      <el-table-column label="采集页面URL" align="center" prop="pageUrl" />
+<!--      <el-table-column label="原始文本内容" align="center" prop="rawContent" width="500" show-overflow-tooltip/>-->
+       <el-table-column label="原始文本内容" align="center" prop="rawContent" width="300">
+        <template slot-scope="scope">
+          <el-link
+            type="primary"
+            @click="openRichTextPage(scope.row.rawContent)"
+            :underline="false"
+            class="text-ellipsis"
+          >
+            {{ scope.row.rawContent.length > 50 ? scope.row.rawContent.substring(0, 50) + '...' : (scope.row.rawContent || '无') }}
+          </el-link>
+        </template>
+      </el-table-column> 
+      
+
+      <el-table-column label="原始附件信息" align="center" prop="rawAttachments" width="300" show-overflow-tooltip/>
+      <el-table-column label="采集时间" align="center" prop="collectionTime" width="180">
+        <template slot-scope="scope">
+          <span>{{ parseTime(scope.row.collectionTime, '{y}-{m}-{d}') }}</span>
+        </template>
+      </el-table-column>
+      <el-table-column label="关联采集任务ID" align="center" prop="taskId" />
+      <el-table-column label="操作" align="center" class-name="small-padding fixed-width">
+        <template slot-scope="scope">
+          <el-button
+            size="mini"
+            type="text"
+            icon="el-icon-edit"
+            @click="handleUpdate(scope.row)"
+            v-hasPermi="['spiderData:spiderData:edit']"
+          >修改</el-button>
+          <el-button
+            size="mini"
+            type="text"
+            icon="el-icon-delete"
+            @click="handleDelete(scope.row)"
+            v-hasPermi="['spiderData:spiderData:remove']"
+          >删除</el-button>
+        </template>
+      </el-table-column>
+    </el-table>
+
+    <pagination
+      v-show="total>0"
+      :total="total"
+      :page.sync="queryParams.pageNum"
+      :limit.sync="queryParams.pageSize"
+      @pagination="getList"
+    />
+
+    <!-- 添加或修改原始采集数据对话框 -->
+    <el-dialog :title="title" :visible.sync="open" width="1000px" append-to-body>
+      <el-form ref="form" :model="form" :rules="rules" label-width="80px">
+        <el-form-item label="采集页面URL" prop="pageUrl">
+          <el-input v-model="form.pageUrl" placeholder="请输入采集页面URL" />
+        </el-form-item>
+        <el-form-item label="原始文本内容">
+          <editor v-model="form.rawContent" :min-height="192"/>
+        </el-form-item>
+        <el-form-item label="原始附件信息" prop="rawAttachments">
+          <el-input v-model="form.rawAttachments" type="textarea" placeholder="请输入内容" />
+        </el-form-item>
+      </el-form>
+      <div slot="footer" class="dialog-footer">
+        <el-button type="primary" @click="submitForm">确 定</el-button>
+        <el-button @click="cancel">取 消</el-button>
+      </div>
+    </el-dialog>
+  </div>
+</template>
+
+<script>
+import { listSpiderData, getSpiderData, delSpiderData, addSpiderData, updateSpiderData,spiderCollect  } from "@/api/spiderData/spiderData";
+
+export default {
+  name: "SpiderData",
+  data() {
+    return {
+      // 遮罩层
+      loading: true,
+      // 选中数组
+      ids: [],
+      // 非单个禁用
+      single: true,
+      // 非多个禁用
+      multiple: true,
+      // 显示搜索条件
+      showSearch: true,
+      // 总条数
+      total: 0,
+      // 原始采集数据表格数据
+      spiderDataList: [],
+      // 弹出层标题
+      title: "",
+      // 是否显示弹出层
+      open: false,
+      // 原始附件信息时间范围
+      daterangeCollectionTime: [],
+      // 查询参数
+      queryParams: {
+        pageNum: 1,
+        pageSize: 10,
+        pageUrl: null,
+        rawContent: null,
+        rawAttachments: null,
+        collectionTime: null,
+        taskId: null
+      },
+      // 表单参数
+      form: {},
+      // 表单校验
+      rules: {
+      }
+    };
+  },
+  created() {
+    this.getList();
+  },
+  methods: {
+    /** 打开富文本新页面 */
+    openRichTextPage(rawContent) {
+      // 路由跳转,携带原始文本参数(通过query传递)
+      this.$router.push({
+        path: '/spiderData/richTextView', // 新页面路由路径
+        query: { content: rawContent || '' } // 传递原始文本
+      });
+    },
+    /** 查询原始采集数据列表 */
+    getList() {
+      this.loading = true;
+      this.queryParams.params = {};
+      if (null != this.daterangeCollectionTime && '' != this.daterangeCollectionTime) {
+        this.queryParams.params["beginCollectionTime"] = this.daterangeCollectionTime[0];
+        this.queryParams.params["endCollectionTime"] = this.daterangeCollectionTime[1];
+      }
+      listSpiderData(this.queryParams).then(response => {
+        this.spiderDataList = response.rows;
+        this.total = response.total;
+        this.loading = false;
+      });
+    },
+    /** 采集按钮操作 */
+    spiderQuery() {
+      // 校验:若pageUrl为空,给出提示
+      if (!this.queryParams.pageUrl) {
+        this.$modal.msgWarning("请输入采集页面URL");
+        return;
+      }
+
+      // 调用封装好的采集接口
+      spiderCollect({
+        pageUrl: this.queryParams.pageUrl // 传递采集页面URL
+      }).then(response => {
+        this.$modal.msgSuccess("采集请求已发送,处理中...");
+        // 若需要刷新列表,可调用getList()
+        this.getList();
+      }).catch(error => {
+        this.$modal.msgError("采集失败:" + (error.response?.data?.msg || "服务器异常"));
+      });
+    },
+    // 取消按钮
+    cancel() {
+      this.open = false;
+      this.reset();
+    },
+    // 表单重置
+    reset() {
+      this.form = {
+        id: null,
+        pageUrl: null,
+        rawContent: null,
+        rawAttachments: null,
+        collectionTime: null,
+        taskId: null
+      };
+      this.resetForm("form");
+    },
+    /** 搜索按钮操作 */
+    handleQuery() {
+      this.queryParams.pageNum = 1;
+      this.getList();
+    },
+    /** 重置按钮操作 */
+    resetQuery() {
+      this.daterangeCollectionTime = [];
+      this.resetForm("queryForm");
+      this.handleQuery();
+    },
+    // 多选框选中数据
+    handleSelectionChange(selection) {
+      this.ids = selection.map(item => item.id)
+      this.single = selection.length!==1
+      this.multiple = !selection.length
+    },
+    /** 新增按钮操作 */
+    handleAdd() {
+      this.reset();
+      this.open = true;
+      this.title = "添加原始采集数据";
+    },
+    /** 修改按钮操作 */
+    handleUpdate(row) {
+      this.reset();
+      const id = row.id || this.ids
+      getSpiderData(id).then(response => {
+        this.form = response.data;
+        this.open = true;
+        this.title = "修改原始采集数据";
+      });
+    },
+    /** 提交按钮 */
+    submitForm() {
+      this.$refs["form"].validate(valid => {
+        if (valid) {
+          if (this.form.id != null) {
+            updateSpiderData(this.form).then(response => {
+              this.$modal.msgSuccess("修改成功");
+              this.open = false;
+              this.getList();
+            });
+          } else {
+            addSpiderData(this.form).then(response => {
+              this.$modal.msgSuccess("新增成功");
+              this.open = false;
+              this.getList();
+            });
+          }
+        }
+      });
+    },
+    /** 删除按钮操作 */
+    handleDelete(row) {
+      const ids = row.id || this.ids;
+      this.$modal.confirm('是否确认删除原始采集数据编号为"' + ids + '"的数据项?').then(function() {
+        return delSpiderData(ids);
+      }).then(() => {
+        this.getList();
+        this.$modal.msgSuccess("删除成功");
+      }).catch(() => {});
+    },
+    /** 导出按钮操作 */
+    handleExport() {
+      this.download('spiderData/sourceData/export', {
+        ...this.queryParams
+      }, `spiderData_${new Date().getTime()}.xlsx`)
+    }
+  }
+};
+</script>

+ 357 - 0
xzl-ui/src/views/spiderData/structured/index.vue

@@ -0,0 +1,357 @@
+<template>
+  <div class="app-container">
+    <el-form :model="queryParams" ref="queryForm" size="small" :inline="true" v-show="showSearch" label-width="68px">
+      <el-form-item label="文档标题" prop="title">
+        <el-input
+          v-model="queryParams.title"
+          placeholder="请输入文档标题"
+          clearable
+          @keyup.enter.native="handleQuery"
+        />
+      </el-form-item>
+      <el-form-item label="作者" prop="author">
+        <el-input
+          v-model="queryParams.author"
+          placeholder="请输入作者"
+          clearable
+          @keyup.enter.native="handleQuery"
+        />
+      </el-form-item>
+      <el-form-item label="所属部门" prop="department">
+        <el-input
+          v-model="queryParams.department"
+          placeholder="请输入所属部门"
+          clearable
+          @keyup.enter.native="handleQuery"
+        />
+      </el-form-item>
+      <el-form-item label="发布时间">
+        <el-date-picker
+          v-model="daterangePublishTime"
+          style="width: 240px"
+          value-format="yyyy-MM-dd"
+          type="daterange"
+          range-separator="-"
+          start-placeholder="开始日期"
+          end-placeholder="结束日期"
+        ></el-date-picker>
+      </el-form-item>
+      <el-form-item label="关键词" prop="keywords">
+        <el-input
+          v-model="queryParams.keywords"
+          placeholder="请输入关键词"
+          clearable
+          @keyup.enter.native="handleQuery"
+        />
+      </el-form-item>
+      <el-form-item>
+        <el-button type="primary" icon="el-icon-search" size="mini" @click="handleQuery">搜索</el-button>
+        <el-button icon="el-icon-refresh" size="mini" @click="resetQuery">重置</el-button>
+      </el-form-item>
+    </el-form>
+
+    <el-row :gutter="10" class="mb8">
+      <el-col :span="1.5">
+        <el-button
+          type="primary"
+          plain
+          icon="el-icon-plus"
+          size="mini"
+          @click="handleAdd"
+          v-hasPermi="['spiderData:structured:add']"
+        >新增</el-button>
+      </el-col>
+      <el-col :span="1.5">
+        <el-button
+          type="success"
+          plain
+          icon="el-icon-edit"
+          size="mini"
+          :disabled="single"
+          @click="handleUpdate"
+          v-hasPermi="['spiderData:structured:edit']"
+        >修改</el-button>
+      </el-col>
+      <el-col :span="1.5">
+        <el-button
+          type="danger"
+          plain
+          icon="el-icon-delete"
+          size="mini"
+          :disabled="multiple"
+          @click="handleDelete"
+          v-hasPermi="['spiderData:structured:remove']"
+        >删除</el-button>
+      </el-col>
+      <el-col :span="1.5">
+        <el-button
+          type="warning"
+          plain
+          icon="el-icon-download"
+          size="mini"
+          @click="handleExport"
+          v-hasPermi="['spiderData:structured:export']"
+        >导出</el-button>
+      </el-col>
+      <right-toolbar :showSearch.sync="showSearch" @queryTable="getList"></right-toolbar>
+    </el-row>
+
+    <el-table v-loading="loading" :data="structuredList" @selection-change="handleSelectionChange">
+      <el-table-column type="selection" width="55" align="center" />
+      <el-table-column label="结构化数据ID" align="center" prop="id" />
+      <el-table-column label="关联原始数据ID" align="center" prop="rawDataId" />
+      <el-table-column label="文档标题" align="center" prop="title" />
+      <el-table-column label="结构化文本内容" align="center" prop="content" />
+      <el-table-column label="作者" align="center" prop="author" />
+      <el-table-column label="所属部门" align="center" prop="department" />
+      <el-table-column label="发布时间" align="center" prop="publishTime" width="180">
+        <template slot-scope="scope">
+          <span>{{ parseTime(scope.row.publishTime, '{y}-{m}-{d}') }}</span>
+        </template>
+      </el-table-column>
+      <el-table-column label="关键词" align="center" prop="keywords" />
+      <el-table-column label="自定义字段" align="center" prop="customFields" />
+      <el-table-column label="处理时间" align="center" prop="processTime" width="180">
+        <template slot-scope="scope">
+          <span>{{ parseTime(scope.row.processTime, '{y}-{m}-{d}') }}</span>
+        </template>
+      </el-table-column>
+      <el-table-column label="操作" align="center" class-name="small-padding fixed-width">
+        <template slot-scope="scope">
+          <el-button
+            size="mini"
+            type="text"
+            icon="el-icon-edit"
+            @click="handleUpdate(scope.row)"
+            v-hasPermi="['spiderData:structured:edit']"
+          >修改</el-button>
+          <el-button
+            size="mini"
+            type="text"
+            icon="el-icon-delete"
+            @click="handleDelete(scope.row)"
+            v-hasPermi="['spiderData:structured:remove']"
+          >删除</el-button>
+        </template>
+      </el-table-column>
+    </el-table>
+    
+    <pagination
+      v-show="total>0"
+      :total="total"
+      :page.sync="queryParams.pageNum"
+      :limit.sync="queryParams.pageSize"
+      @pagination="getList"
+    />
+
+    <!-- 添加或修改结构化数据对话框 -->
+    <el-dialog :title="title" :visible.sync="open" width="500px" append-to-body>
+      <el-form ref="form" :model="form" :rules="rules" label-width="80px">
+        <el-form-item label="关联原始数据ID" prop="rawDataId">
+          <el-input v-model="form.rawDataId" placeholder="请输入关联原始数据ID" />
+        </el-form-item>
+        <el-form-item label="文档标题" prop="title">
+          <el-input v-model="form.title" placeholder="请输入文档标题" />
+        </el-form-item>
+        <el-form-item label="结构化文本内容">
+          <editor v-model="form.content" :min-height="192"/>
+        </el-form-item>
+        <el-form-item label="作者" prop="author">
+          <el-input v-model="form.author" placeholder="请输入作者" />
+        </el-form-item>
+        <el-form-item label="所属部门" prop="department">
+          <el-input v-model="form.department" placeholder="请输入所属部门" />
+        </el-form-item>
+        <el-form-item label="发布时间" prop="publishTime">
+          <el-date-picker clearable
+            v-model="form.publishTime"
+            type="date"
+            value-format="yyyy-MM-dd"
+            placeholder="请选择发布时间">
+          </el-date-picker>
+        </el-form-item>
+        <el-form-item label="关键词" prop="keywords">
+          <el-input v-model="form.keywords" placeholder="请输入关键词" />
+        </el-form-item>
+        <el-form-item label="自定义字段" prop="customFields">
+          <el-input v-model="form.customFields" type="textarea" placeholder="请输入内容" />
+        </el-form-item>
+        <el-form-item label="处理时间" prop="processTime">
+          <el-date-picker clearable
+            v-model="form.processTime"
+            type="date"
+            value-format="yyyy-MM-dd"
+            placeholder="请选择处理时间">
+          </el-date-picker>
+        </el-form-item>
+      </el-form>
+      <div slot="footer" class="dialog-footer">
+        <el-button type="primary" @click="submitForm">确 定</el-button>
+        <el-button @click="cancel">取 消</el-button>
+      </div>
+    </el-dialog>
+  </div>
+</template>
+
+<script>
+import { listStructured, getStructured, delStructured, addStructured, updateStructured } from "@/api/spiderData/structured";
+
+export default {
+  name: "Structured",
+  data() {
+    return {
+      // 遮罩层
+      loading: true,
+      // 选中数组
+      ids: [],
+      // 非单个禁用
+      single: true,
+      // 非多个禁用
+      multiple: true,
+      // 显示搜索条件
+      showSearch: true,
+      // 总条数
+      total: 0,
+      // 结构化数据表格数据
+      structuredList: [],
+      // 弹出层标题
+      title: "",
+      // 是否显示弹出层
+      open: false,
+      // 处理时间时间范围
+      daterangePublishTime: [],
+      // 查询参数
+      queryParams: {
+        pageNum: 1,
+        pageSize: 10,
+        title: null,
+        content: null,
+        author: null,
+        department: null,
+        publishTime: null,
+        keywords: null,
+      },
+      // 表单参数
+      form: {},
+      // 表单校验
+      rules: {
+        rawDataId: [
+          { required: true, message: "关联原始数据ID不能为空", trigger: "blur" }
+        ],
+        processTime: [
+          { required: true, message: "处理时间不能为空", trigger: "blur" }
+        ]
+      }
+    };
+  },
+  created() {
+    this.getList();
+  },
+  methods: {
+    /** 查询结构化数据列表 */
+    getList() {
+      this.loading = true;
+      this.queryParams.params = {};
+      if (null != this.daterangePublishTime && '' != this.daterangePublishTime) {
+        this.queryParams.params["beginPublishTime"] = this.daterangePublishTime[0];
+        this.queryParams.params["endPublishTime"] = this.daterangePublishTime[1];
+      }
+      listStructured(this.queryParams).then(response => {
+        this.structuredList = response.rows;
+        this.total = response.total;
+        this.loading = false;
+      });
+    },
+    // 取消按钮
+    cancel() {
+      this.open = false;
+      this.reset();
+    },
+    // 表单重置
+    reset() {
+      this.form = {
+        id: null,
+        rawDataId: null,
+        title: null,
+        content: null,
+        author: null,
+        department: null,
+        publishTime: null,
+        keywords: null,
+        customFields: null,
+        processTime: null
+      };
+      this.resetForm("form");
+    },
+    /** 搜索按钮操作 */
+    handleQuery() {
+      this.queryParams.pageNum = 1;
+      this.getList();
+    },
+    /** 重置按钮操作 */
+    resetQuery() {
+      this.daterangePublishTime = [];
+      this.resetForm("queryForm");
+      this.handleQuery();
+    },
+    // 多选框选中数据
+    handleSelectionChange(selection) {
+      this.ids = selection.map(item => item.id)
+      this.single = selection.length!==1
+      this.multiple = !selection.length
+    },
+    /** 新增按钮操作 */
+    handleAdd() {
+      this.reset();
+      this.open = true;
+      this.title = "添加结构化数据";
+    },
+    /** 修改按钮操作 */
+    handleUpdate(row) {
+      this.reset();
+      const id = row.id || this.ids
+      getStructured(id).then(response => {
+        this.form = response.data;
+        this.open = true;
+        this.title = "修改结构化数据";
+      });
+    },
+    /** 提交按钮 */
+    submitForm() {
+      this.$refs["form"].validate(valid => {
+        if (valid) {
+          if (this.form.id != null) {
+            updateStructured(this.form).then(response => {
+              this.$modal.msgSuccess("修改成功");
+              this.open = false;
+              this.getList();
+            });
+          } else {
+            addStructured(this.form).then(response => {
+              this.$modal.msgSuccess("新增成功");
+              this.open = false;
+              this.getList();
+            });
+          }
+        }
+      });
+    },
+    /** 删除按钮操作 */
+    handleDelete(row) {
+      const ids = row.id || this.ids;
+      this.$modal.confirm('是否确认删除结构化数据编号为"' + ids + '"的数据项?').then(function() {
+        return delStructured(ids);
+      }).then(() => {
+        this.getList();
+        this.$modal.msgSuccess("删除成功");
+      }).catch(() => {});
+    },
+    /** 导出按钮操作 */
+    handleExport() {
+      this.download('spiderData/structured/export', {
+        ...this.queryParams
+      }, `structured_${new Date().getTime()}.xlsx`)
+    }
+  }
+};
+</script>