项目整合架构及运行(三)

一、主要框架

1、框架介绍

  WebMagic+ElasticSearch+Spring-Data-ES+Spring-Test+SSM

  • WebMagic:版本:0.7.3,webmagic是一个开源的Java垂直爬虫框架,目标是简化爬虫的开发流程,让开发者专注于逻辑功能的开发。
  • ElasticSearch:版本:5.6.8,ElasticSearch是一个基于Lucene的搜索服务器。
  • Spring-Data-ES:版本:3.0.5,用于简化ElasticSearch的操作,便于使用。
  • Spring-Test:版本:5.1.0,整合Junit,针对Spring做单元测试。
  • SSM:分为Spring+Spring MVC + Mybatis,SSM(Spring+SpringMVC+MyBatis)框架集由Spring、MyBatis两个开源框架整合而成(SpringMVC是Spring中的部分内容)。常作为数据源较简单的web项目的框架。

2、Maven坐标,配置依赖

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
	<modelVersion>4.0.0</modelVersion>
	<groupId>com.couragehe</groupId>
	<artifactId>2-ElasticSearchDemo</artifactId>
	<version>0.0.1-SNAPSHOT</version>
	<packaging>war</packaging>
	<dependencies>
		<!-- transport客户端 -->
		<dependency>
			<groupId>org.elasticsearch.client</groupId>
			<artifactId>transport</artifactId>
			<version>5.6.8</version>
		</dependency>
		<!-- elasticsearch包 -->
		<dependency>
			<groupId>org.elasticsearch</groupId>
			<artifactId>elasticsearch</artifactId>
			<version>5.6.8</version>
		</dependency>
		<!-- Spring data与elasticSearch结合的包 给各种数据访问提供统一的编程接口 -->
		<dependency>
			<groupId>org.springframework.data</groupId>
			<artifactId>spring-data-elasticsearch</artifactId>
			<version>3.0.5.RELEASE</version>
		</dependency>
		<!-- Spring测试包 -->
		<dependency>
			<groupId>org.springframework</groupId>
			<artifactId>spring-test</artifactId>
			<version>5.1.0.RELEASE</version>
		</dependency>
		<!-- mysql jdbc -->
		<dependency>
			<groupId>mysql</groupId>
			<artifactId>mysql-connector-Java</artifactId>
			<version>5.1.3</version>
		</dependency>
		<!-- connection pool -->
		<dependency>
			<groupId>commons-dbcp</groupId>
			<artifactId>commons-dbcp</artifactId>
			<version>1.4</version>
		</dependency>
		<!-- Sprng mvc架构的jar包 -->
		<dependency>
			<groupId>org.springframework</groupId>
			<artifactId>spring-webmvc</artifactId>
			<version>5.0.4.RELEASE</version>
			<!--<version>5.2.0.RELEASE</version> -->
		</dependency>
		<!-- mybatis jar包依赖 -->
		<dependency>
			<groupId>org.mybatis</groupId>
			<artifactId>mybatis</artifactId>
			<version>3.3.0</version>
		</dependency>
		<dependency>
			<groupId>org.mybatis</groupId>
			<artifactId>mybatis-spring</artifactId>
			<version>1.2.3</version>
		</dependency>
		<!-- 配合Spring MVC将对象转换为json对象 -->
		<dependency>
	        <groupId>com.fasterxml.jackson.core</groupId>
	        <artifactId>jackson-databind</artifactId>
	        <version>2.9.5</version>
		</dependency>
		<dependency>
		        <groupId>com.fasterxml.jackson.core</groupId>
		        <artifactId>jackson-annotations</artifactId>
		        <version>2.9.5</version>
		</dependency>
		<!-- WebMagic核心包 -->
		<dependency>
		    <groupId>us.codecraft</groupId>
		    <artifactId>webmagic-core</artifactId>
		    <version>0.7.3</version>
		</dependency>
		<dependency>
		    <groupId>us.codecraft</groupId>
		    <artifactId>webmagic-extension</artifactId>
		    <version>0.7.3</version>
		</dependency>
		 <dependency>
		    <groupId>apache-httpclient</groupId>
		    <artifactId>commons-httpclient</artifactId>
		    <version>3.1</version>
		</dependency>
	</dependencies>
</project>

二、项目配置

1、ElasticSearch客户端

<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:util="http://www.springframework.org/schema/util" xmlns:context="http://www.springframework.org/schema/context" xmlns:elasticsearch="http://www.springframework.org/schema/data/elasticsearch" xsi:schemaLocation=" http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd http://www.springframework.org/schema/util http://www.springframework.org/schema/util/spring-util-3.2.xsd http://www.springframework.org/schema/context http://www.springframework.org/schema/context/spring-context-3.2.xsd http://www.springframework.org/schema/data/elasticsearch http://www.springframework.org/schema/data/elasticsearch/spring-elasticsearch-3.2.xsd">

	
	<util:properties id="config" location="classpath:*.properties"/>
    <!-- 搜索DAO 扫描 -->
    <elasticsearch:repositories base-package="com.couragehe.dao" />

    <!-- 配置ES的连接-->
    <elasticsearch:transport-client id="client" cluster-nodes="127.0.0.1:9300"/>

    <!-- 配置ES模板 -->
    <bean id="elasticsearchTemplate" class="org.springframework.data.elasticsearch.core.ElasticsearchTemplate">
        <constructor-arg name="client" ref="client" />
    </bean>
</beans>

2、主配置文件

<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:context="http://www.springframework.org/schema/context" xmlns:jdbc="http://www.springframework.org/schema/jdbc" xmlns:jee="http://www.springframework.org/schema/jee" xmlns:tx="http://www.springframework.org/schema/tx" xmlns:jpa="http://www.springframework.org/schema/data/jpa" xmlns:util="http://www.springframework.org/schema/util" xmlns:mvc="http://www.springframework.org/schema/mvc" xsi:schemaLocation=" http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans-3.2.xsd http://www.springframework.org/schema/context http://www.springframework.org/schema/context/spring-context-3.2.xsd http://www.springframework.org/schema/jdbc http://www.springframework.org/schema/jdbc/spring-jdbc-3.2.xsd http://www.springframework.org/schema/jee http://www.springframework.org/schema/jee/spring-jee-3.2.xsd http://www.springframework.org/schema/tx http://www.springframework.org/schema/tx/spring-tx-3.2.xsd http://www.springframework.org/schema/data/jpa http://www.springframework.org/schema/data/jpa/spring-jpa-1.3.xsd http://www.springframework.org/schema/util http://www.springframework.org/schema/util/spring-util-3.2.xsd http://www.springframework.org/schema/mvc http://www.springframework.org/schema/mvc/spring-mvc-3.2.xsd">
	<!-- 导入ElasticSearch的配置文件 -->
	
	 <import resource="spring-elastic.xml"/>
	<!-- 配置service扫描 -->
    <context:component-scan base-package="com.couragehe"/>
    
    	<!-- 启用注解驱动 -->
	<mvc:annotation-driven/>
</beans>

3、web.xml配置文件

<?xml version="1.0" encoding="UTF-8"?>
<web-app xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://java.sun.com/xml/ns/javaee" xsi:schemaLocation="http://java.sun.com/xml/ns/javaee http://java.sun.com/xml/ns/javaee/web-app_3_5.xsd" version="3.5">
  <display-name>2-ElasticSearchDemo</display-name>
  <welcome-file-list>
    <welcome-file>index.html</welcome-file>
    <welcome-file>index.htm</welcome-file>
    <welcome-file>index.jsp</welcome-file>
    <welcome-file>default.html</welcome-file>
    <welcome-file>default.htm</welcome-file>
    <welcome-file>default.jsp</welcome-file>
  </welcome-file-list>
  
  	<servlet>
		<servlet-name>DispatcherServlet</servlet-name>
		<servlet-class>org.springframework.web.servlet.DispatcherServlet</servlet-class>
		<init-param>
			<param-name>contextConfigLocation</param-name>
			<param-value>classpath:applicationContext.xml</param-value>
		</init-param>
		<!-- 随项目一并启动 -->
		<load-on-startup>1</load-on-startup>
	</servlet>
	<servlet-mapping>
		<servlet-name>DispatcherServlet</servlet-name>
		<url-pattern>*.do</url-pattern>
	</servlet-mapping>
	
	<!-- 配置请求过滤器,编码格式设为UTF-8,避免中文乱码-->
    <filter>
      <filter-name>springUtf8Encoding</filter-name>
      <filter-class>org.springframework.web.filter.CharacterEncodingFilter</filter-class>
       <init-param>
          <param-name>encoding</param-name>
          <param-value>UTF-8</param-value>
       </init-param>
       <init-param>
          <param-name>forceEncoding</param-name>
          <param-value>true</param-value>
       </init-param>
    </filter>
	<filter-mapping>
	   <filter-name>springUtf8Encoding</filter-name>
			<url-pattern>*</url-pattern>
	</filter-mapping>
</web-app>

三、测试文件

  三个测试函数应依次运行,第三个函数函数如果成功运行后,控制台输出相应数据即代表运行成功。在Tomcat服务器上运行,大概十秒左右的加载时间,加载完毕后浏览器打开http://localhost:8000/2-ElasticSearchDemo/index.html,进行搜索查询。

package com.couragehe.test;

import java.util.ArrayList;
import java.util.List;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.beans.BeanUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.ApplicationContext;
import org.springframework.context.support.ClassPathXmlApplicationContext;
import org.springframework.data.elasticsearch.core.ElasticsearchTemplate;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;

import com.couragehe.controller.SearchController;
import com.couragehe.entity.JobInfo;
import com.couragehe.entity.JobInfoField;
import com.couragehe.entity.JobResult;
import com.couragehe.service.JobInfoService;
import com.couragehe.service.JobRepositoryService;

@RunWith(SpringJUnit4ClassRunner.class)
@ContextConfiguration("classpath:applicationContext.xml")
public class ESTest {
	
	@Autowired
	private ElasticsearchTemplate elasticsearchTemplate;
	@Autowired
	private JobRepositoryService jobRepositoryService;
	@Autowired
	private JobInfoService jobInfoService;
	@Autowired
	private SearchController searchController;
	//创建索引和映射
	@Test
	public void CreateIndex() {
		this.elasticsearchTemplate.createIndex(JobInfoField.class);
		this.elasticsearchTemplate.putMapping(JobInfoField.class);
	}
	//导入数据
	@Test
	public void jobInfoData() {
// ApplicationContext ac = new ClassPathXmlApplicationContext("applicationContext.xml");
		//声明页码数,从1开始
		int p = 0;
		//声明查询的数据条数
		int pageSize = 0;
		
		do {			
			//从数据库种查询数据
			List<JobInfo> jobInfos = this.jobInfoService.findJobInfoByPage(p,2000);
			
			//声明器存放jobinfoField
			List<JobInfoField> list = new ArrayList<JobInfoField>();
			
			//把查询到的数据封装为jobInfoField
			for(JobInfo jobInfo : jobInfos) {
				//声明一个jobInfoField对象
				JobInfoField jobInfoField = new JobInfoField();
				//封装数据(Bean工具复制)
				BeanUtils.copyProperties(jobInfo,jobInfoField);
				//把准备好数据的对象放到list容器中
				list.add(jobInfoField);
				//输出测试
// System.out.println(jobInfo);
			}
			//把封装好的是护具保存到索引库中
			this.jobRepositoryService.saveAll(list);
			
 			//获取查询结果集的数据条数(不满500即代表到最后一页)
			pageSize = list.size();
			//起点向后递增
			p+= pageSize;
			System.out.println("查询起点:"+p);
		}while(pageSize == 2000);
		
	}
	//查询
	@Test
	public void TestController() {
		JobResult list= this.searchController.search("", "*", "java", 1);
		or(JobInfoField job: list.getRows()) {
			System.out.println(job);
			
		}
	}
}

该项目源码地址:项目地址