Nginx安装

wget https://nginx.org/download/nginx-1.14.0.tar.gz

tar -zxvf nginx-1.14.0.tar.gz

cd nginx-1.14.0

安装依赖库:

yum install gcc-c++

yum install -y pcre pcre-devel

yum install -y zlib zlib-devel

yum install -y openssl openssl-devel

编译安装:

./configure

make

make install

ln -s /usr/local/nginx/sbin/nginx /usr/bin/nginx

 

nginx命令:
start nginx(linux 下直接nginx启动即可)
nginx -s stop(关闭)
nginx -s reload(重启)

nginx负载均衡配置:

  1 #user  nobody;        #全局块
  2 worker_processes  1;
  3 
  4 #error_log  logs/error.log;
  5 #error_log  logs/error.log  notice;
  6 #error_log  logs/error.log  info;
  7 
  8 #pid        logs/nginx.pid;
  9 
 10 
 11 events {        # events块  12     worker_connections  1024;
 13 }
 14 
 15 
 16 http {         # http块  17     include       mime.types;  # http全局块  18     default_type  application/octet-stream;
 19 
 20     #log_format  main  '$remote_addr - $remote_user [$time_local] "$request" '
 21     #                  '$status $body_bytes_sent "$http_referer" '
 22     #                  '"$http_user_agent" "$http_x_forwarded_for"';
 23 
 24     #access_log  logs/access.log  main;
 25 
 26     sendfile        on;
 27     #tcp_nopush     on;
 28 
 29     #keepalive_timeout  0;
 30     keepalive_timeout  65;
 31 
 32     #gzip  on;
 33     
 34     upstream myserver.com{     # 负载均衡配置  35         server 127.0.0.1:8080;   # 可以在此添加weight(权重),配置每个服务器流量的权重;例如:server 127.0.0.1:8080 weight=1;  36     }
 37 
 38     server {    # server块  39         # 反爬虫    # server 全局块
 40         include anti_spider.conf;  # 加载反爬虫配置  41         
 42         listen       80;
 43         server_name  211.67.160.21;
 44 
 45         #charset koi8-r;
 46 
 47         #access_log  logs/host.access.log  main;
 48 
 49         location ~*^.+$ {  # location 块  50             proxy_pass http://myserver.com;
 51             allow all;
 52         }
 53         
 54         error_page 404 https://www.baidu.com;
 55 
 56         #error_page  404              /404.html;
 57 
 58         # redirect server error pages to the static page /50x.html
 59         #
 60          error_page   500 502 503 504  /50x.html;
 61         location = /50x.html {
 62             root   html;
 63         }
 64 
 65         # proxy the PHP scripts to Apache listening on 127.0.0.1:80
 66         #
 67         #location ~ \.php$ {
 68         #    proxy_pass   http://127.0.0.1;
 69         #}
 70 
 71         # pass the PHP scripts to FastCGI server listening on 127.0.0.1:9000
 72         #
 73         #location ~ \.php$ {
 74         #    root           html;
 75         #    fastcgi_pass   127.0.0.1:9000;
 76         #    fastcgi_index  index.php;
 77         #    fastcgi_param  SCRIPT_FILENAME  /scripts$fastcgi_script_name;
 78         #    include        fastcgi_params;
 79         #}
 80 
 81         # deny access to .htaccess files, if Apache's document root
 82         # concurs with nginx's one
 83         #
 84         #location ~ /\.ht {
 85         #    deny  all;
 86         #}
 87                 
 88         location ~* ^.+\.(html|jpg|jpeg|gif|png|ico|css|js)$  
 89         {  
 90             root D:/register;  
 91             expires 30d;  
 92             break;  
 93         }  
 94   
 95         location ~ ^/static/ {  
 96             root D:/register;  
 97             expires 30d;  
 98             break;  
 99         }   
100   
101         location ~ ^/ {  
102             fastcgi_pass 127.0.0.1:80;  
103             fastcgi_param PATH_INFO $fastcgi_script_name;  
104             fastcgi_param REQUEST_METHOD $request_method;  
105             fastcgi_param QUERY_STRING $query_string;  
106             fastcgi_param CONTENT_TYPE $content_type;  
107             fastcgi_param CONTENT_LENGTH $content_length;  
108             fastcgi_param SERVER_PROTOCOL $server_protocol;  
109             fastcgi_param SERVER_PORT $server_port;  
110             fastcgi_param SERVER_NAME $server_name;  
111             fastcgi_pass_header Authorization;  
112             fastcgi_intercept_errors off;  
113         }  
114     }
115 
116 
117     # another virtual host using mix of IP-, name-, and port-based configuration
118     #
119     #server {
120     #    listen       8000;
121     #    listen       somename:8080;
122     #    server_name  somename  alias  another.alias;
123 
124     #    location / {
125     #        root   html;
126     #        index  index.html index.htm;
127     #    }
128     #}
129 
130 
131     # HTTPS server
132     #
133     #server {
134     #    listen       443 ssl;
135     #    server_name  localhost;
136 
137     #    ssl_certificate      cert.pem;
138     #    ssl_certificate_key  cert.key;
139 
140     #    ssl_session_cache    shared:SSL:1m;
141     #    ssl_session_timeout  5m;
142 
143     #    ssl_ciphers  HIGH:!aNULL:!MD5;
144     #    ssl_prefer_server_ciphers  on;
145 
146     #    location / {
147     #        root   html;
148     #        index  index.html index.htm;
149     #    }
150     #}
151 
152 }
  • 1、全局块:配置影响nginx全局的指令。一般有运行nginx服务器的用户组,nginx进程pid存放路径,日志存放路径,配置文件引入,允许生成worker process数等。
  • 2、events块:配置影响nginx服务器或与用户的网络连接。有每个进程的最大连接数,选取哪种事件驱动模型处理连接请求,是否允许同时接受多个网路连接,开启多个网络连接序列化等。
  • 3、http块:可以嵌套多个server,配置代理,缓存,日志定义等绝大多数功能和第三方模块的配置。如文件引入,mime-type定义,日志自定义,是否使用sendfile传输文件,连接超时时间,单连接请求数等。
  • 4、server块:配置虚拟主机的相关参数,一个http中可以有多个server。
  • 5、location块:配置请求的路由,以及各种页面的处理情况。 

 

 

nginx反爬虫:

 1 #禁止Scrapy等工具的抓取  
 2 if ($http_user_agent ~* (Scrapy|Curl|HttpClient)) {  
 3      return 403;  
 4 }  
 5  
 6 #禁止指定UA及UA为空的访问  
 7 if ($http_user_agent ~ "WinHttp|WebZIP|FetchURL|node-superagent|java/|FeedDemon|Jullo|JikeSpider|Indy Library|Alexa Toolbar|AskTbFXTV|AhrefsBot|CrawlDaddy|Java|Feedly|Apache-HttpAsyncClient|UniversalFeedParser|ApacheBench|Microsoft URL Control|Swiftbot|ZmEu|oBot|jaunty|Python-urllib|lightDeckReports Bot|YYSpider|DigExt|HttpClient|MJ12bot|heritrix|EasouSpider|Ezooms|BOT/0.1|YandexBot|FlightDeckReports|Linguee Bot|^$" ) {  
 8      return 403;               
 9 }  
10  
11 #禁止非GET|HEAD|POST方式的抓取  
12 if ($request_method !~ ^(GET|HEAD|POST)$) {  
13     return 403;  
14 }  
15  
16 #屏蔽单个IP的命令是
17 #deny 123.45.6.7
18 #封整个段即从123.0.0.1到123.255.255.254的命令
19 #deny 123.0.0.0/8
20 #封IP段即从123.45.0.1到123.45.255.254的命令
21 #deny 124.45.0.0/16
22 #封IP段即从123.45.6.1到123.45.6.254的命令是
23 #deny 123.45.6.0/24
24  
25 # 以下IP皆为流氓
26 #deny 58.95.66.0/24;

 

常见垃圾UA列表:

> FeedDemon 内容采集
> BOT/0.1 (BOT for JCE) sql注入 > CrawlDaddy sql注入 > Java 内容采集 > Jullo 内容采集 > Feedly 内容采集 > UniversalFeedParser 内容采集 > ApacheBench cc攻击器 > Swiftbot 无用爬虫 > YandexBot 无用爬虫 > AhrefsBot 无用爬虫 > YisouSpider 无用爬虫 > jikeSpider 无用爬虫 > MJ12bot 无用爬虫 > ZmEu phpmyadmin 漏洞扫描 > WinHttp 采集cc攻击 > EasouSpider 无用爬虫 > HttpClient tcp攻击 > Microsoft URL Control 扫描 > YYSpider 无用爬虫 > jaunty wordpress爆破扫描器 > oBot 无用爬虫 > Python-urllib 内容采集 > Indy Library 扫描 > FlightDeckReports Bot 无用爬虫 > Linguee Bot 无用爬虫