爬取结果

完整代码

主要代码:
在node环境下直接运行此代码

const request = require('request')
const fs = require('fs')
const {
   
  fsRead,
  fsWrite,
  fsDir
} = require('./lcfs')
let httpUrl = "https://www.1905.com/vod/list/n_1/o3p1.html"

//请求网页函数
function req(url) {
   
  return new Promise(function (resolve, reject) {
   
    request.get(url, function (err, response, body) {
   
      if (err) {
   
        reject(err)
      } else {
   
        resolve(response, body)
      }
    })
  })
}

//爬取多页数据
// function getMorePageMovies(page) {
   
// for(let i = 1; i <= page;i ++) {
   
// let MoreHttpUrl = "https://www.1905.com/vod/list/n_1/o3p" + i + ".html"
// //i表示第几页
// getClassUrl(MoreHttpUrl,i);
// }
// }
// getMorePageMovies(80);
//获取起始页面的所有分类地址
async function getClassUrl(targetUrl) {
   
  const {
   
    response,
    body
  } = await req(targetUrl)
  const reg = /<span class="search-index-L">类型(.*?)<div class="grid-12x">/isg
  let result = reg.exec(body)[1]
  const reg1 = /onclick="location\.href='(.*?)';return.*?>(.*?)<\/a>/isg

  let arrClass = []
  let res = null
  while (res = reg1.exec(result)) {
   
    if (res[2] != "全部") {
   
      let obj = {
   
        className: res[2],
        url: res[1]
      }
      arrClass.push(obj)
      await fsDir('./movies/' + res[2])
      getMovies(res[1], res[2])
    }

  }
  // console.log(arrClass)
}
getClassUrl(httpUrl);
//获得电影链接
async function getMovies(url,moviesType) {
   
  let {
   
    response,
    body
  } = await req(url)
  let reg = /<a class="pic-pack-outer" target="_blank" href="(.*?)" title="(.*?)"><img/igs
  let res = null
  let arrList = []
  while (res = reg.exec(body)) {
   
    arrList.push(res[1])
    parsePage(res[1],moviesType)
  }
}

//将拿到的数据写入
async function parsePage(url,mT) {
   
  let {
   response, body} = await req(url)
  let reg = /<h1 class="playerBox-info-name playerBox-info-cnName">(.*?)<\/h1>/
  let res = reg.exec(body)
  //作者姓名
  let movie = {
   
    name: res[1],
    movieUrl: url,
    movieType: mT
  }
  let strMovie = JSON.stringify(movie)
  // console.log(movie)
  fsWrite('./movies/' + mT + "/" + res[1] + '.json',strMovie)
}

封装的文件操作代码

const fs = require('fs');

function fsWrite(path, content) {
   
  return new Promise(function (resolve, reject) {
   
    fs.writeFile(path, content, {
   
      flag: 'a',
      encoding: 'utf-8'
    }, function (err) {
   
      if (!err) {
   
        resolve(err);
      }
    })
  })
}

function fsRead(path) {
   
  return new Promise(function (resolve, reject) {
   
    fs.readFile(path, {
   
      flag: 'r',
      encoding: 'utf-8'
    }, function (err, data) {
   
      if (err) {
   
        reject(err);
      } else {
   
        resolve(data);
      }
    })
  })
}